Skip to content

Commit

Permalink
@allow-large-files [javafoundations] Support incremental jar append o…
Browse files Browse the repository at this point in the history
…peration

Summary:
**Goal: Reduce local build time by up to ~95%**

**Problem:** Rebuilding large JAR files (__unstamped or shaded_jar), like fat jars, can be slow and wasteful when only the project's source code has changed. This is because the entire build process extracts all files from every dependency and repackages them, even if the dependencies themselves haven't been modified. This inefficiency becomes especially costly when the final JAR file reaches gigabytes in size.

**Proposal:** Introduce a cacheable Buck action that stores a previously built JAR with all project dependencies. This cached JAR serves as the foundation for subsequent builds, where only your project's source code changes are appended.

**How:** A feature that allows the CustomZipOutputStream to append files on a previous cached jar file, by incorporating additional files without the need to decompress the entire pre-existing content.
The implementation transferring the data block from the previous jar, append new files to it, and update its header to include the newly added files.

By implementing this feature, Buck can now utilize a cache with dependencies instead of packing it on every change. As a result, the build time for java_binary targets will be significantly reduced, leading to improved overall efficiency in the development process.

Differential Revision: D57572666

fbshipit-source-id: 8ba240821b7b193e876ac97c7061da77f3eb257a
  • Loading branch information
Adolfo Santos authored and facebook-github-bot committed Jul 15, 2024
1 parent 800cfee commit 4c937f1
Show file tree
Hide file tree
Showing 2 changed files with 138 additions and 53 deletions.
89 changes: 76 additions & 13 deletions prelude/java/java_binary.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,46 @@ load(
"get_java_packaging_info",
)

def _should_use_incremental_build(ctx: AnalysisContext):
# use incremental build only for __unstamped jars (which includes inner.jar)
return ctx.label.name.startswith("__unstamped") and (
"incremental_build" in ctx.attrs.labels or read_config("java", "inc_build", "false").lower() == "true"
)

def _is_nested_package(ctx: AnalysisContext, pkg: str) -> bool:
return pkg == ctx.label.package or pkg.startswith(ctx.label.package + "/")

def _get_dependencies_jars(ctx: AnalysisContext, package_deps: typing.Any) -> cmd_args:
jars = cmd_args()
for dep in package_deps.transitive_set.traverse():
if dep.jar and not _is_nested_package(ctx, dep.label.package):
jars.add(dep.jar)
return jars

def _get_incremental_jars(ctx: AnalysisContext, package_deps: typing.Any) -> cmd_args:
jars = cmd_args()
for dep in package_deps.transitive_set.traverse():
if dep.jar and _is_nested_package(ctx, dep.label.package):
jars.add(dep.jar)
return jars

def _generate_script(generate_wrapper: bool, native_libs: list[SharedLibrary]) -> bool:
# if `generate_wrapper` is set and no native libs then it should be a wrapper script as result,
# otherwise fat jar will be generated (inner jar or script will be included inside a final fat jar)
return generate_wrapper and len(native_libs) == 0

def _create_fat_jar(
ctx: AnalysisContext,
java_toolchain: JavaToolchainInfo,
jars: cmd_args,
native_libs: list[SharedLibrary],
do_not_create_inner_jar: bool,
generate_wrapper: bool) -> list[Artifact]:
name_prefix: str = "",
do_not_create_inner_jar: bool = True,
generate_wrapper: bool = False,
main_class: [str, None] = None,
append_jar: [Artifact, None] = None) -> list[Artifact]:
java_toolchain = ctx.attrs._java_toolchain[JavaToolchainInfo]
extension = "sh" if _generate_script(generate_wrapper, native_libs) else "jar"
output = ctx.actions.declare_output("{}.{}".format(ctx.label.name, extension))
output = ctx.actions.declare_output("{}{}.{}".format(name_prefix, ctx.label.name, extension))

args = [
java_toolchain.fat_jar[RunInfo],
Expand All @@ -46,17 +72,20 @@ def _create_fat_jar(
"--output",
output.as_output(),
"--jars_file",
ctx.actions.write("jars_file", jars),
ctx.actions.write("{}jars_file".format(name_prefix), jars),
]

if append_jar:
args += ["--append_jar", append_jar]

if native_libs:
expect(
java_toolchain.is_bootstrap_toolchain == False,
"Bootstrap java toolchain could not be used for java_binary() with native code.",
)
args += [
"--native_libs_file",
ctx.actions.write("native_libs", [cmd_args([native_lib.soname.ensure_str(), native_lib.lib.output], delimiter = " ") for native_lib in native_libs]),
ctx.actions.write("{}native_libs".format(name_prefix), [cmd_args([native_lib.soname.ensure_str(), native_lib.lib.output], delimiter = " ") for native_lib in native_libs]),
]
if do_not_create_inner_jar:
args += [
Expand All @@ -74,7 +103,6 @@ def _create_fat_jar(
"nativelibs",
]

main_class = ctx.attrs.main_class
if main_class:
args += ["--main_class", main_class]

Expand All @@ -84,7 +112,7 @@ def _create_fat_jar(

blocklist = ctx.attrs.blacklist
if blocklist:
args += ["--blocklist", ctx.actions.write("blocklist_args", blocklist)]
args += ["--blocklist", ctx.actions.write("{}blocklist_args".format(name_prefix), blocklist)]

if ctx.attrs.meta_inf_directory:
args += ["--meta_inf_directory", ctx.attrs.meta_inf_directory]
Expand All @@ -111,7 +139,7 @@ def _create_fat_jar(
ctx.actions.run(
fat_jar_cmd,
local_only = False,
category = "fat_jar",
category = "{}fat_jar".format(name_prefix),
allow_cache_upload = True,
)

Expand Down Expand Up @@ -170,15 +198,50 @@ def java_binary_impl(ctx: AnalysisContext) -> list[Provider]:
need_to_generate_wrapper = ctx.attrs.generate_wrapper == True
do_not_create_inner_jar = ctx.attrs.do_not_create_inner_jar == True
packaging_jar_args = packaging_info.packaging_deps.project_as_args("full_jar_args")
outputs = _create_fat_jar(ctx, java_toolchain, cmd_args(packaging_jar_args), native_deps, do_not_create_inner_jar, need_to_generate_wrapper)
main_class = ctx.attrs.main_class

main_artifact = outputs[0]
other_outputs = []

if _should_use_incremental_build(ctx):
# collect all dependencies
dependencies_jars = _get_dependencies_jars(ctx, packaging_jar_args)

# collect nested targets
incremental_jars = _get_incremental_jars(ctx, packaging_jar_args)

# generate intermediary jar only with dependencies
deps_outputs = _create_fat_jar(
ctx,
dependencies_jars,
native_deps,
name_prefix = "deps_",
)
other_outputs = [deps_outputs[0]]

# generate final jar appending modules to the dependencies jar
outputs = _create_fat_jar(
ctx,
incremental_jars,
native_deps,
do_not_create_inner_jar = do_not_create_inner_jar,
generate_wrapper = need_to_generate_wrapper,
main_class = main_class,
append_jar = deps_outputs[0],
)
else:
outputs = _create_fat_jar(
ctx,
cmd_args(packaging_jar_args),
native_deps,
do_not_create_inner_jar = do_not_create_inner_jar,
generate_wrapper = need_to_generate_wrapper,
main_class = main_class,
)

run_cmd = _get_run_cmd(
attrs = ctx.attrs,
script_mode = _generate_script(need_to_generate_wrapper, native_deps),
main_artifact = main_artifact,
main_artifact = outputs[0],
java_toolchain = java_toolchain,
)

Expand All @@ -200,7 +263,7 @@ def java_binary_impl(ctx: AnalysisContext) -> list[Provider]:
)

return [
DefaultInfo(default_output = main_artifact, other_outputs = other_outputs, sub_targets = sub_targets),
DefaultInfo(default_output = outputs[0], other_outputs = other_outputs, sub_targets = sub_targets),
RunInfo(args = run_cmd),
create_template_info(ctx, packaging_info, first_order_libs),
class_to_src_map,
Expand Down
102 changes: 62 additions & 40 deletions prelude/java/tools/fat_jar.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import zipfile
from shutil import copy, copytree
from tempfile import TemporaryDirectory
from typing import Dict, List
from typing import Optional

import utils

Expand Down Expand Up @@ -109,19 +109,45 @@ def _parse_args():
action="store_true",
help="Whether to create an inner jar if native libraries are present.",
)
parser.add_argument(
"--append_jar",
required=False,
type=pathlib.Path,
help="path to a jar used as base of the new jar, which new files will be added",
)

return parser.parse_args()


def _merge_dictionaries(dict1: Dict[str, str], dict2: Dict[str, str]) -> Dict[str, str]:
return {**dict1, **dict2}


def _shlex_split(cmd: str) -> List[str]:
if platform.system() == "Windows":
return cmd.split()
else:
return shlex.split(cmd)
def _fat_jar(
jar_builder_tool: str,
output_path: str,
append_jar: Optional[str] = None,
main_class: Optional[str] = None,
entries_to_jar_file: Optional[str] = None,
override_entries_to_jar_file: Optional[str] = None,
manifest_file: Optional[str] = None,
blocklist_file: Optional[str] = None,
) -> None:
cmd = []
cmd.extend(utils.shlex_split(jar_builder_tool))
if append_jar:
cmd.extend(["--append-jar", append_jar])
if main_class:
cmd.extend(["--main-class", main_class])
if entries_to_jar_file:
cmd.extend(["--entries-to-jar", entries_to_jar_file])
if override_entries_to_jar_file:
cmd.extend(["--override-entries-to-jar", override_entries_to_jar_file])
if manifest_file:
cmd.extend(["--manifest-file", manifest_file])
if blocklist_file:
cmd.extend(["--blocklist-patterns", blocklist_file])
cmd.extend(["--blocklist-patterns-matcher", "substring"])
cmd.append("--merge-manifests")
cmd.extend(["--output", output_path])
utils.log_message("fat_jar_cmd: {}".format(cmd))
utils.execute_command(cmd)


# Reads a list of files from native_libs_file and symlinks each as files in native_libs_dir.
Expand Down Expand Up @@ -151,6 +177,7 @@ def main():
manifest = args.manifest
blocklist_file = args.blocklist
meta_inf_directory = args.meta_inf_directory
append_jar = args.append_jar

generate_wrapper = args.generate_wrapper
classpath_args_output = args.classpath_args_output
Expand Down Expand Up @@ -191,6 +218,8 @@ def main():
utils.log_message("classpath_args_output: {}".format(classpath_args_output))
utils.log_message("java_tool: {}".format(java_tool))
utils.log_message("script_marker_file_name: {}".format(script_marker_file_name))
if append_jar:
utils.log_message("append_jar = {}".format(append_jar))

need_to_process_native_libs = native_libs_file is not None
if need_to_process_native_libs and not do_not_create_inner_jar:
Expand Down Expand Up @@ -258,8 +287,8 @@ def main():

else: # generate fat jar

jar_cmd = []
jar_cmd.extend(utils.shlex_split(jar_builder_tool))
entries_to_jar_file = jars_file
override_entries_to_jar = None

if need_to_process_native_libs and do_not_create_inner_jar:
# symlink native libs to `nativelibs` directory
Expand All @@ -283,11 +312,7 @@ def main():
f.write(str(f2.read()) + "\n")
f.write(str(native_libs_staging))

jar_cmd.extend(
["--entries-to-jar", jars_and_native_libs_directory_file]
)
else:
jar_cmd.extend(["--entries-to-jar", jars_file])
entries_to_jar_file = jars_and_native_libs_directory_file

if meta_inf_directory:
meta_inf_staging = pathlib.Path(temp_dir) / "meta_inf_staging"
Expand All @@ -305,28 +330,26 @@ def main():
with open(meta_inf_directory_file, "w") as f:
f.write(str(meta_inf_staging))

jar_cmd.extend(["--override-entries-to-jar", meta_inf_directory_file])

if main_class:
jar_cmd.extend(["--main-class", main_class])

if blocklist_file:
jar_cmd.extend(["--blocklist-patterns", blocklist_file])
jar_cmd.extend(["--blocklist-patterns-matcher", "substring"])

if manifest:
jar_cmd.extend(["--manifest-file", manifest])

jar_cmd.append("--merge-manifests")
override_entries_to_jar = meta_inf_directory_file

jar_output = (
os.path.join(temp_dir, "inner.jar")
if need_to_process_native_libs and not do_not_create_inner_jar
else output_path
)
jar_cmd.extend(["--output", jar_output])
utils.log_message("jar_cmd: {}".format(jar_cmd))
utils.execute_command(jar_cmd)

utils.log_message("jar_output: {}".format(jar_output))

_fat_jar(
jar_builder_tool=jar_builder_tool,
output_path=jar_output,
main_class=main_class,
entries_to_jar_file=entries_to_jar_file,
override_entries_to_jar_file=override_entries_to_jar,
manifest_file=manifest,
blocklist_file=blocklist_file,
append_jar=append_jar,
)

if need_to_process_native_libs and not do_not_create_inner_jar:
fat_jar_content_dir = os.path.join(temp_dir, "fat_jar_content_dir")
Expand Down Expand Up @@ -385,13 +408,12 @@ def main():
with open(entries_to_jar_file, "w") as f:
f.write("\n".join([contents_zip_path, str(fat_jar_lib)]))

fat_jar_cmd = []
fat_jar_cmd.extend(utils.shlex_split(jar_builder_tool))
fat_jar_cmd.extend(["--main-class", fat_jar_main_class])
fat_jar_cmd.extend(["--output", output_path])
fat_jar_cmd.extend(["--entries-to-jar", entries_to_jar_file])
fat_jar_cmd.append("--merge-manifests")
utils.execute_command(fat_jar_cmd)
_fat_jar(
jar_builder_tool=jar_builder_tool,
output_path=output_path,
main_class=fat_jar_main_class,
entries_to_jar_file=entries_to_jar_file,
)


if __name__ == "__main__":
Expand Down

0 comments on commit 4c937f1

Please sign in to comment.