From 4c937f14f9ba62c72e7b67e5b17976f1135abc0c Mon Sep 17 00:00:00 2001 From: Adolfo Santos Date: Mon, 15 Jul 2024 16:31:01 -0700 Subject: [PATCH] @allow-large-files [javafoundations] Support incremental jar append operation Summary: **Goal: Reduce local build time by up to ~95%** **Problem:** Rebuilding large JAR files (__unstamped or shaded_jar), like fat jars, can be slow and wasteful when only the project's source code has changed. This is because the entire build process extracts all files from every dependency and repackages them, even if the dependencies themselves haven't been modified. This inefficiency becomes especially costly when the final JAR file reaches gigabytes in size. **Proposal:** Introduce a cacheable Buck action that stores a previously built JAR with all project dependencies. This cached JAR serves as the foundation for subsequent builds, where only your project's source code changes are appended. **How:** A feature that allows the CustomZipOutputStream to append files on a previous cached jar file, by incorporating additional files without the need to decompress the entire pre-existing content. The implementation transferring the data block from the previous jar, append new files to it, and update its header to include the newly added files. By implementing this feature, Buck can now utilize a cache with dependencies instead of packing it on every change. As a result, the build time for java_binary targets will be significantly reduced, leading to improved overall efficiency in the development process. Differential Revision: D57572666 fbshipit-source-id: 8ba240821b7b193e876ac97c7061da77f3eb257a --- prelude/java/java_binary.bzl | 89 ++++++++++++++++++++++++----- prelude/java/tools/fat_jar.py | 102 +++++++++++++++++++++------------- 2 files changed, 138 insertions(+), 53 deletions(-) diff --git a/prelude/java/java_binary.bzl b/prelude/java/java_binary.bzl index 4e0832cd948a..a03b9156e771 100644 --- a/prelude/java/java_binary.bzl +++ b/prelude/java/java_binary.bzl @@ -22,6 +22,29 @@ load( "get_java_packaging_info", ) +def _should_use_incremental_build(ctx: AnalysisContext): + # use incremental build only for __unstamped jars (which includes inner.jar) + return ctx.label.name.startswith("__unstamped") and ( + "incremental_build" in ctx.attrs.labels or read_config("java", "inc_build", "false").lower() == "true" + ) + +def _is_nested_package(ctx: AnalysisContext, pkg: str) -> bool: + return pkg == ctx.label.package or pkg.startswith(ctx.label.package + "/") + +def _get_dependencies_jars(ctx: AnalysisContext, package_deps: typing.Any) -> cmd_args: + jars = cmd_args() + for dep in package_deps.transitive_set.traverse(): + if dep.jar and not _is_nested_package(ctx, dep.label.package): + jars.add(dep.jar) + return jars + +def _get_incremental_jars(ctx: AnalysisContext, package_deps: typing.Any) -> cmd_args: + jars = cmd_args() + for dep in package_deps.transitive_set.traverse(): + if dep.jar and _is_nested_package(ctx, dep.label.package): + jars.add(dep.jar) + return jars + def _generate_script(generate_wrapper: bool, native_libs: list[SharedLibrary]) -> bool: # if `generate_wrapper` is set and no native libs then it should be a wrapper script as result, # otherwise fat jar will be generated (inner jar or script will be included inside a final fat jar) @@ -29,13 +52,16 @@ def _generate_script(generate_wrapper: bool, native_libs: list[SharedLibrary]) - def _create_fat_jar( ctx: AnalysisContext, - java_toolchain: JavaToolchainInfo, jars: cmd_args, native_libs: list[SharedLibrary], - do_not_create_inner_jar: bool, - generate_wrapper: bool) -> list[Artifact]: + name_prefix: str = "", + do_not_create_inner_jar: bool = True, + generate_wrapper: bool = False, + main_class: [str, None] = None, + append_jar: [Artifact, None] = None) -> list[Artifact]: + java_toolchain = ctx.attrs._java_toolchain[JavaToolchainInfo] extension = "sh" if _generate_script(generate_wrapper, native_libs) else "jar" - output = ctx.actions.declare_output("{}.{}".format(ctx.label.name, extension)) + output = ctx.actions.declare_output("{}{}.{}".format(name_prefix, ctx.label.name, extension)) args = [ java_toolchain.fat_jar[RunInfo], @@ -46,9 +72,12 @@ def _create_fat_jar( "--output", output.as_output(), "--jars_file", - ctx.actions.write("jars_file", jars), + ctx.actions.write("{}jars_file".format(name_prefix), jars), ] + if append_jar: + args += ["--append_jar", append_jar] + if native_libs: expect( java_toolchain.is_bootstrap_toolchain == False, @@ -56,7 +85,7 @@ def _create_fat_jar( ) args += [ "--native_libs_file", - ctx.actions.write("native_libs", [cmd_args([native_lib.soname.ensure_str(), native_lib.lib.output], delimiter = " ") for native_lib in native_libs]), + ctx.actions.write("{}native_libs".format(name_prefix), [cmd_args([native_lib.soname.ensure_str(), native_lib.lib.output], delimiter = " ") for native_lib in native_libs]), ] if do_not_create_inner_jar: args += [ @@ -74,7 +103,6 @@ def _create_fat_jar( "nativelibs", ] - main_class = ctx.attrs.main_class if main_class: args += ["--main_class", main_class] @@ -84,7 +112,7 @@ def _create_fat_jar( blocklist = ctx.attrs.blacklist if blocklist: - args += ["--blocklist", ctx.actions.write("blocklist_args", blocklist)] + args += ["--blocklist", ctx.actions.write("{}blocklist_args".format(name_prefix), blocklist)] if ctx.attrs.meta_inf_directory: args += ["--meta_inf_directory", ctx.attrs.meta_inf_directory] @@ -111,7 +139,7 @@ def _create_fat_jar( ctx.actions.run( fat_jar_cmd, local_only = False, - category = "fat_jar", + category = "{}fat_jar".format(name_prefix), allow_cache_upload = True, ) @@ -170,15 +198,50 @@ def java_binary_impl(ctx: AnalysisContext) -> list[Provider]: need_to_generate_wrapper = ctx.attrs.generate_wrapper == True do_not_create_inner_jar = ctx.attrs.do_not_create_inner_jar == True packaging_jar_args = packaging_info.packaging_deps.project_as_args("full_jar_args") - outputs = _create_fat_jar(ctx, java_toolchain, cmd_args(packaging_jar_args), native_deps, do_not_create_inner_jar, need_to_generate_wrapper) + main_class = ctx.attrs.main_class - main_artifact = outputs[0] other_outputs = [] + if _should_use_incremental_build(ctx): + # collect all dependencies + dependencies_jars = _get_dependencies_jars(ctx, packaging_jar_args) + + # collect nested targets + incremental_jars = _get_incremental_jars(ctx, packaging_jar_args) + + # generate intermediary jar only with dependencies + deps_outputs = _create_fat_jar( + ctx, + dependencies_jars, + native_deps, + name_prefix = "deps_", + ) + other_outputs = [deps_outputs[0]] + + # generate final jar appending modules to the dependencies jar + outputs = _create_fat_jar( + ctx, + incremental_jars, + native_deps, + do_not_create_inner_jar = do_not_create_inner_jar, + generate_wrapper = need_to_generate_wrapper, + main_class = main_class, + append_jar = deps_outputs[0], + ) + else: + outputs = _create_fat_jar( + ctx, + cmd_args(packaging_jar_args), + native_deps, + do_not_create_inner_jar = do_not_create_inner_jar, + generate_wrapper = need_to_generate_wrapper, + main_class = main_class, + ) + run_cmd = _get_run_cmd( attrs = ctx.attrs, script_mode = _generate_script(need_to_generate_wrapper, native_deps), - main_artifact = main_artifact, + main_artifact = outputs[0], java_toolchain = java_toolchain, ) @@ -200,7 +263,7 @@ def java_binary_impl(ctx: AnalysisContext) -> list[Provider]: ) return [ - DefaultInfo(default_output = main_artifact, other_outputs = other_outputs, sub_targets = sub_targets), + DefaultInfo(default_output = outputs[0], other_outputs = other_outputs, sub_targets = sub_targets), RunInfo(args = run_cmd), create_template_info(ctx, packaging_info, first_order_libs), class_to_src_map, diff --git a/prelude/java/tools/fat_jar.py b/prelude/java/tools/fat_jar.py index 5e5c5320737a..3c9f9b52c034 100644 --- a/prelude/java/tools/fat_jar.py +++ b/prelude/java/tools/fat_jar.py @@ -14,7 +14,7 @@ import zipfile from shutil import copy, copytree from tempfile import TemporaryDirectory -from typing import Dict, List +from typing import Optional import utils @@ -109,19 +109,45 @@ def _parse_args(): action="store_true", help="Whether to create an inner jar if native libraries are present.", ) + parser.add_argument( + "--append_jar", + required=False, + type=pathlib.Path, + help="path to a jar used as base of the new jar, which new files will be added", + ) return parser.parse_args() -def _merge_dictionaries(dict1: Dict[str, str], dict2: Dict[str, str]) -> Dict[str, str]: - return {**dict1, **dict2} - - -def _shlex_split(cmd: str) -> List[str]: - if platform.system() == "Windows": - return cmd.split() - else: - return shlex.split(cmd) +def _fat_jar( + jar_builder_tool: str, + output_path: str, + append_jar: Optional[str] = None, + main_class: Optional[str] = None, + entries_to_jar_file: Optional[str] = None, + override_entries_to_jar_file: Optional[str] = None, + manifest_file: Optional[str] = None, + blocklist_file: Optional[str] = None, +) -> None: + cmd = [] + cmd.extend(utils.shlex_split(jar_builder_tool)) + if append_jar: + cmd.extend(["--append-jar", append_jar]) + if main_class: + cmd.extend(["--main-class", main_class]) + if entries_to_jar_file: + cmd.extend(["--entries-to-jar", entries_to_jar_file]) + if override_entries_to_jar_file: + cmd.extend(["--override-entries-to-jar", override_entries_to_jar_file]) + if manifest_file: + cmd.extend(["--manifest-file", manifest_file]) + if blocklist_file: + cmd.extend(["--blocklist-patterns", blocklist_file]) + cmd.extend(["--blocklist-patterns-matcher", "substring"]) + cmd.append("--merge-manifests") + cmd.extend(["--output", output_path]) + utils.log_message("fat_jar_cmd: {}".format(cmd)) + utils.execute_command(cmd) # Reads a list of files from native_libs_file and symlinks each as files in native_libs_dir. @@ -151,6 +177,7 @@ def main(): manifest = args.manifest blocklist_file = args.blocklist meta_inf_directory = args.meta_inf_directory + append_jar = args.append_jar generate_wrapper = args.generate_wrapper classpath_args_output = args.classpath_args_output @@ -191,6 +218,8 @@ def main(): utils.log_message("classpath_args_output: {}".format(classpath_args_output)) utils.log_message("java_tool: {}".format(java_tool)) utils.log_message("script_marker_file_name: {}".format(script_marker_file_name)) + if append_jar: + utils.log_message("append_jar = {}".format(append_jar)) need_to_process_native_libs = native_libs_file is not None if need_to_process_native_libs and not do_not_create_inner_jar: @@ -258,8 +287,8 @@ def main(): else: # generate fat jar - jar_cmd = [] - jar_cmd.extend(utils.shlex_split(jar_builder_tool)) + entries_to_jar_file = jars_file + override_entries_to_jar = None if need_to_process_native_libs and do_not_create_inner_jar: # symlink native libs to `nativelibs` directory @@ -283,11 +312,7 @@ def main(): f.write(str(f2.read()) + "\n") f.write(str(native_libs_staging)) - jar_cmd.extend( - ["--entries-to-jar", jars_and_native_libs_directory_file] - ) - else: - jar_cmd.extend(["--entries-to-jar", jars_file]) + entries_to_jar_file = jars_and_native_libs_directory_file if meta_inf_directory: meta_inf_staging = pathlib.Path(temp_dir) / "meta_inf_staging" @@ -305,28 +330,26 @@ def main(): with open(meta_inf_directory_file, "w") as f: f.write(str(meta_inf_staging)) - jar_cmd.extend(["--override-entries-to-jar", meta_inf_directory_file]) - - if main_class: - jar_cmd.extend(["--main-class", main_class]) - - if blocklist_file: - jar_cmd.extend(["--blocklist-patterns", blocklist_file]) - jar_cmd.extend(["--blocklist-patterns-matcher", "substring"]) - - if manifest: - jar_cmd.extend(["--manifest-file", manifest]) - - jar_cmd.append("--merge-manifests") + override_entries_to_jar = meta_inf_directory_file jar_output = ( os.path.join(temp_dir, "inner.jar") if need_to_process_native_libs and not do_not_create_inner_jar else output_path ) - jar_cmd.extend(["--output", jar_output]) - utils.log_message("jar_cmd: {}".format(jar_cmd)) - utils.execute_command(jar_cmd) + + utils.log_message("jar_output: {}".format(jar_output)) + + _fat_jar( + jar_builder_tool=jar_builder_tool, + output_path=jar_output, + main_class=main_class, + entries_to_jar_file=entries_to_jar_file, + override_entries_to_jar_file=override_entries_to_jar, + manifest_file=manifest, + blocklist_file=blocklist_file, + append_jar=append_jar, + ) if need_to_process_native_libs and not do_not_create_inner_jar: fat_jar_content_dir = os.path.join(temp_dir, "fat_jar_content_dir") @@ -385,13 +408,12 @@ def main(): with open(entries_to_jar_file, "w") as f: f.write("\n".join([contents_zip_path, str(fat_jar_lib)])) - fat_jar_cmd = [] - fat_jar_cmd.extend(utils.shlex_split(jar_builder_tool)) - fat_jar_cmd.extend(["--main-class", fat_jar_main_class]) - fat_jar_cmd.extend(["--output", output_path]) - fat_jar_cmd.extend(["--entries-to-jar", entries_to_jar_file]) - fat_jar_cmd.append("--merge-manifests") - utils.execute_command(fat_jar_cmd) + _fat_jar( + jar_builder_tool=jar_builder_tool, + output_path=output_path, + main_class=fat_jar_main_class, + entries_to_jar_file=entries_to_jar_file, + ) if __name__ == "__main__":