From 58b58c5f79961b3b04b0071cbc22ecd42ab62607 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 15:39:06 -0400 Subject: [PATCH 01/14] switch from iostream to stdio --- print_args.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/print_args.cpp b/print_args.cpp index b672329b..65601031 100644 --- a/print_args.cpp +++ b/print_args.cpp @@ -1,13 +1,13 @@ // Prints the arguments passed to the script -#include +#include int main(int argc, char *argv[]) { - std::cout << "===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===\n"; - for (int i = 1; i < argc; ++i) { - std::cout << argv[i] << "\n"; - } - std::cout << "===HEDRON_COMPILE_COMMANDS_END_ARGS===\n"; - // We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper. - return 1; + printf("===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===\n"); + for (int i = 1; i < argc; ++i) { + printf("%s\n",argv[i]); + } + printf("===HEDRON_COMPILE_COMMANDS_END_ARGS===\n"); + // We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper. + return 1; } From 2ddd9741ab52f2216df52a6842b422dccdd43849 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 19:09:38 -0400 Subject: [PATCH 02/14] attempt --- refresh.template.py | 8 +++++--- refresh_compile_commands.bzl | 2 ++ 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 194f365e..189a77d0 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -47,6 +47,8 @@ class SGR(enum.Enum): FG_YELLOW = '\033[0;33m' FG_BLUE = '\033[0;34m' +def _bazel(): + return {bazel_command} def _log_with_sgr(sgr, colored_message, uncolored_message=''): """Log a message to stderr wrapped in an SGR context.""" @@ -104,7 +106,7 @@ def _get_bazel_version(): If the version can't be determined, returns (0, 0, 0). """ bazel_version_process = subprocess.run( - ['bazel', 'version'], + [_bazel(), 'version'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -130,7 +132,7 @@ def _get_bazel_version(): def _get_bazel_cached_action_keys(): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( - ['bazel', 'dump', '--action_cache'], + [_bazel, 'dump', '--action_cache'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, @@ -1200,7 +1202,7 @@ def _get_commands(target: str, flags: str): # For efficiency, have bazel filter out external targets (and therefore actions) before they even get turned into actions or serialized and sent to us. Note: this is a different mechanism than is used for excluding just external headers. target_statment = f"filter('^(//|@//)',{target_statment})" aquery_args = [ - 'bazel', + _bazel(), 'aquery', # Aquery docs if you need em: https://docs.bazel.build/versions/master/aquery.html # Aquery output proto reference: https://github.com/bazelbuild/bazel/blob/master/src/main/protobuf/analysis_v2.proto diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 0210d42b..2346e21a 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -115,8 +115,10 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), + "{bazel_command}": ctx.configuration.default_shell_env.get("BAZEL_WRAPPER_PATH", "bazel"), }, ) + return DefaultInfo(files = depset([script])) _expand_template = rule( From 43b9be408b6010be8a3a9d31ee60d7ca1a514f60 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 19:30:17 -0400 Subject: [PATCH 03/14] repr --- refresh_compile_commands.bzl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 2346e21a..987a1bc2 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -115,7 +115,7 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), - "{bazel_command}": ctx.configuration.default_shell_env.get("BAZEL_WRAPPER_PATH", "bazel"), + "{bazel_command}": repr(ctx.configuration.default_shell_env.get("BAZEL_WRAPPER_PATH", "bazel")), }, ) From 3b2ca795743f6a5174bd49d29f7f2eee9b5e0ce9 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 20:54:32 -0400 Subject: [PATCH 04/14] bazel_command as a parameter --- refresh_compile_commands.bzl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 987a1bc2..8d82f971 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,6 +64,7 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, + bazel_command = "bazel", **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -89,7 +90,7 @@ def refresh_compile_commands( # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" - _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs) + _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, bazel_command = bazel_command, **kwargs) # Combine them so the wrapper calls the main script native.py_binary( @@ -115,7 +116,7 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), - "{bazel_command}": repr(ctx.configuration.default_shell_env.get("BAZEL_WRAPPER_PATH", "bazel")), + "{bazel_command}": repr(ctx.attr.bazel_command), }, ) @@ -131,6 +132,7 @@ _expand_template = rule( # For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330 # Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0 "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), + "bazel_command": attr.string(default = "bazel"), }, toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], # Needed for find_cpp_toolchain with --incompatible_enable_cc_toolchain_resolution implementation = _expand_template_impl, From c47ca76091fb07703b1c38f0872e76fa961098a6 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 22:19:04 -0400 Subject: [PATCH 05/14] debugging --- refresh.template.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 189a77d0..115b98f8 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -48,7 +48,9 @@ class SGR(enum.Enum): FG_BLUE = '\033[0;34m' def _bazel(): - return {bazel_command} + bazelcmd = {bazel_cmd} + print(bazelcmd) + return bazelcmd def _log_with_sgr(sgr, colored_message, uncolored_message=''): """Log a message to stderr wrapped in an SGR context.""" From 46719c99671fce57aa978a67100b0130422d5d58 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 22:21:07 -0400 Subject: [PATCH 06/14] bug: --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 115b98f8..21689fc0 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -48,7 +48,7 @@ class SGR(enum.Enum): FG_BLUE = '\033[0;34m' def _bazel(): - bazelcmd = {bazel_cmd} + bazelcmd = {bazel_command} print(bazelcmd) return bazelcmd From 86c23c91c81f23699579ce1256f2cc84e0c73c7c Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 22:22:52 -0400 Subject: [PATCH 07/14] fix application --- refresh.template.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index 21689fc0..9743285e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -49,7 +49,6 @@ class SGR(enum.Enum): def _bazel(): bazelcmd = {bazel_command} - print(bazelcmd) return bazelcmd def _log_with_sgr(sgr, colored_message, uncolored_message=''): @@ -134,7 +133,7 @@ def _get_bazel_version(): def _get_bazel_cached_action_keys(): """Gets the set of actionKeys cached in bazel-out.""" action_cache_process = subprocess.run( - [_bazel, 'dump', '--action_cache'], + [_bazel(), 'dump', '--action_cache'], # MIN_PY=3.7: Replace PIPEs with capture_output. stdout=subprocess.PIPE, stderr=subprocess.PIPE, From 3539e2602c331bb0e23005adf20f49d1a0444203 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 22:45:27 -0400 Subject: [PATCH 08/14] don't check exit --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 9743285e..d327b334 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -138,7 +138,7 @@ def _get_bazel_cached_action_keys(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), - check=True, # Should always succeed. +# check=True, # Should always succeed. ) action_keys = set() From d49e9b93177b232b0380635ae49adbf8d93c1e7c Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Wed, 28 Aug 2024 22:49:51 -0400 Subject: [PATCH 09/14] do check --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index d327b334..9743285e 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -138,7 +138,7 @@ def _get_bazel_cached_action_keys(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding=locale.getpreferredencoding(), -# check=True, # Should always succeed. + check=True, # Should always succeed. ) action_keys = set() From a9d3b034e6a27b8dfb12d92ee168c97c022cf0ad Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Thu, 29 Aug 2024 23:03:27 -0400 Subject: [PATCH 10/14] switch bazel command to a build-time --define --- refresh_compile_commands.bzl | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 8d82f971..f447e924 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,7 +64,6 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, - bazel_command = "bazel", **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -90,7 +89,7 @@ def refresh_compile_commands( # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" - _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, bazel_command = bazel_command, **kwargs) + _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs) # Combine them so the wrapper calls the main script native.py_binary( @@ -116,7 +115,7 @@ def _expand_template_impl(ctx): "{exclude_headers}": repr(ctx.attr.exclude_headers), "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), - "{bazel_command}": repr(ctx.attr.bazel_command), + "{bazel_command}": repr(ctx.var.get("BAZEL_COMMAND", "bazel")), }, ) From 0a1c9af4b2f3fcb6ec0963913b992a4007f9f48b Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Thu, 29 Aug 2024 23:51:21 -0400 Subject: [PATCH 11/14] try with only one thread --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 9743285e..001b19ba 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1146,7 +1146,7 @@ def _convert_compile_commands(aquery_output): # Process each action from Bazelisms -> file paths and their clang commands # Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved with concurrent.futures.ThreadPoolExecutor( - max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor + max_workers=1) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor ) as threadpool: outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) From 46e326fb1495879c812335b5935427c20750eb0f Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Thu, 29 Aug 2024 23:53:19 -0400 Subject: [PATCH 12/14] try with only one thread --- refresh.template.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/refresh.template.py b/refresh.template.py index 001b19ba..b6441955 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -1146,7 +1146,7 @@ def _convert_compile_commands(aquery_output): # Process each action from Bazelisms -> file paths and their clang commands # Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved with concurrent.futures.ThreadPoolExecutor( - max_workers=1) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor + max_workers=1 # trying with only one thread ) as threadpool: outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) From 8c3ab5365da401b223ee787d157a27b0dd6d64e8 Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Fri, 30 Aug 2024 00:16:23 -0400 Subject: [PATCH 13/14] added thread parameter that we can set to 1 --- refresh.template.py | 6 +++++- refresh_compile_commands.bzl | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index b6441955..b956e243 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -51,6 +51,10 @@ def _bazel(): bazelcmd = {bazel_command} return bazelcmd +def _threads(): + threads = {threads} + return threads + def _log_with_sgr(sgr, colored_message, uncolored_message=''): """Log a message to stderr wrapped in an SGR context.""" print(sgr.value, colored_message, SGR.RESET.value, uncolored_message, sep='', file=sys.stderr, flush=True) @@ -1146,7 +1150,7 @@ def _convert_compile_commands(aquery_output): # Process each action from Bazelisms -> file paths and their clang commands # Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved with concurrent.futures.ThreadPoolExecutor( - max_workers=1 # trying with only one thread + max_workers=_threads() ) as threadpool: outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions) diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index f447e924..77e0bb7e 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,6 +64,7 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, + threads = "min(32, (os.cpu_count() or 1) + 4)", **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -89,7 +90,7 @@ def refresh_compile_commands( # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" - _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, **kwargs) + _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, threads = threads, **kwargs) # Combine them so the wrapper calls the main script native.py_binary( @@ -116,6 +117,7 @@ def _expand_template_impl(ctx): "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), "{bazel_command}": repr(ctx.var.get("BAZEL_COMMAND", "bazel")), + "{threads}": ctx.attr.threads, }, ) @@ -132,6 +134,7 @@ _expand_template = rule( # Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0 "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), "bazel_command": attr.string(default = "bazel"), + "threads": attr.string(default = "min(32, (os.cpu_count() or 1) + 4)"), }, toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], # Needed for find_cpp_toolchain with --incompatible_enable_cc_toolchain_resolution implementation = _expand_template_impl, From a9a14c8fdf474b11dcc443c4e1690e2360e4211d Mon Sep 17 00:00:00 2001 From: Simon Thornington Date: Fri, 30 Aug 2024 09:56:38 -0400 Subject: [PATCH 14/14] switch threads param to an int instead of code block, and move the defaulting behavior to the run time --- refresh.template.py | 3 ++- refresh_compile_commands.bzl | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/refresh.template.py b/refresh.template.py index b956e243..27a1f4b3 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -52,7 +52,8 @@ def _bazel(): return bazelcmd def _threads(): - threads = {threads} + user_max_threads = {max_threads} + threads = user_max_threads if user_max_threads else min(32, (os.cpu_count() or 1) + 4) return threads def _log_with_sgr(sgr, colored_message, uncolored_message=''): diff --git a/refresh_compile_commands.bzl b/refresh_compile_commands.bzl index 77e0bb7e..016f8586 100644 --- a/refresh_compile_commands.bzl +++ b/refresh_compile_commands.bzl @@ -64,7 +64,7 @@ def refresh_compile_commands( targets = None, exclude_headers = None, exclude_external_sources = False, - threads = "min(32, (os.cpu_count() or 1) + 4)", + max_threads = None, **kwargs): # For the other common attributes. Tags, compatible_with, etc. https://docs.bazel.build/versions/main/be/common-definitions.html#common-attributes. # Convert the various, acceptable target shorthands into the dictionary format # In Python, `type(x) == y` is an antipattern, but [Starlark doesn't support inheritance](https://bazel.build/rules/language), so `isinstance` doesn't exist, and this is the correct way to switch on type. @@ -90,7 +90,7 @@ def refresh_compile_commands( # Generate the core, runnable python script from refresh.template.py script_name = name + ".py" - _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, threads = threads, **kwargs) + _expand_template(name = script_name, labels_to_flags = targets, exclude_headers = exclude_headers, exclude_external_sources = exclude_external_sources, max_threads = max_threads, **kwargs) # Combine them so the wrapper calls the main script native.py_binary( @@ -117,7 +117,7 @@ def _expand_template_impl(ctx): "{exclude_external_sources}": repr(ctx.attr.exclude_external_sources), "{print_args_executable}": repr(ctx.executable._print_args_executable.path), "{bazel_command}": repr(ctx.var.get("BAZEL_COMMAND", "bazel")), - "{threads}": ctx.attr.threads, + "{max_threads}": repr(ctx.attr.max_threads), }, ) @@ -134,7 +134,7 @@ _expand_template = rule( # Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0 "_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"), "bazel_command": attr.string(default = "bazel"), - "threads": attr.string(default = "min(32, (os.cpu_count() or 1) + 4)"), + "max_threads": attr.int(), }, toolchains = ["@bazel_tools//tools/cpp:toolchain_type"], # Needed for find_cpp_toolchain with --incompatible_enable_cc_toolchain_resolution implementation = _expand_template_impl,