From 4ded913c25e802c5ecff3adfab15302978ef4b6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20Such=C3=BD?= Date: Sat, 18 Jan 2025 14:36:37 +0100 Subject: [PATCH 1/2] change default value of --process to (number of CPUs)-1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Related: #2980 Signed-off-by: Miroslav Suchý --- CHANGELOG.rst | 4 ++++ docs/source/cli-reference/help-text-options.rst | 2 +- docs/source/rst_snippets/core_options.rst | 2 +- src/scancode/cli.py | 11 +++++++++-- tests/scancode/data/help/help.txt | 2 +- tests/scancode/data/help/help_linux.txt | 2 +- 6 files changed, 17 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 91521e9c6e8..ec33d9ad025 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -30,6 +30,10 @@ v33.0.0 (next next, roadmap) license detection and reduce false positives. See https://github.com/nexB/scancode-toolkit/issues/3300 +- default value for `--processes` was previously 1. It was changed + to (number of CPUs)-1. + See https://github.com/aboutcode-org/scancode-toolkit/issues/2980 + - File categorization support added, a post scan plugin tagging files with priority levels for review, and also take advantage of these in other summary plugins. diff --git a/docs/source/cli-reference/help-text-options.rst b/docs/source/cli-reference/help-text-options.rst index 5e7a7694601..58f0fe2e88a 100644 --- a/docs/source/cli-reference/help-text-options.rst +++ b/docs/source/cli-reference/help-text-options.rst @@ -165,7 +165,7 @@ The Following Help Text is displayed, i.e. This is the help text for Scancode Ve seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable parallel processing if 0. Also disable threading if - -1. [default: 1] + -1. [default: (number of CPUs)-1] -q, --quiet Do not print summary or progress. -v, --verbose Print progress as file-by-file path instead of a progress bar. Print verbose scan counters. diff --git a/docs/source/rst_snippets/core_options.rst b/docs/source/rst_snippets/core_options.rst index 277e9570d1e..453763ca57d 100644 --- a/docs/source/rst_snippets/core_options.rst +++ b/docs/source/rst_snippets/core_options.rst @@ -2,7 +2,7 @@ All "Core" Scan Options ----------------------- -n, --processes INTEGER Scan ```` using n parallel processes. - [Default: 1] + [Default: (number of CPUs)-1] -v, --verbose Print verbose file-by-file progress messages. diff --git a/src/scancode/cli.py b/src/scancode/cli.py index add654f2ec4..9369fe7d91c 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -201,6 +201,13 @@ def validate_input_path(ctx, param, value): return value +def default_processes(): + """ return number that is used as a default value for --processes """ + cpu_count = os.cpu_count() + if cpu_count > 1: + return cpu_count-1 + else: + return 1 @click.command(name='scancode', epilog=epilog_text, @@ -230,10 +237,10 @@ def validate_input_path(ctx, param, value): @click.option('-n', '--processes', type=int, - default=1, + default=default_processes(), metavar='INT', help='Set the number of parallel processes to use. ' - 'Disable parallel processing if 0. Also disable threading if -1. [default: 1]', + 'Disable parallel processing if 0. Also disable threading if -1. [default: (number of CPUs)-1]', help_group=cliutils.CORE_GROUP, sort_order=10, cls=PluggableCommandLineOption) @click.option('--timeout', diff --git a/tests/scancode/data/help/help.txt b/tests/scancode/data/help/help.txt index 6def63b8893..8a486871b5d 100644 --- a/tests/scancode/data/help/help.txt +++ b/tests/scancode/data/help/help.txt @@ -138,7 +138,7 @@ Options: seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable parallel processing if 0. Also disable threading if - -1. [default: 1] + -1. [default: (number of CPUs)-1] -q, --quiet Do not print summary or progress. -v, --verbose Print progress as file-by-file path instead of a progress bar. Print verbose scan counters. diff --git a/tests/scancode/data/help/help_linux.txt b/tests/scancode/data/help/help_linux.txt index a6527a51f3d..9ca1d26d68a 100644 --- a/tests/scancode/data/help/help_linux.txt +++ b/tests/scancode/data/help/help_linux.txt @@ -140,7 +140,7 @@ Options: seconds. [default: 120 seconds] -n, --processes INT Set the number of parallel processes to use. Disable parallel processing if 0. Also disable threading if - -1. [default: 1] + -1. [default: (number of CPUs)-1] -q, --quiet Do not print summary or progress. -v, --verbose Print progress as file-by-file path instead of a progress bar. Print verbose scan counters. From 55de77ff96eb33d731c2ad0116b402f44b943397 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miroslav=20Such=C3=BD?= Date: Thu, 16 Jan 2025 11:49:53 +0100 Subject: [PATCH 2/2] add length to progress bar MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit so progress bar shows real progress Signed-off-by: Miroslav Suchý --- CHANGELOG.rst | 3 +++ src/scancode/cli.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ec33d9ad025..3041aa0e8ea 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -42,6 +42,9 @@ v33.0.0 (next next, roadmap) - Update Dockerfile and test container build. See https://github.com/aboutcode-org/scancode-toolkit/issues/3955 +- Progress bar now shows length. You can estimate the duration now. + See https://github.com/aboutcode-org/scancode-toolkit/issues/3342 + v32.3.2 - 2024-01-20 -------------------- diff --git a/src/scancode/cli.py b/src/scancode/cli.py index 9369fe7d91c..293932aca90 100644 --- a/src/scancode/cli.py +++ b/src/scancode/cli.py @@ -1252,6 +1252,8 @@ def scan_codebase( # NOTE: we never scan directories resources = ((r.location, r.path) for r in codebase.walk() if r.is_file) + if progress_manager: + resources = list(resources) use_threading = processes >= 0 runner = partial( @@ -1287,6 +1289,8 @@ def scan_codebase( if progress_manager: scans = progress_manager(scans) + # times two because of #3344 + scans.length = len(resources)*2 # hack to avoid using a context manager if hasattr(scans, '__enter__'): scans.__enter__()