From 61d3755c6f4f7c05aa60e3b692a2806db056edcc Mon Sep 17 00:00:00 2001 From: Vasil Pashov Date: Tue, 27 May 2025 20:36:48 +0300 Subject: [PATCH 1/4] Intentional errors --- .github/workflows/analysis_workflow.yml | 33 +------------- .github/workflows/build.yml | 45 +------------------ .github/workflows/build_with_conda.yml | 29 +----------- .github/workflows/static_analysis.yml | 7 +++ .../version/local_versioned_engine.cpp | 24 ++++++++++ cpp/arcticdb/version/version_core.cpp | 18 ++++++++ 6 files changed, 52 insertions(+), 104 deletions(-) diff --git a/.github/workflows/analysis_workflow.yml b/.github/workflows/analysis_workflow.yml index 02f7c80a86..42d9e29b70 100644 --- a/.github/workflows/analysis_workflow.yml +++ b/.github/workflows/analysis_workflow.yml @@ -1,36 +1,5 @@ name: Build with analysis tools -on: - workflow_dispatch: - inputs: - run_all_benchmarks: - description: Run all benchmarks - type: boolean - default: false - dev_image_tag: - description: Tag of the ArcticDB development image to use for benchmark and code coverage flows - type: string - default: latest - suite_to_run: - description: Run LMDB suite or REAL storage (or both - ALL) - type: choice - options: - - 'LMDB' - - 'REAL' - - 'ALL' - default: 'LMDB' - suite_overwrite: - description: Specify regular expression for specific tests to be executed - type: string - default: '' - - - schedule: # Schedule the job to run at 12 a.m. daily - - cron: '0 0 * * *' - - pull_request_target: - paths-ignore: - - "**/*.md" - +on: workflow_call jobs: get_commits_to_benchmark: diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index ed66f82711..f03a75a333 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,48 +1,5 @@ name: Build and Test -on: - push: - # On push only local storage tests get executed - tags: ["v**"] - branches: ["master"] - pull_request: - # On pull requests only local storage tests get executed - branches: ["**"] - schedule: - # IMPORTANT: For scheduled job we execute AWS_S3 - - cron: '0 23 * * 0,1,2,3,4' # Start previous dat at 23:00 to finish next day - workflow_dispatch: - inputs: - persistent_storage: - description: Run the persistent storage tests? - type: choice - options: - - 'no' - - 'AWS_S3' - - 'GCPXML' - default: 'no' - - pypi_publish: - type: boolean - publish_env: - description: Environment to publish to - type: environment - cmake_preset_type: - description: Override CMAKE preset type - type: choice - options: ["-", debug, release] - dev_image_tag: - description: Tag of the ArcticDB development image to use for the Linux C++ tests build - type: string - default: arcticdb-dev-clang:latest - pytest_args: - description: Rewrite what tests will run or do your own pytest line if string starts with pytest ... (Example -- pytest -n auto -v --count=50 -x python/tests/compat) - type: string - default: "" - version_cache_full_test: - description: 'Run tests with both version cache 0 and 2000000000' - required: false - default: false - type: boolean +on: workflow_call run-name: Building ${{github.ref_name}} on ${{github.event_name}} by ${{github.actor}} concurrency: group: ${{github.ref}} diff --git a/.github/workflows/build_with_conda.yml b/.github/workflows/build_with_conda.yml index ed8da7c17b..fd92f1b837 100644 --- a/.github/workflows/build_with_conda.yml +++ b/.github/workflows/build_with_conda.yml @@ -1,32 +1,5 @@ name: Build with conda -on: - push: - branches: - - master - # For Pull-Requests, this runs the CI on merge commit - # of HEAD with the target branch instead on HEAD, allowing - # testing against potential new states which might have - # been introduced in the target branch last commits. - # See: https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#pull_request - pull_request: - - workflow_dispatch: - inputs: - run_on_arm_mac: - description: 'Run on arm macos' - type: boolean - required: false - default: false - run_cpp_tests: - description: 'Run C++ tests' - type: boolean - required: true - default: true - run_custom_pytest_command: - description: 'Run custom pytest command (curdir is project root). Or pass additional arguments to default command' - type: string - required: false - default: "" +on: workflow_call jobs: diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index abb3a810ac..e4f40b666f 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -4,6 +4,12 @@ workflow_dispatch: schedule: - cron: "0 3 * * *" + pull_request: + + permissions: + pull-requests: write + contents: read + discussions: write jobs: polaris-scan: @@ -73,3 +79,4 @@ polaris_reports_sarif_groupSCAIssues: true polaris_upload_sarif_report: true polaris_prComment_severities: "high,critical,medium,low" + diff --git a/cpp/arcticdb/version/local_versioned_engine.cpp b/cpp/arcticdb/version/local_versioned_engine.cpp index ffffcf71df..7f100d2002 100644 --- a/cpp/arcticdb/version/local_versioned_engine.cpp +++ b/cpp/arcticdb/version/local_versioned_engine.cpp @@ -723,6 +723,30 @@ VersionedItem LocalVersionedEngine::write_versioned_dataframe_internal( bool allow_sparse, bool validate_index ) { + + // ======================================================= + // INTENTIONAL ERROR + std::vector v(1000); + for (int i = 0; i < 1000; i++) { + v[i] = i; + } + std::vector v1 = std::move(v); + for (size_t i = 0; i < v.size(); ++i) { + std::cout<( has_incomplete_segments, "Finalizing staged data is not allowed with empty staging area" From 233bfe70a45bf03aadef7dd348cd3fd351624eee Mon Sep 17 00:00:00 2001 From: Vasil Pashov Date: Tue, 8 Jul 2025 15:43:16 +0300 Subject: [PATCH 2/4] Add new errors --- cpp/arcticdb/async/async_store.hpp | 19 ++++++++++++ python/dedup.py | 49 ++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) create mode 100644 python/dedup.py diff --git a/cpp/arcticdb/async/async_store.hpp b/cpp/arcticdb/async/async_store.hpp index 4773179c01..47ccf20520 100644 --- a/cpp/arcticdb/async/async_store.hpp +++ b/cpp/arcticdb/async/async_store.hpp @@ -71,6 +71,25 @@ class AsyncStore : public Store { util::check(segment.descriptor().id() == stream_id, "Descriptor id mismatch in atom key {} != {}", stream_id, segment.descriptor().id()); + // ================== INTENTIONAL ERRORS ============================= + // 1. Use After Free + char* data = new char[100]; + std::strcpy(data, "Coverity test - use after free"); + delete[] data; + std::cout << "Data (after delete): " << data << std::endl; // USE_AFTER_FREE + + // 2. Null Pointer Dereference + int* ptr = nullptr; + *ptr = 42; // NULL_POINTER_DEREFERENCE + + // 3. Resource Leak + FILE* file = fopen("temp.txt", "w"); + if (file != nullptr) { + fprintf(file, "Testing resource leak\n"); + // Missing fclose(file); // RESOURCE_LEAK + } + // ====================================================================== + return async::submit_cpu_task(EncodeAtomTask{ key_type, version_id, stream_id, start_index, end_index, current_timestamp(), std::move(segment), codec_, encoding_version_ diff --git a/python/dedup.py b/python/dedup.py new file mode 100644 index 0000000000..8c046a72d0 --- /dev/null +++ b/python/dedup.py @@ -0,0 +1,49 @@ +import arcticdb +import numpy as np +import pandas as pd +from arcticdb.options import LibraryOptions +from arcticdb.version_store.library import Library +from arcticdb_ext.storage import KeyType + + +def count_key_types_for_symbol(lib, symbol): + # Get the library tool + lib_tool = lib._nvs.library_tool() + + # Define all possible key types to check + key_types = [ + KeyType.VERSION, + KeyType.TABLE_DATA, + KeyType.TABLE_INDEX, + KeyType.SNAPSHOT_REF, + KeyType.MULTI_KEY, + KeyType.APPEND_DATA, + KeyType.VERSION_REF + ] + + # Count keys for each type for the specific symbol + symbol_key_counts = {} + + for key_type in key_types: + keys = list(lib_tool.find_keys_for_symbol(key_type, symbol)) + symbol_key_counts[key_type.name] = len(keys) + return symbol_key_counts + + +if __name__ == "__main__": + library_name = "test_lib" + ac = arcticdb.Arctic("lmdb://test") + lib_opts = LibraryOptions(rows_per_segment=2, dedup=True) + if ac.has_library(library_name): + ac.delete_library(library_name) + lib = ac.get_library(library_name, library_options=lib_opts, create_if_missing=True) + + df1 = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=pd.date_range(start="2023-01-01", periods=5, freq="D")) + lib.write("test_df", df1) + print(count_key_types_for_symbol(lib, "test_df")) + df2 = pd.DataFrame({"a": [3, 4, 5]}, index=pd.date_range(start="2023-01-03", periods=3, freq="D")) + lib.write("test_df", df2) + print(count_key_types_for_symbol(lib, "test_df")) + + + \ No newline at end of file From 2fd2164dd4b572f910e0b3e254bf5286485d9bb3 Mon Sep 17 00:00:00 2001 From: Vasil Pashov Date: Sun, 10 Aug 2025 22:43:06 +0300 Subject: [PATCH 3/4] Change action version --- .github/workflows/static_analysis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index e4f40b666f..91af933405 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -61,7 +61,7 @@ run: cp ${{github.workspace}}/coverity.yaml ${{github.workspace}}/cpp/out/linux-release-build - name: Polaris PR Scan - uses: blackduck-inc/black-duck-security-scan@v2.0.0 + uses: blackduck-inc/black-duck-security-scan@v2 with: polaris_server_url: ${{ vars.POLARIS_SERVER_URL }} polaris_access_token: ${{ secrets.POLARIS_ACCESS_TOKEN }} From 5e51c7d0380cc6ecb13e7d5005e1041acc7e2285 Mon Sep 17 00:00:00 2001 From: Vasil Pashov Date: Sun, 10 Aug 2025 22:44:13 +0300 Subject: [PATCH 4/4] Change action version --- .github/workflows/static_analysis.yml | 2 +- python/dedup.py | 49 --------------------------- 2 files changed, 1 insertion(+), 50 deletions(-) delete mode 100644 python/dedup.py diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 91af933405..cde22305c3 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -61,7 +61,7 @@ run: cp ${{github.workspace}}/coverity.yaml ${{github.workspace}}/cpp/out/linux-release-build - name: Polaris PR Scan - uses: blackduck-inc/black-duck-security-scan@v2 + uses: blackduck-inc/black-duck-security-scan@latest with: polaris_server_url: ${{ vars.POLARIS_SERVER_URL }} polaris_access_token: ${{ secrets.POLARIS_ACCESS_TOKEN }} diff --git a/python/dedup.py b/python/dedup.py deleted file mode 100644 index 8c046a72d0..0000000000 --- a/python/dedup.py +++ /dev/null @@ -1,49 +0,0 @@ -import arcticdb -import numpy as np -import pandas as pd -from arcticdb.options import LibraryOptions -from arcticdb.version_store.library import Library -from arcticdb_ext.storage import KeyType - - -def count_key_types_for_symbol(lib, symbol): - # Get the library tool - lib_tool = lib._nvs.library_tool() - - # Define all possible key types to check - key_types = [ - KeyType.VERSION, - KeyType.TABLE_DATA, - KeyType.TABLE_INDEX, - KeyType.SNAPSHOT_REF, - KeyType.MULTI_KEY, - KeyType.APPEND_DATA, - KeyType.VERSION_REF - ] - - # Count keys for each type for the specific symbol - symbol_key_counts = {} - - for key_type in key_types: - keys = list(lib_tool.find_keys_for_symbol(key_type, symbol)) - symbol_key_counts[key_type.name] = len(keys) - return symbol_key_counts - - -if __name__ == "__main__": - library_name = "test_lib" - ac = arcticdb.Arctic("lmdb://test") - lib_opts = LibraryOptions(rows_per_segment=2, dedup=True) - if ac.has_library(library_name): - ac.delete_library(library_name) - lib = ac.get_library(library_name, library_options=lib_opts, create_if_missing=True) - - df1 = pd.DataFrame({"a": [1, 2, 3, 4, 5]}, index=pd.date_range(start="2023-01-01", periods=5, freq="D")) - lib.write("test_df", df1) - print(count_key_types_for_symbol(lib, "test_df")) - df2 = pd.DataFrame({"a": [3, 4, 5]}, index=pd.date_range(start="2023-01-03", periods=3, freq="D")) - lib.write("test_df", df2) - print(count_key_types_for_symbol(lib, "test_df")) - - - \ No newline at end of file