From 1c8b20e4e3a24a47fec3dd0102c2b53c8ff9cc61 Mon Sep 17 00:00:00 2001 From: Douglas Coburn Date: Mon, 24 Feb 2025 11:30:13 -0700 Subject: [PATCH 1/5] Fixes for diff logic --- Pipfile.lock | 20 ++++++ socketsecurity/core/__init__.py | 110 ++++++++++++++++++-------------- socketsecurity/core/classes.py | 8 ++- 3 files changed, 90 insertions(+), 48 deletions(-) create mode 100644 Pipfile.lock diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..b6df5da --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,20 @@ +{ + "_meta": { + "hash": { + "sha256": "702ad05de9bc9de99a4807c8dde1686f31e0041d7b5f6f6b74861195a52110f5" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.12" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": {}, + "develop": {} +} diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index 4452072..b872096 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -1,17 +1,13 @@ -import base64 -import json import logging import time from dataclasses import asdict from glob import glob from pathlib import PurePath -from typing import BinaryIO, Dict, List, Optional, Tuple - +from typing import BinaryIO, Dict, List, Tuple from socketdev import socketdev from socketdev.fullscans import ( FullScanParams, - SocketArtifact, - DiffArtifact, + SocketArtifact ) from socketdev.org import Organization from socketdev.repos import RepositoryInfo @@ -27,8 +23,9 @@ Purl, ) from socketsecurity.core.exceptions import ( - APIResourceNotFound, + APIResourceNotFound ) +from socketdev.exceptions import APIFailure from socketsecurity.core.licenses import Licenses from .socket_config import SocketConfig @@ -216,7 +213,7 @@ def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str, return send_files - def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan: + def create_full_scan(self, files: List[str], params: FullScanParams, has_head_scan: bool = False) -> FullScan: """ Creates a new full scan via the Socket API. @@ -236,10 +233,10 @@ def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan raise Exception(f"Error creating full scan: {res.message}, status: {res.status}") full_scan = FullScan(**asdict(res.data)) - - full_scan_artifacts_dict = self.get_sbom_data(full_scan.id) - full_scan.sbom_artifacts = self.get_sbom_data_list(full_scan_artifacts_dict) - full_scan.packages = self.create_packages_dict(full_scan.sbom_artifacts) + if not has_head_scan: + full_scan_artifacts_dict = self.get_sbom_data(full_scan.id) + full_scan.sbom_artifacts = self.get_sbom_data_list(full_scan_artifacts_dict) + full_scan.packages = self.create_packages_dict(full_scan.sbom_artifacts) create_full_end = time.time() total_time = create_full_end - create_full_start @@ -317,12 +314,13 @@ def get_package_license_text(self, package: Package) -> str: return "" - def get_repo_info(self, repo_slug: str) -> RepositoryInfo: + def get_repo_info(self, repo_slug: str, default_branch: str = "socket-default-branch") -> RepositoryInfo: """ Gets repository information from the Socket API. Args: repo_slug: Repository slug to get info for + default_branch: Default branch string to use if the repo doesn't exist Returns: RepositoryInfo object @@ -330,11 +328,23 @@ def get_repo_info(self, repo_slug: str) -> RepositoryInfo: Raises: Exception: If API request fails """ - response = self.sdk.repos.repo(self.config.org_slug, repo_slug) - if not response.success: - log.error(f"Failed to get repository: {response.status}") - log.error(response.message) - raise Exception(f"Failed to get repository info: {response.status}, message: {response.message}") + try: + response = self.sdk.repos.repo(self.config.org_slug, repo_slug) + if not response.success: + log.error(f"Failed to get repository: {response.status}") + log.error(response.message) + # raise Exception(f"Failed to get repository info: {response.status}, message: {response.message}") + except APIFailure: + log.warning(f"Failed to get repository {repo_slug}, attempting to create it") + create_response = self.sdk.repos.post(self.config.org_slug, name=repo_slug, default_branch=default_branch) + if not create_response.success: + log.error(f"Failed to create repository: {create_response.status}") + log.error(create_response.message) + raise Exception( + f"Failed to create repository: {create_response.status}, message: {create_response.message}" + ) + else: + return create_response.data return response.data def get_head_scan_for_repo(self, repo_slug: str) -> str: @@ -350,24 +360,36 @@ def get_head_scan_for_repo(self, repo_slug: str) -> str: repo_info = self.get_repo_info(repo_slug) return repo_info.head_full_scan_id if repo_info.head_full_scan_id else None - def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]: + @staticmethod + def update_package_values(pkg: Package) -> Package: + pkg.purl = f"{pkg.name}@{pkg.version}" + pkg.url = f"https://socket.dev/{pkg.type}/package" + if pkg.namespace: + pkg.purl = f"{pkg.namespace}/{pkg.purl}" + pkg.url += f"/{pkg.namespace}" + pkg.url += f"/{pkg.name}/overview/{pkg.version}" + return pkg + + def get_added_and_removed_packages(self, head_full_scan_id: str, new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]: """ Get packages that were added and removed between scans. Args: head_full_scan: Previous scan (may be None if first scan) - new_full_scan: New scan just created + head_full_scan_id: New scan just created Returns: Tuple of (added_packages, removed_packages) dictionaries """ - if head_full_scan is None: + if head_full_scan_id is None: log.info(f"No head scan found. New scan ID: {new_full_scan.id}") return new_full_scan.packages, {} - log.info(f"Comparing scans - Head scan ID: {head_full_scan.id}, New scan ID: {new_full_scan.id}") - diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan.id, new_full_scan.id).data - + log.info(f"Comparing scans - Head scan ID: {head_full_scan_id}, New scan ID: {new_full_scan.id}") + diff_start = time.time() + diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan_id, new_full_scan.id).data + diff_end = time.time() + log.info(f"Diff Report Gathered in {diff_end - diff_start:.2f} seconds") log.info(f"Diff report artifact counts:") log.info(f"Added: {len(diff_report.artifacts.added)}") log.info(f"Removed: {len(diff_report.artifacts.removed)}") @@ -384,32 +406,24 @@ def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new for artifact in added_artifacts: try: pkg = Package.from_diff_artifact(asdict(artifact)) + pkg = Core.update_package_values(pkg) added_packages[artifact.id] = pkg except KeyError: log.error(f"KeyError: Could not create package from added artifact {artifact.id}") log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}") - matches = [p for p in new_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version] - if matches: - log.error(f"Found {len(matches)} packages with matching name/version:") - for m in matches: - log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}") - else: - log.error("No matching packages found in new_full_scan") + log.error("No matching packages found in new_full_scan") for artifact in removed_artifacts: try: pkg = Package.from_diff_artifact(asdict(artifact)) + pkg = Core.update_package_values(pkg) + if pkg.namespace: + pkg.purl += f"{pkg.namespace}/{pkg.purl}" removed_packages[artifact.id] = pkg except KeyError: log.error(f"KeyError: Could not create package from removed artifact {artifact.id}") log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}") - matches = [p for p in head_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version] - if matches: - log.error(f"Found {len(matches)} packages with matching name/version:") - for m in matches: - log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}") - else: - log.error("No matching packages found in head_full_scan") + log.error("No matching packages found in head_full_scan") return added_packages, removed_packages @@ -439,32 +453,33 @@ def create_new_diff( if not files: return Diff(id="no_diff_id") - head_full_scan_id = None - try: # Get head scan ID head_full_scan_id = self.get_head_scan_for_repo(params.repo) + has_head_scan = True except APIResourceNotFound: head_full_scan_id = None + has_head_scan = False # Create new scan + params.include_license_details = False new_scan_start = time.time() - new_full_scan = self.create_full_scan(files_for_sending, params) + new_full_scan = self.create_full_scan(files_for_sending, params, has_head_scan) new_scan_end = time.time() log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") - head_full_scan = None - if head_full_scan_id: - head_full_scan = self.get_full_scan(head_full_scan_id) + # head_full_scan = None + # if head_full_scan_id: + # head_full_scan = self.get_full_scan(head_full_scan_id) - added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan, new_full_scan) + added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan) diff = self.create_diff_report(added_packages, removed_packages) base_socket = "https://socket.dev/dashboard/org" diff.id = new_full_scan.id - diff.report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}" + diff.report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}?include_license_details=false" if head_full_scan_id is not None: diff.diff_url = f"{base_socket}/{self.config.org_slug}/diff/{diff.id}/{head_full_scan_id}" else: @@ -609,7 +624,8 @@ def get_source_data(package: Package, packages: dict) -> list: source = (top_purl, manifests) introduced_by.append(source) else: - log.debug(f"Unable to get top level package info for {top_id}") + pass + # log.debug(f"Unable to get top level package info for {top_id}") return introduced_by @staticmethod diff --git a/socketsecurity/core/classes.py b/socketsecurity/core/classes.py index 31b529e..006bb0c 100644 --- a/socketsecurity/core/classes.py +++ b/socketsecurity/core/classes.py @@ -115,6 +115,7 @@ class Package(SocketArtifactLink): author: List[str] = field(default_factory=list) size: Optional[int] = None license: Optional[str] = None + namespace: Optional[str] = None # Package-specific fields license_text: str = "" @@ -122,6 +123,10 @@ class Package(SocketArtifactLink): transitives: int = 0 url: str = "" + # Artifact-specific fields + licenseDetails: Optional[list] = None + + @classmethod def from_socket_artifact(cls, data: dict) -> "Package": """ @@ -187,7 +192,8 @@ def from_diff_artifact(cls, data: dict) -> "Package": direct=ref.get("direct", False), manifestFiles=ref.get("manifestFiles", []), dependencies=ref.get("dependencies"), - artifact=ref.get("artifact") + artifact=ref.get("artifact"), + namespace=data.get('namespace', None) ) class Issue: From 8979844907e0a4fd6ab2c6865f5c48b54090c4e5 Mon Sep 17 00:00:00 2001 From: Eric Hibbs Date: Mon, 24 Feb 2025 12:48:34 -0800 Subject: [PATCH 2/5] cleaned up license_details hack --- README.md | 2 ++ socketsecurity/__init__.py | 2 +- socketsecurity/config.py | 8 ++++++++ socketsecurity/core/__init__.py | 8 ++++++-- socketsecurity/socketcli.py | 2 ++ 5 files changed, 19 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b56ef0f..017544c 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ socketcli [-h] [--api-token API_TOKEN] [--repo REPO] [--integration {api,github, [--target-path TARGET_PATH] [--sbom-file SBOM_FILE] [--files FILES] [--default-branch] [--pending-head] [--generate-license] [--enable-debug] [--enable-json] [--enable-sarif] [--disable-overview] [--disable-security-issue] [--allow-unverified] [--ignore-commit-files] [--disable-blocking] [--scm SCM] [--timeout TIMEOUT] + [--exclude-license-details] ```` If you don't want to provide the Socket API Token every time then you can use the environment variable `SOCKET_SECURITY_API_KEY` @@ -58,6 +59,7 @@ If you don't want to provide the Socket API Token every time then you can use th | --enable-json | False | False | Output in JSON format | | --enable-sarif | False | False | Enable SARIF output of results instead of table or JSON format| | --disable-overview | False | False | Disable overview output | +| --exclude-license-details | False | False | Exclude license details from the diff report (boosts performance for large repos) | #### Security Configuration | Parameter | Required | Default | Description | diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 2f4f50d..c2faa62 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.6' +__version__ = '2.0.7' diff --git a/socketsecurity/config.py b/socketsecurity/config.py index ae4e169..24a9eca 100644 --- a/socketsecurity/config.py +++ b/socketsecurity/config.py @@ -33,6 +33,7 @@ class CliConfig: integration_org_slug: Optional[str] = None pending_head: bool = False timeout: Optional[int] = 1200 + exclude_license_details: bool = False @classmethod def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': parser = create_argument_parser() @@ -71,6 +72,7 @@ def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': 'integration_type': args.integration, 'pending_head': args.pending_head, 'timeout': args.timeout, + 'exclude_license_details': args.exclude_license_details, } if args.owner: @@ -283,6 +285,12 @@ def create_argument_parser() -> argparse.ArgumentParser: action="store_true", help=argparse.SUPPRESS ) + output_group.add_argument( + "--exclude-license-details", + dest="exclude_license_details", + action="store_true", + help="Exclude license details from the diff report (boosts performance for large repos)" + ) # Security Configuration security_group = parser.add_argument_group('Security Configuration') diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index b872096..02a96d9 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -462,7 +462,6 @@ def create_new_diff( has_head_scan = False # Create new scan - params.include_license_details = False new_scan_start = time.time() new_full_scan = self.create_full_scan(files_for_sending, params, has_head_scan) new_scan_end = time.time() @@ -479,7 +478,12 @@ def create_new_diff( base_socket = "https://socket.dev/dashboard/org" diff.id = new_full_scan.id - diff.report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}?include_license_details=false" + + report_url = f"{base_socket}/{self.config.org_slug}/sbom/{diff.id}" + if not params.include_license_details: + report_url += "?include_license_details=false" + diff.report_url = report_url + if head_full_scan_id is not None: diff.diff_url = f"{base_socket}/{self.config.org_slug}/diff/{diff.id}/{head_full_scan_id}" else: diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index cf4f6f5..1ae4338 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -160,6 +160,8 @@ def main_code(): set_as_pending_head=True ) + params.include_license_details = not config.exclude_license_details + # Initialize diff diff = Diff() diff.id = "NO_DIFF_RAN" From 814ed183d90e1f0638833bca460bb964e0e875ee Mon Sep 17 00:00:00 2001 From: Eric Hibbs Date: Mon, 24 Feb 2025 19:44:40 -0800 Subject: [PATCH 3/5] some cleanup, added an arg and .net globs --- .gitignore | 3 ++- pyproject.toml | 2 +- socketsecurity/core/__init__.py | 44 ++++++++++++++++++++----------- socketsecurity/core/scm/github.py | 4 ++- socketsecurity/core/utils.py | 23 ++++++++++++++++ 5 files changed, 57 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 0962665..c481ee5 100644 --- a/.gitignore +++ b/.gitignore @@ -23,4 +23,5 @@ file_generator.py .coverage .env.local Pipfile -test/ \ No newline at end of file +test/ +logs \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 58a630a..629f7cb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,7 +13,7 @@ dependencies = [ 'GitPython', 'packaging', 'python-dotenv', - 'socket-sdk-python>=2.0.5' + 'socket-sdk-python>=2.0.6' ] readme = "README.md" description = "Socket Security CLI for CI/CD" diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index 02a96d9..5236a99 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -1,5 +1,6 @@ import logging import time +import sys from dataclasses import asdict from glob import glob from pathlib import PurePath @@ -145,7 +146,7 @@ def find_files(path: str) -> List[str]: for file_name in patterns: pattern = Core.to_case_insensitive_regex(patterns[file_name]["pattern"]) file_path = f"{path}/**/{pattern}" - log.debug(f"Globbing {file_path}") + #log.debug(f"Globbing {file_path}") glob_start = time.time() glob_files = glob(file_path, recursive=True) for glob_file in glob_files: @@ -153,13 +154,16 @@ def find_files(path: str) -> List[str]: files.add(glob_file) glob_end = time.time() glob_total_time = glob_end - glob_start - log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds") + #log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds") log.debug("Finished Find Files") end_time = time.time() total_time = end_time - start_time - log.info(f"Found {len(files)} in {total_time:.2f} seconds") - log.debug(f"Files found: {list(files)}") + files_list = list(files) + if len(files_list) > 5: + log.debug(f"{len(files_list)} Files found ({total_time:.2f}s): {', '.join(files_list[:5])}, ...") + else: + log.debug(f"{len(files_list)} Files found ({total_time:.2f}s): {', '.join(files_list)}") return list(files) @staticmethod @@ -449,7 +453,6 @@ def create_new_diff( files = self.find_files(path) files_for_sending = self.load_files_for_sending(files, path) - log.debug(f"files: {files} found at path {path}") if not files: return Diff(id="no_diff_id") @@ -461,18 +464,27 @@ def create_new_diff( head_full_scan_id = None has_head_scan = False - # Create new scan - new_scan_start = time.time() - new_full_scan = self.create_full_scan(files_for_sending, params, has_head_scan) - new_scan_end = time.time() - log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") - - - # head_full_scan = None - # if head_full_scan_id: - # head_full_scan = self.get_full_scan(head_full_scan_id) + # Create new scan + try: + new_scan_start = time.time() + new_full_scan = self.create_full_scan(files_for_sending, params, has_head_scan) + new_scan_end = time.time() + log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") + except APIFailure as e: + log.error(f"API Error: {e}") + sys.exit(1) + except Exception as e: + log.error(f"Unexpected error while creating new scan: {e}") + sys.exit(1) - added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan) + try: + added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan) + except APIFailure as e: + log.error(f"API Error: {e}") + sys.exit(1) + except Exception as e: + log.error(f"Unexpected error while comparing packages: {e}") + sys.exit(1) diff = self.create_diff_report(added_packages, removed_packages) diff --git a/socketsecurity/core/scm/github.py b/socketsecurity/core/scm/github.py index b958bbd..fa40afb 100644 --- a/socketsecurity/core/scm/github.py +++ b/socketsecurity/core/scm/github.py @@ -54,7 +54,9 @@ def from_env(cls, pr_number: Optional[str] = None) -> 'GithubConfig': owner = repository.split('/')[0] repository = repository.split('/')[1] - is_default = os.getenv('DEFAULT_BRANCH', '').lower() == 'true' + default_branch_env = os.getenv('DEFAULT_BRANCH') + # Consider the variable truthy if it exists and isn't explicitly 'false' + is_default = default_branch_env is not None and default_branch_env.lower() != 'false' return cls( sha=os.getenv('GITHUB_SHA', ''), api_url=os.getenv('GITHUB_API_URL', ''), diff --git a/socketsecurity/core/utils.py b/socketsecurity/core/utils.py index c7a45b0..6e9fb09 100644 --- a/socketsecurity/core/utils.py +++ b/socketsecurity/core/utils.py @@ -81,5 +81,28 @@ "pom.xml": { "pattern": "pom.xml" } + }, + ".net": { + "proj": { + "pattern": "*.*proj" + }, + "props": { + "pattern": "*.props" + }, + "targets": { + "pattern": "*.targets" + }, + "nuspec": { + "pattern": "*.nuspec" + }, + "nugetConfig": { + "pattern": "nuget.config" + }, + "packagesConfig": { + "pattern": "packages.config" + }, + "packagesLock": { + "pattern": "packages.lock.json" + } } } \ No newline at end of file From 8928ac77b582889a3c23b152ddf9e18363f2b5ed Mon Sep 17 00:00:00 2001 From: Eric Hibbs Date: Tue, 25 Feb 2025 13:14:44 -0800 Subject: [PATCH 4/5] gracefully handling no api token --- socketsecurity/socketcli.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index 1ae4338..5a75438 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -48,6 +48,13 @@ def main_code(): log.debug(f"config: {config.to_dict()}") output_handler = OutputHandler(config) + # Validate API token + if not config.api_token: + log.info("Socket API Token not found. Please set it using either:\n" + "1. Command line: --api-token YOUR_TOKEN\n" + "2. Environment variable: SOCKET_SECURITY_API_KEY") + sys.exit(3) + sdk = socketdev(token=config.api_token) log.debug("sdk loaded") @@ -55,10 +62,6 @@ def main_code(): set_debug_mode(True) log.debug("Debug logging enabled") - # Validate API token - if not config.api_token: - log.info("Unable to find Socket API Token") - sys.exit(3) # Initialize Socket core components socket_config = SocketConfig( From 12a810711143f4d7795f3c733ee58f9edc28cf98 Mon Sep 17 00:00:00 2001 From: Eric Hibbs Date: Tue, 25 Feb 2025 16:03:12 -0800 Subject: [PATCH 5/5] bumped sdk version --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 629f7cb..e95fdf9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,8 +12,8 @@ dependencies = [ 'prettytable', 'GitPython', 'packaging', - 'python-dotenv', - 'socket-sdk-python>=2.0.6' + 'python-dotenv', + 'socket-sdk-python>=2.0.7' ] readme = "README.md" description = "Socket Security CLI for CI/CD"