diff --git a/pyproject.toml b/pyproject.toml index 58a630a..f0d2cdc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,6 +42,7 @@ test = [ dev = [ "ruff>=0.3.0", "pip-tools>=7.4.0", # for pip-compile + "twine" ] [project.scripts] diff --git a/socketsecurity/__init__.py b/socketsecurity/__init__.py index 59b063c..872fb53 100644 --- a/socketsecurity/__init__.py +++ b/socketsecurity/__init__.py @@ -1,2 +1,2 @@ __author__ = 'socket.dev' -__version__ = '2.0.4' +__version__ = '2.0.5' diff --git a/socketsecurity/config.py b/socketsecurity/config.py index ae4e169..c2e8bc8 100644 --- a/socketsecurity/config.py +++ b/socketsecurity/config.py @@ -33,6 +33,7 @@ class CliConfig: integration_org_slug: Optional[str] = None pending_head: bool = False timeout: Optional[int] = 1200 + timeout: Optional[int] = 1200 @classmethod def from_args(cls, args_list: Optional[List[str]] = None) -> 'CliConfig': parser = create_argument_parser() @@ -265,6 +266,12 @@ def create_argument_parser() -> argparse.ArgumentParser: action="store_true", help="Output in JSON format" ) + output_group.add_argument( + "--enable_json", + dest="enable_json", + action="store_true", + help=argparse.SUPPRESS + ) output_group.add_argument( "--enable-sarif", dest="enable_sarif", diff --git a/socketsecurity/core/__init__.py b/socketsecurity/core/__init__.py index 18d92d7..5f2e674 100644 --- a/socketsecurity/core/__init__.py +++ b/socketsecurity/core/__init__.py @@ -6,6 +6,7 @@ from glob import glob from pathlib import PurePath from typing import BinaryIO, Dict, List, Optional, Tuple +from itertools import chain from socketdev import socketdev from socketdev.fullscans import ( @@ -148,7 +149,7 @@ def find_files(path: str) -> List[str]: for file_name in patterns: pattern = Core.to_case_insensitive_regex(patterns[file_name]["pattern"]) file_path = f"{path}/**/{pattern}" - log.debug(f"Globbing {file_path}") + # log.debug(f"Globbing {file_path}") glob_start = time.time() glob_files = glob(file_path, recursive=True) for glob_file in glob_files: @@ -156,13 +157,17 @@ def find_files(path: str) -> List[str]: files.add(glob_file) glob_end = time.time() glob_total_time = glob_end - glob_start - log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds") + # log.debug(f"Glob for pattern {file_path} took {glob_total_time:.2f} seconds") log.debug("Finished Find Files") end_time = time.time() total_time = end_time - start_time log.info(f"Found {len(files)} in {total_time:.2f} seconds") - log.debug(f"Files found: {list(files)}") + files_list = list(files) + if len(files_list) > 5: + log.debug(f"{len(files_list)} Files found: {', '.join(files_list[:5])}, ...") + else: + log.debug(f"{len(files_list)} Files found: {', '.join(files_list)}") return list(files) @staticmethod @@ -216,30 +221,89 @@ def load_files_for_sending(files: List[str], workspace: str) -> List[Tuple[str, return send_files - def create_full_scan(self, files: List[str], params: FullScanParams) -> FullScan: - """ - Creates a new full scan via the Socket API. - - Args: - files: List of files to scan - params: Parameters for the full scan - - Returns: - FullScan object with scan results - """ + def create_full_scan(self, files: List[str], params: FullScanParams, store_results: bool = True) -> FullScan: + """Creates a new full scan via the Socket API.""" + def create_full_scan(self, files: List[str], params: FullScanParams, store_results: bool = True) -> FullScan: + """Creates a new full scan via the Socket API.""" log.debug("Creating new full scan") create_full_start = time.time() + # Time the post API call + post_start = time.time() + res = self.sdk.fullscans.post(files, params) + post_end = time.time() + log.debug(f"API fullscans.post took {post_end - post_start:.2f} seconds") + if not res.success: log.error(f"Error creating full scan: {res.message}, status: {res.status}") raise Exception(f"Error creating full scan: {res.message}, status: {res.status}") full_scan = FullScan(**asdict(res.data)) + + if not store_results: + log.debug("Skipping results storage as requested") + full_scan.sbom_artifacts = [] + full_scan.packages = {} + return full_scan + + # Add extensive debug logging + log.debug(f"Full scan created with ID: {full_scan.id}") + log.debug(f"Organization slug: {self.config.org_slug}") + log.debug(f"store_results is {store_results}") + log.debug(f"Params used for scan: {params}") + + # Time the stream API call + stream_start = time.time() + log.debug(f"Initiating stream request for full scan {full_scan.id}") + try: + artifacts_response = self.sdk.fullscans.stream(self.config.org_slug, full_scan.id) + log.debug(f"Stream response received: success={artifacts_response.success}") + if hasattr(artifacts_response, 'status'): + log.debug(f"Stream response status: {artifacts_response.status}") + if hasattr(artifacts_response, 'message'): + log.debug(f"Stream response message: {artifacts_response.message}") + except Exception as e: + log.error(f"Exception during stream request: {str(e)}") + log.error(f"Exception type: {type(e)}") + raise - full_scan_artifacts_dict = self.get_sbom_data(full_scan.id) - full_scan.sbom_artifacts = self.get_sbom_data_list(full_scan_artifacts_dict) - full_scan.packages = self.create_packages_dict(full_scan.sbom_artifacts) + stream_end = time.time() + log.debug(f"API fullscans.stream took {stream_end - stream_start:.2f} seconds") + + if not artifacts_response.success: + log.error(f"Failed to get SBOM data for full-scan {full_scan.id}") + log.error(artifacts_response.message) + full_scan.sbom_artifacts = [] + full_scan.packages = {} + return full_scan + + # Store the original SocketArtifact objects + full_scan.sbom_artifacts = list(artifacts_response.artifacts.values()) + log.debug(f"Retrieved {len(full_scan.sbom_artifacts)} artifacts") + + # Create packages dictionary directly from the artifacts + packages = {} + top_level_count = {} + + log.debug("Starting package processing from artifacts") + for artifact in artifacts_response.artifacts.values(): + package = Package.from_socket_artifact(artifact) + if package.id not in packages: + package.license_text = self.get_package_license_text(package) + packages[package.id] = package + + # Count top-level ancestors in the same pass + if package.topLevelAncestors: + for top_id in package.topLevelAncestors: + top_level_count[top_id] = top_level_count.get(top_id, 0) + 1 + + # Update transitive counts + for package in packages.values(): + package.transitives = top_level_count.get(package.id, 0) + + full_scan.packages = packages + log.debug(f"Processed {len(packages)} packages") create_full_end = time.time() total_time = create_full_end - create_full_start @@ -350,23 +414,19 @@ def get_head_scan_for_repo(self, repo_slug: str) -> str: repo_info = self.get_repo_info(repo_slug) return repo_info.head_full_scan_id if repo_info.head_full_scan_id else None - def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]: - """ - Get packages that were added and removed between scans. - - Args: - head_full_scan: Previous scan (may be None if first scan) - new_full_scan: New scan just created - - Returns: - Tuple of (added_packages, removed_packages) dictionaries - """ - if head_full_scan is None: + def get_added_and_removed_packages(self, head_full_scan_id: Optional[str], new_full_scan: FullScan) -> Tuple[Dict[str, Package], Dict[str, Package]]: + """Get packages that were added and removed between scans.""" + if head_full_scan_id is None: log.info(f"No head scan found. New scan ID: {new_full_scan.id}") return new_full_scan.packages, {} - log.info(f"Comparing scans - Head scan ID: {head_full_scan.id}, New scan ID: {new_full_scan.id}") - diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan.id, new_full_scan.id).data + log.info(f"Comparing scans - Head scan ID: {head_full_scan_id}, New scan ID: {new_full_scan.id}") + + # Time the stream_diff API call + diff_start = time.time() + diff_report = self.sdk.fullscans.stream_diff(self.config.org_slug, head_full_scan_id, new_full_scan.id).data + diff_end = time.time() + log.debug(f"API fullscans.stream_diff took {diff_end - diff_start:.2f} seconds") log.info(f"Diff report artifact counts:") log.info(f"Added: {len(diff_report.artifacts.added)}") @@ -375,41 +435,26 @@ def get_added_and_removed_packages(self, head_full_scan: Optional[FullScan], new log.info(f"Replaced: {len(diff_report.artifacts.replaced)}") log.info(f"Updated: {len(diff_report.artifacts.updated)}") - added_artifacts = diff_report.artifacts.added + diff_report.artifacts.updated - removed_artifacts = diff_report.artifacts.removed + diff_report.artifacts.replaced - added_packages: Dict[str, Package] = {} removed_packages: Dict[str, Package] = {} - for artifact in added_artifacts: + # Process added and updated artifacts + for artifact in chain(diff_report.artifacts.added, diff_report.artifacts.updated): try: - pkg = Package.from_diff_artifact(asdict(artifact)) + pkg = Package.from_socket_artifact(artifact) added_packages[artifact.id] = pkg - except KeyError: - log.error(f"KeyError: Could not create package from added artifact {artifact.id}") - log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}") - matches = [p for p in new_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version] - if matches: - log.error(f"Found {len(matches)} packages with matching name/version:") - for m in matches: - log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}") - else: - log.error("No matching packages found in new_full_scan") + except KeyError as e: + log.error(f"KeyError creating package from added artifact {artifact.id}: {e}") + log.error(f"Artifact: name={artifact.name}, version={artifact.version}") - for artifact in removed_artifacts: + # Process removed and replaced artifacts + for artifact in chain(diff_report.artifacts.removed, diff_report.artifacts.replaced): try: - pkg = Package.from_diff_artifact(asdict(artifact)) + pkg = Package.from_diff_artifact(artifact) removed_packages[artifact.id] = pkg - except KeyError: - log.error(f"KeyError: Could not create package from removed artifact {artifact.id}") - log.error(f"Artifact details - name: {artifact.name}, version: {artifact.version}") - matches = [p for p in head_full_scan.packages.values() if p.name == artifact.name and p.version == artifact.version] - if matches: - log.error(f"Found {len(matches)} packages with matching name/version:") - for m in matches: - log.error(f" ID: {m.id}, name: {m.name}, version: {m.version}") - else: - log.error("No matching packages found in head_full_scan") + except KeyError as e: + log.error(f"KeyError creating package from removed artifact {artifact.id}: {e}") + log.error(f"Artifact: name={artifact.name}, version={artifact.version}") return added_packages, removed_packages @@ -419,46 +464,33 @@ def create_new_diff( params: FullScanParams, no_change: bool = False ) -> Diff: - """Create a new diff using the Socket SDK. - - Args: - path: Path to look for manifest files - params: Query params for the Full Scan endpoint - - no_change: If True, return empty diff - """ - log.debug(f"starting create_new_diff with no_change: {no_change}") + """Create a new diff using the Socket SDK.""" if no_change: + log.debug(f"starting create_new_diff with no_change: {no_change}") return Diff(id="no_diff_id") # Find manifest files files = self.find_files(path) files_for_sending = self.load_files_for_sending(files, path) - log.debug(f"files: {files} found at path {path}") + if not files: return Diff(id="no_diff_id") + # Initialize head scan ID head_full_scan_id = None - try: # Get head scan ID head_full_scan_id = self.get_head_scan_for_repo(params.repo) except APIResourceNotFound: - head_full_scan_id = None + pass - # Create new scan - new_scan_start = time.time() - new_full_scan = self.create_full_scan(files_for_sending, params) - new_scan_end = time.time() - log.info(f"Total time to create new full scan: {new_scan_end - new_scan_start:.2f}") - - - head_full_scan = None - if head_full_scan_id: - head_full_scan = self.get_full_scan(head_full_scan_id) + # Create new scan - only store results if we don't have a head scan to diff against + if head_full_scan_id is None: + log.debug("No head scan found to diff against") + new_full_scan = self.create_full_scan(files_for_sending, params, store_results=head_full_scan_id is None) - added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan, new_full_scan) + added_packages, removed_packages = self.get_added_and_removed_packages(head_full_scan_id, new_full_scan) diff = self.create_diff_report(added_packages, removed_packages) @@ -503,32 +535,38 @@ def create_diff_report( seen_new_packages = set() seen_removed_packages = set() + # Process added packages for package_id, package in added_packages.items(): - purl = Core.create_purl(package_id, added_packages) - base_purl = f"{purl.ecosystem}/{purl.name}@{purl.version}" - - if (not direct_only or package.direct) and base_purl not in seen_new_packages: - diff.new_packages.append(purl) - seen_new_packages.add(base_purl) + # Calculate source data once per package + package.introduced_by = self.get_source_data(package, added_packages) + + if not direct_only or package.direct: + base_purl = f"{package.type}/{package.name}@{package.version}" + if base_purl not in seen_new_packages: + purl = Core.create_purl(package_id, added_packages) + diff.new_packages.append(purl) + seen_new_packages.add(base_purl) self.add_package_alerts_to_collection( package=package, - alerts_collection=alerts_in_added_packages, - packages=added_packages + alerts_collection=alerts_in_added_packages ) + # Process removed packages for package_id, package in removed_packages.items(): - purl = Core.create_purl(package_id, removed_packages) - base_purl = f"{purl.ecosystem}/{purl.name}@{purl.version}" - - if (not direct_only or package.direct) and base_purl not in seen_removed_packages: - diff.removed_packages.append(purl) - seen_removed_packages.add(base_purl) + # Calculate source data once per package + package.introduced_by = self.get_source_data(package, removed_packages) + + if not direct_only or package.direct: + base_purl = f"{package.type}/{package.name}@{package.version}" + if base_purl not in seen_removed_packages: + purl = Core.create_purl(package_id, removed_packages) + diff.removed_packages.append(purl) + seen_removed_packages.add(base_purl) self.add_package_alerts_to_collection( package=package, - alerts_collection=alerts_in_removed_packages, - packages=removed_packages + alerts_collection=alerts_in_removed_packages ) diff.new_alerts = Core.get_new_alerts( @@ -537,7 +575,6 @@ def create_diff_report( ) diff.new_capabilities = Core.get_capabilities_for_added_packages(added_packages) - Core.add_purl_capabilities(diff) return diff @@ -589,26 +626,31 @@ def get_source_data(package: Package, packages: dict) -> list: introduced_by = [] if package.direct: manifests = "" - for manifest_data in package.manifestFiles: - manifest_file = manifest_data.get("file") - manifests += f"{manifest_file};" - manifests = manifests.rstrip(";") + if package.manifestFiles: + for manifest_data in package.manifestFiles: + manifest_file = manifest_data.get("file") # Safer dictionary access + if manifest_file: + manifests += f"{manifest_file};" + manifests = manifests.rstrip(";") source = ("direct", manifests) introduced_by.append(source) else: - for top_id in package.topLevelAncestors: + for top_id in package.topLevelAncestors or []: top_package = packages.get(top_id) if top_package: manifests = "" top_purl = f"{top_package.type}/{top_package.name}@{top_package.version}" - for manifest_data in top_package.manifestFiles: - manifest_file = manifest_data.get("file") - manifests += f"{manifest_file};" - manifests = manifests.rstrip(";") + if top_package.manifestFiles: + for manifest_data in top_package.manifestFiles: + manifest_file = manifest_data.get("file") # Safer dictionary access + if manifest_file: + manifests += f"{manifest_file};" + manifests = manifests.rstrip(";") source = (top_purl, manifests) introduced_by.append(source) else: log.debug(f"Unable to get top level package info for {top_id}") + return introduced_by @staticmethod @@ -632,18 +674,8 @@ def add_purl_capabilities(diff: Diff) -> None: diff.new_packages = new_packages - def add_package_alerts_to_collection(self, package: Package, alerts_collection: dict, packages: dict) -> dict: - """ - Processes alerts from a package and adds them to a shared alerts collection. - - Args: - package: Package to process alerts from - alerts_collection: Dictionary to store processed alerts - packages: Dictionary of all packages for dependency lookup - - Returns: - Updated alerts collection dictionary - """ + def add_package_alerts_to_collection(self, package: Package, alerts_collection: dict) -> None: + """Processes alerts from a package and adds them to a shared alerts collection.""" default_props = type('EmptyProps', (), { 'description': "", 'title': "", @@ -651,10 +683,11 @@ def add_package_alerts_to_collection(self, package: Package, alerts_collection: 'nextStepTitle': "" })() - for alert_item in package.alerts: - alert = Alert(**alert_item) + for alert in package.alerts: + if alert.type == 'licenseSpdxDisj': + continue + props = getattr(self.config.all_issues, alert.type, default_props) - introduced_by = self.get_source_data(package, packages) issue_alert = Issue( pkg_type=package.type, @@ -669,7 +702,7 @@ def add_package_alerts_to_collection(self, package: Package, alerts_collection: title=props.title, suggestion=props.suggestion, next_step_title=props.nextStepTitle, - introduced_by=introduced_by, + introduced_by=package.introduced_by, purl=package.purl, url=package.url ) @@ -678,13 +711,10 @@ def add_package_alerts_to_collection(self, package: Package, alerts_collection: action = self.config.security_policy[alert.type]['action'] setattr(issue_alert, action, True) - if issue_alert.type != 'licenseSpdxDisj': - if issue_alert.key not in alerts_collection: - alerts_collection[issue_alert.key] = [issue_alert] - else: - alerts_collection[issue_alert.key].append(issue_alert) - - return alerts_collection + if alert.key not in alerts_collection: + alerts_collection[alert.key] = [issue_alert] + else: + alerts_collection[alert.key].append(issue_alert) @staticmethod def save_file(file_name: str, content: str) -> None: diff --git a/socketsecurity/core/classes.py b/socketsecurity/core/classes.py index 31b529e..2e6be66 100644 --- a/socketsecurity/core/classes.py +++ b/socketsecurity/core/classes.py @@ -1,8 +1,8 @@ import json from dataclasses import dataclass, field -from typing import Dict, List, TypedDict, Any, Optional +from typing import Dict, List, TypedDict, Any, Optional, Tuple -from socketdev.fullscans import FullScanMetadata, SocketArtifact, SocketArtifactLink, DiffType, SocketManifestReference, SocketScore, SocketAlert +from socketdev.fullscans import FullScanMetadata, SocketArtifact, SocketArtifactLink, SocketScore, SocketAlert, DiffArtifact __all__ = [ "Report", @@ -121,42 +121,43 @@ class Package(SocketArtifactLink): purl: str = "" transitives: int = 0 url: str = "" + introduced_by: List[Tuple[str, str]] = field(default_factory=list) @classmethod - def from_socket_artifact(cls, data: dict) -> "Package": + def from_socket_artifact(cls, artifact: SocketArtifact) -> "Package": """ - Create a Package from a SocketArtifact dictionary. + Create a Package from a SocketArtifact. Args: - data: Dictionary containing SocketArtifact data + artifact: SocketArtifact instance from scan results Returns: New Package instance """ return cls( - id=data["id"], - name=data["name"], - version=data["version"], - type=data["type"], - score=data["score"], - alerts=data["alerts"], - author=data.get("author", []), - size=data.get("size"), - license=data.get("license"), - topLevelAncestors=data["topLevelAncestors"], - direct=data.get("direct", False), - manifestFiles=data.get("manifestFiles", []), - dependencies=data.get("dependencies"), - artifact=data.get("artifact") + id=artifact.id, + name=artifact.name, + version=artifact.version, + type=artifact.type, + score=artifact.score, + alerts=artifact.alerts, + author=artifact.author or [], + size=artifact.size, + license=artifact.license, + topLevelAncestors=artifact.topLevelAncestors, + direct=artifact.direct, + manifestFiles=artifact.manifestFiles, + dependencies=artifact.dependencies, + artifact=artifact.artifact ) @classmethod - def from_diff_artifact(cls, data: dict) -> "Package": + def from_diff_artifact(cls, artifact: DiffArtifact) -> "Package": """ - Create a Package from a DiffArtifact dictionary. + Create a Package from a DiffArtifact. Args: - data: Dictionary containing DiffArtifact data + artifact: DiffArtifact instance from diff results Returns: New Package instance @@ -165,29 +166,29 @@ def from_diff_artifact(cls, data: dict) -> "Package": ValueError: If reference data cannot be found in DiffArtifact """ ref = None - if data["diffType"] in ["added", "updated"] and data.get("head"): - ref = data["head"][0] - elif data["diffType"] in ["removed", "replaced"] and data.get("base"): - ref = data["base"][0] + if artifact.diffType in ["added", "updated"] and artifact.head: + ref = artifact.head[0] + elif artifact.diffType in ["removed", "replaced"] and artifact.base: + ref = artifact.base[0] if not ref: raise ValueError("Could not find reference data in DiffArtifact") return cls( - id=data["id"], - name=data["name"], - version=data["version"], - type=data["type"], - score=data["score"], - alerts=data["alerts"], - author=data.get("author", []), - size=data.get("size"), - license=data.get("license"), - topLevelAncestors=ref["topLevelAncestors"], - direct=ref.get("direct", False), - manifestFiles=ref.get("manifestFiles", []), - dependencies=ref.get("dependencies"), - artifact=ref.get("artifact") + id=artifact.id, + name=artifact.name, + version=artifact.version, + type=artifact.type, + score=artifact.score, + alerts=artifact.alerts, + author=artifact.author or [], + size=artifact.size, + license=artifact.license, + topLevelAncestors=ref.topLevelAncestors, + direct=ref.direct, + manifestFiles=ref.manifestFiles, + dependencies=ref.dependencies, + artifact=ref.artifact ) class Issue: diff --git a/socketsecurity/core/scm/github.py b/socketsecurity/core/scm/github.py index b958bbd..c34e2b4 100644 --- a/socketsecurity/core/scm/github.py +++ b/socketsecurity/core/scm/github.py @@ -54,7 +54,10 @@ def from_env(cls, pr_number: Optional[str] = None) -> 'GithubConfig': owner = repository.split('/')[0] repository = repository.split('/')[1] - is_default = os.getenv('DEFAULT_BRANCH', '').lower() == 'true' + default_branch_env = os.getenv('DEFAULT_BRANCH') + # Consider the variable truthy if it exists and isn't explicitly 'false' + is_default = default_branch_env is not None and default_branch_env.lower() != 'false' + return cls( sha=os.getenv('GITHUB_SHA', ''), api_url=os.getenv('GITHUB_API_URL', ''), diff --git a/socketsecurity/core/utils.py b/socketsecurity/core/utils.py index c7a45b0..6e9fb09 100644 --- a/socketsecurity/core/utils.py +++ b/socketsecurity/core/utils.py @@ -81,5 +81,28 @@ "pom.xml": { "pattern": "pom.xml" } + }, + ".net": { + "proj": { + "pattern": "*.*proj" + }, + "props": { + "pattern": "*.props" + }, + "targets": { + "pattern": "*.targets" + }, + "nuspec": { + "pattern": "*.nuspec" + }, + "nugetConfig": { + "pattern": "nuget.config" + }, + "packagesConfig": { + "pattern": "packages.config" + }, + "packagesLock": { + "pattern": "packages.lock.json" + } } } \ No newline at end of file diff --git a/socketsecurity/socketcli.py b/socketsecurity/socketcli.py index cf4f6f5..f171010 100644 --- a/socketsecurity/socketcli.py +++ b/socketsecurity/socketcli.py @@ -48,7 +48,7 @@ def main_code(): log.debug(f"config: {config.to_dict()}") output_handler = OutputHandler(config) - sdk = socketdev(token=config.api_token) + sdk = socketdev(token=config.api_token, timeout=config.timeout) log.debug("sdk loaded") if config.enable_debug: @@ -146,6 +146,7 @@ def main_code(): integration_type = config.integration_type integration_org_slug = config.integration_org_slug or org_slug + log.debug(f"config: {config.to_dict()}") params = FullScanParams( org_slug=org_slug, integration_type=integration_type, @@ -159,6 +160,7 @@ def main_code(): make_default_branch=config.default_branch, set_as_pending_head=True ) + log.debug(f"Params initially set to: {params}") # Initialize diff diff = Diff()