From 26b1de9ccb85626967b26ca87b281bbcad558b79 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sun, 8 Dec 2024 09:31:22 +0000 Subject: [PATCH 01/26] Bump django from 4.2.16 to 4.2.17 Bumps [django](https://github.com/django/django) from 4.2.16 to 4.2.17. - [Commits](https://github.com/django/django/compare/4.2.16...4.2.17) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 84ea22538..e05816191 100644 --- a/requirements.txt +++ b/requirements.txt @@ -27,7 +27,7 @@ dateparser==1.1.1 decorator==5.1.1 defusedxml==0.7.1 distro==1.7.0 -Django==4.2.16 +Django==4.2.17 django-crispy-forms==2.3 django-environ==0.11.2 django-filter==24.3 From 9fa9514c3a3a0d3bf0e5cbbc4eb3561cb8c58ae9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 7 Jan 2025 15:47:17 +0000 Subject: [PATCH 02/26] Bump jinja2 from 3.1.4 to 3.1.5 Bumps [jinja2](https://github.com/pallets/jinja) from 3.1.4 to 3.1.5. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.1.4...3.1.5) --- updated-dependencies: - dependency-name: jinja2 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index e05816191..347259791 100644 --- a/requirements.txt +++ b/requirements.txt @@ -53,7 +53,7 @@ ipython==8.10.0 isort==5.10.1 itypes==1.2.0 jedi==0.18.1 -Jinja2==3.1.4 +Jinja2==3.1.5 jsonschema==3.2.0 license-expression==30.3.1 lxml==4.9.1 From 87bde21a3d0f4d974609d32b042b1cfb5d302aa4 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 16:17:37 +0530 Subject: [PATCH 03/26] Add postgresql conf in docker compose Signed-off-by: Tushar Goel --- docker-compose.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index 865be14e1..afbe9f337 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,10 +3,12 @@ version: "3" services: db: image: postgres:13 + command: -c config_file=/etc/postgresql/postgresql.conf env_file: - docker.env volumes: - db_data:/var/lib/postgresql/data/ + - ./etc/postgresql/postgresql.conf:/etc/postgresql/postgresql.conf vulnerablecode: build: . From 8519b0d2daa0d73d5d6313e30c50c269f10d27f2 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 20 Dec 2024 23:00:19 +0530 Subject: [PATCH 04/26] Add models for CodeFix Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0085_codefix.py | 60 ++++++++++++++++++++++ vulnerabilities/models.py | 29 +++++++++++ 2 files changed, 89 insertions(+) create mode 100644 vulnerabilities/migrations/0085_codefix.py diff --git a/vulnerabilities/migrations/0085_codefix.py b/vulnerabilities/migrations/0085_codefix.py new file mode 100644 index 000000000..cbe162845 --- /dev/null +++ b/vulnerabilities/migrations/0085_codefix.py @@ -0,0 +1,60 @@ +# Generated by Django 4.2.16 on 2024-12-20 17:29 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0084_alter_package_options_package_version_rank"), + ] + + operations = [ + migrations.CreateModel( + name="CodeFix", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ("commits", models.JSONField(blank=True, default=list)), + ("pulls", models.JSONField(blank=True, default=list)), + ("downloads", models.JSONField(blank=True, default=list)), + ("patch", models.TextField(blank=True, null=True)), + ("notes", models.TextField(blank=True, null=True)), + ("references", models.JSONField(blank=True, default=list)), + ("status_reviewed", models.BooleanField(default=False)), + ("base_commit", models.CharField(blank=True, max_length=255, null=True)), + ("created_at", models.DateTimeField(auto_now_add=True)), + ("updated_at", models.DateTimeField(auto_now=True)), + ( + "applies_to_versions", + models.ManyToManyField( + blank=True, related_name="fixes", to="vulnerabilities.package" + ), + ), + ( + "base_version", + models.ForeignKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="base_version_changes", + to="vulnerabilities.package", + ), + ), + ( + "vulnerabilities", + models.ManyToManyField( + blank=True, related_name="codefixes", to="vulnerabilities.vulnerability" + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6248e1e47..610d35c5f 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1581,3 +1581,32 @@ class Exploit(models.Model): @property def get_known_ransomware_campaign_use_type(self): return "Known" if self.known_ransomware_campaign_use else "Unknown" + + +class CodeChange(models.Model): + commits = models.JSONField(blank=True, default=list) + pulls = models.JSONField(blank=True, default=list) + downloads = models.JSONField(blank=True, default=list) + patch = models.TextField(blank=True, null=True) + notes = models.TextField(blank=True, null=True) + references = models.JSONField(blank=True, default=list) + status_reviewed = models.BooleanField(default=False) + base_version = models.ForeignKey( + "Package", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="base_version_changes", + ) + base_commit = models.CharField(max_length=255, blank=True, null=True) + + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True) + + class Meta: + abstract = True + + +class CodeFix(CodeChange): + vulnerabilities = models.ManyToManyField("Vulnerability", related_name="codefixes", blank=True) + applies_to_versions = models.ManyToManyField("Package", related_name="fixes", blank=True) From 6f984c3e7c17b53a0b22168692fc740686460b60 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Fri, 20 Dec 2024 23:33:10 +0530 Subject: [PATCH 05/26] Add pipeline to collect fix commit Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 112 +++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 vulnerabilities/pipelines/collect_commits.py diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py new file mode 100644 index 000000000..61f60b2a2 --- /dev/null +++ b/vulnerabilities/pipelines/collect_commits.py @@ -0,0 +1,112 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +from aboutcode.pipeline import LoopProgress + +from vulnerabilities.models import CodeFix +from vulnerabilities.models import Package +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.utils import normalize_purl + + +class CollectFixCommitsPipeline(VulnerableCodePipeline): + """ + Improver pipeline to scout References and create CodeFix entries. + """ + + pipeline_id = "collect_fix_commits" + license_expression = None + + @classmethod + def steps(cls): + return (cls.collect_and_store_fix_commits,) + + def collect_and_store_fix_commits(self): + references = VulnerabilityReference.objects.prefetch_related("vulnerabilities").distinct() + + self.log(f"Processing {references.count():,d} references to collect fix commits.") + + created_fix_count = 0 + progress = LoopProgress(total_iterations=references.count(), logger=self.log) + for reference in progress.iter(references.paginated(per_page=500)): + for vulnerability in reference.vulnerabilities.all(): + package_urls = self.extract_package_urls(reference) + commit_id = self.extract_commit_id(reference.url) + + if commit_id and package_urls: + for purl in package_urls: + normalized_purl = normalize_purl(purl) + package = self.get_or_create_package(normalized_purl) + codefix = self.create_codefix_entry( + vulnerability=vulnerability, + package=package, + commit_id=commit_id, + reference=reference.url, + ) + if codefix: + created_fix_count += 1 + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + + def extract_package_urls(self, reference): + """ + Extract Package URLs from a reference. + Returns a list of Package URLs inferred from the reference. + """ + urls = [] + if "github" in reference.url: + parts = reference.url.split("/") + if len(parts) >= 5: + namespace = parts[-3] + name = parts[-2] + commit = parts[-1] + if commit: + urls.append(f"pkg:github/{namespace}/{name}@{commit}") + return urls + + def extract_commit_id(self, url): + """ + Extract a commit ID from a URL, if available. + """ + if "github" in url: + parts = url.split("/") + return parts[-1] if len(parts) > 0 else None + return None + + def get_or_create_package(self, purl): + """ + Get or create a Package object from a Package URL. + """ + try: + package, _ = Package.objects.get_or_create_from_purl(purl) + return package + except Exception as e: + self.log(f"Error creating package from purl {purl}: {e}") + return None + + def create_codefix_entry(self, vulnerability, package, commit_id, reference): + """ + Create a CodeFix entry associated with the given vulnerability and package. + """ + try: + codefix, created = CodeFix.objects.get_or_create( + base_version=package, + defaults={ + "commits": [commit_id], + "references": [reference], + }, + ) + if created: + codefix.vulnerabilities.add(vulnerability) + codefix.save() + return codefix + except Exception as e: + self.log(f"Error creating CodeFix entry: {e}") + return None From bcdc572515bbe437a36415b9e5d0314559a0c66c Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 24 Dec 2024 00:54:57 +0530 Subject: [PATCH 06/26] Address review comments Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 61 ++++-- vulnerabilities/pipelines/collect_commits.py | 187 ++++++++++++++----- 2 files changed, 193 insertions(+), 55 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 610d35c5f..ab93084d9 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1584,29 +1584,64 @@ def get_known_ransomware_campaign_use_type(self): class CodeChange(models.Model): - commits = models.JSONField(blank=True, default=list) - pulls = models.JSONField(blank=True, default=list) - downloads = models.JSONField(blank=True, default=list) - patch = models.TextField(blank=True, null=True) - notes = models.TextField(blank=True, null=True) - references = models.JSONField(blank=True, default=list) - status_reviewed = models.BooleanField(default=False) + """ + Abstract base model representing a change in code, either introducing or fixing a vulnerability. + This includes details about commits, patches, and related metadata. + """ + + commits = models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers associated with the code change.", + ) + pulls = models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ) + downloads = models.JSONField( + blank=True, default=list, help_text="List of download URLs for the patched code." + ) + patch = models.TextField( + blank=True, null=True, help_text="The code change in patch format (e.g., git diff)." + ) + notes = models.TextField( + blank=True, null=True, help_text="Additional notes or instructions about the code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="External references related to this code change." + ) + status_reviewed = models.BooleanField( + default=False, help_text="Indicates if the code change has been reviewed." + ) base_version = models.ForeignKey( "Package", null=True, blank=True, on_delete=models.SET_NULL, related_name="base_version_changes", + help_text="The base version of the package to which this code change applies.", + ) + base_commit = models.CharField( + max_length=255, + blank=True, + null=True, + help_text="The commit ID representing the state of the code before applying the fix or change.", + ) + created_at = models.DateTimeField( + auto_now_add=True, help_text="Timestamp indicating when the code change was created." + ) + updated_at = models.DateTimeField( + auto_now=True, help_text="Timestamp indicating when the code change was last updated." ) - base_commit = models.CharField(max_length=255, blank=True, null=True) - - created_at = models.DateTimeField(auto_now_add=True) - updated_at = models.DateTimeField(auto_now=True) class Meta: abstract = True class CodeFix(CodeChange): - vulnerabilities = models.ManyToManyField("Vulnerability", related_name="codefixes", blank=True) - applies_to_versions = models.ManyToManyField("Package", related_name="fixes", blank=True) + package_vulnerabilities = models.ManyToManyField( + "AffectedByPackageRelatedVulnerability", + related_name="code_fixes", + help_text="The vulnerabilities fixed by this code change.", + ) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 61f60b2a2..44e91be31 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -8,12 +8,36 @@ # from aboutcode.pipeline import LoopProgress +from packageurl.contrib.url2purl import url2purl from vulnerabilities.models import CodeFix from vulnerabilities.models import Package from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline -from vulnerabilities.utils import normalize_purl + + +def extract_commit_id(url): + """ + Extract a commit ID from a URL, if available. + Supports different URL structures for commit references. + + >>> extract_commit_id("https://github.com/hedgedoc/hedgedoc/commit/c1789474020a6d668d616464cb2da5e90e123f65") + 'c1789474020a6d668d616464cb2da5e90e123f65' + """ + if "/commit/" in url: + parts = url.split("/") + if len(parts) > 1 and parts[-2] == "commit": + return parts[-1] + return None + + +def is_reference_already_processed(reference_url, commit_id): + """ + Check if a reference and commit ID pair already exists in a CodeFix entry. + """ + return CodeFix.objects.filter( + references__contains=[reference_url], commits__contains=[commit_id] + ).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -37,48 +61,33 @@ def collect_and_store_fix_commits(self): progress = LoopProgress(total_iterations=references.count(), logger=self.log) for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): - package_urls = self.extract_package_urls(reference) - commit_id = self.extract_commit_id(reference.url) - - if commit_id and package_urls: - for purl in package_urls: - normalized_purl = normalize_purl(purl) - package = self.get_or_create_package(normalized_purl) - codefix = self.create_codefix_entry( - vulnerability=vulnerability, - package=package, - commit_id=commit_id, - reference=reference.url, - ) - if codefix: - created_fix_count += 1 + vcs_url = normalize_vcs_url(reference.url) + commit_id = extract_commit_id(reference.url) - self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") + if not commit_id or not vcs_url: + continue - def extract_package_urls(self, reference): - """ - Extract Package URLs from a reference. - Returns a list of Package URLs inferred from the reference. - """ - urls = [] - if "github" in reference.url: - parts = reference.url.split("/") - if len(parts) >= 5: - namespace = parts[-3] - name = parts[-2] - commit = parts[-1] - if commit: - urls.append(f"pkg:github/{namespace}/{name}@{commit}") - return urls - - def extract_commit_id(self, url): - """ - Extract a commit ID from a URL, if available. - """ - if "github" in url: - parts = url.split("/") - return parts[-1] if len(parts) > 0 else None - return None + # Skip if already processed + if is_reference_already_processed(reference.url, commit_id): + self.log( + f"Skipping already processed reference: {reference.url} with commit {commit_id}" + ) + continue + purl = url2purl(vcs_url) + if not purl: + self.log(f"Could not create purl from url: {vcs_url}") + continue + package = self.get_or_create_package(purl) + codefix = self.create_codefix_entry( + vulnerability=vulnerability, + package=package, + commit_id=commit_id, + reference=reference.url, + ) + if codefix: + created_fix_count += 1 + + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") def get_or_create_package(self, purl): """ @@ -109,4 +118,98 @@ def create_codefix_entry(self, vulnerability, package, commit_id, reference): return codefix except Exception as e: self.log(f"Error creating CodeFix entry: {e}") - return None + return + + +PLAIN_URLS = ( + "https://", + "http://", +) + +VCS_URLS = ( + "git://", + "git+git://", + "git+https://", + "git+http://", + "hg://", + "hg+http://", + "hg+https://", + "svn://", + "svn+https://", + "svn+http://", +) + + +def normalize_vcs_url(repo_url, vcs_tool=None): + """ + Return a normalized vcs_url version control URL given some `repo_url` and an + optional `vcs_tool` hint (such as 'git', 'hg', etc. + + Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories + and more using the same approach as npm install: + + See https://docs.npmjs.com/files/package.json#repository + or https://getcomposer.org/doc/05-repositories.md + + This is done here in npm: + https://github.com/npm/npm/blob/d3c858ce4cfb3aee515bb299eb034fe1b5e44344/node_modules/hosted-git-info/git-host-info.js + + These should be resolved: + npm/npm + gist:11081aaa281 + bitbucket:example/repo + gitlab:another/repo + expressjs/serve-static + git://github.com/angular/di.js.git + git://github.com/hapijs/boom + git@github.com:balderdashy/waterline-criteria.git + http://github.com/ariya/esprima.git + http://github.com/isaacs/nopt + https://github.com/chaijs/chai + https://github.com/christkv/kerberos.git + https://gitlab.com/foo/private.git + git@gitlab.com:foo/private.git + """ + if not repo_url or not isinstance(repo_url, str): + return + + repo_url = repo_url.strip() + if not repo_url: + return + + # TODO: If we match http and https, we may should add more check in + # case if the url is not a repo one. For example, check the domain + # name in the url... + if repo_url.startswith(VCS_URLS + PLAIN_URLS): + return repo_url + + if repo_url.startswith("git@"): + tool, _, right = repo_url.partition("@") + if ":" in repo_url: + host, _, repo = right.partition(":") + else: + # git@github.com/Filirom1/npm2aur.git + host, _, repo = right.partition("/") + + if any(r in host for r in ("bitbucket", "gitlab", "github")): + scheme = "https" + else: + scheme = "git" + + return f"{scheme}://{host}/{repo}" + + # FIXME: where these URL schemes come from?? + if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + hoster_urls = { + "bitbucket": f"https://bitbucket.org/{repo}", + "github": f"https://github.com/{repo}", + "gitlab": f"https://gitlab.com/{repo}", + "gist": f"https://gist.github.com/{repo}", + } + hoster, _, repo = repo_url.partition(":") + return hoster_urls[hoster] % locals() + + if len(repo_url.split("/")) == 2: + # implicit github, but that's only on NPM? + return f"https://github.com/{repo_url}" + return repo_url From b3c0ef260594caebb294e9433012792a85ce112a Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 24 Dec 2024 01:02:54 +0530 Subject: [PATCH 07/26] Fix Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0085_codefix.py | 60 --------- vulnerabilities/migrations/0086_codefix.py | 124 +++++++++++++++++++ vulnerabilities/models.py | 2 +- vulnerabilities/pipelines/collect_commits.py | 28 +---- 4 files changed, 131 insertions(+), 83 deletions(-) delete mode 100644 vulnerabilities/migrations/0085_codefix.py create mode 100644 vulnerabilities/migrations/0086_codefix.py diff --git a/vulnerabilities/migrations/0085_codefix.py b/vulnerabilities/migrations/0085_codefix.py deleted file mode 100644 index cbe162845..000000000 --- a/vulnerabilities/migrations/0085_codefix.py +++ /dev/null @@ -1,60 +0,0 @@ -# Generated by Django 4.2.16 on 2024-12-20 17:29 - -from django.db import migrations, models -import django.db.models.deletion - - -class Migration(migrations.Migration): - - dependencies = [ - ("vulnerabilities", "0084_alter_package_options_package_version_rank"), - ] - - operations = [ - migrations.CreateModel( - name="CodeFix", - fields=[ - ( - "id", - models.AutoField( - auto_created=True, primary_key=True, serialize=False, verbose_name="ID" - ), - ), - ("commits", models.JSONField(blank=True, default=list)), - ("pulls", models.JSONField(blank=True, default=list)), - ("downloads", models.JSONField(blank=True, default=list)), - ("patch", models.TextField(blank=True, null=True)), - ("notes", models.TextField(blank=True, null=True)), - ("references", models.JSONField(blank=True, default=list)), - ("status_reviewed", models.BooleanField(default=False)), - ("base_commit", models.CharField(blank=True, max_length=255, null=True)), - ("created_at", models.DateTimeField(auto_now_add=True)), - ("updated_at", models.DateTimeField(auto_now=True)), - ( - "applies_to_versions", - models.ManyToManyField( - blank=True, related_name="fixes", to="vulnerabilities.package" - ), - ), - ( - "base_version", - models.ForeignKey( - blank=True, - null=True, - on_delete=django.db.models.deletion.SET_NULL, - related_name="base_version_changes", - to="vulnerabilities.package", - ), - ), - ( - "vulnerabilities", - models.ManyToManyField( - blank=True, related_name="codefixes", to="vulnerabilities.vulnerability" - ), - ), - ], - options={ - "abstract": False, - }, - ), - ] diff --git a/vulnerabilities/migrations/0086_codefix.py b/vulnerabilities/migrations/0086_codefix.py new file mode 100644 index 000000000..64ea35fe0 --- /dev/null +++ b/vulnerabilities/migrations/0086_codefix.py @@ -0,0 +1,124 @@ +# Generated by Django 4.2.16 on 2024-12-23 19:32 + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0085_alter_package_is_ghost_alter_package_version_rank_and_more"), + ] + + operations = [ + migrations.CreateModel( + name="CodeFix", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, primary_key=True, serialize=False, verbose_name="ID" + ), + ), + ( + "commits", + models.JSONField( + blank=True, + default=list, + help_text="List of commit identifiers associated with the code change.", + ), + ), + ( + "pulls", + models.JSONField( + blank=True, + default=list, + help_text="List of pull request URLs associated with the code change.", + ), + ), + ( + "downloads", + models.JSONField( + blank=True, + default=list, + help_text="List of download URLs for the patched code.", + ), + ), + ( + "patch", + models.TextField( + blank=True, + help_text="The code change in patch format (e.g., git diff).", + null=True, + ), + ), + ( + "notes", + models.TextField( + blank=True, + help_text="Additional notes or instructions about the code change.", + null=True, + ), + ), + ( + "references", + models.JSONField( + blank=True, + default=list, + help_text="External references related to this code change.", + ), + ), + ( + "status_reviewed", + models.BooleanField( + default=False, help_text="Indicates if the code change has been reviewed." + ), + ), + ( + "base_commit", + models.CharField( + blank=True, + help_text="The commit ID representing the state of the code before applying the fix or change.", + max_length=255, + null=True, + ), + ), + ( + "created_at", + models.DateTimeField( + auto_now_add=True, + help_text="Timestamp indicating when the code change was created.", + ), + ), + ( + "updated_at", + models.DateTimeField( + auto_now=True, + help_text="Timestamp indicating when the code change was last updated.", + ), + ), + ( + "base_version", + models.ForeignKey( + blank=True, + help_text="The base version of the package to which this code change applies.", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="base_version_codechanges", + to="vulnerabilities.package", + ), + ), + ( + "package_vulnerabilities", + models.ManyToManyField( + help_text="The vulnerabilities fixed by this code change.", + related_name="code_fixes", + to="vulnerabilities.affectedbypackagerelatedvulnerability", + ), + ), + ], + options={ + "abstract": False, + }, + ), + ] diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index ab93084d9..7da4ec2c4 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1619,7 +1619,7 @@ class CodeChange(models.Model): null=True, blank=True, on_delete=models.SET_NULL, - related_name="base_version_changes", + related_name="base_version_codechanges", help_text="The base version of the package to which this code change applies.", ) base_commit = models.CharField( diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 44e91be31..564988d34 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -16,21 +16,6 @@ from vulnerabilities.pipelines import VulnerableCodePipeline -def extract_commit_id(url): - """ - Extract a commit ID from a URL, if available. - Supports different URL structures for commit references. - - >>> extract_commit_id("https://github.com/hedgedoc/hedgedoc/commit/c1789474020a6d668d616464cb2da5e90e123f65") - 'c1789474020a6d668d616464cb2da5e90e123f65' - """ - if "/commit/" in url: - parts = url.split("/") - if len(parts) > 1 and parts[-2] == "commit": - return parts[-1] - return None - - def is_reference_already_processed(reference_url, commit_id): """ Check if a reference and commit ID pair already exists in a CodeFix entry. @@ -62,15 +47,14 @@ def collect_and_store_fix_commits(self): for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): vcs_url = normalize_vcs_url(reference.url) - commit_id = extract_commit_id(reference.url) - if not commit_id or not vcs_url: + if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference.url, commit_id): + if is_reference_already_processed(reference.url, vcs_url): self.log( - f"Skipping already processed reference: {reference.url} with commit {commit_id}" + f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue purl = url2purl(vcs_url) @@ -81,7 +65,7 @@ def collect_and_store_fix_commits(self): codefix = self.create_codefix_entry( vulnerability=vulnerability, package=package, - commit_id=commit_id, + vcs_url=vcs_url, reference=reference.url, ) if codefix: @@ -100,7 +84,7 @@ def get_or_create_package(self, purl): self.log(f"Error creating package from purl {purl}: {e}") return None - def create_codefix_entry(self, vulnerability, package, commit_id, reference): + def create_codefix_entry(self, vulnerability, package, vcs_url, reference): """ Create a CodeFix entry associated with the given vulnerability and package. """ @@ -108,7 +92,7 @@ def create_codefix_entry(self, vulnerability, package, commit_id, reference): codefix, created = CodeFix.objects.get_or_create( base_version=package, defaults={ - "commits": [commit_id], + "commits": [vcs_url], "references": [reference], }, ) From c01f6ec81cba6c617063d2fcdbd7be3c253f2d78 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 31 Dec 2024 17:20:42 +0530 Subject: [PATCH 08/26] Model changes Signed-off-by: Tushar Goel --- vulnerabilities/models.py | 62 +++++---- vulnerabilities/pipelines/collect_commits.py | 22 ++- vulnerabilities/tests/test_collect_commits.py | 129 ++++++++++++++++++ 3 files changed, 185 insertions(+), 28 deletions(-) create mode 100644 vulnerabilities/tests/test_collect_commits.py diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 7da4ec2c4..6af4db6ae 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1587,12 +1587,16 @@ class CodeChange(models.Model): """ Abstract base model representing a change in code, either introducing or fixing a vulnerability. This includes details about commits, patches, and related metadata. + + We are tracking commits, pulls and downloads as references to the code change. The goal is to + keep track and store the actual code patch in the ``patch`` field. When not available the patch + will be inferred from these references using improvers. """ commits = models.JSONField( blank=True, default=list, - help_text="List of commit identifiers associated with the code change.", + help_text="List of commit identifiers using VCS URLs associated with the code change.", ) pulls = models.JSONField( blank=True, @@ -1603,36 +1607,30 @@ class CodeChange(models.Model): blank=True, default=list, help_text="List of download URLs for the patched code." ) patch = models.TextField( - blank=True, null=True, help_text="The code change in patch format (e.g., git diff)." - ) - notes = models.TextField( - blank=True, null=True, help_text="Additional notes or instructions about the code change." - ) - references = models.JSONField( - blank=True, default=list, help_text="External references related to this code change." - ) - status_reviewed = models.BooleanField( - default=False, help_text="Indicates if the code change has been reviewed." + blank=True, null=True, help_text="The code change as a patch in unified diff format." ) - base_version = models.ForeignKey( + base_package_version = models.ForeignKey( "Package", null=True, blank=True, on_delete=models.SET_NULL, - related_name="base_version_codechanges", - help_text="The base version of the package to which this code change applies.", + related_name="codechanges", + help_text="The base package version to which this code change applies.", ) - base_commit = models.CharField( - max_length=255, - blank=True, - null=True, - help_text="The commit ID representing the state of the code before applying the fix or change.", + notes = models.TextField( + blank=True, null=True, help_text="Notes or instructions about this code change." + ) + references = models.JSONField( + blank=True, default=list, help_text="URL references related to this code change." + ) + is_reviewed = models.BooleanField( + default=False, help_text="Indicates if this code change has been reviewed." ) created_at = models.DateTimeField( - auto_now_add=True, help_text="Timestamp indicating when the code change was created." + auto_now_add=True, help_text="Timestamp indicating when this code change was created." ) updated_at = models.DateTimeField( - auto_now=True, help_text="Timestamp indicating when the code change was last updated." + auto_now=True, help_text="Timestamp indicating when this code change was last updated." ) class Meta: @@ -1640,8 +1638,24 @@ class Meta: class CodeFix(CodeChange): - package_vulnerabilities = models.ManyToManyField( + """ + A code fix is a code change that addresses a vulnerability and is associated: + - with a specific affected package version + - optionally with a specific fixing package version when it is known + """ + + affected_package_vulnerability = models.ForeignKey( "AffectedByPackageRelatedVulnerability", - related_name="code_fixes", - help_text="The vulnerabilities fixed by this code change.", + on_delete=models.CASCADE, + related_name="code_fix", + help_text="The affected package version to which this code fix applies.", + ) + + fixed_package_vulnerability = models.ForeignKey( + "FixingPackageRelatedVulnerability", + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="code_fix", + help_text="The fixing package version with this code fix", ) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 564988d34..690789b83 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -44,15 +44,25 @@ def collect_and_store_fix_commits(self): created_fix_count = 0 progress = LoopProgress(total_iterations=references.count(), logger=self.log) + + Reference + AffectedByPackageRelatedVulnerability + # FixingPackageRelatedVulnerability + + + for apv in AffectedByPackageRelatedVulnerability.objects.all(): + vuln = apv.vulnerability + for ref in vuln.references: + for reference in progress.iter(references.paginated(per_page=500)): for vulnerability in reference.vulnerabilities.all(): - vcs_url = normalize_vcs_url(reference.url) + vcs_url = normalize_vcs_url(repo_url=reference.url) if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference.url, vcs_url): + if is_reference_already_processed(reference_url=reference.url, commit_id=vcs_url): self.log( f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) @@ -97,7 +107,8 @@ def create_codefix_entry(self, vulnerability, package, vcs_url, reference): }, ) if created: - codefix.vulnerabilities.add(vulnerability) + AffectedByPackageRelatedVulnerability.objects.get + codefix.package_vulnerabilities.add(vulnerability) codefix.save() return codefix except Exception as e: @@ -124,10 +135,13 @@ def create_codefix_entry(self, vulnerability, package, vcs_url, reference): ) +# TODO: This function was borrowed from scancode-toolkit. We need to create a shared library for that. def normalize_vcs_url(repo_url, vcs_tool=None): """ Return a normalized vcs_url version control URL given some `repo_url` and an - optional `vcs_tool` hint (such as 'git', 'hg', etc. + optional `vcs_tool` hint (such as 'git', 'hg', etc.) + + Return None if repo_url is not recognized as a VCS URL. Handles shortcuts for GitHub, GitHub gist, Bitbucket, or GitLab repositories and more using the same approach as npm install: diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py new file mode 100644 index 000000000..ad6aa1ba2 --- /dev/null +++ b/vulnerabilities/tests/test_collect_commits.py @@ -0,0 +1,129 @@ +from unittest.mock import patch + +from vulnerabilities.models import CodeFix +from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.collect_commits import is_reference_already_processed +from vulnerabilities.pipelines.collect_commits import normalize_vcs_url + + +# --- Mocked Dependencies --- +class MockVulnerability: + def __init__(self, id): + self.id = id + + +class MockReference: + def __init__(self, url, vulnerabilities): + self.url = url + self.vulnerabilities = vulnerabilities + + +class MockPackage: + def __init__(self, purl): + self.purl = purl + + +# --- Tests for Utility Functions --- +@patch("vulnerabilities.models.CodeFix.objects.filter") +def test_reference_already_processed_true(mock_filter): + mock_filter.return_value.exists.return_value = True + result = is_reference_already_processed("http://example.com", "commit123") + assert result is True + mock_filter.assert_called_once_with( + references__contains=["http://example.com"], commits__contains=["commit123"] + ) + + +@patch("vulnerabilities.models.CodeFix.objects.filter") +def test_reference_already_processed_false(mock_filter): + mock_filter.return_value.exists.return_value = False + result = is_reference_already_processed("http://example.com", "commit123") + assert result is False + + +# --- Tests for normalize_vcs_url --- +def test_normalize_plain_url(): + url = normalize_vcs_url("https://github.com/user/repo.git") + assert url == "https://github.com/user/repo.git" + + +def test_normalize_git_ssh_url(): + url = normalize_vcs_url("git@github.com:user/repo.git") + assert url == "https://github.com/user/repo.git" + + +def test_normalize_implicit_github(): + url = normalize_vcs_url("user/repo") + assert url == "https://github.com/user/repo" + + +# --- Tests for CollectFixCommitsPipeline --- +@patch("vulnerabilities.models.VulnerabilityReference.objects.prefetch_related") +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +@patch("vulnerabilities.pipelines.collect_commits.is_reference_already_processed") +@patch("vulnerabilities.pipelines.collect_commits.url2purl") +def test_collect_and_store_fix_commits( + mock_url2purl, mock_is_processed, mock_get_package, mock_prefetch +): + mock_vuln = MockVulnerability(id=1) + mock_reference = MockReference(url="http://example.com", vulnerabilities=[mock_vuln]) + mock_prefetch.return_value.distinct.return_value.paginated.return_value = [mock_reference] + mock_url2purl.return_value = "pkg:example/package@1.0.0" + mock_is_processed.return_value = False + mock_get_package.return_value = MockPackage(purl="pkg:example/package@1.0.0") + + pipeline = CollectFixCommitsPipeline() + pipeline.log = lambda msg: None + pipeline.collect_and_store_fix_commits() + + mock_is_processed.assert_called_once_with("http://example.com", "pkg:example/package@1.0.0") + mock_get_package.assert_called_once_with("pkg:example/package@1.0.0") + + +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +def test_get_or_create_package_success(mock_get_or_create): + mock_get_or_create.return_value = (MockPackage(purl="pkg:example/package@1.0.0"), True) + pipeline = CollectFixCommitsPipeline() + package = pipeline.get_or_create_package("pkg:example/package@1.0.0") + assert package.purl == "pkg:example/package@1.0.0" + + +@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") +def test_get_or_create_package_failure(mock_get_or_create): + mock_get_or_create.side_effect = Exception("Error") + pipeline = CollectFixCommitsPipeline() + logs = [] + pipeline.log = lambda msg: logs.append(msg) + result = pipeline.get_or_create_package("pkg:example/package@1.0.0") + assert result is None + assert len(logs) == 1 + + +@patch("vulnerabilities.models.CodeFix.objects.get_or_create") +def test_create_codefix_entry_success(mock_get_or_create): + mock_get_or_create.return_value = (CodeFix(), True) + pipeline = CollectFixCommitsPipeline() + result = pipeline.create_codefix_entry( + MockVulnerability(1), + MockPackage("pkg:example/package@1.0.0"), + "http://example.com", + "http://reference", + ) + assert result is not None + mock_get_or_create.assert_called_once() + + +@patch("vulnerabilities.models.CodeFix.objects.get_or_create") +def test_create_codefix_entry_failure(mock_get_or_create): + mock_get_or_create.side_effect = Exception("Error") + pipeline = CollectFixCommitsPipeline() + logs = [] + pipeline.log = lambda msg: logs.append(msg) + result = pipeline.create_codefix_entry( + MockVulnerability(1), + MockPackage("pkg:example/package@1.0.0"), + "http://example.com", + "http://reference", + ) + assert result is None + assert len(logs) == 1 From 48d2144b8c0cf1145928f6c543f0604622a6144f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Tue, 7 Jan 2025 20:20:46 +0530 Subject: [PATCH 09/26] Refactor the collect fix commit pipeline Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 164 +++++++++++------- vulnerabilities/tests/test_collect_commits.py | 6 +- 2 files changed, 106 insertions(+), 64 deletions(-) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 690789b83..93bcce205 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -7,22 +7,24 @@ # See https://aboutcode.org for more information about nexB OSS projects. # +import re + from aboutcode.pipeline import LoopProgress from packageurl.contrib.url2purl import url2purl +from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix +from vulnerabilities.models import FixingPackageRelatedVulnerability from vulnerabilities.models import Package from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline -def is_reference_already_processed(reference_url, commit_id): +def is_vcs_url_already_processed(commit_id): """ - Check if a reference and commit ID pair already exists in a CodeFix entry. + Check if a VCS URL exists in a CodeFix entry. """ - return CodeFix.objects.filter( - references__contains=[reference_url], commits__contains=[commit_id] - ).exists() + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -38,83 +40,54 @@ def steps(cls): return (cls.collect_and_store_fix_commits,) def collect_and_store_fix_commits(self): - references = VulnerabilityReference.objects.prefetch_related("vulnerabilities").distinct() + affected_by_package_related_vulnerabilities = ( + AffectedByPackageRelatedVulnerability.objects.all().prefetch_related( + "vulnerability", "vulnerability__references" + ) + ) - self.log(f"Processing {references.count():,d} references to collect fix commits.") + self.log( + f"Processing {affected_by_package_related_vulnerabilities.count():,d} references to collect fix commits." + ) created_fix_count = 0 - progress = LoopProgress(total_iterations=references.count(), logger=self.log) - - Reference - AffectedByPackageRelatedVulnerability - # FixingPackageRelatedVulnerability + progress = LoopProgress( + total_iterations=affected_by_package_related_vulnerabilities.count(), logger=self.log + ) + for apv in progress.iter( + affected_by_package_related_vulnerabilities.paginated(per_page=500) + ): + vulnerability = apv.vulnerability + for reference in vulnerability.references: - for apv in AffectedByPackageRelatedVulnerability.objects.all(): - vuln = apv.vulnerability - for ref in vuln.references: + if not is_vcs_url(reference.url): + continue - for reference in progress.iter(references.paginated(per_page=500)): - for vulnerability in reference.vulnerabilities.all(): vcs_url = normalize_vcs_url(repo_url=reference.url) if not vcs_url: continue # Skip if already processed - if is_reference_already_processed(reference_url=reference.url, commit_id=vcs_url): + if is_vcs_url_already_processed(commit_id=vcs_url): self.log( f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue - purl = url2purl(vcs_url) - if not purl: - self.log(f"Could not create purl from url: {vcs_url}") - continue - package = self.get_or_create_package(purl) - codefix = self.create_codefix_entry( - vulnerability=vulnerability, - package=package, - vcs_url=vcs_url, - reference=reference.url, + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, ) - if codefix: + + if created: created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") - def get_or_create_package(self, purl): - """ - Get or create a Package object from a Package URL. - """ - try: - package, _ = Package.objects.get_or_create_from_purl(purl) - return package - except Exception as e: - self.log(f"Error creating package from purl {purl}: {e}") - return None - - def create_codefix_entry(self, vulnerability, package, vcs_url, reference): - """ - Create a CodeFix entry associated with the given vulnerability and package. - """ - try: - codefix, created = CodeFix.objects.get_or_create( - base_version=package, - defaults={ - "commits": [vcs_url], - "references": [reference], - }, - ) - if created: - AffectedByPackageRelatedVulnerability.objects.get - codefix.package_vulnerabilities.add(vulnerability) - codefix.save() - return codefix - except Exception as e: - self.log(f"Error creating CodeFix entry: {e}") - return - PLAIN_URLS = ( "https://", @@ -211,3 +184,72 @@ def normalize_vcs_url(repo_url, vcs_tool=None): # implicit github, but that's only on NPM? return f"https://github.com/{repo_url}" return repo_url + + +def is_vcs_url(repo_url): + """ + Check if a given URL or string matches a valid VCS (Version Control System) URL. + + Supports: + - Standard VCS URL protocols (git, http, https, ssh) + - Shortcut syntax (e.g., github:user/repo, gitlab:group/repo) + - GitHub shortcut (e.g., user/repo) + + Args: + repo_url (str): The repository URL or shortcut to validate. + + Returns: + bool: True if the string is a valid VCS URL, False otherwise. + + Examples: + >>> is_vcs_url("git://github.com/angular/di.js.git") + True + >>> is_vcs_url("github:user/repo") + True + >>> is_vcs_url("user/repo") + True + >>> is_vcs_url("https://github.com/user/repo.git") + True + >>> is_vcs_url("git@github.com:user/repo.git") + True + >>> is_vcs_url("http://github.com/isaacs/nopt") + True + >>> is_vcs_url("https://gitlab.com/foo/private.git") + True + >>> is_vcs_url("git@gitlab.com:foo/private.git") + True + >>> is_vcs_url("bitbucket:example/repo") + True + >>> is_vcs_url("gist:11081aaa281") + True + >>> is_vcs_url("ftp://example.com/not-a-repo") + False + >>> is_vcs_url("random-string") + False + >>> is_vcs_url("https://example.com/not-a-repo") + False + """ + if not repo_url or not isinstance(repo_url, str): + return False + + repo_url = repo_url.strip() + if not repo_url: + return False + + # 1. Match URLs with standard protocols + if re.match(r"^(git|ssh|http|https)://", repo_url): + return True + + # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) + if re.match(r"^git@\w+\.\w+:[\w\-./]+$", repo_url): + return True + + # 3. Match shortcut syntax (e.g., github:user/repo) + if re.match(r"^(github|gitlab|bitbucket|gist):[\w\-./]+$", repo_url): + return True + + # 4. Match implicit GitHub shortcut (e.g., user/repo) + if re.match(r"^[\w\-]+/[\w\-]+$", repo_url): + return True + + return False diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py index ad6aa1ba2..6749fc54e 100644 --- a/vulnerabilities/tests/test_collect_commits.py +++ b/vulnerabilities/tests/test_collect_commits.py @@ -2,7 +2,7 @@ from vulnerabilities.models import CodeFix from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline -from vulnerabilities.pipelines.collect_commits import is_reference_already_processed +from vulnerabilities.pipelines.collect_commits import is_vcs_url_already_processed from vulnerabilities.pipelines.collect_commits import normalize_vcs_url @@ -27,7 +27,7 @@ def __init__(self, purl): @patch("vulnerabilities.models.CodeFix.objects.filter") def test_reference_already_processed_true(mock_filter): mock_filter.return_value.exists.return_value = True - result = is_reference_already_processed("http://example.com", "commit123") + result = is_vcs_url_already_processed("http://example.com", "commit123") assert result is True mock_filter.assert_called_once_with( references__contains=["http://example.com"], commits__contains=["commit123"] @@ -37,7 +37,7 @@ def test_reference_already_processed_true(mock_filter): @patch("vulnerabilities.models.CodeFix.objects.filter") def test_reference_already_processed_false(mock_filter): mock_filter.return_value.exists.return_value = False - result = is_reference_already_processed("http://example.com", "commit123") + result = is_vcs_url_already_processed("http://example.com", "commit123") assert result is False From 991fbeb3aefdd1f2ef3f517c82860522f2edeb9f Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 18:58:48 +0530 Subject: [PATCH 10/26] Add tests Signed-off-by: Tushar Goel --- vulnerabilities/migrations/0086_codefix.py | 55 ++-- vulnerabilities/pipelines/collect_commits.py | 17 +- vulnerabilities/tests/test_collect_commits.py | 281 ++++++++++-------- 3 files changed, 196 insertions(+), 157 deletions(-) diff --git a/vulnerabilities/migrations/0086_codefix.py b/vulnerabilities/migrations/0086_codefix.py index 64ea35fe0..df67c3ae8 100644 --- a/vulnerabilities/migrations/0086_codefix.py +++ b/vulnerabilities/migrations/0086_codefix.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.16 on 2024-12-23 19:32 +# Generated by Django 4.2.16 on 2025-01-08 13:28 from django.db import migrations, models import django.db.models.deletion @@ -25,7 +25,7 @@ class Migration(migrations.Migration): models.JSONField( blank=True, default=list, - help_text="List of commit identifiers associated with the code change.", + help_text="List of commit identifiers using VCS URLs associated with the code change.", ), ), ( @@ -48,7 +48,7 @@ class Migration(migrations.Migration): "patch", models.TextField( blank=True, - help_text="The code change in patch format (e.g., git diff).", + help_text="The code change as a patch in unified diff format.", null=True, ), ), @@ -56,7 +56,7 @@ class Migration(migrations.Migration): "notes", models.TextField( blank=True, - help_text="Additional notes or instructions about the code change.", + help_text="Notes or instructions about this code change.", null=True, ), ), @@ -65,55 +65,58 @@ class Migration(migrations.Migration): models.JSONField( blank=True, default=list, - help_text="External references related to this code change.", + help_text="URL references related to this code change.", ), ), ( - "status_reviewed", + "is_reviewed", models.BooleanField( - default=False, help_text="Indicates if the code change has been reviewed." - ), - ), - ( - "base_commit", - models.CharField( - blank=True, - help_text="The commit ID representing the state of the code before applying the fix or change.", - max_length=255, - null=True, + default=False, help_text="Indicates if this code change has been reviewed." ), ), ( "created_at", models.DateTimeField( auto_now_add=True, - help_text="Timestamp indicating when the code change was created.", + help_text="Timestamp indicating when this code change was created.", ), ), ( "updated_at", models.DateTimeField( auto_now=True, - help_text="Timestamp indicating when the code change was last updated.", + help_text="Timestamp indicating when this code change was last updated.", ), ), ( - "base_version", + "affected_package_vulnerability", + models.ForeignKey( + help_text="The affected package version to which this code fix applies.", + on_delete=django.db.models.deletion.CASCADE, + related_name="code_fix", + to="vulnerabilities.affectedbypackagerelatedvulnerability", + ), + ), + ( + "base_package_version", models.ForeignKey( blank=True, - help_text="The base version of the package to which this code change applies.", + help_text="The base package version to which this code change applies.", null=True, on_delete=django.db.models.deletion.SET_NULL, - related_name="base_version_codechanges", + related_name="codechanges", to="vulnerabilities.package", ), ), ( - "package_vulnerabilities", - models.ManyToManyField( - help_text="The vulnerabilities fixed by this code change.", - related_name="code_fixes", - to="vulnerabilities.affectedbypackagerelatedvulnerability", + "fixed_package_vulnerability", + models.ForeignKey( + blank=True, + help_text="The fixing package version with this code fix", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="code_fix", + to="vulnerabilities.fixingpackagerelatedvulnerability", ), ), ], diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 93bcce205..8806fb4fb 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -10,13 +10,9 @@ import re from aboutcode.pipeline import LoopProgress -from packageurl.contrib.url2purl import url2purl from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix -from vulnerabilities.models import FixingPackageRelatedVulnerability -from vulnerabilities.models import Package -from vulnerabilities.models import VulnerabilityReference from vulnerabilities.pipelines import VulnerableCodePipeline @@ -59,8 +55,7 @@ def collect_and_store_fix_commits(self): affected_by_package_related_vulnerabilities.paginated(per_page=500) ): vulnerability = apv.vulnerability - for reference in vulnerability.references: - + for reference in vulnerability.references.all(): if not is_vcs_url(reference.url): continue @@ -171,6 +166,7 @@ def normalize_vcs_url(repo_url, vcs_tool=None): # FIXME: where these URL schemes come from?? if repo_url.startswith(("bitbucket:", "gitlab:", "github:", "gist:")): + repo = repo_url.split(":")[1] hoster_urls = { "bitbucket": f"https://bitbucket.org/{repo}", "github": f"https://github.com/{repo}", @@ -236,12 +232,15 @@ def is_vcs_url(repo_url): if not repo_url: return False - # 1. Match URLs with standard protocols - if re.match(r"^(git|ssh|http|https)://", repo_url): + # Define valid VCS domains + vcs_domains = r"(github\.com|gitlab\.com|bitbucket\.org|gist\.github\.com)" + + # 1. Match URLs with standard protocols pointing to VCS domains + if re.match(rf"^(git|ssh|http|https)://{vcs_domains}/[\w\-.]+/[\w\-.]+", repo_url): return True # 2. Match SSH URLs (e.g., git@github.com:user/repo.git) - if re.match(r"^git@\w+\.\w+:[\w\-./]+$", repo_url): + if re.match(rf"^git@{vcs_domains}:[\w\-.]+/[\w\-.]+(\.git)?$", repo_url): return True # 3. Match shortcut syntax (e.g., github:user/repo) diff --git a/vulnerabilities/tests/test_collect_commits.py b/vulnerabilities/tests/test_collect_commits.py index 6749fc54e..c478244e1 100644 --- a/vulnerabilities/tests/test_collect_commits.py +++ b/vulnerabilities/tests/test_collect_commits.py @@ -1,129 +1,166 @@ -from unittest.mock import patch +from django.test import TestCase +from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import CodeFix +from vulnerabilities.models import Package +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilityReference +from vulnerabilities.models import VulnerabilityRelatedReference from vulnerabilities.pipelines.collect_commits import CollectFixCommitsPipeline +from vulnerabilities.pipelines.collect_commits import is_vcs_url from vulnerabilities.pipelines.collect_commits import is_vcs_url_already_processed from vulnerabilities.pipelines.collect_commits import normalize_vcs_url -# --- Mocked Dependencies --- -class MockVulnerability: - def __init__(self, id): - self.id = id - - -class MockReference: - def __init__(self, url, vulnerabilities): - self.url = url - self.vulnerabilities = vulnerabilities - - -class MockPackage: - def __init__(self, purl): - self.purl = purl - - -# --- Tests for Utility Functions --- -@patch("vulnerabilities.models.CodeFix.objects.filter") -def test_reference_already_processed_true(mock_filter): - mock_filter.return_value.exists.return_value = True - result = is_vcs_url_already_processed("http://example.com", "commit123") - assert result is True - mock_filter.assert_called_once_with( - references__contains=["http://example.com"], commits__contains=["commit123"] - ) - - -@patch("vulnerabilities.models.CodeFix.objects.filter") -def test_reference_already_processed_false(mock_filter): - mock_filter.return_value.exists.return_value = False - result = is_vcs_url_already_processed("http://example.com", "commit123") - assert result is False - - -# --- Tests for normalize_vcs_url --- -def test_normalize_plain_url(): - url = normalize_vcs_url("https://github.com/user/repo.git") - assert url == "https://github.com/user/repo.git" - - -def test_normalize_git_ssh_url(): - url = normalize_vcs_url("git@github.com:user/repo.git") - assert url == "https://github.com/user/repo.git" - - -def test_normalize_implicit_github(): - url = normalize_vcs_url("user/repo") - assert url == "https://github.com/user/repo" - - -# --- Tests for CollectFixCommitsPipeline --- -@patch("vulnerabilities.models.VulnerabilityReference.objects.prefetch_related") -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -@patch("vulnerabilities.pipelines.collect_commits.is_reference_already_processed") -@patch("vulnerabilities.pipelines.collect_commits.url2purl") -def test_collect_and_store_fix_commits( - mock_url2purl, mock_is_processed, mock_get_package, mock_prefetch -): - mock_vuln = MockVulnerability(id=1) - mock_reference = MockReference(url="http://example.com", vulnerabilities=[mock_vuln]) - mock_prefetch.return_value.distinct.return_value.paginated.return_value = [mock_reference] - mock_url2purl.return_value = "pkg:example/package@1.0.0" - mock_is_processed.return_value = False - mock_get_package.return_value = MockPackage(purl="pkg:example/package@1.0.0") - - pipeline = CollectFixCommitsPipeline() - pipeline.log = lambda msg: None - pipeline.collect_and_store_fix_commits() - - mock_is_processed.assert_called_once_with("http://example.com", "pkg:example/package@1.0.0") - mock_get_package.assert_called_once_with("pkg:example/package@1.0.0") - - -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -def test_get_or_create_package_success(mock_get_or_create): - mock_get_or_create.return_value = (MockPackage(purl="pkg:example/package@1.0.0"), True) - pipeline = CollectFixCommitsPipeline() - package = pipeline.get_or_create_package("pkg:example/package@1.0.0") - assert package.purl == "pkg:example/package@1.0.0" - - -@patch("vulnerabilities.pipelines.collect_commits.CollectFixCommitsPipeline.get_or_create_package") -def test_get_or_create_package_failure(mock_get_or_create): - mock_get_or_create.side_effect = Exception("Error") - pipeline = CollectFixCommitsPipeline() - logs = [] - pipeline.log = lambda msg: logs.append(msg) - result = pipeline.get_or_create_package("pkg:example/package@1.0.0") - assert result is None - assert len(logs) == 1 - - -@patch("vulnerabilities.models.CodeFix.objects.get_or_create") -def test_create_codefix_entry_success(mock_get_or_create): - mock_get_or_create.return_value = (CodeFix(), True) - pipeline = CollectFixCommitsPipeline() - result = pipeline.create_codefix_entry( - MockVulnerability(1), - MockPackage("pkg:example/package@1.0.0"), - "http://example.com", - "http://reference", - ) - assert result is not None - mock_get_or_create.assert_called_once() - - -@patch("vulnerabilities.models.CodeFix.objects.get_or_create") -def test_create_codefix_entry_failure(mock_get_or_create): - mock_get_or_create.side_effect = Exception("Error") - pipeline = CollectFixCommitsPipeline() - logs = [] - pipeline.log = lambda msg: logs.append(msg) - result = pipeline.create_codefix_entry( - MockVulnerability(1), - MockPackage("pkg:example/package@1.0.0"), - "http://example.com", - "http://reference", - ) - assert result is None - assert len(logs) == 1 +class CollectFixCommitsPipelineTests(TestCase): + def setUp(self): + self.vulnerability = Vulnerability.objects.create( + vulnerability_id="VCID-1234", summary="Test vulnerability" + ) + + package = Package.objects.create(type="npm", namespace="abc", name="def", version="1") + + self.affected_by_vuln = AffectedByPackageRelatedVulnerability.objects.create( + package=package, vulnerability=self.vulnerability + ) + + self.reference1 = VulnerabilityReference.objects.create( + url="https://github.com/example/repo/commit/abcd1234" + ) + + self.reference2 = VulnerabilityReference.objects.create( + url="https://gitlab.com/example/repo/commit/efgh5678" + ) + VulnerabilityRelatedReference.objects.create( + vulnerability=self.vulnerability, reference=self.reference2 + ) + VulnerabilityRelatedReference.objects.create( + vulnerability=self.vulnerability, reference=self.reference1 + ) + + def test_is_vcs_url(self): + valid_urls = [ + "git://github.com/angular/di.js.git", + "https://github.com/user/repo.git", + "git@gitlab.com:user/repo.git", + ] + invalid_urls = [ + "ftp://example.com/not-a-repo", + "random-string", + "https://example.com/not-a-repo", + ] + for url in valid_urls: + assert is_vcs_url(url) is True + + for url in invalid_urls: + assert is_vcs_url(url) is False + + def test_normalize_vcs_url(self): + + assert ( + normalize_vcs_url("git@github.com:user/repo.git") == "https://github.com/user/repo.git" + ) + assert normalize_vcs_url("github:user/repo") == "https://github.com/user/repo" + assert normalize_vcs_url( + "https://github.com/user/repo.git" + ), "https://github.com/user/repo.git" + + def test_is_vcs_url_already_processed(self): + CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/abcd1234"], + affected_package_vulnerability=self.affected_by_vuln, + ) + assert ( + is_vcs_url_already_processed("https://github.com/example/repo/commit/abcd1234") is True + ) + assert ( + is_vcs_url_already_processed("https://github.com/example/repo/commit/unknown") is False + ) + + def test_collect_and_store_fix_commits(self): + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + assert ( + CodeFix.objects.filter( + commits__contains=["https://github.com/example/repo/commit/abcd1234"] + ).exists() + is True + ) + assert ( + CodeFix.objects.filter( + commits__contains=["https://gitlab.com/example/repo/commit/efgh5678"] + ).exists() + is True + ) + + def test_skip_already_processed_commit(self): + CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/abcd1234"], + affected_package_vulnerability=self.affected_by_vuln, + ) + + pipeline = CollectFixCommitsPipeline() + pipeline.collect_and_store_fix_commits() + + # Ensure duplicate entry was not created + self.assertEqual( + CodeFix.objects.filter( + commits__contains=["https://github.com/example/repo/commit/abcd1234"] + ).count(), + 1, + ) + + +class IsVCSURLTests(TestCase): + def test_valid_vcs_urls(self): + valid_urls = [ + "git://github.com/example/repo.git", + "https://github.com/example/repo.git", + "git@github.com:example/repo.git", + "github:user/repo", + ] + for url in valid_urls: + with self.subTest(url=url): + self.assertTrue(is_vcs_url(url)) + + def test_invalid_vcs_urls(self): + invalid_urls = ["http://example.com", "ftp://example.com/repo", "random-string"] + for url in invalid_urls: + with self.subTest(url=url): + self.assertFalse(is_vcs_url(url)) + + +class NormalizeVCSURLTests(TestCase): + def test_normalize_valid_vcs_urls(self): + self.assertEqual( + normalize_vcs_url("git@github.com:user/repo.git"), "https://github.com/user/repo.git" + ) + self.assertEqual(normalize_vcs_url("github:user/repo"), "https://github.com/user/repo") + self.assertEqual( + normalize_vcs_url("https://github.com/user/repo.git"), + "https://github.com/user/repo.git", + ) + + +class IsVCSURLAlreadyProcessedTests(TestCase): + def setUp(self): + self.vulnerability = Vulnerability.objects.create(vulnerability_id="VCID-5678") + package = Package.objects.create(type="npm", namespace="abc", name="def", version="1") + self.affected_by_vuln = AffectedByPackageRelatedVulnerability.objects.create( + package=package, vulnerability=self.vulnerability + ) + self.code_fix = CodeFix.objects.create( + commits=["https://github.com/example/repo/commit/commit1"], + affected_package_vulnerability=self.affected_by_vuln, + ) + + def test_commit_already_processed(self): + self.assertTrue( + is_vcs_url_already_processed("https://github.com/example/repo/commit/commit1") + ) + + def test_commit_not_processed(self): + self.assertFalse( + is_vcs_url_already_processed("https://github.com/example/repo/commit/commit2") + ) From 7bb44be8bd2bd5a8bc157c2ad3a5d70e64b92a53 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:21:54 +0530 Subject: [PATCH 11/26] Add CodeFix in API Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 90 ++++++++++++++++++++ vulnerabilities/improvers/__init__.py | 2 + vulnerabilities/models.py | 2 + vulnerabilities/pipelines/collect_commits.py | 23 ++--- vulnerabilities/tests/test_api_v2.py | 30 +++++-- vulnerablecode/urls.py | 3 + 6 files changed, 133 insertions(+), 17 deletions(-) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index b570570ed..d3f1d714c 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -21,6 +21,7 @@ from rest_framework.response import Response from rest_framework.reverse import reverse +from vulnerabilities.models import CodeFix from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability from vulnerabilities.models import VulnerabilityReference @@ -198,14 +199,25 @@ def get_affected_by_vulnerabilities(self, obj): Return a dictionary with vulnerabilities as keys and their details, including fixed_by_packages. """ result = {} + request = self.context.get("request") for vuln in getattr(obj, "prefetched_affected_vulnerabilities", []): fixed_by_package = vuln.fixed_by_packages.first() purl = None if fixed_by_package: purl = fixed_by_package.package_url + # Get code fixed for a vulnerability + code_fixes = CodeFix.objects.filter( + affected_package_vulnerability__vulnerability=vuln + ).distinct() + code_fix_urls = [ + reverse("codefix-detail", args=[code_fix.id], request=request) + for code_fix in code_fixes + ] + result[vuln.vulnerability_id] = { "vulnerability_id": vuln.vulnerability_id, "fixed_by_packages": purl, + "code_fixes": code_fix_urls, } return result @@ -521,3 +533,81 @@ def lookup(self, request): qs = self.get_queryset().for_purls([purl]).with_is_vulnerable() return Response(PackageV2Serializer(qs, many=True, context={"request": request}).data) + + +from rest_framework import serializers + +from vulnerabilities.models import CodeFix + + +class CodeFixSerializer(serializers.ModelSerializer): + """ + Serializer for the CodeFix model. + Provides detailed information about a code fix. + """ + + affected_vulnerability_id = serializers.CharField( + source="affected_package_vulnerability.vulnerability.vulnerability_id", + read_only=True, + help_text="ID of the affected vulnerability.", + ) + affected_package_purl = serializers.CharField( + source="affected_package_vulnerability.package.package_url", + read_only=True, + help_text="PURL of the affected package.", + ) + fixed_package_purl = serializers.CharField( + source="fixed_package_vulnerability.package.package_url", + read_only=True, + help_text="PURL of the fixing package (if available).", + ) + created_at = serializers.DateTimeField( + format="%Y-%m-%dT%H:%M:%SZ", + read_only=True, + help_text="Timestamp when the code fix was created.", + ) + updated_at = serializers.DateTimeField( + format="%Y-%m-%dT%H:%M:%SZ", + read_only=True, + help_text="Timestamp when the code fix was last updated.", + ) + + class Meta: + model = CodeFix + fields = [ + "id", + "commits", + "pulls", + "downloads", + "patch", + "affected_vulnerability_id", + "affected_package_purl", + "fixed_package_purl", + "notes", + "references", + "is_reviewed", + "created_at", + "updated_at", + ] + read_only_fields = ["created_at", "updated_at"] + + +class CodeFixViewSet(viewsets.ReadOnlyModelViewSet): + """ + API endpoint that allows viewing CodeFix entries. + """ + + queryset = CodeFix.objects.all() + serializer_class = CodeFixSerializer + + def get_queryset(self): + """ + Optionally filter by vulnerability ID. + """ + queryset = super().get_queryset() + vulnerability_id = self.request.query_params.get("vulnerability_id") + if vulnerability_id: + queryset = queryset.filter( + affected_package_vulnerability__vulnerability__vulnerability_id=vulnerability_id + ) + return queryset diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index dd73eb02d..44a65df47 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,6 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank from vulnerabilities.pipelines import enhance_with_exploitdb @@ -41,6 +42,7 @@ enhance_with_exploitdb.ExploitDBImproverPipeline, compute_package_risk.ComputePackageRiskPipeline, compute_package_version_rank.ComputeVersionRankPipeline, + collect_commits.CollectFixCommitsPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 6af4db6ae..1a58ec4dc 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -1101,6 +1101,8 @@ class AffectedByPackageRelatedVulnerability(PackageRelatedVulnerabilityBase): related_name="affected_package_vulnerability_relations", ) + objects = BaseQuerySet.as_manager() + class Meta(PackageRelatedVulnerabilityBase.Meta): verbose_name_plural = "Affected By Package Related Vulnerabilities" diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index 8806fb4fb..bf94b755d 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -20,7 +20,8 @@ def is_vcs_url_already_processed(commit_id): """ Check if a VCS URL exists in a CodeFix entry. """ - return CodeFix.objects.filter(commits__contains=[commit_id]).exists() + if "commit" in commit_id: + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -70,17 +71,19 @@ def collect_and_store_fix_commits(self): f"Skipping already processed reference: {reference.url} with VCS URL {vcs_url}" ) continue - code_fix, created = CodeFix.objects.get_or_create( - commits=[vcs_url], - affected_package_vulnerability=apv, - ) - - if created: - created_fix_count += 1 - self.log( - f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + # check if vcs_url has commit + if "/commit/" in vcs_url: + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, ) + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) + self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") diff --git a/vulnerabilities/tests/test_api_v2.py b/vulnerabilities/tests/test_api_v2.py index af4dc47c8..e3434c6a9 100644 --- a/vulnerabilities/tests/test_api_v2.py +++ b/vulnerabilities/tests/test_api_v2.py @@ -216,7 +216,7 @@ def test_list_packages(self): Should return a list of packages with their details and associated vulnerabilities. """ url = reverse("package-v2-list") - with self.assertNumQueries(31): + with self.assertNumQueries(32): response = self.client.get(url, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIn("results", response.data) @@ -238,7 +238,7 @@ def test_filter_packages_by_purl(self): Test filtering packages by one or more PURLs. """ url = reverse("package-v2-list") - with self.assertNumQueries(19): + with self.assertNumQueries(20): response = self.client.get(url, {"purl": "pkg:pypi/django@3.2"}, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(len(response.data["results"]["packages"]), 1) @@ -249,7 +249,7 @@ def test_filter_packages_by_affected_vulnerability(self): Test filtering packages by affected_by_vulnerability. """ url = reverse("package-v2-list") - with self.assertNumQueries(19): + with self.assertNumQueries(20): response = self.client.get( url, {"affected_by_vulnerability": "VCID-1234"}, format="json" ) @@ -308,7 +308,11 @@ def test_package_serializer_fields(self): # Verify affected_by_vulnerabilities structure expected_affected_by_vulnerabilities = { - "VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None} + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } } self.assertEqual(data["affected_by_vulnerabilities"], expected_affected_by_vulnerabilities) @@ -387,7 +391,13 @@ def test_get_affected_by_vulnerabilities(self): vulnerabilities = serializer.get_affected_by_vulnerabilities(package) self.assertEqual( vulnerabilities, - {"VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None}}, + { + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } + }, ) def test_get_fixing_vulnerabilities(self): @@ -591,7 +601,7 @@ def test_lookup_with_valid_purl(self): """ url = reverse("package-v2-lookup") data = {"purl": "pkg:pypi/django@3.2"} - with self.assertNumQueries(12): + with self.assertNumQueries(13): response = self.client.post(url, data, format="json") self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(1, len(response.data)) @@ -603,7 +613,13 @@ def test_lookup_with_valid_purl(self): self.assertEqual(response.data[0]["purl"], "pkg:pypi/django@3.2") self.assertEqual( response.data[0]["affected_by_vulnerabilities"], - {"VCID-1234": {"vulnerability_id": "VCID-1234", "fixed_by_packages": None}}, + { + "VCID-1234": { + "code_fixes": [], + "vulnerability_id": "VCID-1234", + "fixed_by_packages": None, + } + }, ) self.assertEqual(response.data[0]["fixing_vulnerabilities"], []) diff --git a/vulnerablecode/urls.py b/vulnerablecode/urls.py index 10f7db13f..54540a66d 100644 --- a/vulnerablecode/urls.py +++ b/vulnerablecode/urls.py @@ -20,6 +20,7 @@ from vulnerabilities.api import CPEViewSet from vulnerabilities.api import PackageViewSet from vulnerabilities.api import VulnerabilityViewSet +from vulnerabilities.api_v2 import CodeFixViewSet from vulnerabilities.api_v2 import PackageV2ViewSet from vulnerabilities.api_v2 import VulnerabilityV2ViewSet from vulnerabilities.views import ApiUserCreateView @@ -48,6 +49,8 @@ def __init__(self, *args, **kwargs): api_v2_router = OptionalSlashRouter() api_v2_router.register("packages", PackageV2ViewSet, basename="package-v2") api_v2_router.register("vulnerabilities", VulnerabilityV2ViewSet, basename="vulnerability-v2") +api_v2_router.register("codefixes", CodeFixViewSet, basename="codefix") + urlpatterns = [ path("api/v2/", include(api_v2_router.urls)), From 805590b89052a99465be5f68d9ce5cdda2b2c883 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:26:21 +0530 Subject: [PATCH 12/26] Fix code Signed-off-by: Tushar Goel --- vulnerabilities/api_v2.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vulnerabilities/api_v2.py b/vulnerabilities/api_v2.py index d3f1d714c..10ffb6d98 100644 --- a/vulnerabilities/api_v2.py +++ b/vulnerabilities/api_v2.py @@ -535,11 +535,6 @@ def lookup(self, request): return Response(PackageV2Serializer(qs, many=True, context={"request": request}).data) -from rest_framework import serializers - -from vulnerabilities.models import CodeFix - - class CodeFixSerializer(serializers.ModelSerializer): """ Serializer for the CodeFix model. From 3d7c209395bd99e7e33aa30bd314a18fac98f033 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Wed, 8 Jan 2025 20:31:02 +0530 Subject: [PATCH 13/26] Minor Fix Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/collect_commits.py | 24 ++++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/vulnerabilities/pipelines/collect_commits.py b/vulnerabilities/pipelines/collect_commits.py index bf94b755d..92145c051 100644 --- a/vulnerabilities/pipelines/collect_commits.py +++ b/vulnerabilities/pipelines/collect_commits.py @@ -20,8 +20,7 @@ def is_vcs_url_already_processed(commit_id): """ Check if a VCS URL exists in a CodeFix entry. """ - if "commit" in commit_id: - return CodeFix.objects.filter(commits__contains=[commit_id]).exists() + return CodeFix.objects.filter(commits__contains=[commit_id]).exists() class CollectFixCommitsPipeline(VulnerableCodePipeline): @@ -57,6 +56,8 @@ def collect_and_store_fix_commits(self): ): vulnerability = apv.vulnerability for reference in vulnerability.references.all(): + if not "/commit/" in reference.url: + continue if not is_vcs_url(reference.url): continue @@ -72,17 +73,16 @@ def collect_and_store_fix_commits(self): ) continue # check if vcs_url has commit - if "/commit/" in vcs_url: - code_fix, created = CodeFix.objects.get_or_create( - commits=[vcs_url], - affected_package_vulnerability=apv, - ) + code_fix, created = CodeFix.objects.get_or_create( + commits=[vcs_url], + affected_package_vulnerability=apv, + ) - if created: - created_fix_count += 1 - self.log( - f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" - ) + if created: + created_fix_count += 1 + self.log( + f"Created CodeFix entry for reference: {reference.url} with VCS URL {vcs_url}" + ) self.log(f"Successfully created {created_fix_count:,d} CodeFix entries.") From c132094166b74acd1ee21c1f6623fc34863a49fd Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Thu, 9 Jan 2025 12:25:01 +0530 Subject: [PATCH 14/26] Add default postgresql.conf for local docker build Signed-off-by: Keshav Priyadarshi --- etc/postgresql/postgresql.conf | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 etc/postgresql/postgresql.conf diff --git a/etc/postgresql/postgresql.conf b/etc/postgresql/postgresql.conf new file mode 100644 index 000000000..57ea6700a --- /dev/null +++ b/etc/postgresql/postgresql.conf @@ -0,0 +1,12 @@ +# Default configuration for development build +# DB Version: 13 +# OS Type: linux +# DB Type: development +# Data Storage: local + +listen_addresses = '*' +max_connections = 100 +shared_buffers = 128MB +dynamic_shared_memory_type = posix +max_wal_size = 1GB +min_wal_size = 80MB From ea98eeb7afe9bea40e7c53fe44d8810024af1beb Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Jan 2025 13:53:48 +0530 Subject: [PATCH 15/26] Migrate alpine importer to aboutcode pipeline Signed-off-by: Keshav Priyadarshi --- vulnerabilities/importers/__init__.py | 4 +- .../alpine_linux_importer.py} | 118 ++++++++++++++---- .../test_alpine_linux_importer_pipeline.py} | 87 ++++++++----- 3 files changed, 146 insertions(+), 63 deletions(-) rename vulnerabilities/{importers/alpine_linux.py => pipelines/alpine_linux_importer.py} (66%) rename vulnerabilities/tests/{test_alpine.py => pipelines/test_alpine_linux_importer_pipeline.py} (90%) diff --git a/vulnerabilities/importers/__init__.py b/vulnerabilities/importers/__init__.py index 3394dd989..3f429f669 100644 --- a/vulnerabilities/importers/__init__.py +++ b/vulnerabilities/importers/__init__.py @@ -7,7 +7,6 @@ # See https://aboutcode.org for more information about nexB OSS projects. # -from vulnerabilities.importers import alpine_linux from vulnerabilities.importers import apache_httpd from vulnerabilities.importers import apache_kafka from vulnerabilities.importers import apache_tomcat @@ -35,6 +34,7 @@ from vulnerabilities.importers import vulnrichment from vulnerabilities.importers import xen from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline +from vulnerabilities.pipelines import alpine_linux_importer from vulnerabilities.pipelines import github_importer from vulnerabilities.pipelines import gitlab_importer from vulnerabilities.pipelines import nginx_importer @@ -44,7 +44,6 @@ from vulnerabilities.pipelines import pysec_importer IMPORTERS_REGISTRY = [ - alpine_linux.AlpineImporter, openssl.OpensslImporter, redhat.RedhatImporter, debian.DebianImporter, @@ -78,6 +77,7 @@ github_importer.GitHubAPIImporterPipeline, nvd_importer.NVDImporterPipeline, pysec_importer.PyPIImporterPipeline, + alpine_linux_importer.AlpineLinuxImporterPipeline, ] IMPORTERS_REGISTRY = { diff --git a/vulnerabilities/importers/alpine_linux.py b/vulnerabilities/pipelines/alpine_linux_importer.py similarity index 66% rename from vulnerabilities/importers/alpine_linux.py rename to vulnerabilities/pipelines/alpine_linux_importer.py index db169184e..d29f9bc9b 100644 --- a/vulnerabilities/importers/alpine_linux.py +++ b/vulnerabilities/pipelines/alpine_linux_importer.py @@ -1,5 +1,4 @@ # -# # Copyright (c) nexB Inc. and others. All rights reserved. # VulnerableCode is a trademark of nexB Inc. # SPDX-License-Identifier: Apache-2.0 @@ -21,40 +20,61 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importer import Importer +from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipeline from vulnerabilities.references import WireSharkReference from vulnerabilities.references import XsaReference from vulnerabilities.references import ZbxReference from vulnerabilities.utils import fetch_response from vulnerabilities.utils import is_cve -LOGGER = logging.getLogger(__name__) -BASE_URL = "https://secdb.alpinelinux.org/" +class AlpineLinuxImporterPipeline(VulnerableCodeBaseImporterPipeline): + """Collect Alpine Linux advisories.""" + + pipeline_id = "alpine_linux_importer" -class AlpineImporter(Importer): spdx_license_expression = "CC-BY-SA-4.0" license_url = "https://secdb.alpinelinux.org/license.txt" + url = "https://secdb.alpinelinux.org/" importer_name = "Alpine Linux Importer" - def advisory_data(self) -> Iterable[AdvisoryData]: - page_response_content = fetch_response(BASE_URL).content - advisory_directory_links = fetch_advisory_directory_links(page_response_content) + @classmethod + def steps(cls): + return ( + cls.collect_and_store_advisories, + cls.import_new_advisories, + ) + + def advisories_count(self) -> int: + return 0 + + def collect_advisories(self) -> Iterable[AdvisoryData]: + page_response_content = fetch_response(self.url).content + advisory_directory_links = fetch_advisory_directory_links( + page_response_content, self.url, self.log + ) advisory_links = [] for advisory_directory_link in advisory_directory_links: advisory_directory_page = fetch_response(advisory_directory_link).content advisory_links.extend( - fetch_advisory_links(advisory_directory_page, advisory_directory_link) + fetch_advisory_links(advisory_directory_page, advisory_directory_link, self.log) ) for link in advisory_links: record = fetch_response(link).json() if not record["packages"]: - LOGGER.error(f'"packages" not found in {link!r}') + self.log( + f'"packages" not found in {link!r}', + level=logging.DEBUG, + ) continue - yield from process_record(record=record, url=link) + yield from process_record(record=record, url=link, logger=self.log) -def fetch_advisory_directory_links(page_response_content: str) -> List[str]: +def fetch_advisory_directory_links( + page_response_content: str, + base_url: str, + logger: callable = None, +) -> List[str]: """ Return a list of advisory directory links present in `page_response_content` html string """ @@ -66,16 +86,22 @@ def fetch_advisory_directory_links(page_response_content: str) -> List[str]: ] if not alpine_versions: - LOGGER.error(f"No versions found in {BASE_URL!r}") + if logger: + logger( + f"No versions found in {base_url!r}", + level=logging.DEBUG, + ) return [] - advisory_directory_links = [urljoin(BASE_URL, version) for version in alpine_versions] + advisory_directory_links = [urljoin(base_url, version) for version in alpine_versions] return advisory_directory_links def fetch_advisory_links( - advisory_directory_page: str, advisory_directory_link: str + advisory_directory_page: str, + advisory_directory_link: str, + logger: callable = None, ) -> Iterable[str]: """ Yield json file urls present in `advisory_directory_page` @@ -83,36 +109,52 @@ def fetch_advisory_links( advisory_directory_page = BeautifulSoup(advisory_directory_page, features="lxml") anchor_tags = advisory_directory_page.find_all("a") if not anchor_tags: - LOGGER.error(f"No anchor tags found in {advisory_directory_link!r}") + if logger: + logger( + f"No anchor tags found in {advisory_directory_link!r}", + level=logging.DEBUG, + ) return iter([]) for anchor_tag in anchor_tags: if anchor_tag.text.endswith("json"): yield urljoin(advisory_directory_link, anchor_tag.text) -def check_for_attributes(record) -> bool: +def check_for_attributes(record, logger) -> bool: attributes = ["distroversion", "reponame", "archs"] for attribute in attributes: if attribute not in record: - LOGGER.error(f'"{attribute!r}" not found in {record!r}') + if logger: + logger( + f'"{attribute!r}" not found in {record!r}', + level=logging.DEBUG, + ) return False return True -def process_record(record: dict, url: str) -> Iterable[AdvisoryData]: +def process_record(record: dict, url: str, logger: callable = None) -> Iterable[AdvisoryData]: """ Return a list of AdvisoryData objects by processing data present in that `record` """ if not record.get("packages"): - LOGGER.error(f'"packages" not found in this record {record!r}') + if logger: + logger( + f'"packages" not found in this record {record!r}', + level=logging.DEBUG, + ) return [] for package in record["packages"]: if not package["pkg"]: - LOGGER.error(f'"pkg" not found in this package {package!r}') + if logger: + logger( + f'"pkg" not found in this package {package!r}', + level=logging.DEBUG, + ) continue - if not check_for_attributes(record): + if not check_for_attributes(record, logger): continue yield from load_advisories( pkg_infos=package["pkg"], @@ -120,6 +162,7 @@ def process_record(record: dict, url: str) -> Iterable[AdvisoryData]: reponame=record["reponame"], archs=record["archs"], url=url, + logger=logger, ) @@ -129,6 +172,7 @@ def load_advisories( reponame: str, archs: List[str], url: str, + logger: callable = None, ) -> Iterable[AdvisoryData]: """ Yield AdvisoryData by mapping data from `pkg_infos` @@ -136,17 +180,29 @@ def load_advisories( `distroversion`, `reponame`, `archs` """ if not pkg_infos.get("name"): - LOGGER.error(f'"name" is not available in package {pkg_infos!r}') + if logger: + logger( + f'"name" is not available in package {pkg_infos!r}', + level=logging.DEBUG, + ) return [] for version, fixed_vulns in pkg_infos["secfixes"].items(): if not fixed_vulns: - LOGGER.error(f"No fixed vulnerabilities in version {version!r}") + if logger: + logger( + f"No fixed vulnerabilities in version {version!r}", + level=logging.DEBUG, + ) continue for vuln_ids in fixed_vulns: if not isinstance(vuln_ids, str): - LOGGER.error(f"{vuln_ids!r} is not of `str` instance") + if logger: + logger( + f"{vuln_ids!r} is not of `str` instance", + level=logging.DEBUG, + ) continue vuln_ids = vuln_ids.split() aliases = [] @@ -179,10 +235,18 @@ def load_advisories( try: fixed_version = AlpineLinuxVersion(version) except Exception as e: - LOGGER.error(f"{version!r} is not a valid AlpineVersion {e!r}") + if logger: + logger( + f"{version!r} is not a valid AlpineVersion {e!r}", + level=logging.DEBUG, + ) continue if not isinstance(archs, List): - LOGGER.error(f"{archs!r} is not of `List` instance") + if logger: + logger( + f"{archs!r} is not of `List` instance", + level=logging.DEBUG, + ) continue if archs: for arch in archs: diff --git a/vulnerabilities/tests/test_alpine.py b/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py similarity index 90% rename from vulnerabilities/tests/test_alpine.py rename to vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py index 1ab74a89a..386f239d8 100644 --- a/vulnerabilities/tests/test_alpine.py +++ b/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py @@ -9,6 +9,7 @@ import json import os +from pathlib import Path import pytest from packageurl import PackageURL @@ -16,17 +17,18 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage -from vulnerabilities.importers.alpine_linux import fetch_advisory_directory_links -from vulnerabilities.importers.alpine_linux import fetch_advisory_links -from vulnerabilities.importers.alpine_linux import load_advisories -from vulnerabilities.importers.alpine_linux import process_record +from vulnerabilities.pipelines.alpine_linux_importer import fetch_advisory_directory_links +from vulnerabilities.pipelines.alpine_linux_importer import fetch_advisory_links +from vulnerabilities.pipelines.alpine_linux_importer import load_advisories +from vulnerabilities.pipelines.alpine_linux_importer import process_record from vulnerabilities.references import XsaReference +from vulnerabilities.tests.pipelines import TestLogger -BASE_DIR = os.path.dirname(os.path.abspath(__file__)) -TEST_DATA = os.path.join(BASE_DIR, "test_data", "alpine") +TEST_DATA = Path(__file__).parent.parent / "test_data" / "alpine" -def test_process_record(caplog): +def test_process_record(): + logger = TestLogger() expected_advisories = [ AdvisoryData( aliases=[], @@ -445,14 +447,18 @@ def test_process_record(caplog): url="https://secdb.alpinelinux.org/v3.11/", ), ] - with open(os.path.join(TEST_DATA, os.path.join(TEST_DATA, "v3.11", "main.json"))) as f: + with open(TEST_DATA / "v3.11/main.json") as f: found_advisories = list( - process_record(json.loads(f.read()), "https://secdb.alpinelinux.org/v3.11/") + process_record( + json.loads(f.read()), + "https://secdb.alpinelinux.org/v3.11/", + logger=logger.write, + ) ) assert found_advisories == expected_advisories assert ( "'4.10-1-r1' is not a valid AlpineVersion InvalidVersion(\"'4.10-1-r1' is not a valid \")" - in caplog.text + in logger.getvalue() ) @@ -474,14 +480,22 @@ def test_fetch_advisory_directory_links(): "https://secdb.alpinelinux.org/v3.8/", "https://secdb.alpinelinux.org/v3.9/", ] - with open(os.path.join(TEST_DATA, "web_pages", "directory.html")) as f: - assert fetch_advisory_directory_links(f.read()) == expected + with open(TEST_DATA / "web_pages/directory.html") as f: + assert ( + fetch_advisory_directory_links(f.read(), "https://secdb.alpinelinux.org/") == expected + ) -def test_fetch_advisory_directory_links_failure(caplog): - with open(os.path.join(TEST_DATA, "web_pages", "fail_directory.html")) as f: - assert fetch_advisory_directory_links(f.read()) == [] - assert "No versions found in 'https://secdb.alpinelinux.org/'" in caplog.text +def test_fetch_advisory_directory_links_failure(): + logger = TestLogger() + with open(TEST_DATA / "web_pages/fail_directory.html") as f: + assert ( + fetch_advisory_directory_links( + f.read(), "https://secdb.alpinelinux.org/", logger=logger.write + ) + == [] + ) + assert "No versions found in 'https://secdb.alpinelinux.org/'" in logger.getvalue() def test_fetch_advisory_links(): @@ -489,45 +503,49 @@ def test_fetch_advisory_links(): "https://secdb.alpinelinux.org/v3.11/community.json", "https://secdb.alpinelinux.org/v3.11/main.json", ] - with open(os.path.join(TEST_DATA, "web_pages", "v3.11.html")) as f: + with open(TEST_DATA / "web_pages/v3.11.html") as f: assert ( list(fetch_advisory_links(f.read(), "https://secdb.alpinelinux.org/v3.11/")) == expected ) -def test_fetch_advisory_links_failure(caplog): - with open(os.path.join(TEST_DATA, "web_pages", "fail_directory.html")) as f: - assert list(fetch_advisory_links(f.read(), "v3.11")) == [] - assert "No anchor tags found in 'v3.11'" in caplog.text +def test_fetch_advisory_links_failure(): + logger = TestLogger() + with open(TEST_DATA / "web_pages/fail_directory.html") as f: + assert list(fetch_advisory_links(f.read(), "v3.11", logger=logger.write)) == [] + assert "No anchor tags found in 'v3.11'" in logger.getvalue() -def test_process_record_without_packages(caplog): - with open(os.path.join(TEST_DATA, os.path.join(TEST_DATA, "v3.3", "community.json"))) as f: - assert list(process_record(json.loads(f.read()), "")) == [] +def test_process_record_without_packages(): + logger = TestLogger() + with open(TEST_DATA / TEST_DATA / "v3.3/community.json") as f: + assert list(process_record(json.loads(f.read()), "", logger=logger.write)) == [] assert ( "\"packages\" not found in this record {'apkurl': '{{urlprefix}}/{{distroversion}}/{{reponame}}/{{arch}}/{{pkg.name}}-{{pkg.ver}}.apk', 'archs': ['armhf', 'x86', 'x86_64'], 'reponame': 'community', 'urlprefix': 'https://dl-cdn.alpinelinux.org/alpine', 'distroversion': 'v3.3', 'packages': []}" - in caplog.text + in logger.getvalue() ) -def test_load_advisories_package_without_name(caplog): +def test_load_advisories_package_without_name(): + logger = TestLogger() package = { "secfixes": {"4.10.0-r1": ["XSA-248"], "4.10.0-r2": ["CVE-2018-7540 XSA-252"]}, } - list(load_advisories(package, "v3.11", "main", archs=[], url="")) + list(load_advisories(package, "v3.11", "main", archs=[], url="", logger=logger.write)) assert ( "\"name\" is not available in package {'secfixes': {'4.10.0-r1': ['XSA-248'], '4.10.0-r2': ['CVE-2018-7540 XSA-252']}}" - in caplog.text + in logger.getvalue() ) -def test_load_advisories_package_without_secfixes(caplog): +def test_load_advisories_package_without_secfixes(): + logger = TestLogger() package = { "name": "xen", "secfixes": {"4.10.0-r1": []}, } - list(load_advisories(package, "v3.11", "main", archs=[], url="")) - assert "No fixed vulnerabilities in version '4.10.0-r1'" in caplog.text + list(load_advisories(package, "v3.11", "main", archs=[], url="", logger=logger.write)) + assert "No fixed vulnerabilities in version '4.10.0-r1'" in logger.getvalue() @pytest.mark.parametrize( @@ -542,13 +560,14 @@ def test_load_advisories_package_without_secfixes(caplog): "4.10-1-r1", ], ) -def test_load_advisories_package_with_invalid_alpine_version(test_case, caplog): +def test_load_advisories_package_with_invalid_alpine_version(test_case): + logger = TestLogger() package = { "name": "xen", "secfixes": {f"{test_case}": ["XSA-248"]}, } - list(load_advisories(package, "v3.11", "main", archs=[], url="")) + list(load_advisories(package, "v3.11", "main", archs=[], url="", logger=logger.write)) assert ( f"{test_case!r} is not a valid AlpineVersion InvalidVersion(\"{test_case!r} is not a valid \")" - in caplog.text + in logger.getvalue() ) From 1b834c3e0faabfa38c686bce17e75111ec3eb095 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Jan 2025 14:34:20 +0530 Subject: [PATCH 16/26] Add data migration for old alpine advisory Signed-off-by: Keshav Priyadarshi --- .../0086_update_alpine_advisory_created_by.py | 36 ++++++++++++++++ vulnerabilities/tests/test_data_migrations.py | 42 +++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py diff --git a/vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py b/vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py new file mode 100644 index 000000000..b7e5394b1 --- /dev/null +++ b/vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py @@ -0,0 +1,36 @@ + +from django.db import migrations + +""" +Update the created_by field on Advisory from the old qualified_name +to the new pipeline_id. +""" + + +def update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.alpine_linux_importer import AlpineLinuxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by="vulnerabilities.importers.alpine_linux.AlpineImporter").update( + created_by=AlpineLinuxImporterPipeline.pipeline_id + ) + + +def reverse_update_created_by(apps, schema_editor): + from vulnerabilities.pipelines.alpine_linux_importer import AlpineLinuxImporterPipeline + + Advisory = apps.get_model("vulnerabilities", "Advisory") + Advisory.objects.filter(created_by=AlpineLinuxImporterPipeline.pipeline_id).update( + created_by="vulnerabilities.importers.alpine_linux.AlpineImporter" + ) + + +class Migration(migrations.Migration): + + dependencies = [ + ("vulnerabilities", "0085_alter_package_is_ghost_alter_package_version_rank_and_more"), + ] + + operations = [ + migrations.RunPython(update_created_by, reverse_code=reverse_update_created_by), + ] \ No newline at end of file diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 046c86ce5..02cb1d489 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -880,3 +880,45 @@ def test_update_pysec_created_by_field(self): assert adv.filter(created_by="vulnerabilities.importers.pysec.PyPIImporter").count() == 0 assert adv.filter(created_by="pysec_importer").count() == 1 + + +class TestUpdateAlpineAdvisoryCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0085_alter_package_is_ghost_alter_package_version_rank_and_more" + migrate_to = "0086_update_alpine_advisory_created_by" + + advisory_data1 = AdvisoryData( + aliases=["CVE-2020-13371337"], + summary="vulnerability description here", + affected_packages=[ + AffectedPackage( + package=PackageURL(type="pypi", name="foobar"), + affected_version_range=VersionRange.from_string("vers:pypi/>=1.0.0|<=2.0.0"), + ) + ], + references=[Reference(url="https://example.com/with/more/info/CVE-2020-13371337")], + date_published=timezone.now(), + url="https://test.com", + ) + + def setUpBeforeMigration(self, apps): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv1 = Advisory.objects.create( + aliases=self.advisory_data1.aliases, + summary=self.advisory_data1.summary, + affected_packages=[pkg.to_dict() for pkg in self.advisory_data1.affected_packages], + references=[ref.to_dict() for ref in self.advisory_data1.references], + url=self.advisory_data1.url, + created_by="vulnerabilities.importers.alpine_linux.AlpineImporter", + date_collected=timezone.now(), + ) + + def test_update_pysec_created_by_field(self): + Advisory = apps.get_model("vulnerabilities", "Advisory") + adv = Advisory.objects.all() + + assert ( + adv.filter(created_by="vulnerabilities.importers.alpine_linux.AlpineImporter").count() + == 0 + ) + assert adv.filter(created_by="alpine_linux_importer").count() == 1 From 11c417af82204d7726811b154870365cf7adc01e Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Fri, 10 Jan 2025 16:15:22 +0530 Subject: [PATCH 17/26] Resolve migration conflict Signed-off-by: Keshav Priyadarshi --- ...reated_by.py => 0087_update_alpine_advisory_created_by.py} | 2 +- vulnerabilities/tests/test_data_migrations.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename vulnerabilities/migrations/{0086_update_alpine_advisory_created_by.py => 0087_update_alpine_advisory_created_by.py} (91%) diff --git a/vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py b/vulnerabilities/migrations/0087_update_alpine_advisory_created_by.py similarity index 91% rename from vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py rename to vulnerabilities/migrations/0087_update_alpine_advisory_created_by.py index b7e5394b1..9b162b09d 100644 --- a/vulnerabilities/migrations/0086_update_alpine_advisory_created_by.py +++ b/vulnerabilities/migrations/0087_update_alpine_advisory_created_by.py @@ -28,7 +28,7 @@ def reverse_update_created_by(apps, schema_editor): class Migration(migrations.Migration): dependencies = [ - ("vulnerabilities", "0085_alter_package_is_ghost_alter_package_version_rank_and_more"), + ("vulnerabilities", "0086_codefix"), ] operations = [ diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 02cb1d489..38bf9417f 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -884,8 +884,8 @@ def test_update_pysec_created_by_field(self): class TestUpdateAlpineAdvisoryCreatedByField(TestMigrations): app_name = "vulnerabilities" - migrate_from = "0085_alter_package_is_ghost_alter_package_version_rank_and_more" - migrate_to = "0086_update_alpine_advisory_created_by" + migrate_from = "0086_codefix" + migrate_to = "0087_update_alpine_advisory_created_by" advisory_data1 = AdvisoryData( aliases=["CVE-2020-13371337"], From fd9cf47d44440f12c6b6103d43a9f9c5919e36be Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Sun, 12 Jan 2025 21:40:09 +0530 Subject: [PATCH 18/26] Allow CVSS3.1 Severities in NVD Signed-off-by: Tushar Goel --- vulnerabilities/pipelines/nvd_importer.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vulnerabilities/pipelines/nvd_importer.py b/vulnerabilities/pipelines/nvd_importer.py index bd6f33cf9..645b9f442 100644 --- a/vulnerabilities/pipelines/nvd_importer.py +++ b/vulnerabilities/pipelines/nvd_importer.py @@ -210,8 +210,14 @@ def severities(self): base_metric_v3 = impact.get("baseMetricV3") or {} if base_metric_v3: cvss_v3 = get_item(base_metric_v3, "cvssV3") + version = cvss_v3.get("version") + system = None + if version == "3.1": + system = severity_systems.CVSSV31 + else: + system = severity_systems.CVSSV3 vs = VulnerabilitySeverity( - system=severity_systems.CVSSV3, + system=system, value=str(cvss_v3.get("baseScore") or ""), scoring_elements=str(cvss_v3.get("vectorString") or ""), ) From c80c7a3944c87d463d90171c0f92c4bcdb354a8e Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 13 Jan 2025 20:04:52 +0530 Subject: [PATCH 19/26] Add pipeline to add CVSSv3.1 score for CVEs Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 + vulnerabilities/models.py | 2 + .../pipelines/add_cvss31_to_CVEs.py | 105 ++++++++++++++++++ vulnerabilities/tests/test_add_cvsssv31.py | 56 ++++++++++ 4 files changed, 165 insertions(+) create mode 100644 vulnerabilities/pipelines/add_cvss31_to_CVEs.py create mode 100644 vulnerabilities/tests/test_add_cvsssv31.py diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index 44a65df47..d7bb3c288 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -17,6 +17,7 @@ from vulnerabilities.pipelines import enhance_with_kev from vulnerabilities.pipelines import enhance_with_metasploit from vulnerabilities.pipelines import flag_ghost_packages +from vulnerabilities.pipelines import add_cvss31_to_CVEs IMPROVERS_REGISTRY = [ valid_versions.GitHubBasicImprover, @@ -43,6 +44,7 @@ compute_package_risk.ComputePackageRiskPipeline, compute_package_version_rank.ComputeVersionRankPipeline, collect_commits.CollectFixCommitsPipeline, + add_cvss31_to_CVEs.CVEAdvisoryMappingPipeline, ] IMPROVERS_REGISTRY = { diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 1a58ec4dc..21b1129a2 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -189,6 +189,8 @@ class VulnerabilitySeverity(models.Model): blank=True, null=True, help_text="UTC Date of publication of the vulnerability severity" ) + objects = BaseQuerySet.as_manager() + class Meta: ordering = ["url", "scoring_system", "value"] diff --git a/vulnerabilities/pipelines/add_cvss31_to_CVEs.py b/vulnerabilities/pipelines/add_cvss31_to_CVEs.py new file mode 100644 index 000000000..acda42b52 --- /dev/null +++ b/vulnerabilities/pipelines/add_cvss31_to_CVEs.py @@ -0,0 +1,105 @@ +# Copyright (c) nexB Inc. and others. All rights reserved. +# VulnerableCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/vulnerablecode for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# +import re + +from aboutcode.pipeline import LoopProgress +from django.db import transaction + +from vulnerabilities import severity_systems +from vulnerabilities.models import Advisory +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.pipelines import VulnerableCodePipeline + + +class CVEAdvisoryMappingPipeline(VulnerableCodePipeline): + """ + Pipeline to map CVEs from VulnerabilitySeverity to corresponding Advisories with CVSS3.1 scores. + """ + + pipeline_id = "add_cvssv3.1_to_CVEs" + + @classmethod + def steps(cls): + return (cls.process_cve_advisory_mapping,) + + def process_cve_advisory_mapping(self): + nvd_severities = ( + VulnerabilitySeverity.objects.filter( + url__startswith="https://nvd.nist.gov/vuln/detail/CVE-", scoring_system="cvssv3" + ) + .prefetch_related("vulnerabilities") + .distinct() + ) + + self.log(f"Processing {nvd_severities.count():,d} CVE severity records") + + progress = LoopProgress( + total_iterations=nvd_severities.count(), + logger=self.log, + progress_step=5, + ) + + batch_size = 1000 + results = [] + + for severity in progress.iter(nvd_severities.paginated(per_page=batch_size)): + print(severity.url) + cve_pattern = re.compile(r"(CVE-\d{4}-\d{4,7})").search + cve_match = cve_pattern(severity.url) + if cve_match: + cve_id = cve_match.group() + else: + self.log(f"Could not find CVE ID in URL: {severity.url}") + continue + + matching_advisories = Advisory.objects.filter( + aliases=[cve_id], + created_by="nvd_importer", + ) + + for advisory in matching_advisories: + for reference in advisory.references: + for sev in reference.get("severities", []): + if sev.get("system") == "cvssv3.1": + results.append( + { + "cve_id": cve_id, + "cvss31_score": sev.get("value"), + "cvss31_vector": sev.get("scoring_elements"), + "vulnerabilities": severity.vulnerabilities.all(), + } + ) + + if results: + print(results) + self._process_batch(results) + + self.log(f"Completed processing CVE to Advisory mappings") + + def _process_batch(self, results): + """ + Process a batch of results. Transactions are used to ensure data consistency. + """ + self.log(f"Processing batch of {len(results)} mappings") + + with transaction.atomic(): + for result in results: + self.log( + f"CVE: {result['cve_id']}, " + f"CVSS3.1: {result['cvss31_score']}, " + f"Vector: {result['cvss31_vector']}" + ) + + for vulnerability in result["vulnerabilities"]: + vuln_severity, _ = VulnerabilitySeverity.objects.update_or_create( + scoring_system=severity_systems.CVSSV31.identifier, + url=f"https://nvd.nist.gov/vuln/detail/{result['cve_id']}", + value=result["cvss31_score"], + scoring_elements=result["cvss31_vector"], + ) + vulnerability.severities.add(vuln_severity) diff --git a/vulnerabilities/tests/test_add_cvsssv31.py b/vulnerabilities/tests/test_add_cvsssv31.py new file mode 100644 index 000000000..c79b51879 --- /dev/null +++ b/vulnerabilities/tests/test_add_cvsssv31.py @@ -0,0 +1,56 @@ +import unittest +from unittest.mock import Mock +from unittest.mock import patch + +from django.test import TestCase + +from vulnerabilities.models import Advisory +from vulnerabilities.models import Alias +from vulnerabilities.models import Vulnerability +from vulnerabilities.models import VulnerabilitySeverity +from vulnerabilities.pipelines.add_cvss31_to_CVEs import CVEAdvisoryMappingPipeline +from vulnerabilities.severity_systems import CVSSV3 +from vulnerabilities.severity_systems import CVSSV31 + + +class TestCVEAdvisoryMappingPipeline(TestCase): + def setUp(self): + self.pipeline = CVEAdvisoryMappingPipeline() + Advisory.objects.create( + created_by="nvd_importer", + aliases=["CVE-2024-1234"], + references=[ + { + "severities": [ + { + "system": "cvssv3.1", + "value": "7.5", + "scoring_elements": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + } + ], + "url": "https://nvd.nist.gov/vuln/detail/CVE-2024-1234", + } + ], + date_collected="2024-09-27T19:38:00Z", + ) + vuln = Vulnerability.objects.create(vulnerability_id="CVE-2024-1234") + sev = VulnerabilitySeverity.objects.create( + scoring_system=CVSSV3.identifier, + url="https://nvd.nist.gov/vuln/detail/CVE-2024-1234", + value="7.5", + scoring_elements="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + ) + vuln.severities.add(sev) + + def test_process_cve_advisory_mapping_single_record(self): + self.pipeline.process_cve_advisory_mapping() + self.assertEqual(VulnerabilitySeverity.objects.count(), 2) + # check if severity with cvssv3.1 is created + sev = VulnerabilitySeverity.objects.get(scoring_system=CVSSV31.identifier) + self.assertEqual(sev.url, "https://nvd.nist.gov/vuln/detail/CVE-2024-1234") + self.assertEqual(sev.value, "7.5") + self.assertEqual(sev.scoring_elements, "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N") + # check if severity is added to existing vulnerability + vuln = Vulnerability.objects.get(vulnerability_id="CVE-2024-1234") + self.assertEqual(vuln.severities.count(), 2) + self.assertIn(sev, vuln.severities.all()) From 5f2d228323dda854a387bdf5408e2c16d0d54a88 Mon Sep 17 00:00:00 2001 From: Tushar Goel Date: Mon, 13 Jan 2025 20:05:02 +0530 Subject: [PATCH 20/26] Add pipeline to add CVSSv3.1 score for CVEs Signed-off-by: Tushar Goel --- vulnerabilities/improvers/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vulnerabilities/improvers/__init__.py b/vulnerabilities/improvers/__init__.py index d7bb3c288..9b11c7920 100644 --- a/vulnerabilities/improvers/__init__.py +++ b/vulnerabilities/improvers/__init__.py @@ -10,6 +10,7 @@ from vulnerabilities.improvers import valid_versions from vulnerabilities.improvers import vulnerability_status from vulnerabilities.pipelines import VulnerableCodePipeline +from vulnerabilities.pipelines import add_cvss31_to_CVEs from vulnerabilities.pipelines import collect_commits from vulnerabilities.pipelines import compute_package_risk from vulnerabilities.pipelines import compute_package_version_rank @@ -17,7 +18,6 @@ from vulnerabilities.pipelines import enhance_with_kev from vulnerabilities.pipelines import enhance_with_metasploit from vulnerabilities.pipelines import flag_ghost_packages -from vulnerabilities.pipelines import add_cvss31_to_CVEs IMPROVERS_REGISTRY = [ valid_versions.GitHubBasicImprover, From 8fe43f7623d6b86e75b632669ee137d095222aab Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 11:22:21 +0530 Subject: [PATCH 21/26] Fix secret generation on mac Signed-off-by: Keshav Priyadarshi --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index 067cb419f..47ce9fcd7 100644 --- a/Makefile +++ b/Makefile @@ -42,6 +42,10 @@ else SUDO_POSTGRES= endif +ifeq ($(UNAME), Darwin) + GET_SECRET_KEY=`head /dev/urandom | base64 | head -c50` +endif + virtualenv: @echo "-> Bootstrap the virtualenv with PYTHON_EXE=${PYTHON_EXE}" @${PYTHON_EXE} ${VIRTUALENV_PYZ} --never-download --no-periodic-update ${VENV} From cc6d3006847514aee950b2462d9013714c0dc4fe Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 17:18:34 +0530 Subject: [PATCH 22/26] Add migration to fix alpine PURLs Signed-off-by: Keshav Priyadarshi --- .../migrations/0088_fix_alpine_purl_type.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 vulnerabilities/migrations/0088_fix_alpine_purl_type.py diff --git a/vulnerabilities/migrations/0088_fix_alpine_purl_type.py b/vulnerabilities/migrations/0088_fix_alpine_purl_type.py new file mode 100644 index 000000000..29339cfd4 --- /dev/null +++ b/vulnerabilities/migrations/0088_fix_alpine_purl_type.py @@ -0,0 +1,103 @@ +from datetime import datetime +from datetime import timezone + +from aboutcode.pipeline import LoopProgress +from django.db import migrations +from packageurl import PackageURL + +CHUNK_SIZE = 50000 +BATCH_SIZE = 500 + + +class Migration(migrations.Migration): + def fix_alpine_purl_type(apps, schema_editor): + """Use proper apk package type for Alpine""" + + Package = apps.get_model("vulnerabilities", "Package") + batch = [] + alpine_packages_query = Package.objects.filter(type="alpine") + + log(f"\nFixing PURL for {alpine_packages_query.count():,d} alpine packages") + progress = LoopProgress( + total_iterations=alpine_packages_query.count(), + progress_step=10, + logger=log, + ) + for package in progress.iter(alpine_packages_query.iterator(chunk_size=CHUNK_SIZE)): + package.type = "apk" + package.namespace = "alpine" + + package.package_url = update_alpine_purl(package.package_url, "apk", "alpine") + package.plain_package_url = update_alpine_purl( + package.plain_package_url, "apk", "alpine" + ) + + batch.append(package) + if len(batch) >= BATCH_SIZE: + bulk_update_package(Package, batch) + batch.clear() + + bulk_update_package(Package, batch) + + def reverse_fix_alpine_purl_type(apps, schema_editor): + Package = apps.get_model("vulnerabilities", "Package") + batch = [] + alpine_packages_query = Package.objects.filter(type="apk", namespace="alpine") + + log(f"\nREVERSE: Fix for {alpine_packages_query.count():,d} alpine packages") + progress = LoopProgress( + total_iterations=alpine_packages_query.count(), + progress_step=10, + logger=log, + ) + for package in progress.iter(alpine_packages_query.iterator(chunk_size=CHUNK_SIZE)): + package.type = "alpine" + package.namespace = "" + + package.package_url = update_alpine_purl(package.package_url, "alpine", "") + package.plain_package_url = update_alpine_purl(package.plain_package_url, "alpine", "") + + batch.append(package) + if len(batch) >= BATCH_SIZE: + bulk_update_package(Package, batch) + batch.clear() + + bulk_update_package(Package, batch) + + dependencies = [ + ("vulnerabilities", "0087_update_alpine_advisory_created_by"), + ] + + operations = [ + migrations.RunPython( + code=fix_alpine_purl_type, + reverse_code=reverse_fix_alpine_purl_type, + ), + ] + + +def bulk_update_package(package, batch): + if batch: + package.objects.bulk_update( + objs=batch, + fields=[ + "type", + "namespace", + "package_url", + "plain_package_url", + ], + ) + + +def update_alpine_purl(purl, purl_type, purl_namespace): + package_url = PackageURL.from_string(purl).to_dict() + package_url["type"] = purl_type + package_url["namespace"] = purl_namespace + return str(PackageURL(**package_url)) + + +def log(message): + now_local = datetime.now(timezone.utc).astimezone() + timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3] + message = f"{timestamp} {message}" + print(message) From 8a55549b7f440c70363a2942c554e5febf5b8488 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 17:50:19 +0530 Subject: [PATCH 23/26] Add test for Alpine data migration Signed-off-by: Keshav Priyadarshi --- vulnerabilities/tests/test_data_migrations.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/vulnerabilities/tests/test_data_migrations.py b/vulnerabilities/tests/test_data_migrations.py index 38bf9417f..55bbb71ef 100644 --- a/vulnerabilities/tests/test_data_migrations.py +++ b/vulnerabilities/tests/test_data_migrations.py @@ -19,6 +19,7 @@ from vulnerabilities.importer import AdvisoryData from vulnerabilities.importer import AffectedPackage from vulnerabilities.importer import Reference +from vulnerabilities.utils import purl_to_dict class TestMigrations(TestCase): @@ -922,3 +923,32 @@ def test_update_pysec_created_by_field(self): == 0 ) assert adv.filter(created_by="alpine_linux_importer").count() == 1 + + +class TestFixAlpinePURLCreatedByField(TestMigrations): + app_name = "vulnerabilities" + migrate_from = "0087_update_alpine_advisory_created_by" + migrate_to = "0088_fix_alpine_purl_type" + + def setUpBeforeMigration(self, apps): + Package = apps.get_model("vulnerabilities", "Package") + purl = str( + PackageURL( + type="alpine", + namespace="", + name="curl", + version="7.83.0-r0", + qualifiers="arch=x86", + ) + ) + package1 = Package.objects.create( + **purl_to_dict(purl=purl), package_url=purl, plain_package_url=purl + ) + + def test_fix_alpine_purl(self): + Package = apps.get_model("vulnerabilities", "Package") + package = Package.objects.all() + print(package) + + assert package.filter(type="alpine").count() == 0 + assert package.filter(type="apk").count() == 1 From 3cb9d3f9b33e5812472d0972867ffcb97e1af2c2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 21:32:05 +0530 Subject: [PATCH 24/26] Use proper purl type for Alpine in pipeline, models, and views Signed-off-by: Keshav Priyadarshi --- vulnerabilities/models.py | 4 +- .../pipelines/alpine_linux_importer.py | 6 +- .../test_alpine_linux_importer_pipeline.py | 112 +++++++++--------- .../default_improver/alpine-expected.json | 28 ++--- .../default_improver/alpine-input.json | 28 ++--- vulnerabilities/tests/test_models.py | 12 +- vulnerabilities/tests/test_view.py | 5 +- vulnerabilities/views.py | 2 +- 8 files changed, 95 insertions(+), 102 deletions(-) diff --git a/vulnerabilities/models.py b/vulnerabilities/models.py index 21b1129a2..4db674e3e 100644 --- a/vulnerabilities/models.py +++ b/vulnerabilities/models.py @@ -10,10 +10,8 @@ import hashlib import json import logging -import typing from contextlib import suppress from functools import cached_property -from typing import Optional from typing import Union from cwe2.database import Database @@ -56,7 +54,7 @@ models.CharField.register_lookup(Trim) # patch univers for missing entry -RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange +RANGE_CLASS_BY_SCHEMES["apk"] = AlpineLinuxVersionRange class BaseQuerySet(models.QuerySet): diff --git a/vulnerabilities/pipelines/alpine_linux_importer.py b/vulnerabilities/pipelines/alpine_linux_importer.py index d29f9bc9b..28736e507 100644 --- a/vulnerabilities/pipelines/alpine_linux_importer.py +++ b/vulnerabilities/pipelines/alpine_linux_importer.py @@ -254,7 +254,8 @@ def load_advisories( affected_packages.append( AffectedPackage( package=PackageURL( - type="alpine", + type="apk", + namespace="alpine", name=pkg_infos["name"], qualifiers=qualifiers, ), @@ -266,7 +267,8 @@ def load_advisories( affected_packages.append( AffectedPackage( package=PackageURL( - type="alpine", + type="apk", + namespace="alpine", name=pkg_infos["name"], qualifiers=qualifiers, ), diff --git a/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py b/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py index 386f239d8..49182b287 100644 --- a/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py +++ b/vulnerabilities/tests/pipelines/test_alpine_linux_importer_pipeline.py @@ -36,8 +36,8 @@ def test_process_record(): affected_packages=[ AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={ @@ -52,8 +52,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "armhf", "distroversion": "v3.11", "reponame": "main"}, @@ -64,8 +64,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "armv7", "distroversion": "v3.11", "reponame": "main"}, @@ -76,8 +76,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={ @@ -92,8 +92,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "s390x", "distroversion": "v3.11", "reponame": "main"}, @@ -104,8 +104,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "x86", "distroversion": "v3.11", "reponame": "main"}, @@ -116,8 +116,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "x86_64", "distroversion": "v3.11", "reponame": "main"}, @@ -143,8 +143,8 @@ def test_process_record(): affected_packages=[ AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={ @@ -159,8 +159,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "armhf", "distroversion": "v3.11", "reponame": "main"}, @@ -171,8 +171,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "armv7", "distroversion": "v3.11", "reponame": "main"}, @@ -183,8 +183,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={ @@ -199,8 +199,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "s390x", "distroversion": "v3.11", "reponame": "main"}, @@ -211,8 +211,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "x86", "distroversion": "v3.11", "reponame": "main"}, @@ -223,8 +223,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="xen", version=None, qualifiers={"arch": "x86_64", "distroversion": "v3.11", "reponame": "main"}, @@ -250,8 +250,8 @@ def test_process_record(): affected_packages=[ AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={ @@ -266,8 +266,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "armhf", "distroversion": "v3.11", "reponame": "main"}, @@ -278,8 +278,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "armv7", "distroversion": "v3.11", "reponame": "main"}, @@ -290,8 +290,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={ @@ -306,8 +306,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "s390x", "distroversion": "v3.11", "reponame": "main"}, @@ -318,8 +318,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "x86", "distroversion": "v3.11", "reponame": "main"}, @@ -330,8 +330,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "x86_64", "distroversion": "v3.11", "reponame": "main"}, @@ -351,8 +351,8 @@ def test_process_record(): affected_packages=[ AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={ @@ -367,8 +367,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "armhf", "distroversion": "v3.11", "reponame": "main"}, @@ -379,8 +379,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "armv7", "distroversion": "v3.11", "reponame": "main"}, @@ -391,8 +391,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={ @@ -407,8 +407,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "s390x", "distroversion": "v3.11", "reponame": "main"}, @@ -419,8 +419,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "x86", "distroversion": "v3.11", "reponame": "main"}, @@ -431,8 +431,8 @@ def test_process_record(): ), AffectedPackage( package=PackageURL( - type="alpine", - namespace=None, + type="apk", + namespace="alpine", name="apk-tools", version=None, qualifiers={"arch": "x86_64", "distroversion": "v3.11", "reponame": "main"}, diff --git a/vulnerabilities/tests/test_data/default_improver/alpine-expected.json b/vulnerabilities/tests/test_data/default_improver/alpine-expected.json index 5d8a84930..f9d3caf16 100644 --- a/vulnerabilities/tests/test_data/default_improver/alpine-expected.json +++ b/vulnerabilities/tests/test_data/default_improver/alpine-expected.json @@ -6,8 +6,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=aarch64&distroversion=v3.11&reponame=main", @@ -30,8 +30,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=armhf&distroversion=v3.11&reponame=main", @@ -54,8 +54,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=armv7&distroversion=v3.11&reponame=main", @@ -78,8 +78,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=ppc64le&distroversion=v3.11&reponame=main", @@ -102,8 +102,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=s390x&distroversion=v3.11&reponame=main", @@ -126,8 +126,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=x86&distroversion=v3.11&reponame=main", @@ -150,8 +150,8 @@ "summary": null, "affected_purls": [], "fixed_purl": { - "type": "alpine", - "namespace": "", + "type": "apk", + "namespace": "alpine", "name": "xen", "version": "4.10.0-r1", "qualifiers": "arch=x86_64&distroversion=v3.11&reponame=main", diff --git a/vulnerabilities/tests/test_data/default_improver/alpine-input.json b/vulnerabilities/tests/test_data/default_improver/alpine-input.json index 9ff37ecb8..f2143b32b 100644 --- a/vulnerabilities/tests/test_data/default_improver/alpine-input.json +++ b/vulnerabilities/tests/test_data/default_improver/alpine-input.json @@ -4,8 +4,8 @@ "affected_packages": [ { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -20,8 +20,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -36,8 +36,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -52,8 +52,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -68,8 +68,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -84,8 +84,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { @@ -100,8 +100,8 @@ }, { "package": { - "type": "alpine", - "namespace": null, + "type": "apk", + "namespace": "alpine", "name": "xen", "version": null, "qualifiers": { diff --git a/vulnerabilities/tests/test_models.py b/vulnerabilities/tests/test_models.py index 014754786..a5f8e251c 100644 --- a/vulnerabilities/tests/test_models.py +++ b/vulnerabilities/tests/test_models.py @@ -8,25 +8,17 @@ # import urllib.parse -from datetime import datetime from unittest import TestCase -from unittest import mock import pytest -from django.db import transaction -from django.db.models.query import QuerySet -from django.db.utils import IntegrityError -from freezegun import freeze_time from packageurl import PackageURL from univers import versions from univers.version_range import RANGE_CLASS_BY_SCHEMES -from univers.version_range import AlpineLinuxVersionRange from vulnerabilities import models from vulnerabilities.models import Alias from vulnerabilities.models import Package from vulnerabilities.models import Vulnerability -from vulnerabilities.models import VulnerabilityQuerySet class TestVulnerabilityModel(TestCase): @@ -397,7 +389,9 @@ def test_univers_version_class(self): pypi_package_version = RANGE_CLASS_BY_SCHEMES[pypi_package.type].version_class assert pypi_package_version == versions.PypiVersion - alpine_package = models.Package.objects.create(type="alpine", name="lxml", version="0.9") + alpine_package = models.Package.objects.create( + type="apk", namespace="alpine", name="lxml", version="0.9" + ) alpine_version = RANGE_CLASS_BY_SCHEMES[alpine_package.type].version_class assert alpine_version == versions.AlpineLinuxVersion diff --git a/vulnerabilities/tests/test_view.py b/vulnerabilities/tests/test_view.py index fd62e94a1..98a555294 100644 --- a/vulnerabilities/tests/test_view.py +++ b/vulnerabilities/tests/test_view.py @@ -16,7 +16,6 @@ from packageurl import PackageURL from univers import versions -from vulnerabilities import models from vulnerabilities.models import AffectedByPackageRelatedVulnerability from vulnerabilities.models import Alias from vulnerabilities.models import FixingPackageRelatedVulnerability @@ -249,8 +248,8 @@ class TestCustomFilters: "pkg%3Arpm/redhat/katello-client-bootstrap%401.1.0-2%3Farch%3Del6sat", ), ( - "pkg:alpine/nginx@1.10.3-r1?arch=armhf&distroversion=v3.5&reponame=main", - "pkg%3Aalpine/nginx%401.10.3-r1%3Farch%3Darmhf%26distroversion%3Dv3.5%26reponame%3Dmain", + "pkg:apk/alpine/nginx@1.10.3-r1?arch=armhf&distroversion=v3.5&reponame=main", + "pkg%3Aapk/alpine/nginx%401.10.3-r1%3Farch%3Darmhf%26distroversion%3Dv3.5%26reponame%3Dmain", ), ("pkg:nginx/nginx@0.9.0?os=windows", "pkg%3Anginx/nginx%400.9.0%3Fos%3Dwindows"), ( diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index fd57acea5..7d0911c64 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -54,7 +54,7 @@ def purl_sort_key(purl: models.Package): def get_purl_version_class(purl: models.Package): - RANGE_CLASS_BY_SCHEMES["alpine"] = AlpineLinuxVersionRange + RANGE_CLASS_BY_SCHEMES["apk"] = AlpineLinuxVersionRange purl_version_class = None check_version_class = RANGE_CLASS_BY_SCHEMES.get(purl.type, None) if check_version_class: From a342879c4878b750a43ed8f7c624b6dd6b3438cf Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 14:30:32 +0530 Subject: [PATCH 25/26] Add description and link to latest release in UI Signed-off-by: Keshav Priyadarshi --- vulnerabilities/templates/index.html | 35 ++++++++++++++++++++++++---- vulnerabilities/views.py | 3 ++- 2 files changed, 32 insertions(+), 6 deletions(-) diff --git a/vulnerabilities/templates/index.html b/vulnerabilities/templates/index.html index 8321efd83..eaa7a55b9 100644 --- a/vulnerabilities/templates/index.html +++ b/vulnerabilities/templates/index.html @@ -6,8 +6,33 @@ {% endblock %} {% block content %} -
- {% include "package_search_box.html" %} - {% include "vulnerability_search_box.html" %} -
-{% endblock %} +
+
+
+

+ VulnerableCode aggregates software + vulnerabilities from multiple public advisory sources + and presents their details along with their affected + packages and fixed-by packages identified by + Package URLs (PURLs). +

+

+ What's new in this Release: + + Check out latest updates here! + +

+
+
+
+ {% include "vulnerability_search_box.html" %} +
+
+
+
+ {% include "package_search_box.html" %} +
+
+
+
+{% endblock %} \ No newline at end of file diff --git a/vulnerabilities/views.py b/vulnerabilities/views.py index fd57acea5..93cff0628 100644 --- a/vulnerabilities/views.py +++ b/vulnerabilities/views.py @@ -7,7 +7,6 @@ # See https://aboutcode.org for more information about nexB OSS projects. # import logging -from datetime import datetime from cvss.exceptions import CVSS2MalformedError from cvss.exceptions import CVSS3MalformedError @@ -34,6 +33,7 @@ from vulnerabilities.severity_systems import EPSS from vulnerabilities.severity_systems import SCORING_SYSTEMS from vulnerabilities.utils import get_severity_range +from vulnerablecode import __version__ as VULNERABLECODE_VERSION from vulnerablecode.settings import env PAGE_SIZE = 20 @@ -256,6 +256,7 @@ def get(self, request): context = { "vulnerability_search_form": VulnerabilitySearchForm(request_query), "package_search_form": PackageSearchForm(request_query), + "release_url": f"https://github.com/aboutcode-org/vulnerablecode/releases/tag/v{VULNERABLECODE_VERSION}", } return render(request=request, template_name=self.template_name, context=context) From 656fd7d1969aff5bd95144bf68bd769cdbf6e0c2 Mon Sep 17 00:00:00 2001 From: Keshav Priyadarshi Date: Mon, 13 Jan 2025 15:22:24 +0530 Subject: [PATCH 26/26] Move description to down Signed-off-by: Keshav Priyadarshi --- vulnerabilities/templates/index.html | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vulnerabilities/templates/index.html b/vulnerabilities/templates/index.html index eaa7a55b9..cdc9212ed 100644 --- a/vulnerabilities/templates/index.html +++ b/vulnerabilities/templates/index.html @@ -8,8 +8,18 @@ {% block content %}
+
+
+ {% include "vulnerability_search_box.html" %} +
+
+
+
+ {% include "package_search_box.html" %} +
+
-

+

VulnerableCode aggregates software vulnerabilities from multiple public advisory sources and presents their details along with their affected @@ -23,16 +33,6 @@

-
-
- {% include "vulnerability_search_box.html" %} -
-
-
-
- {% include "package_search_box.html" %} -
-
{% endblock %} \ No newline at end of file