Skip to content

Commit e33e85a

Browse files
committed
Change API design
Signed-off-by: Tushar Goel <[email protected]>
1 parent 10188d2 commit e33e85a

19 files changed

+1457
-133
lines changed

vulnerabilities/api_v2.py

Lines changed: 382 additions & 0 deletions
Large diffs are not rendered by default.

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from vulnerabilities.pipelines import flag_ghost_packages
2121
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2222
from vulnerabilities.pipelines import remove_duplicate_advisories
23+
from vulnerabilities.pipelines.v2_improvers import collect_commits as collect_commits_v2
2324
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2425
from vulnerabilities.pipelines.v2_improvers import (
2526
computer_package_version_rank as compute_version_rank_v2,
@@ -65,6 +66,7 @@
6566
enhance_with_metasploit_v2.MetasploitImproverPipeline,
6667
compute_package_risk_v2.ComputePackageRiskPipeline,
6768
compute_version_rank_v2.ComputeVersionRankPipeline,
69+
collect_commits_v2.CollectFixCommitsPipeline,
6870
]
6971

7072
IMPROVERS_REGISTRY = {

vulnerabilities/migrations/0093_advisoryalias_advisoryreference_advisoryseverity_and_more.py

Lines changed: 215 additions & 87 deletions
Large diffs are not rendered by default.

vulnerabilities/models.py

Lines changed: 131 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1323,7 +1323,7 @@ def url(self):
13231323
return f"https://github.com/nodejs/security-wg/blob/main/vuln/npm/{id}.json"
13241324

13251325

1326-
class AdvisoryQuerySet(BaseQuerySet):
1326+
class AdvisoryV2QuerySet(BaseQuerySet):
13271327
def search(query):
13281328
"""
13291329
This function will take a string as an input, the string could be an alias or an advisory ID or
@@ -1337,6 +1337,20 @@ def search(query):
13371337
).distinct()
13381338

13391339

1340+
class AdvisoryQuerySet(BaseQuerySet):
1341+
def search(query):
1342+
"""
1343+
This function will take a string as an input, the string could be an alias or an advisory ID or
1344+
something in the advisory description.
1345+
"""
1346+
return Advisory.objects.filter(
1347+
Q(advisory_id__icontains=query)
1348+
| Q(aliases__alias__icontains=query)
1349+
| Q(summary__icontains=query)
1350+
| Q(references__url__icontains=query)
1351+
).distinct()
1352+
1353+
13401354
# FIXME: Remove when migration from Vulnerability to Advisory is completed
13411355
class Advisory(models.Model):
13421356
"""
@@ -1820,6 +1834,60 @@ class Meta:
18201834
abstract = True
18211835

18221836

1837+
class CodeChangeV2(models.Model):
1838+
"""
1839+
Abstract base model representing a change in code, either introducing or fixing a vulnerability.
1840+
This includes details about commits, patches, and related metadata.
1841+
1842+
We are tracking commits, pulls and downloads as references to the code change. The goal is to
1843+
keep track and store the actual code patch in the ``patch`` field. When not available the patch
1844+
will be inferred from these references using improvers.
1845+
"""
1846+
1847+
commits = models.JSONField(
1848+
blank=True,
1849+
default=list,
1850+
help_text="List of commit identifiers using VCS URLs associated with the code change.",
1851+
)
1852+
pulls = models.JSONField(
1853+
blank=True,
1854+
default=list,
1855+
help_text="List of pull request URLs associated with the code change.",
1856+
)
1857+
downloads = models.JSONField(
1858+
blank=True, default=list, help_text="List of download URLs for the patched code."
1859+
)
1860+
patch = models.TextField(
1861+
blank=True, null=True, help_text="The code change as a patch in unified diff format."
1862+
)
1863+
base_package_version = models.ForeignKey(
1864+
"PackageV2",
1865+
null=True,
1866+
blank=True,
1867+
on_delete=models.SET_NULL,
1868+
related_name="codechanges_v2",
1869+
help_text="The base package version to which this code change applies.",
1870+
)
1871+
notes = models.TextField(
1872+
blank=True, null=True, help_text="Notes or instructions about this code change."
1873+
)
1874+
references = models.JSONField(
1875+
blank=True, default=list, help_text="URL references related to this code change."
1876+
)
1877+
is_reviewed = models.BooleanField(
1878+
default=False, help_text="Indicates if this code change has been reviewed."
1879+
)
1880+
created_at = models.DateTimeField(
1881+
auto_now_add=True, help_text="Timestamp indicating when this code change was created."
1882+
)
1883+
updated_at = models.DateTimeField(
1884+
auto_now=True, help_text="Timestamp indicating when this code change was last updated."
1885+
)
1886+
1887+
class Meta:
1888+
abstract = True
1889+
1890+
18231891
class CodeFix(CodeChange):
18241892
"""
18251893
A code fix is a code change that addresses a vulnerability and is associated:
@@ -1844,6 +1912,35 @@ class CodeFix(CodeChange):
18441912
)
18451913

18461914

1915+
class CodeFixV2(CodeChangeV2):
1916+
"""
1917+
A code fix is a code change that addresses a vulnerability and is associated:
1918+
- with a specific advisory
1919+
- package that has been affected
1920+
- optionally with a specific fixing package version when it is known
1921+
"""
1922+
1923+
advisory = models.ForeignKey(
1924+
"AdvisoryV2",
1925+
on_delete=models.CASCADE,
1926+
related_name="code_fix_v2",
1927+
help_text="The affected package version to which this code fix applies.",
1928+
)
1929+
1930+
affected_package = models.ForeignKey(
1931+
"PackageV2", on_delete=models.CASCADE, related_name="code_fix_v2_affected"
1932+
)
1933+
1934+
fixed_package = models.ForeignKey(
1935+
"PackageV2",
1936+
null=True,
1937+
blank=True,
1938+
on_delete=models.SET_NULL,
1939+
related_name="code_fix_v2_fixed",
1940+
help_text="The fixing package version with this code fix",
1941+
)
1942+
1943+
18471944
class PipelineRun(models.Model):
18481945
"""The Database representation of a pipeline execution."""
18491946

@@ -2451,6 +2548,23 @@ class AdvisoryV2(models.Model):
24512548
into structured data
24522549
"""
24532550

2551+
# This is similar to a type or a namespace
2552+
datasource_id = models.CharField(
2553+
max_length=100,
2554+
blank=False,
2555+
null=False,
2556+
help_text="Unique ID for the datasource used for this advisory ." "e.g.: nginx_importer_v2",
2557+
)
2558+
2559+
avid = models.CharField(
2560+
max_length=500,
2561+
blank=False,
2562+
null=False,
2563+
help_text="Unique ID for the datasource used for this advisory ."
2564+
"e.g.: pysec_importer_v2/PYSEC-2020-2233",
2565+
)
2566+
2567+
# This is similar to a name
24542568
advisory_id = models.CharField(
24552569
max_length=50,
24562570
blank=False,
@@ -2460,13 +2574,27 @@ class AdvisoryV2(models.Model):
24602574
"such as PYSEC-2020-2233",
24612575
)
24622576

2577+
# This is similar to a version
24632578
unique_content_id = models.CharField(
24642579
max_length=64,
24652580
blank=False,
24662581
null=False,
24672582
unique=True,
24682583
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
24692584
)
2585+
url = models.URLField(
2586+
blank=False,
2587+
null=False,
2588+
help_text="Link to the advisory on the upstream website",
2589+
)
2590+
2591+
# TODO: Have a mapping that gives datasource class by datasource ID
2592+
# Get label from datasource class
2593+
# Remove this from model
2594+
# In the UI - Use label
2595+
# In the API - Use datasource_id
2596+
# Have an API endpoint for all info for datasources - show license, label
2597+
24702598
summary = models.TextField(
24712599
blank=True,
24722600
)
@@ -2497,18 +2625,6 @@ class AdvisoryV2(models.Model):
24972625
date_imported = models.DateTimeField(
24982626
blank=True, null=True, help_text="UTC Date on which the advisory was imported"
24992627
)
2500-
# TODO: Rename to datasource ID
2501-
datasource_ID = models.CharField(
2502-
max_length=100,
2503-
help_text="Fully qualified name of the importer prefixed with the"
2504-
"module name importing the advisory. Eg:"
2505-
"nginx_importer_v2",
2506-
)
2507-
url = models.URLField(
2508-
blank=False,
2509-
null=False,
2510-
help_text="Link to the advisory on the upstream website",
2511-
)
25122628

25132629
affecting_packages = models.ManyToManyField(
25142630
"PackageV2",
@@ -2558,7 +2674,8 @@ def risk_score(self):
25582674
objects = AdvisoryQuerySet.as_manager()
25592675

25602676
class Meta:
2561-
ordering = ["date_published", "unique_content_id"]
2677+
unique_together = ["datasource_id", "advisory_id", "unique_content_id"]
2678+
ordering = ["datasource_id", "advisory_id", "date_published", "unique_content_id"]
25622679

25632680
def save(self, *args, **kwargs):
25642681
self.full_clean()

vulnerabilities/pipelines/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ class VulnerableCodeBaseImporterPipelineV2(VulnerableCodePipeline):
273273

274274
pipeline_id = None # Unique Pipeline ID, this should be the name of pipeline module.
275275
license_url = None
276+
label = None
276277
spdx_license_expression = None
277278
repo_url = None
278279
importer_name = None

vulnerabilities/pipelines/v2_importers/apache_httpd_importer.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,12 @@ def get_weaknesses(cve_data):
140140

141141
class ApacheHTTPDImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
142142
pipeline_id = "apache_httpd_importer_v2"
143+
label = "Apache-Httpd"
143144
spdx_license_expression = "Apache-2.0"
144145
license_url = "https://www.apache.org/licenses/LICENSE-2.0"
145146
importer_name = "Apache HTTPD Importer"
146147
base_url = "https://httpd.apache.org/security/json/"
148+
unfurl_version_ranges = True
147149

148150
links = []
149151

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from pathlib import Path
11+
from typing import Iterable
12+
13+
from dateutil import parser as dateparser
14+
from fetchcode.vcs import fetch_via_vcs
15+
from packageurl import PackageURL
16+
from univers.version_constraint import VersionConstraint
17+
from univers.version_range import HexVersionRange
18+
19+
from vulnerabilities.importer import AdvisoryData
20+
from vulnerabilities.importer import AffectedPackage
21+
from vulnerabilities.importer import Reference
22+
from vulnerabilities.pipelines import VulnerableCodeBaseImporterPipelineV2
23+
from vulnerabilities.utils import is_cve
24+
from vulnerabilities.utils import load_yaml
25+
26+
27+
class ElixirSecurityImporterPipeline(VulnerableCodeBaseImporterPipelineV2):
28+
29+
pipeline_id = "elixir_security_importer_v2"
30+
label = "Elixir Security"
31+
repo_url = "git+https://github.com/dependabot/elixir-security-advisories"
32+
license_url = "https://github.com/dependabot/elixir-security-advisories/blob/master/LICENSE.txt"
33+
spdx_license_expression = "CC0-1.0"
34+
importer_name = "Elixir Security Importer"
35+
36+
@classmethod
37+
def steps(cls):
38+
return (cls.collect_and_store_advisories,)
39+
40+
def clone(self):
41+
self.log(f"Cloning `{self.repo_url}`")
42+
self.vcs_response = fetch_via_vcs(self.repo_url)
43+
44+
def advisories_count(self) -> int:
45+
base_path = Path(self.vcs_response.dest_dir)
46+
count = len(list((base_path / "packages").glob("**/*.yml")))
47+
return count
48+
49+
def collect_advisories(self) -> Iterable[AdvisoryData]:
50+
try:
51+
base_path = Path(self.vcs_response.dest_dir)
52+
vuln = base_path / "packages"
53+
for file in vuln.glob("**/*.yml"):
54+
yield from self.process_file(file, base_path)
55+
finally:
56+
if self.vcs_response:
57+
self.vcs_response.delete()
58+
59+
def process_file(self, file, base_path) -> Iterable[AdvisoryData]:
60+
relative_path = str(file.relative_to(base_path)).strip("/")
61+
advisory_url = (
62+
f"https://github.com/dependabot/elixir-security-advisories/blob/master/{relative_path}"
63+
)
64+
yaml_file = load_yaml(str(file))
65+
66+
summary = yaml_file.get("description") or ""
67+
pkg_name = yaml_file.get("package") or ""
68+
69+
cve_id = ""
70+
cve = yaml_file.get("cve") or ""
71+
if cve and not cve.startswith("CVE-"):
72+
cve_id = f"CVE-{cve}"
73+
elif cve:
74+
cve_id = cve
75+
76+
if not cve_id or not is_cve(cve_id):
77+
return
78+
79+
references = []
80+
link = yaml_file.get("link") or ""
81+
if link:
82+
references.append(Reference(url=link))
83+
84+
constraints = []
85+
vrc = HexVersionRange.version_class
86+
unaffected_versions = yaml_file.get("unaffected_versions") or []
87+
patched_versions = yaml_file.get("patched_versions") or []
88+
89+
for version in unaffected_versions:
90+
constraints.append(VersionConstraint.from_string(version_class=vrc, string=version))
91+
92+
for version in patched_versions:
93+
if version.startswith("~>"):
94+
version = version[2:]
95+
constraints.append(
96+
VersionConstraint.from_string(version_class=vrc, string=version).invert()
97+
)
98+
99+
affected_packages = []
100+
if pkg_name:
101+
affected_packages.append(
102+
AffectedPackage(
103+
package=PackageURL(type="hex", name=pkg_name),
104+
affected_version_range=HexVersionRange(constraints=constraints),
105+
)
106+
)
107+
108+
date_published = None
109+
if yaml_file.get("disclosure_date"):
110+
date_published = dateparser.parse(yaml_file.get("disclosure_date"))
111+
112+
yield AdvisoryData(
113+
advisory_id=cve_id,
114+
aliases=[],
115+
summary=summary,
116+
references_v2=references,
117+
affected_packages=affected_packages,
118+
url=advisory_url,
119+
date_published=date_published,
120+
)

0 commit comments

Comments
 (0)