Skip to content

Commit b7c0a28

Browse files
committed
Fix incorrect default starting year in NVD importer
The function fetch_cve_data_1_1 used starting_year=2025 by default, but NVD JSON CVE feeds are available starting from 2002. This caused older CVE data (2002–2024) to be skipped when no starting year was provided. This updates the default starting_year to 2002 so all available NVD CVE data is fetched by default, matching documented behavior. Fixes: #2079 Signed-off-by: Aditya Kumar Singh <[email protected]>
1 parent be89117 commit b7c0a28

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2663
-419
lines changed

aboutcode/federated/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1687,7 +1687,7 @@ def package_path_elements(
16871687
_pkg, _, core_path = core_purl.partition(":")
16881688
purl_hash = _compute_hash(core_purl=core_purl, max_value=max_value)
16891689

1690-
version = normalize_version(purl.version)
1690+
version = normalize_version(purl.version, purl.type)
16911691
if version:
16921692
version = percent_quote_more(version)
16931693

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ MarkupSafe==2.1.1
6464
matplotlib-inline==0.1.3
6565
multidict==6.0.2
6666
mypy-extensions==0.4.3
67-
packageurl-python==0.15.6
67+
packageurl-python==0.17.6
6868
packaging==21.3
6969
paramiko==3.4.0
7070
parso==0.8.3

setup.cfg

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ install_requires =
7171
drf-spectacular[sidecar]>=0.24.2
7272

7373
#essentials
74-
packageurl-python>=0.15
74+
packageurl-python>=0.17
7575
univers>=30.12.0
7676
license-expression>=30.0.0
7777

vulnerabilities/api_v2.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,31 @@ class AdvisoryV2Serializer(serializers.ModelSerializer):
146146
references = AdvisoryReferenceSerializer(many=True)
147147
severities = AdvisorySeveritySerializer(many=True)
148148
advisory_id = serializers.CharField(source="avid", read_only=True)
149+
related_ssvc_trees = serializers.SerializerMethodField()
150+
151+
def get_related_ssvc_trees(self, obj):
152+
related_ssvcs = obj.related_ssvcs.all().select_related("source_advisory")
153+
source_ssvcs = obj.source_ssvcs.all().select_related("source_advisory")
154+
155+
seen = set()
156+
result = []
157+
158+
for ssvc in list(related_ssvcs) + list(source_ssvcs):
159+
key = (ssvc.vector, ssvc.source_advisory_id)
160+
if key in seen:
161+
continue
162+
seen.add(key)
163+
164+
result.append(
165+
{
166+
"vector": ssvc.vector,
167+
"decision": ssvc.decision,
168+
"options": ssvc.options,
169+
"source_url": ssvc.source_advisory.url,
170+
}
171+
)
172+
173+
return result
149174

150175
class Meta:
151176
model = AdvisoryV2
@@ -160,6 +185,7 @@ class Meta:
160185
"exploitability",
161186
"weighted_severity",
162187
"risk_score",
188+
"related_ssvc_trees",
163189
]
164190

165191
def get_aliases(self, obj):

vulnerabilities/importer.py

Lines changed: 133 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,9 @@
3636
from vulnerabilities.severity_systems import SCORING_SYSTEMS
3737
from vulnerabilities.severity_systems import ScoringSystem
3838
from vulnerabilities.utils import classproperty
39+
from vulnerabilities.utils import compute_patch_checksum
3940
from vulnerabilities.utils import get_reference_id
41+
from vulnerabilities.utils import is_commit
4042
from vulnerabilities.utils import is_cve
4143
from vulnerabilities.utils import nearest_patched_package
4244
from vulnerabilities.utils import purl_to_dict
@@ -194,6 +196,103 @@ def from_url(cls, url):
194196
return cls(url=url)
195197

196198

199+
@dataclasses.dataclass(eq=True)
200+
@functools.total_ordering
201+
class PackageCommitPatchData:
202+
vcs_url: str
203+
commit_hash: str
204+
patch_text: Optional[str] = None
205+
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
206+
207+
def __post_init__(self):
208+
if not self.commit_hash:
209+
raise ValueError("Commit must have a non-empty commit_hash.")
210+
211+
if not is_commit(self.commit_hash):
212+
raise ValueError(f"Commit must be a valid a commit_hash: {self.commit_hash}.")
213+
214+
if not self.vcs_url:
215+
raise ValueError("Commit must have a non-empty vcs_url.")
216+
217+
if self.patch_text:
218+
self.patch_checksum = compute_patch_checksum(self.patch_text)
219+
220+
def __lt__(self, other):
221+
if not isinstance(other, PackageCommitPatchData):
222+
return NotImplemented
223+
return self._cmp_key() < other._cmp_key()
224+
225+
# TODO: Add cache
226+
def _cmp_key(self):
227+
return (
228+
self.vcs_url,
229+
self.commit_hash,
230+
self.patch_text,
231+
self.patch_checksum,
232+
)
233+
234+
def to_dict(self) -> dict:
235+
"""Return a normalized dictionary representation of the commit."""
236+
return {
237+
"vcs_url": self.vcs_url,
238+
"commit_hash": self.commit_hash,
239+
"patch_text": self.patch_text,
240+
"patch_checksum": self.patch_checksum,
241+
}
242+
243+
@classmethod
244+
def from_dict(cls, data: dict):
245+
"""Create a PackageCommitPatchData instance from a dictionary."""
246+
return cls(
247+
vcs_url=data.get("vcs_url"),
248+
commit_hash=data.get("commit_hash"),
249+
patch_text=data.get("patch_text"),
250+
)
251+
252+
253+
@dataclasses.dataclass(eq=True)
254+
@functools.total_ordering
255+
class PatchData:
256+
patch_url: Optional[str] = None
257+
patch_text: Optional[str] = None
258+
patch_checksum: Optional[str] = dataclasses.field(init=False, default=None)
259+
260+
def __post_init__(self):
261+
if not self.patch_url and not self.patch_text:
262+
raise ValueError("A patch must include either patch_url or patch_text")
263+
264+
if self.patch_text:
265+
self.patch_checksum = compute_patch_checksum(self.patch_text)
266+
267+
def __lt__(self, other):
268+
if not isinstance(other, PatchData):
269+
return NotImplemented
270+
return self._cmp_key() < other._cmp_key()
271+
272+
def _cmp_key(self):
273+
return (
274+
self.patch_url,
275+
self.patch_text,
276+
self.patch_checksum,
277+
)
278+
279+
def to_dict(self) -> dict:
280+
"""Return a normalized dictionary representation of the commit."""
281+
return {
282+
"patch_url": self.patch_url,
283+
"patch_text": self.patch_text,
284+
"patch_checksum": self.patch_checksum,
285+
}
286+
287+
@classmethod
288+
def from_dict(cls, data: dict):
289+
"""Create a PatchData instance from a dictionary."""
290+
return cls(
291+
patch_url=data.get("patch_url"),
292+
patch_text=data.get("patch_text"),
293+
)
294+
295+
197296
class UnMergeablePackageError(Exception):
198297
"""
199298
Raised when a package cannot be merged with another one.
@@ -344,21 +443,30 @@ class AffectedPackageV2:
344443
"""
345444
Relate a Package URL with a range of affected versions and fixed versions.
346445
The Package URL must *not* have a version.
347-
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range``.
446+
AffectedPackage must contain either ``affected_version_range`` or ``fixed_version_range`` or ``introduced_by_commits`` or ``fixed_by_commits``.
348447
"""
349448

350449
package: PackageURL
351450
affected_version_range: Optional[VersionRange] = None
352451
fixed_version_range: Optional[VersionRange] = None
452+
introduced_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(
453+
default_factory=list
454+
)
455+
fixed_by_commit_patches: List[PackageCommitPatchData] = dataclasses.field(default_factory=list)
353456

354457
def __post_init__(self):
355458
if self.package.version:
356459
raise ValueError(f"Affected Package URL {self.package!r} cannot have a version.")
357460

358-
if not (self.affected_version_range or self.fixed_version_range):
461+
if not (
462+
self.affected_version_range
463+
or self.fixed_version_range
464+
or self.introduced_by_commit_patches
465+
or self.fixed_by_commit_patches
466+
):
359467
raise ValueError(
360-
f"Affected Package {self.package!r} should have either fixed version range or an "
361-
"affected version range."
468+
f"Affected package {self.package!r} must have either a fixed version range, "
469+
"an affected version range, introduced commit patches, or fixed commit patches."
362470
)
363471

364472
def __lt__(self, other):
@@ -372,6 +480,8 @@ def _cmp_key(self):
372480
str(self.package),
373481
str(self.affected_version_range or ""),
374482
str(self.fixed_version_range or ""),
483+
str(self.introduced_by_commit_patches or []),
484+
str(self.fixed_by_commit_patches or []),
375485
)
376486

377487
def to_dict(self):
@@ -385,6 +495,12 @@ def to_dict(self):
385495
"package": purl_to_dict(self.package),
386496
"affected_version_range": affected_version_range,
387497
"fixed_version_range": fixed_version_range,
498+
"introduced_by_commit_patches": [
499+
commit.to_dict() for commit in self.introduced_by_commit_patches
500+
],
501+
"fixed_by_commit_patches": [
502+
commit.to_dict() for commit in self.fixed_by_commit_patches
503+
],
388504
}
389505

390506
@classmethod
@@ -396,6 +512,10 @@ def from_dict(cls, affected_pkg: dict):
396512
fixed_version_range = None
397513
affected_range = affected_pkg["affected_version_range"]
398514
fixed_range = affected_pkg["fixed_version_range"]
515+
introduced_by_commit_patches = (
516+
affected_pkg.get("introduced_by_package_commit_patches") or []
517+
)
518+
fixed_by_commit_patches = affected_pkg.get("fixed_by_package_commit_patches") or []
399519

400520
try:
401521
affected_version_range = VersionRange.from_string(affected_range)
@@ -417,6 +537,12 @@ def from_dict(cls, affected_pkg: dict):
417537
package=package,
418538
affected_version_range=affected_version_range,
419539
fixed_version_range=fixed_version_range,
540+
introduced_by_commit_patches=[
541+
PackageCommitPatchData.from_dict(commit) for commit in introduced_by_commit_patches
542+
],
543+
fixed_by_commit_patches=[
544+
PackageCommitPatchData.from_dict(commit) for commit in fixed_by_commit_patches
545+
],
420546
)
421547

422548

@@ -441,6 +567,7 @@ class AdvisoryData:
441567
)
442568
references: List[Reference] = dataclasses.field(default_factory=list)
443569
references_v2: List[ReferenceV2] = dataclasses.field(default_factory=list)
570+
patches: List[PatchData] = dataclasses.field(default_factory=list)
444571
date_published: Optional[datetime.datetime] = None
445572
weaknesses: List[int] = dataclasses.field(default_factory=list)
446573
severities: List[VulnerabilitySeverity] = dataclasses.field(default_factory=list)
@@ -473,6 +600,7 @@ def to_dict(self):
473600
"summary": self.summary,
474601
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
475602
"references_v2": [ref.to_dict() for ref in self.references_v2],
603+
"patches": [patch.to_dict() for patch in self.patches],
476604
"severities": [sev.to_dict() for sev in self.severities],
477605
"date_published": self.date_published.isoformat() if self.date_published else None,
478606
"weaknesses": self.weaknesses,
@@ -505,74 +633,7 @@ def from_dict(cls, advisory_data):
505633
"affected_packages": [
506634
affected_package_cls.from_dict(pkg) for pkg in affected_packages if pkg is not None
507635
],
508-
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
509-
"date_published": datetime.datetime.fromisoformat(date_published)
510-
if date_published
511-
else None,
512-
"weaknesses": advisory_data["weaknesses"],
513-
"url": advisory_data.get("url") or None,
514-
}
515-
return cls(**transformed)
516-
517-
518-
@dataclasses.dataclass(order=True)
519-
class AdvisoryDataV2:
520-
"""
521-
This data class expresses the contract between data sources and the import runner.
522-
523-
If a vulnerability_id is present then:
524-
summary or affected_packages or references must be present
525-
otherwise
526-
either affected_package or references should be present
527-
528-
date_published must be aware datetime
529-
"""
530-
531-
advisory_id: str = ""
532-
aliases: List[str] = dataclasses.field(default_factory=list)
533-
summary: Optional[str] = ""
534-
affected_packages: List[AffectedPackage] = dataclasses.field(default_factory=list)
535-
references: List[ReferenceV2] = dataclasses.field(default_factory=list)
536-
date_published: Optional[datetime.datetime] = None
537-
weaknesses: List[int] = dataclasses.field(default_factory=list)
538-
url: Optional[str] = None
539-
540-
def __post_init__(self):
541-
if self.date_published and not self.date_published.tzinfo:
542-
logger.warning(f"AdvisoryData with no tzinfo: {self!r}")
543-
if self.summary:
544-
self.summary = self.clean_summary(self.summary)
545-
546-
def clean_summary(self, summary):
547-
# https://nvd.nist.gov/vuln/detail/CVE-2013-4314
548-
# https://github.com/cms-dev/cms/issues/888#issuecomment-516977572
549-
summary = summary.strip()
550-
if summary:
551-
summary = summary.replace("\x00", "\uFFFD")
552-
return summary
553-
554-
def to_dict(self):
555-
return {
556-
"aliases": self.aliases,
557-
"summary": self.summary,
558-
"affected_packages": [pkg.to_dict() for pkg in self.affected_packages],
559-
"references": [ref.to_dict() for ref in self.references],
560-
"date_published": self.date_published.isoformat() if self.date_published else None,
561-
"weaknesses": self.weaknesses,
562-
"url": self.url if self.url else "",
563-
}
564-
565-
@classmethod
566-
def from_dict(cls, advisory_data):
567-
date_published = advisory_data["date_published"]
568-
transformed = {
569-
"aliases": advisory_data["aliases"],
570-
"summary": advisory_data["summary"],
571-
"affected_packages": [
572-
AffectedPackage.from_dict(pkg)
573-
for pkg in advisory_data["affected_packages"]
574-
if pkg is not None
575-
],
636+
"patches": [PatchData.from_dict(patch) for patch in advisory_data.get("patches", [])],
576637
"references": [Reference.from_dict(ref) for ref in advisory_data["references"]],
577638
"date_published": datetime.datetime.fromisoformat(date_published)
578639
if date_published

vulnerabilities/importers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
from vulnerabilities.pipelines import nvd_importer
4242
from vulnerabilities.pipelines import pypa_importer
4343
from vulnerabilities.pipelines import pysec_importer
44+
from vulnerabilities.pipelines.v2_importers import aosp_importer as aosp_importer_v2
4445
from vulnerabilities.pipelines.v2_importers import apache_httpd_importer as apache_httpd_v2
4546
from vulnerabilities.pipelines.v2_importers import archlinux_importer as archlinux_importer_v2
4647
from vulnerabilities.pipelines.v2_importers import curl_importer as curl_importer_v2
@@ -81,6 +82,7 @@
8182
mozilla_importer_v2.MozillaImporterPipeline,
8283
github_osv_importer_v2.GithubOSVImporterPipeline,
8384
redhat_importer_v2.RedHatImporterPipeline,
85+
aosp_importer_v2.AospImporterPipeline,
8486
nvd_importer.NVDImporterPipeline,
8587
github_importer.GitHubAPIImporterPipeline,
8688
gitlab_importer.GitLabImporterPipeline,

vulnerabilities/importers/curl.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def parse_advisory_data(raw_data) -> AdvisoryData:
9797
... ]
9898
... }
9999
>>> parse_advisory_data(raw_data)
100-
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
100+
AdvisoryData(advisory_id='', aliases=['CVE-2024-2379'], summary='QUIC certificate check bypass with wolfSSL', affected_packages=[AffectedPackage(package=PackageURL(type='generic', namespace='curl.se', name='curl', version=None, qualifiers={}, subpath=None), affected_version_range=GenericVersionRange(constraints=(VersionConstraint(comparator='=', version=SemverVersion(string='8.6.0')),)), fixed_version=SemverVersion(string='8.7.0'))], references=[Reference(reference_id='', reference_type='', url='https://curl.se/docs/CVE-2024-2379.html', severities=[VulnerabilitySeverity(system=Cvssv3ScoringSystem(identifier='cvssv3.1', name='CVSSv3.1 Base Score', url='https://www.first.org/cvss/v3-1/', notes='CVSSv3.1 base score and vector'), value='Low', scoring_elements='', published_at=None, url=None)]), Reference(reference_id='', reference_type='', url='https://hackerone.com/reports/2410774', severities=[])], references_v2=[], patches=[], date_published=datetime.datetime(2024, 3, 27, 8, 0, tzinfo=datetime.timezone.utc), weaknesses=[297], severities=[], url='https://curl.se/docs/CVE-2024-2379.json', original_advisory_text=None)
101101
"""
102102

103103
affected = get_item(raw_data, "affected")[0] if len(get_item(raw_data, "affected")) > 0 else []

vulnerabilities/improvers/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
from vulnerabilities.pipelines import flag_ghost_packages
2020
from vulnerabilities.pipelines import populate_vulnerability_summary_pipeline
2121
from vulnerabilities.pipelines import remove_duplicate_advisories
22+
from vulnerabilities.pipelines.v2_improvers import collect_ssvc_trees
2223
from vulnerabilities.pipelines.v2_improvers import compute_advisory_todo as compute_advisory_todo_v2
2324
from vulnerabilities.pipelines.v2_improvers import compute_package_risk as compute_package_risk_v2
2425
from vulnerabilities.pipelines.v2_improvers import (
@@ -70,5 +71,6 @@
7071
compute_advisory_todo_v2.ComputeToDo,
7172
unfurl_version_range_v2.UnfurlVersionRangePipeline,
7273
compute_advisory_todo.ComputeToDo,
74+
collect_ssvc_trees.CollectSSVCPipeline,
7375
]
7476
)

0 commit comments

Comments
 (0)