Skip to content

Commit b34649b

Browse files
committed
Add new content ID function
Signed-off-by: Tushar Goel <[email protected]>
1 parent 56eb442 commit b34649b

File tree

2 files changed

+47
-1
lines changed

2 files changed

+47
-1
lines changed

vulnerabilities/models.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from aboutcode import hashid
4545
from vulnerabilities import utils
4646
from vulnerabilities.severity_systems import SCORING_SYSTEMS
47-
from vulnerabilities.utils import normalize_purl
47+
from vulnerabilities.utils import compute_content_id, normalize_purl
4848
from vulnerabilities.utils import purl_to_dict
4949
from vulnerablecode import __version__ as VULNERABLECODE_VERSION
5050

@@ -1230,6 +1230,11 @@ def save(self, *args, **kwargs):
12301230
checksum.update(value)
12311231
self.unique_content_id = checksum.hexdigest()
12321232
super().save(*args, **kwargs)
1233+
1234+
def save(self, *args, **kwargs):
1235+
advisory_data = self.to_advisory_data()
1236+
self.unique_content_id = compute_content_id(advisory_data, include_metadata=False)
1237+
super().save(*args, **kwargs)
12331238

12341239
def to_advisory_data(self) -> "AdvisoryData":
12351240
from vulnerabilities.importer import AdvisoryData

vulnerabilities/utils.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import bisect
1111
import csv
1212
import dataclasses
13+
import hashlib
1314
import json
1415
import logging
1516
import os
@@ -536,3 +537,43 @@ def normalize_purl(purl: Union[PackageURL, str]):
536537
if isinstance(purl, PackageURL):
537538
purl = str(purl)
538539
return PackageURL.from_string(purl)
540+
541+
542+
543+
def compute_content_id(advisory_data, include_metadata=False):
544+
"""
545+
Computes a unique content_id for an advisory by normalizing its data and hashing it.
546+
547+
:param advisory_data: An AdvisoryData object
548+
:param include_metadata: Boolean indicating whether to include `created_by` and `url`
549+
:return: SHA-256 hash digest as content_id
550+
"""
551+
552+
def normalize_text(text):
553+
"""Normalize text by removing spaces and converting to lowercase."""
554+
return text.replace(" ", "").lower() if text else ""
555+
556+
def normalize_list(lst):
557+
"""Sort a list to ensure consistent ordering."""
558+
return sorted(lst) if lst else []
559+
560+
def normalize_dict(obj):
561+
"""Ensure dictionary keys are ordered."""
562+
return json.loads(json.dumps(obj, sort_keys=True)) if obj else {}
563+
564+
# Normalize fields
565+
normalized_data = {
566+
"summary": normalize_text(advisory_data.summary),
567+
"affected_packages": normalize_list(advisory_data.affected_packages),
568+
"references": normalize_list(advisory_data.references),
569+
"weaknesses": normalize_list(advisory_data.weaknesses),
570+
}
571+
572+
if include_metadata:
573+
normalized_data["created_by"] = advisory_data.created_by
574+
normalized_data["url"] = advisory_data.url
575+
576+
normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True)
577+
content_id = hashlib.sha512(normalized_json.encode("utf-8")).hexdigest()
578+
579+
return content_id

0 commit comments

Comments
 (0)