|
10 | 10 | import bisect
|
11 | 11 | import csv
|
12 | 12 | import dataclasses
|
| 13 | +import hashlib |
13 | 14 | import json
|
14 | 15 | import logging
|
15 | 16 | import os
|
@@ -536,3 +537,43 @@ def normalize_purl(purl: Union[PackageURL, str]):
|
536 | 537 | if isinstance(purl, PackageURL):
|
537 | 538 | purl = str(purl)
|
538 | 539 | return PackageURL.from_string(purl)
|
| 540 | + |
| 541 | + |
| 542 | + |
| 543 | +def compute_content_id(advisory_data, include_metadata=False): |
| 544 | + """ |
| 545 | + Computes a unique content_id for an advisory by normalizing its data and hashing it. |
| 546 | +
|
| 547 | + :param advisory_data: An AdvisoryData object |
| 548 | + :param include_metadata: Boolean indicating whether to include `created_by` and `url` |
| 549 | + :return: SHA-256 hash digest as content_id |
| 550 | + """ |
| 551 | + |
| 552 | + def normalize_text(text): |
| 553 | + """Normalize text by removing spaces and converting to lowercase.""" |
| 554 | + return text.replace(" ", "").lower() if text else "" |
| 555 | + |
| 556 | + def normalize_list(lst): |
| 557 | + """Sort a list to ensure consistent ordering.""" |
| 558 | + return sorted(lst) if lst else [] |
| 559 | + |
| 560 | + def normalize_dict(obj): |
| 561 | + """Ensure dictionary keys are ordered.""" |
| 562 | + return json.loads(json.dumps(obj, sort_keys=True)) if obj else {} |
| 563 | + |
| 564 | + # Normalize fields |
| 565 | + normalized_data = { |
| 566 | + "summary": normalize_text(advisory_data.summary), |
| 567 | + "affected_packages": normalize_list(advisory_data.affected_packages), |
| 568 | + "references": normalize_list(advisory_data.references), |
| 569 | + "weaknesses": normalize_list(advisory_data.weaknesses), |
| 570 | + } |
| 571 | + |
| 572 | + if include_metadata: |
| 573 | + normalized_data["created_by"] = advisory_data.created_by |
| 574 | + normalized_data["url"] = advisory_data.url |
| 575 | + |
| 576 | + normalized_json = json.dumps(normalized_data, separators=(",", ":"), sort_keys=True) |
| 577 | + content_id = hashlib.sha512(normalized_json.encode("utf-8")).hexdigest() |
| 578 | + |
| 579 | + return content_id |
0 commit comments