Skip to content

Commit 89770c8

Browse files
authored
Merge pull request #1784 from aboutcode-org/convert-advisory-alias-to-concrete-relation
Migrate Advisory aliases field to M2M relationship
2 parents d84d5dc + d86e3c2 commit 89770c8

24 files changed

+2587
-3171
lines changed

vulnerabilities/import_runner.py

+30-25
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
from django.core.exceptions import ValidationError
1717
from django.db import transaction
18+
from django.db.models.query import QuerySet
1819

1920
from vulnerabilities.importer import AdvisoryData
2021
from vulnerabilities.importer import Importer
@@ -96,23 +97,29 @@ def process_advisories(
9697
Insert advisories into the database
9798
Return the number of inserted advisories.
9899
"""
100+
from vulnerabilities.pipes.advisory import get_or_create_aliases
101+
from vulnerabilities.utils import compute_content_id
102+
99103
count = 0
100104
advisories = []
101105
for data in advisory_datas:
106+
content_id = compute_content_id(advisory_data=data)
102107
try:
108+
aliases = get_or_create_aliases(aliases=data.aliases)
103109
obj, created = Advisory.objects.get_or_create(
104-
aliases=data.aliases,
105-
summary=data.summary,
106-
affected_packages=[pkg.to_dict() for pkg in data.affected_packages],
107-
references=[ref.to_dict() for ref in data.references],
108-
date_published=data.date_published,
109-
weaknesses=data.weaknesses,
110+
unique_content_id=content_id,
111+
url=data.url,
110112
defaults={
113+
"summary": data.summary,
114+
"affected_packages": [pkg.to_dict() for pkg in data.affected_packages],
115+
"references": [ref.to_dict() for ref in data.references],
116+
"date_published": data.date_published,
117+
"weaknesses": data.weaknesses,
111118
"created_by": importer_name,
112119
"date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
113120
},
114-
url=data.url,
115121
)
122+
obj.aliases.add(*aliases)
116123
if not obj.date_imported:
117124
advisories.append(obj)
118125
except Exception as e:
@@ -148,6 +155,8 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
148155
erroneous. Also, the atomic transaction for every advisory and its
149156
inferences makes sure that date_imported of advisory is consistent.
150157
"""
158+
from vulnerabilities.pipes.advisory import get_or_create_aliases
159+
151160
inferences_processed_count = 0
152161

153162
if not inferences:
@@ -157,9 +166,10 @@ def process_inferences(inferences: List[Inference], advisory: Advisory, improver
157166
logger.info(f"Improving advisory id: {advisory.id}")
158167

159168
for inference in inferences:
169+
aliases = get_or_create_aliases(inference.aliases)
160170
vulnerability = get_or_create_vulnerability_and_aliases(
161171
vulnerability_id=inference.vulnerability_id,
162-
aliases=inference.aliases,
172+
aliases=aliases,
163173
summary=inference.summary,
164174
advisory=advisory,
165175
)
@@ -265,14 +275,13 @@ def create_valid_vulnerability_reference(url, reference_id=None):
265275

266276

267277
def get_or_create_vulnerability_and_aliases(
268-
aliases: List[str], vulnerability_id=None, summary=None, advisory=None
278+
aliases: QuerySet, vulnerability_id=None, summary=None, advisory=None
269279
):
270280
"""
271281
Get or create vulnerabilitiy and aliases such that all existing and new
272282
aliases point to the same vulnerability
273283
"""
274-
aliases = set(alias.strip() for alias in aliases if alias and alias.strip())
275-
new_alias_names, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases)
284+
new_aliases, existing_vulns = get_vulns_for_aliases_and_get_new_aliases(aliases)
276285

277286
# All aliases must point to the same vulnerability
278287
vulnerability = None
@@ -310,11 +319,11 @@ def get_or_create_vulnerability_and_aliases(
310319
# f"Inconsistent summary for {vulnerability.vulnerability_id}. "
311320
# f"Existing: {vulnerability.summary!r}, provided: {summary!r}"
312321
# )
313-
associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_alias_names)
322+
associate_vulnerability_with_aliases(vulnerability=vulnerability, aliases=new_aliases)
314323
else:
315324
try:
316325
vulnerability = create_vulnerability_and_add_aliases(
317-
aliases=new_alias_names, summary=summary
326+
aliases=new_aliases, summary=summary
318327
)
319328
importer_name = get_importer_name(advisory)
320329
VulnerabilityChangeLog.log_import(
@@ -324,24 +333,22 @@ def get_or_create_vulnerability_and_aliases(
324333
)
325334
except Exception as e:
326335
logger.error(
327-
f"Cannot create vulnerability with summary {summary!r} and {new_alias_names!r} {e!r}.\n{traceback_format_exc()}."
336+
f"Cannot create vulnerability with summary {summary!r} and {new_aliases!r} {e!r}.\n{traceback_format_exc()}."
328337
)
329338
return
330339

331340
return vulnerability
332341

333342

334-
def get_vulns_for_aliases_and_get_new_aliases(aliases):
343+
def get_vulns_for_aliases_and_get_new_aliases(aliases: QuerySet):
335344
"""
336345
Return ``new_aliases`` that are not in the database and
337346
``existing_vulns`` that point to the given ``aliases``.
338347
"""
339-
new_aliases = set(aliases)
340-
existing_vulns = set()
341-
for alias in Alias.objects.filter(alias__in=aliases):
342-
existing_vulns.add(alias.vulnerability)
343-
new_aliases.remove(alias.alias)
344-
return new_aliases, existing_vulns
348+
new_aliases = aliases.filter(vulnerability__isnull=True)
349+
existing_vulns = [alias.vulnerability for alias in aliases.filter(vulnerability__isnull=False)]
350+
351+
return new_aliases, list(set(existing_vulns))
345352

346353

347354
@transaction.atomic
@@ -360,7 +367,5 @@ def create_vulnerability_and_add_aliases(aliases, summary):
360367

361368

362369
def associate_vulnerability_with_aliases(aliases, vulnerability):
363-
for alias_name in aliases:
364-
alias = Alias(alias=alias_name, vulnerability=vulnerability)
365-
alias.save()
366-
logger.info(f"New alias for {vulnerability!r}: {alias_name}")
370+
aliases.update(vulnerability=vulnerability)
371+
logger.info(f"New alias for {vulnerability!r}: {aliases}")

vulnerabilities/importer.py

+2
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ class Reference:
103103
def __post_init__(self):
104104
if not self.url:
105105
raise TypeError("Reference must have a url")
106+
if self.reference_id and not isinstance(self.reference_id, str):
107+
self.reference_id = str(self.reference_id)
106108

107109
def __lt__(self, other):
108110
if not isinstance(other, Reference):

vulnerabilities/improvers/vulnerability_status.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from typing import Iterable
1111
from urllib.parse import urljoin
1212

13-
from django.db.models import Q
1413
from django.db.models.query import QuerySet
1514

1615
from vulnerabilities.importer import AdvisoryData
@@ -37,10 +36,8 @@ class VulnerabilityStatusImprover(Improver):
3736

3837
@property
3938
def interesting_advisories(self) -> QuerySet:
40-
return (
41-
Advisory.objects.filter(Q(created_by=NVDImporterPipeline.pipeline_id))
42-
.distinct("aliases")
43-
.paginated()
39+
return Advisory.objects.filter(created_by=NVDImporterPipeline.pipeline_id).iterator(
40+
chunk_size=5000
4441
)
4542

4643
def get_inferences(self, advisory_data: AdvisoryData) -> Iterable[Inference]:

0 commit comments

Comments
 (0)