Skip to content

Commit e5c598b

Browse files
authored
Remove duplicated changelogs (#1400)
* Remove duplicated changelogs Signed-off-by: Tushar Goel <[email protected]> * Fix migrations Signed-off-by: Tushar Goel <[email protected]> * Fix migrations Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent 2471c87 commit e5c598b

4 files changed

+124
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# VulnerableCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/vulnerablecode for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from django.db import migrations
11+
from django.db import models
12+
13+
14+
class Migration(migrations.Migration):
15+
16+
def remove_duped_changelogs(apps, schema_editor):
17+
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
18+
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")
19+
20+
models_list = [PackageChangeLog, VulnerabilityChangeLog]
21+
22+
for model in models_list:
23+
# Identify duplicate records based on actor_name, action_type, and source_url
24+
duplicate_records = model.objects.values('actor_name', 'action_type', 'source_url').annotate(count=models.Count('id')).filter(count__gt=1)
25+
26+
to_be_deleted = list()
27+
28+
for duplicate_set in duplicate_records:
29+
# Get the records for the current duplicate set
30+
records_to_delete = model.objects.filter(
31+
actor_name=duplicate_set['actor_name'],
32+
action_type=duplicate_set['action_type'],
33+
source_url=duplicate_set['source_url']
34+
).order_by('-software_version')
35+
36+
# Keep the record with the older software version
37+
record_to_keep = records_to_delete.last()
38+
39+
# Delete the records with the newer software version
40+
to_be_deleted.extend(records_to_delete.exclude(id=record_to_keep.id))
41+
42+
to_be_deleted = list(set(to_be_deleted))
43+
to_be_deleted = [rec.id for rec in to_be_deleted]
44+
model.objects.filter(id__in = to_be_deleted).delete()
45+
46+
dependencies = [
47+
("vulnerabilities", "0054_alter_packagechangelog_software_version_and_more"),
48+
]
49+
50+
operations = [
51+
migrations.RunPython(remove_duped_changelogs, reverse_code=migrations.RunPython.noop),
52+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Generated by Django 4.1.13 on 2024-01-22 09:42
2+
3+
from django.db import migrations
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0055_remove_changelogs_with_same_data_different_software_version"),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name="packagechangelog",
15+
unique_together={("action_time", "actor_name", "action_type", "source_url")},
16+
),
17+
migrations.AlterUniqueTogether(
18+
name="vulnerabilitychangelog",
19+
unique_together={("action_time", "actor_name", "action_type", "source_url")},
20+
),
21+
]

vulnerabilities/models.py

+1
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,7 @@ def get_iso_time(self):
11451145
class Meta:
11461146
abstract = True
11471147
ordering = ("-action_time",)
1148+
unique_together = ("action_time", "actor_name", "action_type", "source_url")
11481149

11491150

11501151
class VulnerabilityHistoryManager(models.Manager):

vulnerabilities/tests/test_data_migrations.py

+50
Original file line numberDiff line numberDiff line change
@@ -610,3 +610,53 @@ def setUpBeforeMigration(self, apps):
610610
def test_removal_of_duped_purls(self):
611611
Package = apps.get_model("vulnerabilities", "Package")
612612
assert Package.objects.count() == 1
613+
614+
615+
class TestRemoveDupedChangeLogWithSameData(TestMigrations):
616+
app_name = "vulnerabilities"
617+
migrate_from = "0054_alter_packagechangelog_software_version_and_more"
618+
migrate_to = "0055_remove_changelogs_with_same_data_different_software_version"
619+
620+
def setUpBeforeMigration(self, apps):
621+
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
622+
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")
623+
Package = apps.get_model("vulnerabilities", "Package")
624+
Vulnerability = apps.get_model("vulnerabilities", "Vulnerability")
625+
pkg1 = Package.objects.create(type="nginx", name="nginx", qualifiers={"os": "windows"})
626+
vuln = Vulnerability.objects.create(summary="NEW")
627+
PackageChangeLog.objects.create(
628+
actor_name="Nginx",
629+
action_type=1,
630+
source_url="test",
631+
software_version="1",
632+
package=pkg1,
633+
related_vulnerability=vuln,
634+
)
635+
PackageChangeLog.objects.create(
636+
actor_name="Nginx",
637+
action_type=1,
638+
source_url="test",
639+
software_version="2",
640+
package=pkg1,
641+
related_vulnerability=vuln,
642+
)
643+
VulnerabilityChangeLog.objects.create(
644+
actor_name="Nginx",
645+
action_type=1,
646+
source_url="test",
647+
software_version="2",
648+
vulnerability=vuln,
649+
)
650+
VulnerabilityChangeLog.objects.create(
651+
actor_name="Nginx",
652+
action_type=1,
653+
source_url="test",
654+
software_version="1",
655+
vulnerability=vuln,
656+
)
657+
658+
def test_removal_of_changelog(self):
659+
PackageChangeLog = apps.get_model("vulnerabilities", "PackageChangeLog")
660+
VulnerabilityChangeLog = apps.get_model("vulnerabilities", "VulnerabilityChangeLog")
661+
assert PackageChangeLog.objects.all().count() == 1
662+
assert VulnerabilityChangeLog.objects.all().count() == 1

0 commit comments

Comments
 (0)