From 544fa6c679bb587f6791d1e2bac068f922b6e287 Mon Sep 17 00:00:00 2001
From: Anton M <anton2008m@gmail.com>
Date: Fri, 1 Mar 2024 02:18:43 +0500
Subject: [PATCH] introduce sync_in_migration

---
 validity/migrations/0004_netbox35_scripts.py |  7 +++--
 validity/migrations/0008_script_change.py    |  7 +++--
 validity/models/data.py                      | 30 +++++++++++++-------
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/validity/migrations/0004_netbox35_scripts.py b/validity/migrations/0004_netbox35_scripts.py
index 32450cd..bab1f28 100644
--- a/validity/migrations/0004_netbox35_scripts.py
+++ b/validity/migrations/0004_netbox35_scripts.py
@@ -15,14 +15,15 @@ def forward_func(apps, schema_editor):
     if config.netbox_version < "3.5.0":
         return
 
-    from core.models import DataSource
+    from validity.models import VDataSource
     from extras.models import ScriptModule
 
     db_alias = schema_editor.connection.alias
-    data_source = DataSource.objects.using(db_alias).create(
+    data_source = VDataSource.objects.using(db_alias).create(
         name=DATASOURCE_NAME, type="local", source_url="file://" + SCRIPTS_FOLDER, description=_("Required by Validity")
     )
-    data_source.sync()
+    DataFile = apps.get_model("core", "DataFile")
+    data_source.sync_in_migration(DataFile)
     for data_file in data_source.datafiles.using(db_alias).all():
         if data_file.path.endswith("__init__.py") or data_file.path.endswith(".pyc"):
             continue
diff --git a/validity/migrations/0008_script_change.py b/validity/migrations/0008_script_change.py
index 3a88371..a99c4ac 100644
--- a/validity/migrations/0008_script_change.py
+++ b/validity/migrations/0008_script_change.py
@@ -11,10 +11,11 @@
 
 
 def forward_func(apps, schema_editor):
-    from core.models import DataSource
+    from validity.models import VDataSource
     from extras.models import ScriptModule
 
-    datasource, _ = DataSource.objects.get_or_create(
+    DataFile = apps.get_model("core", "DataFile")
+    datasource, _ = VDataSource.objects.get_or_create(
         name=DATASOURCE_NAME,
         type="local",
         defaults={"source_url": f"file://{SCRIPTS_INSTALL_FOLDER.parent}", "description": __("Required by Validity")},
@@ -23,7 +24,7 @@ def forward_func(apps, schema_editor):
     ScriptModule.objects.filter(data_source=datasource).delete()
     datasource.source_url = f"file://{SCRIPTS_INSTALL_FOLDER}"
     datasource.save()
-    datasource.sync()
+    datasource.sync_in_migration(DataFile)
     module = ScriptModule(
         data_source=datasource,
         data_file=datasource.datafiles.get(path=SCRIPT_NAME),
diff --git a/validity/models/data.py b/validity/models/data.py
index 9396a94..af97eb2 100644
--- a/validity/models/data.py
+++ b/validity/models/data.py
@@ -72,12 +72,12 @@ def _sync_status(self):
             DataSource.objects.filter(pk=self.pk).update(status=self.status, last_synced=self.last_synced)
             post_sync.send(sender=self.__class__, instance=self)
 
-    def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> None:
+    def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> set[str]:
         def update_batch(batch):
             for datafile in self.datafiles.filter(path__in=batch).iterator():
                 if datafile.refresh_from_disk(local_path):
                     yield datafile
-                paths.discard(datafile.path)
+                updated_paths.add(datafile.path)
 
         def new_data_file(path):
             df = DataFile(source=self, path=path)
@@ -85,22 +85,30 @@ def new_data_file(path):
             df.full_clean()
             return df
 
-        if self.type != "device_polling":
-            raise SyncError("Partial sync is available only for Data Source with type Device Polling")
         backend = self.get_backend()
-        with backend.fetch(device_filter) as local_path, self._sync_status():
-            paths = self._walk(local_path)
+        fetch = backend.fetch(device_filter) if self.type == "device_polling" else backend.fetch()
+        with fetch as local_path, self._sync_status():
+            all_new_paths = self._walk(local_path)
+            updated_paths = set()
             datafiles_to_update = chain.from_iterable(
-                update_batch(path_batch) for path_batch in batched(paths, batch_size)
+                update_batch(path_batch) for path_batch in batched(all_new_paths, batch_size)
             )
             updated = DataFile.objects.bulk_update(
                 datafiles_to_update, batch_size=batch_size, fields=("last_updated", "size", "hash", "data")
             )
-            new_datafiles = (new_data_file(path) for path in paths)
+            new_datafiles = (new_data_file(path) for path in all_new_paths - updated_paths)
             created = len(DataFile.objects.bulk_create(new_datafiles, batch_size=batch_size))
             logger.debug("%s new files were created and %s existing files were updated during sync", created, updated)
+            return all_new_paths
 
     def sync(self, device_filter: Q | None = None):
-        if device_filter is not None and self.type == "device_polling":
-            return self.partial_sync(device_filter)
-        return super().sync()
+        if device_filter is None or self.type != "device_polling":
+            return super().sync()
+        self.partial_sync(device_filter)
+
+    def sync_in_migration(self, datafile_model: type):
+        """
+        This method performs sync and avoids problems with historical models which have reference to DataFile
+        """
+        new_paths = self.partial_sync(Q())
+        datafile_model.objects.exclude(path__in=new_paths).delete()