From 544fa6c679bb587f6791d1e2bac068f922b6e287 Mon Sep 17 00:00:00 2001 From: Anton M Date: Fri, 1 Mar 2024 02:18:43 +0500 Subject: [PATCH] introduce sync_in_migration --- validity/migrations/0004_netbox35_scripts.py | 7 +++-- validity/migrations/0008_script_change.py | 7 +++-- validity/models/data.py | 30 +++++++++++++------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/validity/migrations/0004_netbox35_scripts.py b/validity/migrations/0004_netbox35_scripts.py index 32450cd..bab1f28 100644 --- a/validity/migrations/0004_netbox35_scripts.py +++ b/validity/migrations/0004_netbox35_scripts.py @@ -15,14 +15,15 @@ def forward_func(apps, schema_editor): if config.netbox_version < "3.5.0": return - from core.models import DataSource + from validity.models import VDataSource from extras.models import ScriptModule db_alias = schema_editor.connection.alias - data_source = DataSource.objects.using(db_alias).create( + data_source = VDataSource.objects.using(db_alias).create( name=DATASOURCE_NAME, type="local", source_url="file://" + SCRIPTS_FOLDER, description=_("Required by Validity") ) - data_source.sync() + DataFile = apps.get_model("core", "DataFile") + data_source.sync_in_migration(DataFile) for data_file in data_source.datafiles.using(db_alias).all(): if data_file.path.endswith("__init__.py") or data_file.path.endswith(".pyc"): continue diff --git a/validity/migrations/0008_script_change.py b/validity/migrations/0008_script_change.py index 3a88371..a99c4ac 100644 --- a/validity/migrations/0008_script_change.py +++ b/validity/migrations/0008_script_change.py @@ -11,10 +11,11 @@ def forward_func(apps, schema_editor): - from core.models import DataSource + from validity.models import VDataSource from extras.models import ScriptModule - datasource, _ = DataSource.objects.get_or_create( + DataFile = apps.get_model("core", "DataFile") + datasource, _ = VDataSource.objects.get_or_create( name=DATASOURCE_NAME, type="local", defaults={"source_url": f"file://{SCRIPTS_INSTALL_FOLDER.parent}", "description": __("Required by Validity")}, @@ -23,7 +24,7 @@ def forward_func(apps, schema_editor): ScriptModule.objects.filter(data_source=datasource).delete() datasource.source_url = f"file://{SCRIPTS_INSTALL_FOLDER}" datasource.save() - datasource.sync() + datasource.sync_in_migration(DataFile) module = ScriptModule( data_source=datasource, data_file=datasource.datafiles.get(path=SCRIPT_NAME), diff --git a/validity/models/data.py b/validity/models/data.py index 9396a94..af97eb2 100644 --- a/validity/models/data.py +++ b/validity/models/data.py @@ -72,12 +72,12 @@ def _sync_status(self): DataSource.objects.filter(pk=self.pk).update(status=self.status, last_synced=self.last_synced) post_sync.send(sender=self.__class__, instance=self) - def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> None: + def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> set[str]: def update_batch(batch): for datafile in self.datafiles.filter(path__in=batch).iterator(): if datafile.refresh_from_disk(local_path): yield datafile - paths.discard(datafile.path) + updated_paths.add(datafile.path) def new_data_file(path): df = DataFile(source=self, path=path) @@ -85,22 +85,30 @@ def new_data_file(path): df.full_clean() return df - if self.type != "device_polling": - raise SyncError("Partial sync is available only for Data Source with type Device Polling") backend = self.get_backend() - with backend.fetch(device_filter) as local_path, self._sync_status(): - paths = self._walk(local_path) + fetch = backend.fetch(device_filter) if self.type == "device_polling" else backend.fetch() + with fetch as local_path, self._sync_status(): + all_new_paths = self._walk(local_path) + updated_paths = set() datafiles_to_update = chain.from_iterable( - update_batch(path_batch) for path_batch in batched(paths, batch_size) + update_batch(path_batch) for path_batch in batched(all_new_paths, batch_size) ) updated = DataFile.objects.bulk_update( datafiles_to_update, batch_size=batch_size, fields=("last_updated", "size", "hash", "data") ) - new_datafiles = (new_data_file(path) for path in paths) + new_datafiles = (new_data_file(path) for path in all_new_paths - updated_paths) created = len(DataFile.objects.bulk_create(new_datafiles, batch_size=batch_size)) logger.debug("%s new files were created and %s existing files were updated during sync", created, updated) + return all_new_paths def sync(self, device_filter: Q | None = None): - if device_filter is not None and self.type == "device_polling": - return self.partial_sync(device_filter) - return super().sync() + if device_filter is None or self.type != "device_polling": + return super().sync() + self.partial_sync(device_filter) + + def sync_in_migration(self, datafile_model: type): + """ + This method performs sync and avoids problems with historical models which have reference to DataFile + """ + new_paths = self.partial_sync(Q()) + datafile_model.objects.exclude(path__in=new_paths).delete()