Skip to content

Commit

Permalink
introduce sync_in_migration
Browse files Browse the repository at this point in the history
  • Loading branch information
amyasnikov committed Feb 29, 2024
1 parent ee00199 commit 544fa6c
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 17 deletions.
7 changes: 4 additions & 3 deletions validity/migrations/0004_netbox35_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,15 @@ def forward_func(apps, schema_editor):
if config.netbox_version < "3.5.0":
return

from core.models import DataSource
from validity.models import VDataSource
from extras.models import ScriptModule

db_alias = schema_editor.connection.alias
data_source = DataSource.objects.using(db_alias).create(
data_source = VDataSource.objects.using(db_alias).create(
name=DATASOURCE_NAME, type="local", source_url="file://" + SCRIPTS_FOLDER, description=_("Required by Validity")
)
data_source.sync()
DataFile = apps.get_model("core", "DataFile")
data_source.sync_in_migration(DataFile)
for data_file in data_source.datafiles.using(db_alias).all():
if data_file.path.endswith("__init__.py") or data_file.path.endswith(".pyc"):
continue
Expand Down
7 changes: 4 additions & 3 deletions validity/migrations/0008_script_change.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@


def forward_func(apps, schema_editor):
from core.models import DataSource
from validity.models import VDataSource
from extras.models import ScriptModule

datasource, _ = DataSource.objects.get_or_create(
DataFile = apps.get_model("core", "DataFile")
datasource, _ = VDataSource.objects.get_or_create(
name=DATASOURCE_NAME,
type="local",
defaults={"source_url": f"file://{SCRIPTS_INSTALL_FOLDER.parent}", "description": __("Required by Validity")},
Expand All @@ -23,7 +24,7 @@ def forward_func(apps, schema_editor):
ScriptModule.objects.filter(data_source=datasource).delete()
datasource.source_url = f"file://{SCRIPTS_INSTALL_FOLDER}"
datasource.save()
datasource.sync()
datasource.sync_in_migration(DataFile)
module = ScriptModule(
data_source=datasource,
data_file=datasource.datafiles.get(path=SCRIPT_NAME),
Expand Down
30 changes: 19 additions & 11 deletions validity/models/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,35 +72,43 @@ def _sync_status(self):
DataSource.objects.filter(pk=self.pk).update(status=self.status, last_synced=self.last_synced)
post_sync.send(sender=self.__class__, instance=self)

def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> None:
def partial_sync(self, device_filter: Q, batch_size: int = 1000) -> set[str]:
def update_batch(batch):
for datafile in self.datafiles.filter(path__in=batch).iterator():
if datafile.refresh_from_disk(local_path):
yield datafile
paths.discard(datafile.path)
updated_paths.add(datafile.path)

def new_data_file(path):
df = DataFile(source=self, path=path)
df.refresh_from_disk(local_path)
df.full_clean()
return df

if self.type != "device_polling":
raise SyncError("Partial sync is available only for Data Source with type Device Polling")
backend = self.get_backend()
with backend.fetch(device_filter) as local_path, self._sync_status():
paths = self._walk(local_path)
fetch = backend.fetch(device_filter) if self.type == "device_polling" else backend.fetch()
with fetch as local_path, self._sync_status():
all_new_paths = self._walk(local_path)
updated_paths = set()
datafiles_to_update = chain.from_iterable(
update_batch(path_batch) for path_batch in batched(paths, batch_size)
update_batch(path_batch) for path_batch in batched(all_new_paths, batch_size)
)
updated = DataFile.objects.bulk_update(
datafiles_to_update, batch_size=batch_size, fields=("last_updated", "size", "hash", "data")
)
new_datafiles = (new_data_file(path) for path in paths)
new_datafiles = (new_data_file(path) for path in all_new_paths - updated_paths)
created = len(DataFile.objects.bulk_create(new_datafiles, batch_size=batch_size))
logger.debug("%s new files were created and %s existing files were updated during sync", created, updated)
return all_new_paths

def sync(self, device_filter: Q | None = None):
if device_filter is not None and self.type == "device_polling":
return self.partial_sync(device_filter)
return super().sync()
if device_filter is None or self.type != "device_polling":
return super().sync()
self.partial_sync(device_filter)

def sync_in_migration(self, datafile_model: type):
"""
This method performs sync and avoids problems with historical models which have reference to DataFile
"""
new_paths = self.partial_sync(Q())
datafile_model.objects.exclude(path__in=new_paths).delete()

0 comments on commit 544fa6c

Please sign in to comment.