Skip to content

Commit fa41661

Browse files
authored
feat: support for restoring component static files (#401)
This commit also adds optional arguments to create_next_*_version API functions to better support the backup/restore use case of having non-contiguous versions. For example, the backup archive might only include a published version 4 and a draft version 6 of a given entity, and not versions 1-3 or 5.
1 parent 38dc6c6 commit fa41661

File tree

12 files changed

+320
-34
lines changed

12 files changed

+320
-34
lines changed

openedx_learning/apps/authoring/backup_restore/api.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ def load_dump_zip_file(path: str) -> None:
2222
Loads a zip file derived from create_zip_file
2323
"""
2424
with zipfile.ZipFile(path, "r") as zipf:
25-
LearningPackageUnzipper().load(zipf)
25+
LearningPackageUnzipper(zipf).load()

openedx_learning/apps/authoring/backup_restore/serializers.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""
22
The serializers module for restoration of authoring data.
33
"""
4+
from datetime import timezone
5+
46
from rest_framework import serializers
57

68
from openedx_learning.apps.authoring.components import api as components_api
@@ -12,7 +14,7 @@ class EntitySerializer(serializers.Serializer): # pylint: disable=abstract-meth
1214
"""
1315
can_stand_alone = serializers.BooleanField(required=True)
1416
key = serializers.CharField(required=True)
15-
created = serializers.DateTimeField(required=True)
17+
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
1618
created_by = serializers.CharField(required=True, allow_null=True)
1719

1820

@@ -22,8 +24,9 @@ class EntityVersionSerializer(serializers.Serializer): # pylint: disable=abstra
2224
"""
2325
title = serializers.CharField(required=True)
2426
entity_key = serializers.CharField(required=True)
25-
created = serializers.DateTimeField(required=True)
27+
created = serializers.DateTimeField(required=True, default_timezone=timezone.utc)
2628
created_by = serializers.CharField(required=True, allow_null=True)
29+
version_num = serializers.IntegerField(required=True)
2730

2831

2932
class ComponentSerializer(EntitySerializer): # pylint: disable=abstract-method
@@ -51,7 +54,6 @@ class ComponentVersionSerializer(EntityVersionSerializer): # pylint: disable=ab
5154
"""
5255
Serializer for component versions.
5356
"""
54-
content_to_replace = serializers.DictField(child=serializers.CharField(), required=True)
5557

5658

5759
class ContainerSerializer(EntitySerializer): # pylint: disable=abstract-method

openedx_learning/apps/authoring/backup_restore/zipper.py

Lines changed: 83 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -406,8 +406,9 @@ class LearningPackageUnzipper:
406406
summary = unzipper.load("/path/to/backup.zip")
407407
"""
408408

409-
def __init__(self) -> None:
410-
self.utc_now: datetime = datetime.now(tz=timezone.utc)
409+
def __init__(self, zipf: zipfile.ZipFile) -> None:
410+
self.zipf = zipf
411+
self.utc_now: datetime = datetime.now(timezone.utc)
411412
self.component_types_cache: dict[tuple[str, str], ComponentType] = {}
412413
self.errors: list[dict[str, Any]] = []
413414
# Maps for resolving relationships
@@ -422,28 +423,34 @@ def __init__(self) -> None:
422423
# --------------------------
423424

424425
@transaction.atomic
425-
def load(self, zipf: zipfile.ZipFile) -> dict[str, Any]:
426+
def load(self) -> dict[str, Any]:
426427
"""Extracts and restores all objects from the ZIP archive in an atomic transaction."""
427-
organized_files = self._get_organized_file_list(zipf.namelist())
428+
organized_files = self._get_organized_file_list(self.zipf.namelist())
428429

429430
if not organized_files["learning_package"]:
430431
raise FileNotFoundError(f"Missing required {TOML_PACKAGE_NAME} in archive.")
431432

432-
learning_package = self._load_learning_package(zipf, organized_files["learning_package"])
433+
learning_package = self._load_learning_package(organized_files["learning_package"])
433434
components_validated = self._extract_entities(
434-
zipf, organized_files["components"], ComponentSerializer, ComponentVersionSerializer
435+
organized_files["components"], ComponentSerializer, ComponentVersionSerializer
435436
)
436437
containers_validated = self._extract_entities(
437-
zipf, organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
438+
organized_files["containers"], ContainerSerializer, ContainerVersionSerializer
438439
)
439440

440441
collections_validated = self._extract_collections(
441-
zipf, organized_files["collections"]
442+
organized_files["collections"]
442443
)
443444

444445
self._write_errors()
445446
if not self.errors:
446-
self._save(learning_package, components_validated, containers_validated, collections_validated)
447+
self._save(
448+
learning_package,
449+
components_validated,
450+
containers_validated,
451+
collections_validated,
452+
component_static_files=organized_files["component_static_files"]
453+
)
447454

448455
return {
449456
"learning_package": learning_package.key,
@@ -458,7 +465,6 @@ def load(self, zipf: zipfile.ZipFile) -> dict[str, Any]:
458465

459466
def _extract_entities(
460467
self,
461-
zipf: zipfile.ZipFile,
462468
entity_files: list[str],
463469
entity_serializer: type[serializers.Serializer],
464470
version_serializer: type[serializers.Serializer],
@@ -471,7 +477,7 @@ def _extract_entities(
471477
# Skip non-TOML files
472478
continue
473479

474-
entity_data, draft_version, published_version = self._load_entity_data(zipf, file)
480+
entity_data, draft_version, published_version = self._load_entity_data(file)
475481
serializer = entity_serializer(
476482
data={"created": self.utc_now, "created_by": None, **entity_data}
477483
)
@@ -501,7 +507,6 @@ def _extract_entities(
501507

502508
def _extract_collections(
503509
self,
504-
zipf: zipfile.ZipFile,
505510
collection_files: list[str],
506511
) -> dict[str, Any]:
507512
"""Extraction + validation pipeline for collections."""
@@ -511,7 +516,7 @@ def _extract_collections(
511516
if not file.endswith(".toml"):
512517
# Skip non-TOML files
513518
continue
514-
toml_content = self._read_file_from_zip(zipf, file)
519+
toml_content = self._read_file_from_zip(file)
515520
collection_data = parse_collection_toml(toml_content)
516521
serializer = CollectionSerializer(data={"created_by": None, **collection_data})
517522
if not serializer.is_valid():
@@ -538,20 +543,22 @@ def _save(
538543
learning_package: LearningPackage,
539544
components: dict[str, Any],
540545
containers: dict[str, Any],
541-
collections: dict[str, Any]
546+
collections: dict[str, Any],
547+
*,
548+
component_static_files: dict[str, List[str]]
542549
) -> None:
543550
"""Persist all validated entities in two phases: published then drafts."""
544551

545552
with publishing_api.bulk_draft_changes_for(learning_package.id):
546-
self._save_components(learning_package, components)
553+
self._save_components(learning_package, components, component_static_files)
547554
self._save_units(learning_package, containers)
548555
self._save_subsections(learning_package, containers)
549556
self._save_sections(learning_package, containers)
550557
self._save_collections(learning_package, collections)
551558
publishing_api.publish_all_drafts(learning_package.id)
552559

553560
with publishing_api.bulk_draft_changes_for(learning_package.id):
554-
self._save_draft_versions(components, containers)
561+
self._save_draft_versions(components, containers, component_static_files)
555562

556563
def _save_collections(self, learning_package, collections):
557564
"""Save collections and their entities."""
@@ -564,7 +571,7 @@ def _save_collections(self, learning_package, collections):
564571
entities_qset=publishing_api.get_publishable_entities(learning_package.id).filter(key__in=entities)
565572
)
566573

567-
def _save_components(self, learning_package, components):
574+
def _save_components(self, learning_package, components, component_static_files):
568575
"""Save components and published component versions."""
569576
for valid_component in components.get("components", []):
570577
entity_key = valid_component.pop("key")
@@ -573,8 +580,12 @@ def _save_components(self, learning_package, components):
573580

574581
for valid_published in components.get("components_published", []):
575582
entity_key = valid_published.pop("entity_key")
583+
version_num = valid_published["version_num"] # Should exist, validated earlier
584+
content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
576585
components_api.create_next_component_version(
577586
self.components_map_by_key[entity_key].publishable_entity.id,
587+
content_to_replace=content_to_replace,
588+
force_version_num=valid_published.pop("version_num", None),
578589
**valid_published
579590
)
580591

@@ -620,34 +631,50 @@ def _save_sections(self, learning_package, containers):
620631
self.sections_map_by_key[entity_key], subsections=children, **valid_published
621632
)
622633

623-
def _save_draft_versions(self, components, containers):
634+
def _save_draft_versions(self, components, containers, component_static_files):
624635
"""Save draft versions for all entity types."""
625636
for valid_draft in components.get("components_drafts", []):
626637
entity_key = valid_draft.pop("entity_key")
638+
version_num = valid_draft["version_num"] # Should exist, validated earlier
639+
content_to_replace = self._resolve_static_files(version_num, entity_key, component_static_files)
627640
components_api.create_next_component_version(
628641
self.components_map_by_key[entity_key].publishable_entity.id,
642+
content_to_replace=content_to_replace,
643+
force_version_num=valid_draft.pop("version_num", None),
644+
# Drafts can diverge from published, so we allow ignoring previous content
645+
# Use case: published v1 had files A, B; draft v2 only has file A
646+
ignore_previous_content=True,
629647
**valid_draft
630648
)
631649

632650
for valid_draft in containers.get("unit_drafts", []):
633651
entity_key = valid_draft.pop("entity_key")
634652
children = self._resolve_children(valid_draft, self.components_map_by_key)
635653
units_api.create_next_unit_version(
636-
self.units_map_by_key[entity_key], components=children, **valid_draft
654+
self.units_map_by_key[entity_key],
655+
components=children,
656+
force_version_num=valid_draft.pop("version_num", None),
657+
**valid_draft
637658
)
638659

639660
for valid_draft in containers.get("subsection_drafts", []):
640661
entity_key = valid_draft.pop("entity_key")
641662
children = self._resolve_children(valid_draft, self.units_map_by_key)
642663
subsections_api.create_next_subsection_version(
643-
self.subsections_map_by_key[entity_key], units=children, **valid_draft
664+
self.subsections_map_by_key[entity_key],
665+
units=children,
666+
force_version_num=valid_draft.pop("version_num", None),
667+
**valid_draft
644668
)
645669

646670
for valid_draft in containers.get("section_drafts", []):
647671
entity_key = valid_draft.pop("entity_key")
648672
children = self._resolve_children(valid_draft, self.subsections_map_by_key)
649673
sections_api.create_next_section_version(
650-
self.sections_map_by_key[entity_key], subsections=children, **valid_draft
674+
self.sections_map_by_key[entity_key],
675+
subsections=children,
676+
force_version_num=valid_draft.pop("version_num", None),
677+
**valid_draft
651678
)
652679

653680
# --------------------------
@@ -680,14 +707,31 @@ def _write_errors(self) -> str | None:
680707

681708
return log_filename
682709

710+
def _resolve_static_files(
711+
self,
712+
num_version: int,
713+
entity_key: str,
714+
static_files_map: dict[str, List[str]]
715+
) -> dict[str, bytes]:
716+
"""Resolve static file paths into their binary content."""
717+
resolved_files: dict[str, bytes] = {}
718+
719+
static_file_key = f"{entity_key}:v{num_version}" # e.g., "my_component:123:v1"
720+
static_files = static_files_map.get(static_file_key, [])
721+
for static_file in static_files:
722+
local_key = static_file.split(f"v{num_version}/")[-1]
723+
with self.zipf.open(static_file, "r") as f:
724+
resolved_files[local_key] = f.read()
725+
return resolved_files
726+
683727
def _resolve_children(self, entity_data: dict[str, Any], lookup_map: dict[str, Any]) -> list[Any]:
684728
"""Resolve child entity keys into model instances."""
685729
children_keys = entity_data.pop("children", [])
686730
return [lookup_map[key] for key in children_keys if key in lookup_map]
687731

688-
def _load_learning_package(self, zipf: zipfile.ZipFile, package_file: str) -> LearningPackage:
732+
def _load_learning_package(self, package_file: str) -> LearningPackage:
689733
"""Load and persist the learning package TOML file."""
690-
toml_content = self._read_file_from_zip(zipf, package_file)
734+
toml_content = self._read_file_from_zip(package_file)
691735
data = parse_learning_package_toml(toml_content)
692736
return publishing_api.create_learning_package(
693737
key=data["key"],
@@ -696,10 +740,10 @@ def _load_learning_package(self, zipf: zipfile.ZipFile, package_file: str) -> Le
696740
)
697741

698742
def _load_entity_data(
699-
self, zipf: zipfile.ZipFile, entity_file: str
743+
self, entity_file: str
700744
) -> tuple[dict[str, Any], dict[str, Any] | None, dict[str, Any] | None]:
701745
"""Load entity data and its versions from TOML."""
702-
content = self._read_file_from_zip(zipf, entity_file)
746+
content = self._read_file_from_zip(entity_file)
703747
entity_data, version_data = parse_publishable_entity_toml(content)
704748
return entity_data, *self._get_versions_to_write(version_data, entity_data)
705749

@@ -712,7 +756,6 @@ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, f
712756
serializer = serializer_cls(
713757
data={
714758
"entity_key": entity_data["key"],
715-
"content_to_replace": {},
716759
"created": self.utc_now,
717760
"created_by": None,
718761
**version
@@ -724,9 +767,9 @@ def _validate_versions(self, entity_data, draft, published, serializer_cls, *, f
724767
self.errors.append({"file": file, "errors": serializer.errors})
725768
return valid
726769

727-
def _read_file_from_zip(self, zipf: zipfile.ZipFile, filename: str) -> str:
770+
def _read_file_from_zip(self, filename: str) -> str:
728771
"""Read and decode a UTF-8 file from the zip archive."""
729-
with zipf.open(filename) as f:
772+
with self.zipf.open(filename) as f:
730773
return f.read().decode("utf-8")
731774

732775
def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
@@ -735,6 +778,7 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
735778
"learning_package": None,
736779
"containers": [],
737780
"components": [],
781+
"component_static_files": defaultdict(list),
738782
"collections": [],
739783
}
740784

@@ -746,10 +790,19 @@ def _get_organized_file_list(self, file_paths: list[str]) -> dict[str, Any]:
746790
elif path.startswith("entities/") and str(Path(path).parent) == "entities":
747791
organized["containers"].append(path)
748792
elif path.startswith("entities/"):
749-
organized["components"].append(path)
793+
if path.endswith(".toml"):
794+
organized["components"].append(path)
795+
else:
796+
component_key = Path(path).parts[1:4] # e.g., ['xblock.v1', 'html', 'my_component_123456']
797+
num_version = Path(path).parts[5] if len(Path(path).parts) > 5 else "v1" # e.g., 'v1'
798+
if len(component_key) == 3:
799+
component_identifier = ":".join(component_key)
800+
component_identifier += f":{num_version}"
801+
organized["component_static_files"][component_identifier].append(path)
802+
else:
803+
self.errors.append({"file": path, "errors": "Invalid component static file path structure."})
750804
elif path.startswith("collections/"):
751805
organized["collections"].append(path)
752-
753806
return organized
754807

755808
def _get_versions_to_write(

openedx_learning/apps/authoring/components/api.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,10 +159,28 @@ def create_next_component_version(
159159
created: datetime,
160160
title: str | None = None,
161161
created_by: int | None = None,
162+
*,
163+
force_version_num: int | None = None,
164+
ignore_previous_content: bool = False,
162165
) -> ComponentVersion:
163166
"""
164167
Create a new ComponentVersion based on the most recent version.
165168
169+
Args:
170+
component_pk (int): The primary key of the Component to version.
171+
content_to_replace (dict): Mapping of file keys to Content IDs,
172+
None (for deletion), or bytes (for new file content).
173+
created (datetime): The creation timestamp for the new version.
174+
title (str, optional): Title for the new version. If None, uses the previous version's title.
175+
created_by (int, optional): User ID of the creator.
176+
force_version_num (int, optional): If provided, overrides the automatic version number increment and sets
177+
this version's number explicitly. Use this if you need to restore or import a version with a specific
178+
version number, such as during data migration or when synchronizing with external systems.
179+
ignore_previous_content (bool): If True, do not copy over content from the previous version.
180+
181+
Returns:
182+
ComponentVersion: The newly created ComponentVersion instance.
183+
166184
A very common pattern for making a new ComponentVersion is going to be "make
167185
it just like the last version, except changing these one or two things".
168186
Before calling this, you should create any new contents via the contents
@@ -183,6 +201,14 @@ def create_next_component_version(
183201
convenient to remove paths (e.g. due to deprecation) without having to
184202
always check for its existence first.
185203
204+
Why use force_version_num?
205+
Normally, the version number is incremented automatically from the latest version. If you need to set a specific
206+
version number (for example, when restoring from backup, importing legacy data, or synchronizing with another
207+
system), use force_version_num to override the default behavior.
208+
209+
Why not use create_component_version?
210+
The main reason is that we want to reuse the logic to create a static file component from a dictionary.
211+
186212
TODO: Have to add learning_downloadable info to this when it comes time to
187213
support static asset download.
188214
"""
@@ -202,6 +228,9 @@ def create_next_component_version(
202228
if title is None:
203229
title = last_version.title
204230

231+
if force_version_num is not None:
232+
next_version_num = force_version_num
233+
205234
with atomic():
206235
publishable_entity_version = publishing_api.create_publishable_entity_version(
207236
component_pk,
@@ -241,6 +270,10 @@ def create_next_component_version(
241270
component_version=component_version,
242271
key=key,
243272
)
273+
274+
if ignore_previous_content:
275+
return component_version
276+
244277
# Now copy any old associations that existed, as long as they aren't
245278
# in conflict with the new stuff or marked for deletion.
246279
last_version_content_mapping = ComponentVersionContent.objects \

0 commit comments

Comments
 (0)