Skip to content

Commit df709cc

Browse files
authored
Changes for path resolver (#23)
* Changes for path resolver * Moved the path resolver to Util and updated the test case * Added Abstraction for file provider and refactored the code accordingly * updated file provider test cases * renamed file provider and updated its reference * File Provider changes * Fixed the Stage Object file function and formatted the files * Changes to reflect the locref as relative path * Removed unused reference * Added fileprovider dependency to test_store_resource * Update poetry.lock * Update poetry.lock * removed commented resolve
1 parent d80696c commit df709cc

18 files changed

+485
-213
lines changed

Diff for: dor/providers/file_provider.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from abc import ABC, abstractmethod
2+
from pathlib import Path
3+
4+
5+
class FileProvider(ABC):
6+
@abstractmethod
7+
def apply_relative_path(self, base_path: Path, path_to_apply: str) -> Path:
8+
pass
9+
10+
@abstractmethod
11+
def get_descriptor_dir(self, file_path: Path) -> Path:
12+
pass
13+
14+
@abstractmethod
15+
def get_norm_path(self, base_path: Path, path_to_apply: str) -> Path:
16+
pass

Diff for: dor/providers/file_system_file_provider.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import os
2+
from pathlib import Path
3+
from dor.providers.file_provider import FileProvider
4+
5+
6+
class FilesystemFileProvider(FileProvider):
7+
8+
def apply_relative_path(self, base_path: Path, path_to_apply: str) -> Path:
9+
resolved_combined_path = self.get_norm_path(base_path, path_to_apply)
10+
final_path = resolved_combined_path.relative_to(base_path.parent)
11+
return final_path
12+
13+
def get_descriptor_dir(self, file_path: Path) -> Path:
14+
return file_path.parent
15+
16+
def get_norm_path(self, base_path: Path, path_to_apply: str) -> Path:
17+
combined_path = os.path.join(base_path, path_to_apply)
18+
resolved_combined_path = Path(os.path.normpath(combined_path))
19+
return resolved_combined_path

Diff for: dor/providers/models.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,17 @@ class PreservationEvent:
3333
@dataclass
3434
class FileReference:
3535
locref: str
36-
mdtype: Optional[str] = None
37-
mimetype: Optional[str] = None
36+
mdtype: str | None = None
37+
mimetype: str | None = None
3838

3939

4040
@dataclass
4141
class FileMetadata:
4242
id: str
4343
use: str
44-
mdid: Optional[str] = None
45-
groupid: Optional[str] = None
46-
ref: FileReference = None
44+
ref: FileReference
45+
mdid: str | None = None
46+
groupid: str | None = None
4747

4848

4949
class StructMapType(Enum):
@@ -56,7 +56,7 @@ class StructMapItem:
5656
order: int
5757
label: str
5858
asset_id: str
59-
type: Optional[str] = None
59+
type: str | None = None
6060

6161

6262
@dataclass
@@ -77,7 +77,7 @@ class PackageResource:
7777
struct_maps: list[StructMap] = field(default_factory=list)
7878

7979
def get_entries(self) -> list[Path]:
80-
entries = []
80+
entries: list[Path] = []
8181
for file_metadata in self.metadata_files:
8282
if not file_metadata.ref.locref.startswith("https://"):
8383
entries.append(Path(file_metadata.ref.locref))

Diff for: dor/providers/package_resource_provider.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
11
from pathlib import Path
22

3-
from .parsers import DescriptorFileParser
3+
from dor.providers.file_provider import FileProvider
44

5+
from .parsers import DescriptorFileParser
56

67
class PackageResourceProvider:
78

8-
def __init__(self, data_path: Path):
9+
def __init__(self, data_path: Path, file_provider: FileProvider):
910
self.data_path = data_path
11+
self.file_provider = file_provider
1012

1113
@property
1214
def descriptor_files(self):
@@ -15,5 +17,6 @@ def descriptor_files(self):
1517

1618
def get_resources(self):
1719
return [
18-
DescriptorFileParser(file).get_resource() for file in self.descriptor_files
20+
DescriptorFileParser(file, self.file_provider).get_resource() for file in self.descriptor_files
1921
]
22+

Diff for: dor/providers/parsers.py

+57-30
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,20 @@
11
from datetime import datetime
22
import uuid
3+
from pathlib import Path
34

5+
from dor.providers.file_provider import FileProvider
46
from utils.element_adapter import ElementAdapter
5-
from .models import *
7+
from .models import (
8+
Agent,
9+
AlternateIdentifier,
10+
FileMetadata,
11+
FileReference,
12+
PackageResource,
13+
PreservationEvent,
14+
StructMap,
15+
StructMapItem,
16+
StructMapType,
17+
)
618

719

820
class DescriptorFileParser:
@@ -11,9 +23,13 @@ class DescriptorFileParser:
1123
"PREMIS": "http://www.loc.gov/premis/v3",
1224
}
1325

14-
def __init__(self, descriptor_path):
15-
text = descriptor_path.read_text()
26+
def __init__(self, descriptor_file_path: Path, file_provider: FileProvider):
27+
text = descriptor_file_path.read_text()
1628
self.tree: ElementAdapter = ElementAdapter.from_string(text, self.namespaces)
29+
self.file_provider = file_provider
30+
self.descriptor_path: Path = file_provider.get_descriptor_dir(
31+
descriptor_file_path
32+
)
1733

1834
def get_id(self):
1935
return uuid.UUID(self.tree.get("OBJID"))
@@ -22,17 +38,16 @@ def get_type(self):
2238
hdr = self.tree.find("METS:metsHdr")
2339
return hdr.get("TYPE")
2440

25-
def get_alternate_identifier(self):
41+
def get_alternate_identifier(self) -> AlternateIdentifier:
2642
alt_record_id = self.tree.find("METS:metsHdr/METS:altRecordID")
27-
if alt_record_id:
28-
return AlternateIdentifier(
29-
type=alt_record_id.get("TYPE"), id=alt_record_id.text
30-
)
43+
return AlternateIdentifier(
44+
type=alt_record_id.get("TYPE"), id=alt_record_id.text
45+
)
3146

3247
def get_preservation_events(self) -> list[PreservationEvent]:
3348
return [self.get_event(elem) for elem in self.tree.findall(".//PREMIS:event")]
3449

35-
def get_event(self, elem) -> PreservationEvent:
50+
def get_event(self, elem: ElementAdapter) -> PreservationEvent:
3651
event_identifier = elem.find(".//PREMIS:eventIdentifierValue").text
3752
event_type = elem.find(".//PREMIS:eventType").text
3853
event_datetime = elem.find(".//PREMIS:eventDateTime").text
@@ -59,56 +74,68 @@ def get_data_files(self) -> list[FileMetadata]:
5974
for elem in self.tree.findall(".//METS:file")
6075
]
6176

62-
def get_md_file_metadatum(self, elem):
77+
def get_md_file_metadatum(self, elem: ElementAdapter):
6378
id_ = elem.get("ID")
6479
use = elem.get("USE")
65-
locref = elem.find("METS:mdRef").get_optional("LOCREF")
66-
mdtype = elem.find("METS:mdRef").get_optional("MDTYPE")
67-
mimetype = elem.find("METS:mdRef").get_optional("MIMETYPE")
80+
md_ref_element = elem.find("METS:mdRef")
81+
locref = md_ref_element.get("LOCREF")
82+
if not locref.startswith("https"):
83+
locref = self.file_provider.apply_relative_path(
84+
self.descriptor_path, locref
85+
)
86+
mdtype = md_ref_element.get_optional("MDTYPE")
87+
mimetype = md_ref_element.get_optional("MIMETYPE")
6888

6989
return FileMetadata(
7090
id=id_,
7191
use=use,
72-
ref=FileReference(locref=locref, mdtype=mdtype, mimetype=mimetype),
92+
ref=FileReference(locref=str(locref), mdtype=mdtype, mimetype=mimetype),
7393
)
7494

75-
def get_filesec_file_metadatum(self, elem):
95+
def get_filesec_file_metadatum(self, elem: ElementAdapter):
7696
id_ = elem.get("ID")
7797
use = elem.get("USE")
78-
mdid = elem.get_optional("MDID", None)
79-
groupid = elem.get_optional("GROUPID", None)
80-
mimetype = elem.get('MIMETYPE')
98+
mdid = elem.get_optional("MDID")
99+
groupid = elem.get_optional("GROUPID")
100+
mimetype = elem.get_optional("MIMETYPE")
81101
mdtype = None
82-
locref = elem.find("METS:FLocat").get_optional("LOCREF")
102+
flocat_element = elem.find("METS:FLocat")
103+
locref = flocat_element.get("LOCREF")
104+
if not locref.startswith("https"):
105+
locref = self.file_provider.apply_relative_path(
106+
self.descriptor_path, locref
107+
)
83108

84109
return FileMetadata(
85110
id=id_,
86111
use=use,
87112
mdid=mdid,
88113
groupid=groupid,
89-
ref=FileReference(locref=locref, mdtype=mdtype, mimetype=mimetype),
114+
ref=FileReference(locref=str(locref), mdtype=mdtype, mimetype=mimetype),
90115
)
91116

92-
def get_struct_maps(self):
93-
struct_maps = []
117+
def get_struct_maps(self) -> list[StructMap]:
118+
struct_maps: list[StructMap] = []
94119
for struct_map_elem in self.tree.findall(".//METS:structMap"):
95120
struct_map_id = struct_map_elem.get("ID")
96121
struct_map_type = struct_map_elem.get("TYPE")
97122

98123
order_elems = struct_map_elem.findall(".//METS:div[@ORDER]")
99124

100-
struct_map_items = []
125+
struct_map_items: list[StructMapItem] = []
101126
for order_elem in order_elems:
102127
order_number = int(order_elem.get("ORDER"))
103128
label = order_elem.get("LABEL")
104129
asset_id = order_elem.get("ID")
105-
order_elem_type = order_elem.get_optional("TYPE", None)
106-
struct_map_items.append(StructMapItem(
107-
order=order_number,
108-
label=label,
109-
asset_id=asset_id,
110-
type=order_elem_type,
111-
))
130+
order_elem_type = order_elem.get_optional("TYPE")
131+
struct_map_items.append(
132+
StructMapItem(
133+
order=order_number,
134+
label=label,
135+
asset_id=asset_id,
136+
type=order_elem_type,
137+
)
138+
)
112139

113140
struct_maps.append(
114141
StructMap(

Diff for: dor/providers/translocator.py

+1-10
Original file line numberDiff line numberDiff line change
@@ -37,20 +37,11 @@ def object_data_directory(self) -> Path:
3737
if self.root_identifier is None:
3838
raise Exception()
3939
return self.package_directory() / "data" / self.root_identifier
40-
41-
def _apply_relative_path(self, path: Path, path_to_apply: Path) -> Path:
42-
return (path / path_to_apply).resolve().relative_to(self.object_data_directory().resolve())
4340

4441
def get_bundle(self, entries: list[Path]) -> Bundle:
45-
resolved_entries = []
46-
for entry in entries:
47-
resolved_entries.append(
48-
self._apply_relative_path(self.object_data_directory() / "descriptor", entry)
49-
)
50-
5142
return Bundle(
5243
root_path=self.object_data_directory(),
53-
entries=resolved_entries
44+
entries=entries
5445
)
5546

5647

Diff for: dor/service_layer/handlers/store_files.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1+
from pathlib import Path
12
from dor.domain.events import PackageStored, PackageUnpacked
23
from dor.service_layer.unit_of_work import UnitOfWork
34

45

56
def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type) -> None:
67
workspace = workspace_class(event.workspace_identifier, event.identifier)
78

8-
entries = []
9+
entries: list[Path] = []
910
for resource in event.resources:
1011
entries.extend(resource.get_entries())
1112

@@ -19,11 +20,10 @@ def store_files(event: PackageUnpacked, uow: UnitOfWork, workspace_class: type)
1920
uow.gateway.commit_object_changes(
2021
id=event.identifier,
2122
coordinator=event.version_info.coordinator,
22-
message=event.version_info.message
23+
message=event.version_info.message,
2324
)
2425

2526
stored_event = PackageStored(
26-
identifier=event.identifier,
27-
tracking_identifier=event.tracking_identifier
27+
identifier=event.identifier, tracking_identifier=event.tracking_identifier
2828
)
29-
uow.add_event(stored_event)
29+
uow.add_event(stored_event)

Diff for: dor/service_layer/handlers/unpack_package.py

+16-9
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,41 @@
11
from dor.domain.events import PackageUnpacked, PackageVerified
22
from dor.domain.models import VersionInfo
3+
from dor.providers.file_provider import FileProvider
34
from dor.service_layer.unit_of_work import UnitOfWork
45
from gateway.coordinator import Coordinator
56

7+
68
def unpack_package(
79
event: PackageVerified,
810
uow: UnitOfWork,
911
bag_adapter_class: type,
1012
package_resource_provider_class: type,
11-
workspace_class: type
13+
workspace_class: type,
14+
file_provider: FileProvider,
1215
) -> None:
1316
workspace = workspace_class(event.workspace_identifier)
1417
bag_adapter = bag_adapter_class(workspace.package_directory())
1518

1619
info = bag_adapter.dor_info
1720
workspace.root_identifier = info["Root-Identifier"]
18-
resources = package_resource_provider_class(workspace.object_data_directory()).get_resources()
21+
resources = package_resource_provider_class(
22+
workspace.object_data_directory(), file_provider
23+
).get_resources()
1924

20-
root_resource = [ r for r in resources if str(r.id) == info['Root-Identifier'] ][0]
21-
preservation_event = [e for e in root_resource.events if e.type == 'ingest'][0]
25+
root_resource = [r for r in resources if str(r.id) == info["Root-Identifier"]][0]
26+
preservation_event = [e for e in root_resource.events if e.type == "ingest"][0]
2227

2328
unpacked_event = PackageUnpacked(
24-
identifier=info['Root-Identifier'],
29+
identifier=info["Root-Identifier"],
2530
tracking_identifier=event.tracking_identifier,
2631
package_identifier=event.package_identifier,
2732
workspace_identifier=event.workspace_identifier,
2833
resources=resources,
2934
version_info=VersionInfo(
30-
coordinator=Coordinator(preservation_event.agent.address, preservation_event.agent.address),
31-
message=preservation_event.detail
32-
)
35+
coordinator=Coordinator(
36+
preservation_event.agent.address, preservation_event.agent.address
37+
),
38+
message=preservation_event.detail,
39+
),
3340
)
34-
uow.add_event(unpacked_event)
41+
uow.add_event(unpacked_event)

Diff for: features/scratch/storage/.keep

Whitespace-only changes.

0 commit comments

Comments
 (0)