Skip to content

Commit 56cf8f1

Browse files
committed
nested zip adapter support
1 parent f57f430 commit 56cf8f1

File tree

2 files changed

+496
-0
lines changed

2 files changed

+496
-0
lines changed

src/zarr/storage/_builtin_adapters.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,13 +453,19 @@ async def from_url_segment(
453453
- Local paths: /path/to/file.zip
454454
- File URLs: file:/path/to/file.zip
455455
- Remote URLs: s3://bucket/file.zip, https://example.com/file.zip, gs://bucket/file.zip
456+
- Nested URLs: file:outer.zip|zip:inner.zip (recursively resolved)
456457
"""
457458
# Determine read-only mode
458459
read_only = kwargs.get("storage_options", {}).get("read_only", True)
459460
if "mode" in kwargs:
460461
mode = kwargs["mode"]
461462
read_only = mode == "r"
462463

464+
# Check if preceding_url is itself a nested ZEP 8 URL
465+
if "|" in preceding_url:
466+
# Nested URL - need to recursively resolve it
467+
return await cls._create_nested_zip_store(preceding_url, segment, read_only, kwargs)
468+
463469
# Handle different URL types
464470
if cls._is_remote_url(preceding_url):
465471
# For remote URLs, we need to create a custom ZipStore that can handle remote files
@@ -541,6 +547,68 @@ async def _create_remote_zip_store(
541547

542548
return zip_store
543549

550+
@classmethod
551+
async def _create_nested_zip_store(
552+
cls,
553+
nested_url: str,
554+
segment: URLSegment,
555+
read_only: bool,
556+
kwargs: dict[str, Any],
557+
) -> Store:
558+
"""Create a ZipStore from a nested ZEP 8 URL.
559+
560+
For nested URLs like "file:outer.zip|zip:inner.zip", this:
561+
1. Recursively resolves the nested URL to get a store
562+
2. Extracts the target file's bytes from that store
563+
3. Creates a ZipStore from those bytes using BytesIO
564+
"""
565+
from io import BytesIO
566+
567+
# Recursively resolve the nested URL to get the parent store and path
568+
# For "file:outer.zip|zip:inner.zip", this returns:
569+
# - parent_store: ZipStore for outer.zip
570+
# - parent_path: "inner.zip" (the path within that store)
571+
resolver = URLStoreResolver()
572+
parent_store, parent_path, _ = await resolver.resolve_url_with_path(
573+
nested_url, storage_options=kwargs.get("storage_options")
574+
)
575+
576+
# The parent_path already has the correct file path we need to extract
577+
full_key = parent_path
578+
579+
# Read the file bytes from the parent store
580+
from zarr.core.buffer import default_buffer_prototype
581+
582+
try:
583+
file_bytes = await parent_store.get(full_key, prototype=default_buffer_prototype())
584+
except KeyError as e:
585+
raise FileNotFoundError(
586+
f"Could not find file '{full_key}' in nested store from URL: {nested_url}"
587+
) from e
588+
589+
# Create BytesIO from the file data
590+
if file_bytes is None:
591+
raise FileNotFoundError(
592+
f"File '{full_key}' exists but returned None from nested store: {nested_url}"
593+
)
594+
595+
# Convert Buffer to bytes
596+
file_bytes_raw = file_bytes.to_bytes()
597+
598+
zip_fileobj = BytesIO(file_bytes_raw)
599+
600+
# Create ZipStore from the file object
601+
zip_store = ZipStore(
602+
path=zip_fileobj,
603+
mode="r",
604+
read_only=True, # Nested ZIPs are always read-only
605+
)
606+
607+
# Open the store
608+
await zip_store._open()
609+
610+
return zip_store
611+
544612
@classmethod
545613
def _get_fsspec_protocol(cls, url: str) -> str:
546614
"""Get the fsspec protocol name for installation hints."""

0 commit comments

Comments
 (0)