Skip to content

Commit 141bf07

Browse files
committed
Fix inline CTable TreeStore edge cases:
- TreeStore.values() now collapses object roots. - Parent subtree deletion removes nested inline CTable objects. - Missing object registry fallback now hides/protects inline CTable internals. - Inline CTable indexes work in .b2d and .b2z.
1 parent bbd9823 commit 141bf07

4 files changed

Lines changed: 138 additions & 19 deletions

File tree

src/blosc2/ctable_storage.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -504,13 +504,12 @@ def discard(self) -> None:
504504
@staticmethod
505505
def _walk_descriptor_paths(descriptor: dict):
506506
"""Yield (obj, key) for every string value that looks like a file path."""
507-
_PATH_KEYS = {"path", "values_path", "positions_path", "l1_path", "l2_path"}
508507
stack = [descriptor]
509508
while stack:
510509
obj = stack.pop()
511510
if isinstance(obj, dict):
512511
for k, v in obj.items():
513-
if k in _PATH_KEYS and isinstance(v, str):
512+
if (k == "path" or k.endswith("_path")) and isinstance(v, str):
514513
yield obj, k
515514
elif isinstance(v, (dict, list)):
516515
stack.append(v)
@@ -521,22 +520,22 @@ def _walk_descriptor_paths(descriptor: dict):
521520

522521
@staticmethod
523522
def _relativize_descriptor(descriptor: dict, working_dir: str) -> dict:
524-
"""Replace absolute paths inside *working_dir* with ``_indexes/…`` relative paths."""
525-
prefix = working_dir.rstrip("/") + "/"
523+
"""Replace absolute paths inside *working_dir* with working-dir relative paths."""
524+
prefix = working_dir.rstrip(os.sep) + os.sep
526525
d = copy.deepcopy(descriptor)
527526
for obj, key in FileTableStorage._walk_descriptor_paths(d):
528527
v = obj[key]
529-
if v.startswith(prefix):
530-
obj[key] = v[len(prefix) :]
528+
if os.path.isabs(v) and v.startswith(prefix):
529+
obj[key] = v[len(prefix) :].replace(os.sep, "/")
531530
return d
532531

533532
@staticmethod
534533
def _absolutize_descriptor(descriptor: dict, working_dir: str) -> dict:
535-
"""Expand ``_indexes/…`` relative paths back to absolute using *working_dir*."""
534+
"""Expand working-dir relative paths back to absolute paths."""
536535
d = copy.deepcopy(descriptor)
537536
for obj, key in FileTableStorage._walk_descriptor_paths(d):
538537
v = obj[key]
539-
if v.startswith(_INDEXES_DIR + "/") or v.startswith(_INDEXES_DIR + os.sep):
538+
if not os.path.isabs(v):
540539
obj[key] = os.path.join(working_dir, v)
541540
return d
542541

src/blosc2/indexing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5889,7 +5889,12 @@ def _bucket_batch_result_dtype(where_x) -> np.dtype:
58895889

58905890
def _bucket_worker_source(where_x):
58915891
if _supports_block_reads(where_x) and getattr(where_x, "urlpath", None) is not None:
5892-
return blosc2.open(str(where_x.urlpath), mode="r", mmap_mode=_INDEX_MMAP_MODE)
5892+
urlpath = str(where_x.urlpath)
5893+
# Arrays opened from a b2z TreeStore/CTable are offset-backed leaves whose
5894+
# urlpath points at the outer bundle, not at a standalone .b2nd file.
5895+
# Reopening that path would materialize the whole TreeStore/CTable.
5896+
if not urlpath.endswith(".b2z"):
5897+
return blosc2.open(urlpath, mode="r", mmap_mode=_INDEX_MMAP_MODE)
58935898
return where_x
58945899

58955900

src/blosc2/tree_store.py

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -229,9 +229,29 @@ def _object_roots(self) -> set:
229229
"""Return all registered object-root full keys."""
230230
return set(self._objects_registry().keys())
231231

232+
def _probed_object_roots(self) -> set:
233+
"""Return object roots discovered from physical CTable manifests."""
234+
roots = set()
235+
candidates = set(self.map_tree.keys()) | set(self._estore.keys())
236+
for key in candidates:
237+
if not key.endswith("/_meta"):
238+
continue
239+
root = key[: -len("/_meta")]
240+
if not root:
241+
# A manifest at /_meta marks the TreeStore itself as a CTable
242+
# backing store; it is not an inline object root to collapse.
243+
continue
244+
if self._probe_object_info(root) is not None:
245+
roots.add(root)
246+
return roots
247+
248+
def _known_object_roots(self) -> set:
249+
"""Return registered plus physically probed object-root full keys."""
250+
return self._object_roots() | self._probed_object_roots()
251+
232252
def _effective_object_roots(self) -> set:
233253
"""Object root keys relative to the current view (subtree or root)."""
234-
all_roots = self._object_roots()
254+
all_roots = self._known_object_roots()
235255
if not self.subtree_path:
236256
return all_roots
237257
result = set()
@@ -386,7 +406,7 @@ def __setitem__(
386406

387407
# Block overwriting an existing object root with a plain value
388408
full_key = self._translate_key_to_full(key)
389-
if self._object_info(full_key) is not None:
409+
if (self._object_info(full_key) or self._probe_object_info(full_key)) is not None:
390410
raise ValueError(
391411
f"'{key}' is an object root (e.g. CTable). "
392412
f"Delete it first with `del ts['{key}']` before assigning a new value."
@@ -421,7 +441,7 @@ def _set_ctable_object(self, key: str, value: blosc2.CTable) -> None:
421441
full_key = self._translate_key_to_full(key)
422442

423443
# Raise if already exists as object root (no silent replace)
424-
if self._object_info(full_key) is not None:
444+
if (self._object_info(full_key) or self._probe_object_info(full_key)) is not None:
425445
raise ValueError(
426446
f"'{key}' already exists as an object root. Delete it first with `del ts['{key}']`."
427447
)
@@ -531,7 +551,7 @@ def __delitem__(self, key: str) -> None:
531551
full_key = self._translate_key_to_full(key)
532552

533553
# --- Object root deletion ---
534-
if self._object_info(full_key) is not None:
554+
if (self._object_info(full_key) or self._probe_object_info(full_key)) is not None:
535555
self._delete_object_subtree(full_key)
536556
return
537557

@@ -545,8 +565,14 @@ def __delitem__(self, key: str) -> None:
545565
# Regular node / subtree deletion
546566
key_exists_as_data = super().__contains__(full_key)
547567
descendants = self.get_descendants(key)
548-
549-
if not key_exists_as_data and not descendants:
568+
prefix = full_key + "/" if full_key != "/" else "/"
569+
object_roots_to_delete = sorted(
570+
[root for root in self._known_object_roots() if root.startswith(prefix)],
571+
key=len,
572+
reverse=True,
573+
)
574+
575+
if not key_exists_as_data and not descendants and not object_roots_to_delete:
550576
raise KeyError(f"Key '{key}' not found")
551577

552578
keys_to_delete = []
@@ -557,8 +583,18 @@ def __delitem__(self, key: str) -> None:
557583
if super().__contains__(full_desc):
558584
keys_to_delete.append(descendant)
559585

586+
for object_root in object_roots_to_delete:
587+
self._delete_object_subtree(object_root)
588+
560589
for k in keys_to_delete:
561-
super().__delitem__(self._translate_key_to_full(k))
590+
full_desc = self._translate_key_to_full(k)
591+
if super().__contains__(full_desc):
592+
super().__delitem__(full_desc)
593+
594+
# Remove stale registry entries for any nested objects that were deleted as plain descendants.
595+
for root in list(self._object_roots()):
596+
if root.startswith(prefix):
597+
self._unregister_object(root)
562598

563599
def _delete_object_subtree(self, full_key: str) -> None:
564600
"""Delete all physical leaves under *full_key* and unregister it."""
@@ -614,7 +650,11 @@ def __contains__(self, key: str) -> bool:
614650
if self._is_object_internal_key(key):
615651
return False
616652
full_key = self._translate_key_to_full(key)
617-
return super().__contains__(full_key) or self._object_info(full_key) is not None
653+
return (
654+
super().__contains__(full_key)
655+
or self._object_info(full_key) is not None
656+
or self._probe_object_info(full_key) is not None
657+
)
618658
except ValueError:
619659
return False
620660

@@ -679,6 +719,15 @@ def items(self) -> Iterator[tuple[str, NDArray | C2Array | SChunk | TreeStore]]:
679719
for key in self.keys():
680720
yield key, self[key]
681721

722+
def values(
723+
self,
724+
) -> Iterator[
725+
NDArray | C2Array | SChunk | blosc2.ObjectArray | blosc2.BatchArray | blosc2.CTable | TreeStore
726+
]:
727+
"""Return values in the current subtree view, with object roots collapsed."""
728+
for key in self.keys():
729+
yield self[key]
730+
682731
def get_children(self, path: str) -> list[str]:
683732
"""Get direct children of a given path.
684733
@@ -815,7 +864,11 @@ def walk(self, path: str = "/", topdown: bool = True) -> Iterator[tuple[str, lis
815864
child_rel_path = path + "/" + name if path != "/" else "/" + name
816865
# Translate to full key in the backing store and verify it's a data node or object root
817866
full_key = self._translate_key_to_full(child_rel_path)
818-
if super().__contains__(full_key) or self._object_info(full_key) is not None:
867+
if (
868+
super().__contains__(full_key)
869+
or self._object_info(full_key) is not None
870+
or self._probe_object_info(full_key) is not None
871+
):
819872
valid_leaf_nodes.append(name)
820873
leaf_nodes = valid_leaf_nodes
821874

@@ -865,7 +918,7 @@ def get_subtree(self, path: str) -> TreeStore:
865918
full_path = self._translate_key_to_full(path)
866919

867920
# Object roots cannot be navigated as subtrees
868-
if self._object_info(full_path) is not None:
921+
if (self._object_info(full_path) or self._probe_object_info(full_path)) is not None:
869922
raise ValueError(
870923
f"'{path}' is an object root (e.g. CTable), not a TreeStore subtree. "
871924
f"Use ts['{path}'] to access the object."

tests/test_tree_store.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1432,3 +1432,65 @@ def test_ctable_context_manager_auto_close(tmp_path, storage_type):
14321432
t2 = ts["/table"]
14331433
assert len(t2) == 3
14341434
assert list(t2["x"][:])[-1] == 100
1435+
1436+
1437+
@pytest.mark.parametrize("storage_type", ["b2d", "b2z"])
1438+
def test_ctable_values_collapses_object_roots(tmp_path, storage_type):
1439+
"""values() yields the CTable object, not its internal leaves."""
1440+
path = str(tmp_path / f"bundle.{storage_type}")
1441+
with blosc2.TreeStore(path, mode="w") as ts:
1442+
ts["/table"] = _make_ctable(n=2)
1443+
values = list(ts.values())
1444+
1445+
assert len(values) == 1
1446+
assert isinstance(values[0], blosc2.CTable)
1447+
1448+
1449+
@pytest.mark.parametrize("storage_type", ["b2d", "b2z"])
1450+
def test_ctable_delete_parent_subtree_removes_nested_object(tmp_path, storage_type):
1451+
"""Deleting a normal subtree also deletes nested object roots and physical leaves."""
1452+
path = str(tmp_path / f"bundle.{storage_type}")
1453+
with blosc2.TreeStore(path, mode="w") as ts:
1454+
ts["/grp/table"] = _make_ctable(n=2)
1455+
del ts["/grp"]
1456+
assert sorted(ts.keys()) == []
1457+
1458+
with blosc2.open(path, mode="r") as ts:
1459+
assert sorted(ts.keys()) == []
1460+
assert "/grp/table" not in ts
1461+
1462+
1463+
@pytest.mark.parametrize("storage_type", ["b2d", "b2z"])
1464+
def test_ctable_inline_index_roundtrip(tmp_path, storage_type):
1465+
"""Index catalogs and sidecars work for inline CTable objects."""
1466+
path = str(tmp_path / f"bundle.{storage_type}")
1467+
with blosc2.TreeStore(path, mode="w") as ts:
1468+
ts["/table"] = _make_ctable(n=100)
1469+
1470+
with blosc2.TreeStore(path, mode="a") as ts:
1471+
table = ts["/table"]
1472+
table.create_index("x")
1473+
np.testing.assert_array_equal(list(table.where(table["x"] > 95)["x"][:]), [96, 97, 98, 99])
1474+
1475+
with blosc2.open(path, mode="r") as ts:
1476+
table = ts["/table"]
1477+
assert len(table.indexes) == 1
1478+
np.testing.assert_array_equal(list(table.where(table["x"] > 95)["x"][:]), [96, 97, 98, 99])
1479+
1480+
1481+
@pytest.mark.parametrize("storage_type", ["b2d", "b2z"])
1482+
def test_ctable_registry_missing_fallback_hides_and_protects_internals(tmp_path, storage_type):
1483+
"""Physical CTable manifests are enough to detect object roots if registry is missing."""
1484+
path = str(tmp_path / f"bundle.{storage_type}")
1485+
with blosc2.TreeStore(path, mode="w") as ts:
1486+
ts["/table"] = _make_ctable(n=2)
1487+
1488+
with blosc2.TreeStore(path, mode="a") as ts:
1489+
del ts._estore._store.vlmeta["_object_registry"]
1490+
1491+
with blosc2.open(path, mode="r") as ts:
1492+
assert sorted(ts.keys()) == ["/table"]
1493+
assert isinstance(ts["/table"], blosc2.CTable)
1494+
assert "/table/_meta" not in ts
1495+
with pytest.raises(ValueError, match="object root"):
1496+
ts.get_subtree("/table")

0 commit comments

Comments
 (0)