Skip to content

Commit 7b21b3e

Browse files
committed
Add new urlpath support for CTable.copy(). Fixes #632.
1 parent 5acf7ac commit 7b21b3e

2 files changed

Lines changed: 58 additions & 3 deletions

File tree

src/blosc2/ctable.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4775,8 +4775,14 @@ def sort_by(
47754775
result._last_pos = n
47764776
return result
47774777

4778-
def copy(self, compact: bool = True) -> CTable:
4779-
"""Return a new standalone in-memory copy of this table.
4778+
def copy(
4779+
self,
4780+
compact: bool = True,
4781+
*,
4782+
urlpath: str | os.PathLike[str] | None = None,
4783+
overwrite: bool = False,
4784+
) -> CTable:
4785+
"""Return a new standalone copy of this table.
47804786
47814787
Parameters
47824788
----------
@@ -4785,8 +4791,26 @@ def copy(self, compact: bool = True) -> CTable:
47854791
The result is a dense table with no tombstones and no parent
47864792
dependency — ideal for materialising a filtered view.
47874793
If ``False``, all physical slots are copied including deleted gaps,
4788-
preserving the tombstone state exactly.
4794+
preserving the tombstone state exactly for in-memory copies.
4795+
urlpath:
4796+
Destination path for a persistent copy. The extension selects the
4797+
on-disk format: ``.b2z`` for a compact zip-backed store or ``.b2d``
4798+
for a directory-backed store. If ``None`` (default), return an
4799+
in-memory copy.
4800+
overwrite:
4801+
If ``True``, replace an existing persistent destination.
47894802
"""
4803+
if urlpath is not None:
4804+
urlpath = os.fspath(urlpath)
4805+
ext = os.path.splitext(urlpath)[1]
4806+
if ext == ".b2z":
4807+
self.to_b2z(urlpath, overwrite=overwrite, compact=compact)
4808+
elif ext == ".b2d":
4809+
self.to_b2d(urlpath, overwrite=overwrite, compact=compact)
4810+
else:
4811+
raise ValueError("urlpath must have a .b2z or .b2d extension")
4812+
return CTable.open(urlpath, mode="r")
4813+
47904814
valid_np = self._valid_rows[:]
47914815
live_pos = np.where(valid_np)[0]
47924816
n_live = len(live_pos)

tests/ctable/test_table_persistency.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,18 @@ def test_to_b2z_materializes_view():
148148
assert list(opened["id"][:]) == [2, 3]
149149

150150

151+
def test_copy_to_b2z_uses_urlpath_extension():
152+
dest = table_path("copy_dst.b2z")
153+
t = CTable(Row, new_data=[(10, 50.0, True), (20, 60.0, False)])
154+
155+
copied = t.copy(urlpath=dest)
156+
157+
assert isinstance(copied, CTable)
158+
assert os.path.exists(dest)
159+
assert len(copied) == 2
160+
assert list(copied["id"][:]) == [10, 20]
161+
162+
151163
def test_to_b2d_unpacks_persistent_b2z():
152164
src_b2d = table_path("to_b2d_src.b2d")
153165
src_b2z = table_path("to_b2d_src.b2z")
@@ -179,6 +191,25 @@ def test_to_b2d_materializes_view():
179191
assert list(opened["id"][:]) == [2, 3]
180192

181193

194+
def test_copy_to_b2d_uses_urlpath_extension():
195+
dest = table_path("copy_dst.b2d")
196+
t = CTable(Row, new_data=[(10, 50.0, True), (20, 60.0, False)])
197+
198+
copied = t.copy(urlpath=dest)
199+
200+
assert isinstance(copied, CTable)
201+
assert os.path.isdir(dest)
202+
assert len(copied) == 2
203+
assert list(copied["id"][:]) == [10, 20]
204+
205+
206+
def test_copy_rejects_unknown_urlpath_extension():
207+
t = CTable(Row, new_data=[(10, 50.0, True)])
208+
209+
with pytest.raises(ValueError, match=r"\.b2z or \.b2d"):
210+
t.copy(urlpath=table_path("copy_dst.b2nd"))
211+
212+
182213
def test_column_order_preserved_after_reopen():
183214
"""Column order from the schema JSON is respected on reopen."""
184215
path = table_path("order")

0 commit comments

Comments
 (0)