Skip to content

Commit f17a1cc

Browse files
nguyenvihnorton
authored andcommitted
Add Consolidation and Vacuuming For Group Metadata
1 parent 819ef07 commit f17a1cc

File tree

3 files changed

+153
-64
lines changed

3 files changed

+153
-64
lines changed

tiledb/cc/group.cc

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ void init_group(py::module &m) {
126126
.def("_add", &Group::add_member, py::arg("uri"),
127127
py::arg("relative") = false, py::arg("name") = std::nullopt)
128128
.def("_remove", &Group::remove_member)
129+
.def("_delete_group", &Group::delete_group)
129130
.def("_member_count", &Group::member_count)
130131
.def("_member",
131132
static_cast<Object (Group::*)(uint64_t) const>(&Group::member))
@@ -136,8 +137,12 @@ void init_group(py::module &m) {
136137
.def("_dump", &Group::dump)
137138

138139
/* static methods */
139-
.def("_create", &Group::create)
140-
.def("_delete_group", &Group::delete_group);
140+
.def_static("_create", &Group::create)
141+
.def_static("_consolidate_metadata", &Group::consolidate_metadata,
142+
py::arg("ctx"), py::arg("uri"),
143+
py::arg("config") = (Config *)nullptr)
144+
.def_static("_vacuum_metadata", &Group::vacuum_metadata, py::arg("ctx"),
145+
py::arg("uri"), py::arg("config") = (Config *)nullptr);
141146
}
142147

143148
} // namespace libtiledbcpp

tiledb/group.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,3 +461,39 @@ def set_config(self, cfg: Config):
461461
"Use `group.cl0se()` or Group(.., closed=True)"
462462
)
463463
self._set_config(cfg)
464+
465+
@staticmethod
466+
def consolidate_metadata(
467+
uri: str, config: Config = None, ctx: Optional[Ctx] = None
468+
):
469+
"""
470+
Consolidate the group metadata.
471+
472+
:param uri: The URI of the TileDB group to be consolidated
473+
:type uri: str
474+
:param config: Optional configuration parameters for the consolidation
475+
:type config: Config
476+
:param ctx: Optional TileDB context
477+
:type ctx: Ctx
478+
"""
479+
if ctx is None:
480+
ctx = default_ctx()
481+
482+
lt.Group._consolidate_metadata(ctx, uri, config)
483+
484+
@staticmethod
485+
def vacuum_metadata(uri: str, config: Config = None, ctx: Optional[Ctx] = None):
486+
"""
487+
Vacuum the group metadata.
488+
489+
:param uri: The URI of the TileDB group to be vacuum
490+
:type uri: str
491+
:param config: Optional configuration parameters for the vacuuming
492+
:type config: Config
493+
:param ctx: Optional TileDB context
494+
:type ctx: Ctx
495+
"""
496+
if ctx is None:
497+
ctx = default_ctx()
498+
499+
lt.Group._vacuum_metadata(ctx, uri, config)

tiledb/tests/test_group.py

Lines changed: 110 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import pathlib
23

34
import numpy as np
45
import pytest
@@ -265,6 +266,82 @@ def test_group_named_members(self):
265266
assert len(grp) == 0
266267
grp.close()
267268

269+
def test_pass_context(self):
270+
foo = self.path("foo")
271+
bar = self.path("foo/bar")
272+
273+
tiledb.group_create(foo)
274+
tiledb.group_create(bar)
275+
276+
ctx = tiledb.Ctx()
277+
with tiledb.Group(foo, mode="w", ctx=ctx) as G:
278+
G.add(bar, name="bar")
279+
280+
with tiledb.Group(foo, mode="r", ctx=ctx) as G:
281+
assert "bar" in G
282+
283+
def test_relative(self):
284+
group1 = self.path("group1")
285+
group2_1 = self.path("group1/group2_1")
286+
group2_2 = self.path("group1/group2_2")
287+
288+
tiledb.group_create(group2_1)
289+
tiledb.group_create(group2_2)
290+
291+
with tiledb.Group(group1, mode="w") as G:
292+
G.add(group2_1, name="group2_1", relative=False)
293+
G.add("group2_2", name="group2_2", relative=True)
294+
295+
with tiledb.Group(group1, mode="r") as G:
296+
assert G.is_relative("group2_1") is False
297+
assert G.is_relative("group2_2") is True
298+
299+
def test_set_config(self):
300+
group_uri = self.path("foo")
301+
array_uri_1 = self.path("foo/a")
302+
array_uri_2 = self.path("foo/b")
303+
304+
tiledb.group_create(group_uri)
305+
306+
dom = tiledb.Domain(tiledb.Dim("id", dtype="ascii"))
307+
attr = tiledb.Attr("value", dtype=np.int64)
308+
sch = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True)
309+
310+
tiledb.Array.create(array_uri_1, sch)
311+
tiledb.Array.create(array_uri_2, sch)
312+
313+
cfg = tiledb.Config({"sm.group.timestamp_end": 2000})
314+
with tiledb.Group(group_uri, "w", cfg) as G:
315+
G.add(name="a", uri="a", relative=True)
316+
317+
cfg = tiledb.Config({"sm.group.timestamp_end": 3000})
318+
with tiledb.Group(group_uri, "w", cfg) as G:
319+
G.add(name="b", uri="b", relative=True)
320+
321+
ms = np.arange(1000, 4000, 1000, dtype=np.int64)
322+
323+
for sz, m in enumerate(ms):
324+
cfg = tiledb.Config({"sm.group.timestamp_end": m})
325+
326+
G = tiledb.Group(group_uri)
327+
328+
# Cannot set config on open group
329+
with self.assertRaises(ValueError):
330+
G.set_config(cfg)
331+
332+
G.close()
333+
G.set_config(cfg)
334+
335+
G.open()
336+
assert len(G) == sz
337+
G.close()
338+
339+
for sz, m in enumerate(ms):
340+
cfg = tiledb.Config({"sm.group.timestamp_end": m})
341+
342+
with tiledb.Group(group_uri, config=cfg) as G:
343+
assert len(G) == sz
344+
268345

269346
class GroupMetadataTest(GroupTestCase):
270347
@pytest.mark.parametrize(
@@ -489,78 +566,49 @@ def test_basic(self, test_vals):
489566
self.assert_metadata_roundtrip(grp.meta, test_vals)
490567
grp.close()
491568

492-
def test_pass_context(self):
493-
foo = self.path("foo")
494-
bar = self.path("foo/bar")
495-
496-
tiledb.group_create(foo)
497-
tiledb.group_create(bar)
498-
499-
ctx = tiledb.Ctx()
500-
with tiledb.Group(foo, mode="w", ctx=ctx) as G:
501-
G.add(bar, name="bar")
502-
503-
with tiledb.Group(foo, mode="r", ctx=ctx) as G:
504-
assert "bar" in G
505-
506-
def test_relative(self):
507-
group1 = self.path("group1")
508-
group2_1 = self.path("group1/group2_1")
509-
group2_2 = self.path("group1/group2_2")
510-
511-
tiledb.group_create(group2_1)
512-
tiledb.group_create(group2_2)
513-
514-
with tiledb.Group(group1, mode="w") as G:
515-
G.add(group2_1, name="group2_1", relative=False)
516-
G.add("group2_2", name="group2_2", relative=True)
517-
518-
with tiledb.Group(group1, mode="r") as G:
519-
assert G.is_relative("group2_1") is False
520-
assert G.is_relative("group2_2") is True
521-
522-
def test_set_config(self):
523-
group_uri = self.path("foo")
524-
array_uri_1 = self.path("foo/a")
525-
array_uri_2 = self.path("foo/b")
569+
def test_consolidation_and_vac(self):
570+
vfs = tiledb.VFS()
571+
path = self.path("test_consolidation_and_vac")
572+
tiledb.Group.create(path)
526573

527-
tiledb.group_create(group_uri)
574+
cfg = tiledb.Config({"sm.group.timestamp_end": 1})
575+
with tiledb.Group(path, "w", cfg) as grp:
576+
grp.meta["meta"] = 1
528577

529-
dom = tiledb.Domain(tiledb.Dim("id", dtype="ascii"))
530-
attr = tiledb.Attr("value", dtype=np.int64)
531-
sch = tiledb.ArraySchema(domain=dom, attrs=(attr,), sparse=True)
578+
cfg = tiledb.Config({"sm.group.timestamp_end": 2})
579+
with tiledb.Group(path, "w", cfg) as grp:
580+
grp.meta["meta"] = 2
532581

533-
tiledb.Array.create(array_uri_1, sch)
534-
tiledb.Array.create(array_uri_2, sch)
582+
cfg = tiledb.Config({"sm.group.timestamp_end": 3})
583+
with tiledb.Group(path, "w", cfg) as grp:
584+
grp.meta["meta"] = 3
535585

536-
cfg = tiledb.Config({"sm.group.timestamp_end": 2000})
537-
with tiledb.Group(group_uri, "w", cfg) as G:
538-
G.add(name="a", uri="a", relative=True)
586+
meta_path = pathlib.Path(path) / "__meta"
587+
assert len(vfs.ls(meta_path)) == 3
539588

540-
cfg = tiledb.Config({"sm.group.timestamp_end": 3000})
541-
with tiledb.Group(group_uri, "w", cfg) as G:
542-
G.add(name="b", uri="b", relative=True)
589+
tiledb.Group.consolidate_metadata(path, cfg)
590+
tiledb.Group.vacuum_metadata(path, cfg)
543591

544-
ms = np.arange(1000, 4000, 1000, dtype=np.int64)
592+
assert len(vfs.ls(meta_path)) == 1
545593

546-
for sz, m in enumerate(ms):
547-
cfg = tiledb.Config({"sm.group.timestamp_end": m})
594+
def test_consolidation_and_vac_no_config(self):
595+
vfs = tiledb.VFS()
596+
path = self.path("test_consolidation_and_vac")
597+
tiledb.Group.create(path)
548598

549-
G = tiledb.Group(group_uri)
599+
with tiledb.Group(path, "w") as grp:
600+
grp.meta["meta"] = 1
550601

551-
# Cannot set config on open group
552-
with self.assertRaises(ValueError):
553-
G.set_config(cfg)
602+
with tiledb.Group(path, "w") as grp:
603+
grp.meta["meta"] = 2
554604

555-
G.close()
556-
G.set_config(cfg)
605+
with tiledb.Group(path, "w") as grp:
606+
grp.meta["meta"] = 3
557607

558-
G.open()
559-
assert len(G) == sz
560-
G.close()
608+
meta_path = pathlib.Path(path) / "__meta"
609+
assert len(vfs.ls(meta_path)) == 3
561610

562-
for sz, m in enumerate(ms):
563-
cfg = tiledb.Config({"sm.group.timestamp_end": m})
611+
tiledb.Group.consolidate_metadata(path)
612+
tiledb.Group.vacuum_metadata(path)
564613

565-
with tiledb.Group(group_uri, config=cfg) as G:
566-
assert len(G) == sz
614+
assert len(vfs.ls(meta_path)) == 1

0 commit comments

Comments
 (0)