From a793c9b2d360fcc31a5fcde6b8a72bc04640695d Mon Sep 17 00:00:00 2001 From: Vivian Nguyen Date: Thu, 30 Sep 2021 12:35:45 -0500 Subject: [PATCH] Addition of `Ctx.get_stats` and `Query.get_stats` (#698) * Addition of `Ctx.get_stats` and `Query.get_stats` * Update HISTORY.md * Update HISTORY.md --- HISTORY.md | 7 ++++++- tiledb/core.cc | 3 +++ tiledb/libtiledb.pxd | 10 ++++++++++ tiledb/libtiledb.pyx | 22 +++++++++++++++++++++- tiledb/tests/test_libtiledb.py | 34 ++++++++++++++++++++++++++++++++++ 5 files changed, 74 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 537677f099..37a5a01c25 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -7,9 +7,14 @@ # TileDB-Py 0.10.1 Release Notes ## API Changes -* Do not require `domain=(None, None)` for string dimensions [#662](https://github.com/TileDB-Inc/TileDB-Py/pull/662) * Deprecate sparse writes to dense arrays [#681](https://github.com/TileDB-Inc/TileDB-Py/pull/681) * Addition of `Attr.isascii` [#681](https://github.com/TileDB-Inc/TileDB-Py/pull/681) +* Addition of `Ctx.get_stats` and 'Query.get_stats` [#698](https://github.com/TileDB-Inc/TileDB-Py/pull/698) + +# TileDB-Py 0.10.1 Release Notes + +## API Changes +* Do not require `domain=(None, None)` for string dimensions [#662](https://github.com/TileDB-Inc/TileDB-Py/pull/662) ## Improvements * Print a warning about ContextVar bug when running under ipykernel < 6.0. [#665](https://github.com/TileDB-Inc/TileDB-Py/pull/665) diff --git a/tiledb/core.cc b/tiledb/core.cc index 59055c2941..25f6f98d0d 100644 --- a/tiledb/core.cc +++ b/tiledb/core.cc @@ -1359,6 +1359,8 @@ class PyQuery { // test helper to get the configured init_buffer_bytes return alloc_max_bytes_; } + + std::string get_stats() { return query_->stats(); } }; void init_stats() { @@ -1462,6 +1464,7 @@ void init_core(py::module &m) { .def("submit", &PyQuery::submit) .def("unpack_buffer", &PyQuery::unpack_buffer) .def("estimated_result_sizes", &PyQuery::estimated_result_sizes) + .def("get_stats", &PyQuery::get_stats) .def("_allocate_buffers", &PyQuery::allocate_buffers) .def("_get_buffers", &PyQuery::get_buffers) .def("_buffer_to_pa", &PyQuery::buffer_to_pa) diff --git a/tiledb/libtiledb.pxd b/tiledb/libtiledb.pxd index 013bbcdb09..7b4b0e6d54 100644 --- a/tiledb/libtiledb.pxd +++ b/tiledb/libtiledb.pxd @@ -239,6 +239,10 @@ cdef extern from "tiledb/tiledb.h": int tiledb_ctx_get_last_error( tiledb_ctx_t* ctx, tiledb_error_t** error) + + int tiledb_ctx_get_stats( + tiledb_ctx_t* ctx, + char** stats_json); int tiledb_ctx_is_supported_fs( tiledb_ctx_t* ctx, @@ -848,6 +852,11 @@ cdef extern from "tiledb/tiledb.h": const char* attr_name, uint64_t* size_off, uint64_t* size_val) + + int tiledb_query_get_stats( + tiledb_ctx_t* ctx, + tiledb_query_t* query, + char** stats_json); # Array int tiledb_array_alloc( @@ -1292,6 +1301,7 @@ cdef class Array(object): cdef object df cdef Metadata meta cdef object last_fragment_info + cdef object pyquery cdef _ndarray_is_varlen(self, np.ndarray array) diff --git a/tiledb/libtiledb.pyx b/tiledb/libtiledb.pyx index 035da46e3b..e4e61a4e1b 100644 --- a/tiledb/libtiledb.pyx +++ b/tiledb/libtiledb.pyx @@ -1150,6 +1150,18 @@ cdef class Ctx(object): self.set_tag('x-tiledb-api-language', 'python') self.set_tag('x-tiledb-api-language-version', '{}.{}.{}'.format(*sys.version_info)) self.set_tag('x-tiledb-api-sys-platform', sys.platform) + + def get_stats(self): + """Retrieves the stats from a TileDB context.""" + import json + cdef tiledb_ctx_t* ctx_ptr = self.ptr + cdef int rc = TILEDB_OK + cdef char* stats_json + rc = tiledb_ctx_get_stats(ctx_ptr, &stats_json) + if rc != TILEDB_OK: + _raise_ctx_err(ctx_ptr, rc) + cdef unicode stats = stats_json.decode('UTF-8', 'strict') + return stats def _tiledb_datetime_extent(begin, end): @@ -4665,6 +4677,13 @@ cdef class Query(object): # Delayed to avoid circular import from .multirange_indexing import DataFrameIndexer return DataFrameIndexer(self.array, query=self, use_arrow=self.use_arrow) + + def get_stats(self): + """Retrieves the stats from a TileDB query.""" + pyquery = self.array.pyquery + if pyquery is None: + return "" + return self.array.pyquery.get_stats() cdef class DenseArrayImpl(Array): @@ -4908,9 +4927,9 @@ cdef class DenseArrayImpl(Array): from tiledb.main import PyQuery q = PyQuery(self._ctx_(), self, tuple(attr_names), tuple(), layout, False) + self.pyquery = q q.set_ranges([list([x]) for x in subarray]) q.submit() - cdef object results = OrderedDict() results = q.results() @@ -5566,6 +5585,7 @@ cdef class SparseArrayImpl(Array): from tiledb.main import PyQuery q = PyQuery(self._ctx_(), self, tuple(attr_names), tuple(), layout, False) + self.pyquery = q q.set_ranges([list([x]) for x in subarray]) q.submit() diff --git a/tiledb/tests/test_libtiledb.py b/tiledb/tests/test_libtiledb.py index 1177c9a852..bd6d484c41 100644 --- a/tiledb/tests/test_libtiledb.py +++ b/tiledb/tests/test_libtiledb.py @@ -3926,6 +3926,40 @@ def test_init_config(self): self.assertEqual(3, init_test_wrapper({"sm.io_concurrency_level": 3})) +class GetStatsTest(DiskTestCase): + def test_ctx(self): + tiledb.libtiledb.stats_enable() + ctx = tiledb.default_ctx() + uri = self.path("test_ctx") + dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) + att = tiledb.Attr(dtype=np.int64) + schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) + tiledb.Array.create(uri, schema) + + with tiledb.open(uri, mode="w", ctx=ctx) as T: + T[:] = np.random.randint(10, size=3) + + assert "Context.StorageManager.write_store" in ctx.get_stats() + + def test_query(self): + tiledb.libtiledb.stats_enable() + uri = self.path("test_ctx") + dom = tiledb.Domain(tiledb.Dim(domain=(0, 2), dtype=np.int64)) + att = tiledb.Attr(dtype=np.int64) + schema = tiledb.ArraySchema(domain=dom, attrs=(att,)) + tiledb.Array.create(uri, schema) + + with tiledb.open(uri, mode="w") as T: + T[:] = np.random.randint(10, size=3) + + with tiledb.open(uri, mode="r") as T: + q = T.query() + assert "" == q.get_stats() + + q[:] + assert "Context.StorageManager.Query" in q.get_stats() + + class ReprTest(DiskTestCase): def test_attr_repr(self): attr = tiledb.Attr(name="itsanattr", dtype=np.float64)