Skip to content

Commit ccc6bca

Browse files
committed
Improve multi_index query/coords behavior
- Don't return coords for dense multi_index by default (#347) - Fix and test coords exclusion for sparse array queries
1 parent 35b5d30 commit ccc6bca

File tree

5 files changed

+21
-15
lines changed

5 files changed

+21
-15
lines changed

Diff for: tiledb/core.cc

+7-8
Original file line numberDiff line numberDiff line change
@@ -214,21 +214,22 @@ class PyQuery {
214214
array_ = std::shared_ptr<tiledb::Array>(new Array(ctx_, c_array_, false),
215215
[](Array *p) {} /* no deleter*/);
216216

217+
bool issparse = array_->schema().array_type() == TILEDB_SPARSE;
218+
217219
query_ = std::shared_ptr<tiledb::Query>(
218220
new Query(ctx_, *array_, TILEDB_READ));
219221
// [](Query* p){} /* note: no deleter*/);
220222

221223
tiledb_layout_t layout = (tiledb_layout_t)py_layout.cast<int32_t>();
222-
if (array_->schema().array_type() == TILEDB_DENSE &&
223-
layout == TILEDB_UNORDERED) {
224+
if (issparse && layout == TILEDB_UNORDERED) {
224225
TPY_ERROR_LOC("TILEDB_UNORDERED read is not supported for dense arrays")
225226
}
226227
query_->set_layout(layout);
227228

228-
if (coords.is(py::none())) {
229-
include_coords_ = true;
230-
} else {
229+
if (!coords.is(py::none())) {
231230
include_coords_ = coords.cast<bool>();
231+
} else {
232+
include_coords_ = issparse;
232233
}
233234

234235
for (auto a : attrs) {
@@ -553,10 +554,8 @@ class PyQuery {
553554

554555
void submit_read() {
555556
auto schema = array_->schema();
556-
auto issparse = schema.array_type() == TILEDB_SPARSE;
557-
auto need_dim_buffers = include_coords_ || issparse;
558557

559-
if (need_dim_buffers) {
558+
if (include_coords_) {
560559
auto domain = schema.domain();
561560
for (auto dim : domain.dimensions()) {
562561
alloc_buffer(dim.name());

Diff for: tiledb/multirange_indexing.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,7 @@ def sel_to_subranges(dim_sel):
5151

5252
class MultiRangeIndexer(object):
5353
"""
54-
Implements multi-range / outer / orthogonal indexing.
55-
54+
Implements multi-range indexing.
5655
"""
5756

5857
def __init__(self, array, query = None):
@@ -111,7 +110,6 @@ def __getitem__(self, idx):
111110
schema = self.schema
112111
dom = self.schema.domain
113112
attr_names = tuple(self.schema.attr(i)._internal_name for i in range(self.schema.nattr))
114-
115113
coords = None
116114
order = 'C' # TILEDB_ROW_MAJOR
117115
if self.query is not None:

Diff for: tiledb/tests/test_libtiledb.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2853,7 +2853,9 @@ def test_tiledb_py_0_6_anon_attr(self):
28532853
self.assertEqual(A[0], 1)
28542854
mres = A.multi_index[0]
28552855
self.assertEqual(mres[''], 1)
2856-
self.assertEqual(mres['d'], 0)
2856+
2857+
qres = A.query(coords=True).multi_index[0]
2858+
self.assertEqual(qres['d'], 0)
28572859

28582860
class MemoryTest(DiskTestCase):
28592861
# sanity check that memory usage doesn't increase more than 2x when reading 40MB 100x

Diff for: tiledb/tests/test_multi_index.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def test_multirange_1d_dense_int64(self):
299299
with tiledb.open(path) as A:
300300
# stepped ranges are not supported
301301
with self.assertRaises(ValueError):
302-
A.multi_index[ 1::2 ]
302+
A.query(coords=True).multi_index[ 1::2 ]
303303

304304
assert_array_equal(
305305
orig_array[ [0,-1] ],
@@ -311,7 +311,7 @@ def test_multirange_1d_dense_int64(self):
311311
)
312312
self.assertEqual(
313313
-10,
314-
A.multi_index[-10]['coords'].view('i8')
314+
A.query(coords=True).multi_index[-10]['coords'].view('i8')
315315
)
316316
assert_array_equal(
317317
orig_array[0:],
@@ -576,6 +576,14 @@ def test_multirange_1d_sparse_query(self):
576576
res[k]
577577
)
578578

579+
with tiledb.open(path) as A:
580+
Q = A.query(coords=False, attrs=["U"])
581+
res = Q.multi_index[:]
582+
self.assertTrue("U" in res)
583+
self.assertTrue("V" not in res)
584+
self.assertTrue("coords" not in res)
585+
assert_array_equal(res["U"], data["U"])
586+
579587
def test_multirange_1d_dense_vectorized(self):
580588
ctx = tiledb.Ctx()
581589
path = self.path('mr_1d_dense_vectorized')

Diff for: tiledb/tests/test_pandas_dataframe.py

-1
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,6 @@ def test_csv_chunked(self):
467467
ned = A.nonempty_domain()[0]
468468
# TODO should support numpy scalar here
469469
res = A.multi_index[int(ned[0]):int(ned[1])]
470-
res.pop('rows')
471470
df_bk = pd.DataFrame(res)
472471

473472
tm.assert_frame_equal(df_bk, df)

0 commit comments

Comments
 (0)