Skip to content

Commit 3abea77

Browse files
jp-darkperrygeokounelisagis
authored
Add support for creating WKB/WKT attributes (#1912)
* Add support for WKB/WKT attributes * Add tests --------- Co-authored-by: Matthew Perry <[email protected]> Co-authored-by: Agis Kounelis <[email protected]>
1 parent a1a7137 commit 3abea77

File tree

8 files changed

+103
-15
lines changed

8 files changed

+103
-15
lines changed

tiledb/attribute.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def __init__(
4040
:raises tiledb.TileDBError:
4141
"""
4242
dt = DataType.from_numpy(
43-
np.dtype(dtype) if dtype not in ("ascii", "blob") else dtype
43+
np.dtype(dtype) if dtype not in ("ascii", "blob", "wkb", "wkt") else dtype
4444
)
4545

4646
# ensure that all strings are var-length
@@ -274,6 +274,16 @@ def __repr__(self):
274274
attr_dtype = "ascii"
275275
elif self._tiledb_dtype == lt.DataType.BLOB:
276276
attr_dtype = "blob"
277+
elif (
278+
hasattr(lt.DataType, "GEOM_WKB")
279+
and self._tiledb_dtype == lt.DataType.GEOM_WKB
280+
):
281+
attr_dtype = "wkb"
282+
elif (
283+
hasattr(lt.DataType, "GEOM_WKT")
284+
and self._tiledb_dtype == lt.DataType.GEOM_WKT
285+
):
286+
attr_dtype = "wkt"
277287
else:
278288
attr_dtype = self.dtype
279289

tiledb/cc/attribute.cc

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -19,28 +19,42 @@ void set_fill_value(Attribute &attr, py::array value) {
1919
}
2020

2121
py::array get_fill_value(Attribute &attr) {
22+
// Get the fill value from the C++ API as a void* value.
2223
const void *value;
2324
uint64_t size;
24-
2525
attr.get_fill_value(&value, &size);
2626

27-
auto value_num = attr.cell_val_num();
28-
auto value_type = tdb_to_np_dtype(attr.type(), value_num);
29-
27+
// If this is a string type, we want to return each value as a single cell.
3028
if (is_tdb_str(attr.type())) {
31-
value_type = py::dtype("|S1");
32-
value_num = size;
29+
auto value_type = py::dtype("|S1");
30+
return py::array(value_type, size, value);
3331
}
3432

35-
// record type
33+
// If this is a record type (void), return a single cell.
34+
// If this is a blob-like type, we want to return each value as a single byte
35+
// cell.
36+
auto tdb_type = attr.type();
37+
if (tdb_type == TILEDB_BLOB
38+
#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 21
39+
|| tdb_type == TILEDB_GEOM_WKB || tdb_type == TILEDB_GEOM_WKT
40+
#endif
41+
) {
42+
auto value_type = py::dtype("S");
43+
return py::array(value_type, size, value);
44+
}
45+
46+
// Get the number of values in a cell and the Python datatype.
47+
auto value_num = attr.cell_val_num();
48+
auto value_type = tdb_to_np_dtype(attr.type(), value_num);
49+
3650
if (py::str(value_type.attr("kind")) == py::str("V")) {
37-
value_num = 1;
51+
return py::array(value_type, 1, value);
3852
}
3953

40-
// complex type - both cell values fit in a single complex element
54+
// If this is a complex type both cell values fit in a single complex element.
4155
if (value_type == py::dtype("complex64") ||
4256
value_type == py::dtype("complex128")) {
43-
value_num = 1;
57+
return py::array(value_type, 1, value);
4458
}
4559

4660
return py::array(value_type, value_num, value);

tiledb/cc/common.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ std::unordered_map<tiledb_datatype_t, std::string> _tdb_to_np_name_dtype = {
4343
{TILEDB_BOOL, "bool"},
4444
#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 21
4545
{TILEDB_GEOM_WKB, "byte"},
46-
{TILEDB_GEOM_WKT, "S1"},
46+
{TILEDB_GEOM_WKT, "S"},
4747
#endif
4848
};
4949

tiledb/core.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ py::dtype tiledb_dtype(tiledb_datatype_t type, uint32_t cell_val_num) {
218218
case TILEDB_GEOM_WKB:
219219
return py::dtype("byte");
220220
case TILEDB_GEOM_WKT:
221-
return py::dtype("S1");
221+
return py::dtype("S");
222222
#endif
223223

224224
case TILEDB_ANY:

tiledb/datatypes.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,12 @@ def from_numpy(cls, dtype: np.dtype) -> DataType:
2424
if dtype == "blob":
2525
return cls(np.dtype("S"), lt.DataType.BLOB, 1)
2626

27+
if hasattr(lt.DataType, "GEOM_WKB") and dtype == "wkb":
28+
return cls(np.dtype("S"), lt.DataType.GEOM_WKB, 1)
29+
30+
if hasattr(lt.DataType, "GEOM_WKT") and dtype == "wkt":
31+
return cls(np.dtype("S"), lt.DataType.GEOM_WKT, 1)
32+
2733
dtype = np.dtype(dtype)
2834
if dtype.kind == "V":
2935
# fixed-size record dtypes
@@ -179,10 +185,13 @@ def uncast_tile_extent(self, tile_extent: Any) -> np.generic:
179185
_NUMPY_TO_TILEDB[np.dtype("complex64")] = lt.DataType.FLOAT32
180186
_NUMPY_TO_TILEDB[np.dtype("complex128")] = lt.DataType.FLOAT64
181187

182-
# tiledb has STRING_ASCII and BLOB, numpy doesn't
188+
# tiledb has STRING_ASCII, BLOB, WKB and WKT types, numpy doesn't
183189
_TILEDB_TO_NUMPY = {t: n for n, t in _COMMON_DATATYPES}
184190
_TILEDB_TO_NUMPY[lt.DataType.STRING_ASCII] = np.dtype("S")
185191
_TILEDB_TO_NUMPY[lt.DataType.BLOB] = np.dtype("S")
192+
if hasattr(lt.DataType, "GEOM_WKB"):
193+
_TILEDB_TO_NUMPY[lt.DataType.GEOM_WKB] = np.dtype("S")
194+
_TILEDB_TO_NUMPY[lt.DataType.GEOM_WKT] = np.dtype("S")
186195

187196
# pre-populate the LRU caches with all ncell=1 datatypes
188197
list(map(DataType.from_numpy, _NUMPY_TO_TILEDB.keys()))

tiledb/libtiledb.pxd

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ cdef extern from "tiledb/tiledb.h":
6060
TILEDB_FLOAT32
6161
TILEDB_FLOAT64
6262
TILEDB_BLOB
63+
TILEDB_GEOM_WKB
64+
TILEDB_GEOM_WKT
6365
TILEDB_CHAR
6466
TILEDB_INT8
6567
TILEDB_UINT8

tiledb/py_arrow_io_impl.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ ArrowInfo tiledb_buffer_arrow_fmt(BufferInfo bufferinfo, bool use_list = true) {
199199
case TILEDB_FLOAT64:
200200
return ArrowInfo("g");
201201
case TILEDB_BLOB:
202+
#if TILEDB_VERSION_MAJOR >= 2 && TILEDB_VERSION_MINOR >= 21
203+
case TILEDB_GEOM_WKB:
204+
case TILEDB_GEOM_WKT:
205+
#endif
202206
return ArrowInfo("B");
203207
case TILEDB_INT8:
204208
return ArrowInfo("c");

tiledb/tests/test_attribute.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def test_minimal_attribute(self):
2525
except:
2626
pytest.fail(f"Could not parse attr._repr_html_(). Saw {attr._repr_html_()}")
2727

28-
def test_attribute(self, capfd):
28+
def test_attribute_name_only(self, capfd):
2929
attr = tiledb.Attr("foo")
3030

3131
attr.dump()
@@ -252,3 +252,52 @@ def test_modify_attribute_in_schema(self):
252252
assert "can't set attribute" in str(exc.value)
253253
else:
254254
assert "object has no setter" in str(exc.value)
255+
256+
def test_wkt_attribute(self):
257+
A = np.array(
258+
["POINT (30 10)", "POLYGON ((3 1, 4 5, 2 2, 1 2, 3 1))"],
259+
dtype="S",
260+
)
261+
262+
dom = tiledb.Domain(tiledb.Dim(domain=(0, 1), tile=2))
263+
att = tiledb.Attr(dtype="wkt", var=True)
264+
265+
schema = tiledb.ArraySchema(dom, (att,))
266+
267+
tiledb.DenseArray.create(self.path("foo"), schema)
268+
with tiledb.DenseArray(self.path("foo"), mode="w") as T:
269+
T[:] = A
270+
271+
# read back the data
272+
with tiledb.DenseArray(self.path("foo"), mode="r") as T:
273+
for i in range(2):
274+
assert_array_equal(T[i], A[i])
275+
276+
def test_wkb_attribute(self):
277+
A = np.array(
278+
[
279+
# representation of POINT (30 10)
280+
b"\x01\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00>@\x00\x00\x00\x00\x00\x00$@",
281+
# representation of POLYGON ((3 1, 4 5, 2 2, 1 2, 3 1))
282+
(
283+
b"\x01\x03\x00\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08@"
284+
b"\x00\x00\x00\x00\x00\x00\xf0?\x00\x00\x00\x00\x00\x00\x10@\x00\x00\x00\x00\x00\x00\x14@"
285+
b"\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\xf0?"
286+
b"\x00\x00\x00\x00\x00\x00\x00@\x00\x00\x00\x00\x00\x00\x08@\x00\x00\x00\x00\x00\x00\xf0?"
287+
),
288+
],
289+
)
290+
291+
dom = tiledb.Domain(tiledb.Dim(domain=(0, 1), tile=2))
292+
att = tiledb.Attr(dtype="wkb", var=True)
293+
294+
schema = tiledb.ArraySchema(dom, (att,))
295+
296+
tiledb.DenseArray.create(self.path("foo"), schema)
297+
with tiledb.DenseArray(self.path("foo"), mode="w") as T:
298+
T[:] = A
299+
300+
# read back the data
301+
with tiledb.DenseArray(self.path("foo"), mode="r") as T:
302+
for i in range(2):
303+
assert_array_equal(T[:][i].tobytes(), A[i])

0 commit comments

Comments
 (0)