Skip to content

Commit caa9906

Browse files
authored
Add support for numpy2 (#1969)
1 parent 5b1f60e commit caa9906

File tree

10 files changed

+46
-44
lines changed

10 files changed

+46
-44
lines changed

.github/workflows/daily-test-build-numpy.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,16 @@ jobs:
2121
# https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
2222
- python-version: "3.12"
2323
numpy-version: "1.26.4"
24+
- python-version: "3.12"
25+
numpy-version: "2.0.0rc2"
2426
- python-version: "3.11"
2527
numpy-version: "1.23.2"
28+
- python-version: "3.11"
29+
numpy-version: "2.0.0rc2"
2630
- python-version: "3.10"
2731
numpy-version: "1.21.6"
32+
- python-version: "3.10"
33+
numpy-version: "2.0.0rc2"
2834
- python-version: "3.9"
2935
numpy-version: "1.19.3"
3036
- python-version: "3.8"

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,6 @@ repos:
44
hooks:
55
- id: black
66
- repo: https://github.com/charliermarsh/ruff-pre-commit
7-
rev: v0.0.284
7+
rev: v0.4.4
88
hooks:
99
- id: ruff

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,5 +72,8 @@ extend-select = ["I001"]
7272
extend-exclude = ["doc"]
7373
fix = true
7474

75+
[tool.ruff.lint]
76+
select = ["NPY201"]
77+
7578
[tool.ruff.per-file-ignores]
7679
"tiledb/__init__.py" = ["F401"]

setup.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import sys
77
from ctypes import CDLL, POINTER, Structure, byref, c_char_p, c_int, c_void_p
88

9-
from pkg_resources import resource_filename
9+
import numpy as np
1010
from pybind11.setup_helpers import Pybind11Extension
1111
from setuptools import Extension, find_packages, setup
1212

@@ -478,16 +478,8 @@ class build_ext(cython_build_ext):
478478
"""
479479

480480
def build_extensions(self):
481-
"""
482-
Lazily append numpy's include directory to Extension includes.
483-
484-
This is done here rather than at module scope because setup.py
485-
may be run before numpy has been installed, in which case
486-
importing numpy and calling `numpy.get_include()` will fail.
487-
"""
488-
numpy_incl = resource_filename("numpy", "core/include")
489481
for ext in self.extensions:
490-
ext.include_dirs.append(numpy_incl)
482+
ext.include_dirs.append(np.get_include())
491483

492484
find_or_install_libtiledb(self)
493485

tiledb/highlevel.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ def is_ndarray_like(arr):
224224
elif shape and dtype:
225225
if np.issubdtype(np.bytes_, dtype):
226226
dtype = np.dtype("S")
227-
elif np.issubdtype(dtype, np.unicode_):
227+
elif np.issubdtype(dtype, np.str_):
228228
dtype = np.dtype("U")
229229

230230
ndim = len(shape)

tiledb/libtiledb.pyx

Lines changed: 17 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ import io
1212
import warnings
1313
import collections.abc
1414
from collections import OrderedDict
15-
from json import dumps as json_dumps
16-
from json import loads as json_loads
15+
from json import dumps as json_dumps, loads as json_loads
1716

1817
from ._generated_version import version_tuple as tiledbpy_version
1918
from .array_schema import ArraySchema
@@ -35,10 +34,7 @@ np.import_array()
3534

3635
# Integer types supported by Python / System
3736
_inttypes = (int, np.integer)
38-
39-
# Numpy initialization code (critical)
40-
# https://docs.scipy.org/doc/numpy/reference/c-api.array.html#c.import_array
41-
np.import_array()
37+
np.set_printoptions(legacy='1.21') # use unified numpy printing
4238

4339

4440
cdef tiledb_ctx_t* safe_ctx_ptr(object ctx):
@@ -145,8 +141,7 @@ cdef _write_array(
145141
if attr.isvar:
146142
try:
147143
if attr.isnullable:
148-
if(np.issubdtype(attr.dtype, np.unicode_)
149-
or np.issubdtype(attr.dtype, np.string_)
144+
if(np.issubdtype(attr.dtype, np.str_)
150145
or np.issubdtype(attr.dtype, np.bytes_)):
151146
attr_val = np.array(["" if v is None else v for v in values[i]])
152147
else:
@@ -601,7 +596,7 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
601596
dim = dom.dim(r)
602597
dim_dtype = dim.dtype
603598

604-
if array.mode == 'r' and (np.issubdtype(dim_dtype, np.unicode_) or np.issubdtype(dim_dtype, np.bytes_)):
599+
if array.mode == 'r' and (np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_)):
605600
# NED can only be retrieved in read mode
606601
ned = array.nonempty_domain()
607602
(dim_lb, dim_ub) = ned[r] if ned else (None, None)
@@ -612,7 +607,11 @@ def index_domain_subarray(array: Array, dom, idx: tuple):
612607
if not isinstance(dim_slice, slice):
613608
raise IndexError("invalid index type: {!r}".format(type(dim_slice)))
614609

610+
# numpy2 doesn't allow addition beween int and np.int64 - NEP 50
615611
start, stop, step = dim_slice.start, dim_slice.stop, dim_slice.step
612+
start = np.int64(start) if isinstance(start, int) else start
613+
stop = np.int64(stop) if isinstance(stop, int) else stop
614+
step = np.int64(step) if isinstance(step, int) else step
616615

617616
if np.issubdtype(dim_dtype, np.str_) or np.issubdtype(dim_dtype, np.bytes_):
618617
if start is None or stop is None:
@@ -1503,7 +1502,7 @@ cdef class Array(object):
15031502

15041503
cdef _ndarray_is_varlen(self, np.ndarray array):
15051504
return (np.issubdtype(array.dtype, np.bytes_) or
1506-
np.issubdtype(array.dtype, np.unicode_) or
1505+
np.issubdtype(array.dtype, np.str_) or
15071506
array.dtype == object)
15081507

15091508
@property
@@ -2526,8 +2525,8 @@ cdef class DenseArrayImpl(Array):
25262525
dtype=np.uint8
25272526
)
25282527
else:
2529-
if (np.issubdtype(attr.dtype, np.string_) and not
2530-
(np.issubdtype(attr_val.dtype, np.string_) or attr_val.dtype == np.dtype('O'))):
2528+
if (np.issubdtype(attr.dtype, np.bytes_) and not
2529+
(np.issubdtype(attr_val.dtype, np.bytes_) or attr_val.dtype == np.dtype('O'))):
25312530
raise ValueError("Cannot write a string value to non-string "
25322531
"typed attribute '{}'!".format(name))
25332532

@@ -2541,7 +2540,7 @@ cdef class DenseArrayImpl(Array):
25412540
dtype=np.uint8
25422541
)
25432542

2544-
if np.issubdtype(attr.dtype, np.string_):
2543+
if np.issubdtype(attr.dtype, np.bytes_):
25452544
attr_val = np.array(
25462545
["" if v is None else v for v in attr_val])
25472546
else:
@@ -2575,8 +2574,8 @@ cdef class DenseArrayImpl(Array):
25752574
if attr.isnullable and name not in nullmaps:
25762575
nullmaps[name] = np.array([int(v is None) for v in val], dtype=np.uint8)
25772576
else:
2578-
if (np.issubdtype(attr.dtype, np.string_) and not
2579-
(np.issubdtype(val.dtype, np.string_) or val.dtype == np.dtype('O'))):
2577+
if (np.issubdtype(attr.dtype, np.bytes_) and not
2578+
(np.issubdtype(val.dtype, np.bytes_) or val.dtype == np.dtype('O'))):
25802579
raise ValueError("Cannot write a string value to non-string "
25812580
"typed attribute '{}'!".format(name))
25822581

@@ -3063,8 +3062,8 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
30633062
nullmaps[name] = np.array(
30643063
[int(v is not None) for v in attr_val], dtype=np.uint8)
30653064
else:
3066-
if (np.issubdtype(attr.dtype, np.string_)
3067-
and not (np.issubdtype(attr_val.dtype, np.string_)
3065+
if (np.issubdtype(attr.dtype, np.bytes_)
3066+
and not (np.issubdtype(attr_val.dtype, np.bytes_)
30683067
or attr_val.dtype == np.dtype('O'))):
30693068
raise ValueError("Cannot write a string value to non-string "
30703069
"typed attribute '{}'!".format(name))
@@ -3076,7 +3075,7 @@ def _setitem_impl_sparse(self: Array, selection, val, dict nullmaps):
30763075
nullmaps[name] = np.array(
30773076
[int(v is not None) for v in attr_val], dtype=np.uint8)
30783077

3079-
if np.issubdtype(attr.dtype, np.string_):
3078+
if np.issubdtype(attr.dtype, np.bytes_):
30803079
attr_val = np.array(["" if v is None else v for v in attr_val])
30813080
else:
30823081
attr_val = np.nan_to_num(attr_val)

tiledb/multirange_indexing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def __init__(
422422
# Until list attributes are supported in core, error with a clear message.
423423
if use_arrow and any(
424424
(attr.isvar or len(attr.dtype) > 1)
425-
and attr.dtype not in (np.unicode_, np.bytes_)
425+
and attr.dtype not in (np.str_, np.bytes_)
426426
for attr in map(array.attr, query.attrs or ())
427427
):
428428
raise TileDBError(

tiledb/tests/test_attribute.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def test_minimal_attribute(self):
1616
self.assertEqual(attr, attr)
1717
self.assertTrue(attr.isanon)
1818
self.assertEqual(attr.name, "")
19-
self.assertEqual(attr.dtype, np.float_)
19+
self.assertEqual(attr.dtype, np.float64)
2020
self.assertFalse(attr.isvar)
2121
self.assertFalse(attr.isnullable)
2222

tiledb/tests/test_libtiledb.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,10 +1230,10 @@ def test_reopen_dense_array(self, use_timestamps):
12301230

12311231
def test_data_begins_with_null_chars(self):
12321232
path = self.path("test_data_begins_with_null_chars")
1233-
data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.unicode_)
1233+
data = np.array(["", "", "", "a", "", "", "", "", "", "b"], dtype=np.str_)
12341234

12351235
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
1236-
att = tiledb.Attr(dtype=np.unicode_, var=True)
1236+
att = tiledb.Attr(dtype=np.str_, var=True)
12371237
schema = tiledb.ArraySchema(dom, (att,))
12381238
tiledb.Array.create(path, schema)
12391239

@@ -1325,12 +1325,12 @@ def test_varlen_write_unicode(self):
13251325
"",
13261326
"hhhhhhhhhh",
13271327
],
1328-
dtype=np.unicode_,
1328+
dtype=np.str_,
13291329
)
13301330

13311331
# basic write
13321332
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
1333-
att = tiledb.Attr(dtype=np.unicode_, var=True)
1333+
att = tiledb.Attr(dtype=np.str_, var=True)
13341334

13351335
schema = tiledb.ArraySchema(dom, (att,))
13361336

@@ -1487,7 +1487,7 @@ def test_varlen_write_fixedunicode(self):
14871487

14881488
# basic write
14891489
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(A)), tile=len(A)))
1490-
att = tiledb.Attr(dtype=np.unicode_)
1490+
att = tiledb.Attr(dtype=np.str_)
14911491

14921492
schema = tiledb.ArraySchema(dom, (att,))
14931493

@@ -1991,7 +1991,7 @@ def test_sparse_bytes(self, fx_sparse_cell_order):
19911991

19921992
def test_sparse_unicode(self, fx_sparse_cell_order):
19931993
dom = tiledb.Domain(tiledb.Dim("x", domain=(1, 10000), tile=100, dtype=int))
1994-
att = tiledb.Attr("", var=True, dtype=np.unicode_)
1994+
att = tiledb.Attr("", var=True, dtype=np.str_)
19951995
schema = tiledb.ArraySchema(
19961996
domain=dom, attrs=(att,), sparse=True, cell_order=fx_sparse_cell_order
19971997
)
@@ -3514,11 +3514,11 @@ def test_incomplete_dense_varlen(self, non_overlapping_ranges):
35143514
ncells = 10
35153515
path = self.path("incomplete_dense_varlen")
35163516
str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
3517-
data = np.array(str_data, dtype=np.unicode_)
3517+
data = np.array(str_data, dtype=np.str_)
35183518

35193519
# basic write
35203520
dom = tiledb.Domain(tiledb.Dim(domain=(1, len(data)), tile=len(data)))
3521-
att = tiledb.Attr(dtype=np.unicode_, var=True)
3521+
att = tiledb.Attr(dtype=np.str_, var=True)
35223522

35233523
schema = tiledb.ArraySchema(dom, (att,))
35243524

@@ -3556,12 +3556,12 @@ def test_incomplete_sparse_varlen(self, allows_duplicates, non_overlapping_range
35563556

35573557
path = self.path("incomplete_sparse_varlen")
35583558
str_data = [rand_utf8(random.randint(0, n)) for n in range(ncells)]
3559-
data = np.array(str_data, dtype=np.unicode_)
3559+
data = np.array(str_data, dtype=np.str_)
35603560
coords = np.arange(ncells)
35613561

35623562
# basic write
35633563
dom = tiledb.Domain(tiledb.Dim(domain=(0, len(data) + 100), tile=len(data)))
3564-
att = tiledb.Attr(dtype=np.unicode_, var=True)
3564+
att = tiledb.Attr(dtype=np.str_, var=True)
35653565

35663566
schema = tiledb.ArraySchema(
35673567
dom, (att,), sparse=True, allows_duplicates=allows_duplicates

tiledb/tests/test_pandas_dataframe.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1324,8 +1324,10 @@ def test_incomplete_df(self, allows_duplicates, non_overlapping_ranges):
13241324
data[validity_idx] = None
13251325

13261326
# TODO - not supported
1327-
# str_data = np.array([rand_utf8(random.randint(0, n)) for n in range(ncells)],
1328-
# dtype=np.unicode_)
1327+
# str_data = np.array(
1328+
# [rand_utf8(random.randint(0, n)) for n in range(ncells)],
1329+
# dtype=np.str_,
1330+
# )
13291331
# str_data[validity_idx] = None
13301332

13311333
df = pd.DataFrame({"int64": pd.Series(data, dtype=pd.Int64Dtype())})

0 commit comments

Comments
 (0)