Skip to content

support for datetime and timedelta dtypes (#2616) #2884

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/2616.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
NumPy’s datetime64 (‘M8’) and timedelta64 (‘m8’) dtypes are supported for Zarr arrays, as long as the units are specified.
24 changes: 17 additions & 7 deletions docs/user-guide/arrays.rst
Original file line number Diff line number Diff line change
Expand Up @@ -619,6 +619,23 @@ In this example a shard shape of (1000, 1000) and a chunk shape of (100, 100) is
This means that 10*10 chunks are stored in each shard, and there are 10*10 shards in total.
Without the ``shards`` argument, there would be 10,000 chunks stored as individual files.

.. _user-guide-datetime:

Datetime and Timedelta arrays
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
NumPy’s datetime64 (‘M8’) and timedelta64 (‘m8’) dtypes are supported for Zarr arrays, as long as the units are specified. E.g.:

.. code-block:: python
>>> data = np.array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='M8[D]')
>>> z = zarr.create_array(store='data/example-datetime.zarr', shape=data.shape, dtype=data.dtype)
>>> z[:] = data
>>> z[:]
array(['2007-07-13', '2006-01-13', '2010-08-13'], dtype='datetime64[D]')
>>> z[0] = '1999-12-31'
>>> z[:]
array(['1999-12-31', '2006-01-13', '2010-08-13'], dtype='datetime64[D]')


Missing features in 3.0
-----------------------

Expand All @@ -639,13 +656,6 @@ Fixed-length string arrays

See the Zarr-Python 2 documentation on `Fixed-length string arrays <https://zarr.readthedocs.io/en/support-v2/tutorial.html#string-arrays>`_ for more details.

.. _user-guide-datetime:

Datetime and Timedelta arrays
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

See the Zarr-Python 2 documentation on `Datetime and Timedelta <https://zarr.readthedocs.io/en/support-v2/tutorial.html#datetimes-and-timedeltas>`_ for more details.

.. _user-guide-copy:

Copying and migrating data
Expand Down
137 changes: 129 additions & 8 deletions src/zarr/core/metadata/v3.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import re
import warnings
from typing import TYPE_CHECKING, TypedDict, overload

Expand Down Expand Up @@ -174,11 +175,13 @@
return str(o)
if np.isscalar(o):
out: Any
if hasattr(o, "dtype") and o.dtype.kind == "M" and hasattr(o, "view"):
if hasattr(o, "dtype") and o.dtype.kind in "Mm" and hasattr(o, "view"):
# https://github.com/zarr-developers/zarr-python/issues/2119
# `.item()` on a datetime type might or might not return an
# integer, depending on the value.
# Explicitly cast to an int first, and then grab .item()
if np.isnat(o):
return "NaT"
out = o.view("i8").item()
else:
# convert numpy scalar to python type, and pass
Expand Down Expand Up @@ -440,12 +443,25 @@
FLOAT = np.float16 | np.float32 | np.float64
COMPLEX_DTYPE = Literal["complex64", "complex128"]
COMPLEX = np.complex64 | np.complex128
DATETIME_DTYPE = Literal["datetime64"]
DATETIME = np.datetime64
TIMEDELTA_DTYPE = Literal["timedelta64"]
TIMEDELTA = np.timedelta64
STRING_DTYPE = Literal["string"]
STRING = np.str_
BYTES_DTYPE = Literal["bytes"]
BYTES = np.bytes_

ALL_DTYPES = BOOL_DTYPE | INTEGER_DTYPE | FLOAT_DTYPE | COMPLEX_DTYPE | STRING_DTYPE | BYTES_DTYPE
ALL_DTYPES = (
BOOL_DTYPE
| INTEGER_DTYPE
| FLOAT_DTYPE
| COMPLEX_DTYPE
| DATETIME_DTYPE
| TIMEDELTA_DTYPE
| STRING_DTYPE
| BYTES_DTYPE
)


@overload
Expand Down Expand Up @@ -490,6 +506,20 @@
) -> BYTES: ...


@overload
def parse_fill_value(
fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
dtype: DATETIME_DTYPE,
) -> DATETIME: ...


@overload
def parse_fill_value(
fill_value: complex | str | bytes | np.generic | Sequence[Any] | bool,
dtype: TIMEDELTA_DTYPE,
) -> TIMEDELTA: ...


def parse_fill_value(
fill_value: Any,
dtype: ALL_DTYPES,
Expand Down Expand Up @@ -551,12 +581,24 @@
# fill_value != casted_value below.
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
casted_value = np.dtype(np_dtype).type(fill_value)
if np.dtype(np_dtype).kind in "Mm":
# datetime64 values have an associated precision
match = re.search(r"\[(.*?)\]", np.dtype(np_dtype).str)
if match:
precision = match.group(1)
else:
precision = "s"

Check warning on line 590 in src/zarr/core/metadata/v3.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v3.py#L590

Added line #L590 was not covered by tests
casted_value = np.dtype(np_dtype).type(fill_value, precision)
else:
casted_value = np.dtype(np_dtype).type(fill_value)
except (ValueError, OverflowError, TypeError) as e:
raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}") from e
# Check if the value is still representable by the dtype
if (fill_value == "NaN" and np.isnan(casted_value)) or (
fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value)
if (
(fill_value == "NaN" and np.isnan(casted_value))
or (fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value))
or (fill_value == "NaT" and np.isnat(casted_value))
or (np.dtype(np_dtype).kind in "Mm" and np.isnat(casted_value) and np.isnat(fill_value))
):
pass
elif np_dtype.kind == "f":
Expand All @@ -576,7 +618,6 @@
else:
if fill_value != casted_value:
raise ValueError(f"fill value {fill_value!r} is not valid for dtype {data_type}")

return casted_value


Expand All @@ -585,9 +626,17 @@
return ""
elif dtype == DataType.bytes:
return b""
np_dtype = dtype.to_numpy()
np_dtype = cast(np.dtype[Any], np_dtype)
if np_dtype.kind in "Mm":
# datetime64 values have an associated precision
match = re.search(r"\[(.*?)\]", np_dtype.str)
if match:
precision = match.group(1)
else:
precision = "s"

Check warning on line 637 in src/zarr/core/metadata/v3.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/metadata/v3.py#L637

Added line #L637 was not covered by tests
return np_dtype.type("nat", precision) # type: ignore[misc,call-arg]
else:
np_dtype = dtype.to_numpy()
np_dtype = cast(np.dtype[Any], np_dtype)
return np_dtype.type(0) # type: ignore[misc]


Expand All @@ -610,6 +659,24 @@
float64 = "float64"
complex64 = "complex64"
complex128 = "complex128"
datetime64ns = "datetime64ns"
datetime64ms = "datetime64ms"
datetime64s = "datetime64s"
datetime64m = "datetime64m"
datetime64h = "datetime64h"
datetime64D = "datetime64D"
datetime64W = "datetime64W"
datetime64M = "datetime64M"
datetime64Y = "datetime64Y"
timedelta64ns = "timedelta64ns"
timedelta64ms = "timedelta64ms"
timedelta64s = "timedelta64s"
timedelta64m = "timedelta64m"
timedelta64h = "timedelta64h"
timedelta64D = "timedelta64D"
timedelta64W = "timedelta64W"
timedelta64M = "timedelta64M"
timedelta64Y = "timedelta64Y"
string = "string"
bytes = "bytes"

Expand All @@ -630,6 +697,24 @@
DataType.float64: 8,
DataType.complex64: 8,
DataType.complex128: 16,
DataType.datetime64ns: 8,
DataType.datetime64ms: 8,
DataType.datetime64s: 8,
DataType.datetime64m: 8,
DataType.datetime64h: 8,
DataType.datetime64D: 8,
DataType.datetime64W: 8,
DataType.datetime64M: 8,
DataType.datetime64Y: 8,
DataType.timedelta64ns: 8,
DataType.timedelta64ms: 8,
DataType.timedelta64s: 8,
DataType.timedelta64m: 8,
DataType.timedelta64h: 8,
DataType.timedelta64D: 8,
DataType.timedelta64W: 8,
DataType.timedelta64M: 8,
DataType.timedelta64Y: 8,
}
try:
return data_type_byte_counts[self]
Expand Down Expand Up @@ -657,6 +742,24 @@
DataType.float64: "f8",
DataType.complex64: "c8",
DataType.complex128: "c16",
DataType.datetime64ns: "M8[ns]",
DataType.datetime64ms: "M8[ms]",
DataType.datetime64s: "M8[s]",
DataType.datetime64m: "M8[m]",
DataType.datetime64h: "M8[h]",
DataType.datetime64D: "M8[D]",
DataType.datetime64W: "M8[W]",
DataType.datetime64M: "M8[M]",
DataType.datetime64Y: "M8[Y]",
DataType.timedelta64ns: "m8[ns]",
DataType.timedelta64ms: "m8[ms]",
DataType.timedelta64s: "m8[s]",
DataType.timedelta64m: "m8[m]",
DataType.timedelta64h: "m8[h]",
DataType.timedelta64D: "m8[D]",
DataType.timedelta64W: "m8[W]",
DataType.timedelta64M: "m8[M]",
DataType.timedelta64Y: "m8[Y]",
}
return data_type_to_numpy[self]

Expand Down Expand Up @@ -700,6 +803,24 @@
"<f8": "float64",
"<c8": "complex64",
"<c16": "complex128",
"<M8[ns]": "datetime64ns",
"<M8[ms]": "datetime64ms",
"<M8[s]": "datetime64s",
"<M8[m]": "datetime64m",
"<M8[h]": "datetime64h",
"<M8[D]": "datetime64D",
"<M8[W]": "datetime64W",
"<M8[M]": "datetime64M",
"<M8[Y]": "datetime64Y",
"<m8[ns]": "timedelta64ns",
"<m8[ms]": "timedelta64ms",
"<m8[s]": "timedelta64s",
"<m8[m]": "timedelta64m",
"<m8[h]": "timedelta64h",
"<m8[D]": "timedelta64D",
"<m8[W]": "timedelta64W",
"<m8[M]": "timedelta64M",
"<m8[Y]": "timedelta64Y",
}
return DataType[dtype_to_data_type[dtype.str]]

Expand Down
18 changes: 15 additions & 3 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,15 @@ def test_array_v3_fill_value_default(
@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize(
("dtype_str", "fill_value"),
[("bool", True), ("uint8", 99), ("float32", -99.9), ("complex64", 3 + 4j)],
[
("bool", True),
("uint8", 99),
("float32", -99.9),
("complex64", 3 + 4j),
("m8[ns]", 0),
("M8[s]", None),
("<m8[D]", "NaT"),
],
)
def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str) -> None:
shape = (10,)
Expand All @@ -221,9 +229,13 @@ def test_array_v3_fill_value(store: MemoryStore, fill_value: int, dtype_str: str
chunks=shape,
fill_value=fill_value,
)

assert arr.fill_value == np.dtype(dtype_str).type(fill_value)
assert arr.fill_value.dtype == arr.dtype
if np.isfinite(arr.fill_value):
assert arr.fill_value == np.dtype(dtype_str).type(fill_value)
else:
if arr.dtype.kind in "Mm":
assert np.isnat(arr.fill_value)
assert np.isnat(np.dtype(dtype_str).type(fill_value))


def test_create_positional_args_deprecated() -> None:
Expand Down
Loading