From c5e706b863324bfe17a5378c1a9b2d669d59cf2d Mon Sep 17 00:00:00 2001
From: Lachlan Deakin <ljdgit@gmail.com>
Date: Wed, 5 Feb 2025 21:38:29 +1100
Subject: [PATCH 1/4] fix: implicit fill value initialisation

- initialise empty chunks to the default fill value during writing
- add default fill value for datetime, timedelta, structured data types
---
 src/zarr/core/codec_pipeline.py | 34 ++++++++++++++++-----------------
 src/zarr/core/metadata/v2.py    |  8 ++++++++
 2 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index 583ca01c5e..a35c5ca210 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -56,6 +56,19 @@ def resolve_batched(codec: Codec, chunk_specs: Iterable[ArraySpec]) -> Iterable[
     return [codec.resolve_metadata(chunk_spec) for chunk_spec in chunk_specs]
 
 
+def fill_value_or_default(chunk_spec: ArraySpec) -> Any:
+    fill_value = chunk_spec.fill_value
+    if fill_value is None:
+        # Zarr V2 allowed `fill_value` to be null in the metadata.
+        # Zarr V3 requires it to be set. This has already been
+        # validated when decoding the metadata, but we support reading
+        # Zarr V2 data and need to support the case where fill_value
+        # is None.
+        return _default_fill_value(dtype=chunk_spec.dtype)
+    else:
+        return fill_value
+
+
 @dataclass(frozen=True)
 class BatchedCodecPipeline(CodecPipeline):
     """Default codec pipeline.
@@ -247,17 +260,7 @@ async def read_batch(
                 if chunk_array is not None:
                     out[out_selection] = chunk_array
                 else:
-                    fill_value = chunk_spec.fill_value
-
-                    if fill_value is None:
-                        # Zarr V2 allowed `fill_value` to be null in the metadata.
-                        # Zarr V3 requires it to be set. This has already been
-                        # validated when decoding the metadata, but we support reading
-                        # Zarr V2 data and need to support the case where fill_value
-                        # is None.
-                        fill_value = _default_fill_value(dtype=chunk_spec.dtype)
-
-                    out[out_selection] = fill_value
+                    out[out_selection] = fill_value_or_default(chunk_spec)
         else:
             chunk_bytes_batch = await concurrent_map(
                 [
@@ -284,10 +287,7 @@ async def read_batch(
                         tmp = tmp.squeeze(axis=drop_axes)
                     out[out_selection] = tmp
                 else:
-                    fill_value = chunk_spec.fill_value
-                    if fill_value is None:
-                        fill_value = _default_fill_value(dtype=chunk_spec.dtype)
-                    out[out_selection] = fill_value
+                    out[out_selection] = fill_value_or_default(chunk_spec)
 
     def _merge_chunk_array(
         self,
@@ -305,7 +305,7 @@ def _merge_chunk_array(
                 shape=chunk_spec.shape,
                 dtype=chunk_spec.dtype,
                 order=chunk_spec.order,
-                fill_value=chunk_spec.fill_value,
+                fill_value=fill_value_or_default(chunk_spec),
             )
         else:
             chunk_array = existing_chunk_array.copy()  # make a writable copy
@@ -394,7 +394,7 @@ async def _read_key(
                     chunk_array_batch.append(None)  # type: ignore[unreachable]
                 else:
                     if not chunk_spec.config.write_empty_chunks and chunk_array.all_equal(
-                        chunk_spec.fill_value
+                        fill_value_or_default(chunk_spec)
                     ):
                         chunk_array_batch.append(None)
                     else:
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 192db5b203..7d6741500e 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -349,6 +349,14 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
         return b""
     elif dtype.kind in "UO":
         return ""
+    elif dtype.kind in "Mm":
+        return dtype.type('nat')
+    elif dtype.kind == "V":
+        if dtype.fields is not None:
+            default = tuple([_default_fill_value(field[0]) for field in dtype.fields.values()])
+            return np.array([default], dtype=dtype)
+        else:
+            return np.zeros(1, dtype=dtype)
     else:
         return dtype.type(0)
 

From 19ec879fe1dbdc67ceca401968369c39b95f9dba Mon Sep 17 00:00:00 2001
From: Lachlan Deakin <ljdgit@gmail.com>
Date: Wed, 5 Feb 2025 23:13:25 +1100
Subject: [PATCH 2/4] fmt

---
 src/zarr/core/metadata/v2.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 7d6741500e..25697c4545 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -350,7 +350,7 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
     elif dtype.kind in "UO":
         return ""
     elif dtype.kind in "Mm":
-        return dtype.type('nat')
+        return dtype.type("nat")
     elif dtype.kind == "V":
         if dtype.fields is not None:
             default = tuple([_default_fill_value(field[0]) for field in dtype.fields.values()])

From ea99951c66b1098ca43b594eb6f5308ce4f6b8d5 Mon Sep 17 00:00:00 2001
From: Lachlan Deakin <ljdgit@gmail.com>
Date: Wed, 5 Feb 2025 23:34:12 +1100
Subject: [PATCH 3/4] add "other" dtype test

---
 tests/test_v2.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/test_v2.py b/tests/test_v2.py
index 4c689c8e64..0a4487cfcc 100644
--- a/tests/test_v2.py
+++ b/tests/test_v2.py
@@ -313,3 +313,22 @@ def test_structured_dtype_roundtrip(fill_value, tmp_path) -> None:
     za[...] = a
     za = zarr.open_array(store=array_path)
     assert (a == za[:]).all()
+
+
+@pytest.mark.parametrize("fill_value", [None, b"x"], ids=["no_fill", "fill"])
+def test_other_dtype_roundtrip(fill_value, tmp_path) -> None:
+    a = np.array([b"a\0\0", b"bb", b"ccc"], dtype="V7")
+    array_path = tmp_path / "data.zarr"
+    za = zarr.create(
+        shape=(3,),
+        store=array_path,
+        chunks=(2,),
+        fill_value=fill_value,
+        zarr_format=2,
+        dtype=a.dtype,
+    )
+    if fill_value is not None:
+        assert (np.array([fill_value] * a.shape[0], dtype=a.dtype) == za[:]).all()
+    za[...] = a
+    za = zarr.open_array(store=array_path)
+    assert (a == za[:]).all()

From 077a1ba5211c76ec562ddff736cbc940b5782cf4 Mon Sep 17 00:00:00 2001
From: Lachlan Deakin <ljdgit@gmail.com>
Date: Tue, 11 Feb 2025 07:53:07 +1100
Subject: [PATCH 4/4] changelog

---
 changes/2799.bugfix.rst | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 changes/2799.bugfix.rst

diff --git a/changes/2799.bugfix.rst b/changes/2799.bugfix.rst
new file mode 100644
index 0000000000..f22b7074bb
--- /dev/null
+++ b/changes/2799.bugfix.rst
@@ -0,0 +1 @@
+Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types
\ No newline at end of file