Skip to content

Fix specifying memory order in v2 arrays #2951

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 8 additions & 9 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,15 +1035,13 @@
)
warnings.warn(UserWarning(msg), stacklevel=1)
config_dict["write_empty_chunks"] = write_empty_chunks
if order is not None:
if config is not None:
msg = (
"Both order and config keyword arguments are set. "
"This is redundant. When both are set, order will be ignored and "
"config will be used."
)
warnings.warn(UserWarning(msg), stacklevel=1)
config_dict["order"] = order
if order is not None and config is not None:
msg = (

Check warning on line 1039 in src/zarr/api/asynchronous.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/api/asynchronous.py#L1039

Added line #L1039 was not covered by tests
"Both order and config keyword arguments are set. "
"This is redundant. When both are set, order will be ignored and "
"config will be used."
)
warnings.warn(UserWarning(msg), stacklevel=1)

Check warning on line 1044 in src/zarr/api/asynchronous.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/api/asynchronous.py#L1044

Added line #L1044 was not covered by tests

config_parsed = ArrayConfig.from_dict(config_dict)

Expand All @@ -1057,6 +1055,7 @@
overwrite=overwrite,
filters=filters,
dimension_separator=dimension_separator,
order=order,
zarr_format=zarr_format,
chunk_shape=chunk_shape,
chunk_key_encoding=chunk_key_encoding,
Expand Down
15 changes: 12 additions & 3 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,7 @@ async def _create(

if order is not None:
_warn_order_kwarg()
config_parsed = replace(config_parsed, order=order)

result = await cls._create_v3(
store_path,
Expand Down Expand Up @@ -1042,7 +1043,10 @@ def order(self) -> MemoryOrder:
bool
Memory order of the array
"""
return self._config.order
if self.metadata.zarr_format == 2:
return self.metadata.order
else:
return self._config.order

@property
def attrs(self) -> dict[str, JSON]:
Expand Down Expand Up @@ -1274,14 +1278,14 @@ async def _get_selection(
out_buffer = prototype.nd_buffer.create(
shape=indexer.shape,
dtype=out_dtype,
order=self._config.order,
order=self.order,
fill_value=self.metadata.fill_value,
)
if product(indexer.shape) > 0:
# need to use the order from the metadata for v2
_config = self._config
if self.metadata.zarr_format == 2:
_config = replace(_config, order=self.metadata.order)
_config = replace(_config, order=self.order)

# reading chunks and decoding them
await self.codec_pipeline.read(
Expand Down Expand Up @@ -3986,6 +3990,11 @@ async def init_array(
chunks_out = chunk_shape_parsed
codecs_out = sub_codecs

if config is None:
config = {}
if order is not None and isinstance(config, dict):
config["order"] = config.get("order", order)

meta = AsyncArray._create_metadata_v3(
shape=shape_parsed,
dtype=dtype_parsed,
Expand Down
7 changes: 3 additions & 4 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,12 @@ def test_array_order(zarr_format: ZarrFormat) -> None:
def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None:
with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"):
arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format)
expected = order or zarr.config.get("array.order")
assert arr.order == expected
assert arr.order == order

vals = np.asarray(arr)
if expected == "C":
if order == "C":
assert vals.flags.c_contiguous
elif expected == "F":
elif order == "F":
assert vals.flags.f_contiguous
else:
raise AssertionError
Expand Down
42 changes: 23 additions & 19 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,39 +1241,43 @@ async def test_data_ignored_params(store: Store) -> None:
await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True)

@staticmethod
@pytest.mark.parametrize("order_config", ["C", "F", None])
@pytest.mark.parametrize("order", ["C", "F", None])
@pytest.mark.parametrize("with_config", [True, False])
def test_order(
order_config: MemoryOrder | None,
order: MemoryOrder | None,
with_config: bool,
zarr_format: ZarrFormat,
store: MemoryStore,
) -> None:
"""
Test that the arrays generated by array indexing have a memory order defined by the config order
value, and that for zarr v2 arrays, the ``order`` field in the array metadata is set correctly.
"""
config: ArrayConfigLike = {}
if order_config is None:
config: ArrayConfigLike | None = {}
if order is None:
config = {}
expected = zarr.config.get("array.order")
else:
config = {"order": order_config}
expected = order_config
config = {"order": order}
expected = order

if not with_config:
# Test without passing config parameter
config = None

arr = zarr.create_array(
store=store,
shape=(2, 2),
zarr_format=zarr_format,
dtype="i4",
order=order,
config=config,
)
assert arr.order == expected
if zarr_format == 2:
arr = zarr.create_array(
store=store,
shape=(2, 2),
zarr_format=zarr_format,
dtype="i4",
order=expected,
config=config,
)
# guard for type checking
assert arr.metadata.zarr_format == 2
assert arr.metadata.order == expected
else:
arr = zarr.create_array(
store=store, shape=(2, 2), zarr_format=zarr_format, dtype="i4", config=config
)

vals = np.asarray(arr)
if expected == "C":
assert vals.flags.c_contiguous
Expand Down
24 changes: 9 additions & 15 deletions tests/test_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,12 +194,8 @@ def test_create_array_defaults(store: Store):
)


@pytest.mark.parametrize("array_order", ["C", "F"])
@pytest.mark.parametrize("data_order", ["C", "F"])
@pytest.mark.parametrize("memory_order", ["C", "F"])
def test_v2_non_contiguous(
array_order: Literal["C", "F"], data_order: Literal["C", "F"], memory_order: Literal["C", "F"]
) -> None:
Comment on lines -197 to -202
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@normanrz you originally wrote this test in #2679, but I don't understand why there are three different types of order, and there's no documentation, or even comments on this test explaining.

I've therefore chosen to be a little bit bold, and for at least v2 data this PR now assumes there is only one order. If we need to deal with different types of orders, I'm certainly open to that, but that increased complexity probably needs to come with some very good user docs, given in zarr-python v2 there was only one type of memory order.

Since you originally wrote this test, I wanted to see what you thought about this PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

array_order = order in the metadata
data_order = order of the numpy array that is used to write
memory_order = order in the runtime config which is used for reading data, ie the order of the output numpy array

At least the first two are necessary for this test. If you want to change the behavior to always read in array_order, then you can drop the memory_order.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe I'm missing something, but don't we just need to parametrize with one order? We pass that one order parameter to the array creation routine, and then check it's correct in both the metadata and the order the data is stored in memory, right (which is how the test works currently in this PR)?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider this: You create a Zarr array with F order. Next you create a numpy array with C order. You write the numpy array in the Zarr array. This test makes sure that the data on-disk is in F order. That's why you need at least 2 orders.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Aha, that makes sense - thanks!

@pytest.mark.parametrize("order", ["C", "F"])
def test_v2_non_contiguous(order: Literal["C", "F"]) -> None:
store = MemoryStore()
arr = zarr.create_array(
store,
Expand All @@ -211,22 +207,21 @@ def test_v2_non_contiguous(
filters=None,
compressors=None,
overwrite=True,
order=array_order,
config={"order": memory_order},
order=order,
)

# Non-contiguous write
a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=data_order)
a = np.arange(arr.shape[0] * arr.shape[1]).reshape(arr.shape, order=order)
arr[6:9, 3:6] = a[6:9, 3:6] # The slice on the RHS is important
np.testing.assert_array_equal(arr[6:9, 3:6], a[6:9, 3:6])

np.testing.assert_array_equal(
a[6:9, 3:6],
np.frombuffer(
sync(store.get("2.1", default_buffer_prototype())).to_bytes(), dtype="float64"
).reshape((3, 3), order=array_order),
).reshape((3, 3), order=order),
)
if memory_order == "F":
if order == "F":
assert (arr[6:9, 3:6]).flags.f_contiguous
else:
assert (arr[6:9, 3:6]).flags.c_contiguous
Expand All @@ -242,13 +237,12 @@ def test_v2_non_contiguous(
compressors=None,
filters=None,
overwrite=True,
order=array_order,
config={"order": memory_order},
order=order,
)

# Contiguous write
a = np.arange(9).reshape((3, 3), order=data_order)
if data_order == "F":
a = np.arange(9).reshape((3, 3), order=order)
if order == "F":
assert a.flags.f_contiguous
else:
assert a.flags.c_contiguous
Expand Down
Loading