Skip to content

Fix order handling #3112

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3112.bugfix.1.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Creating a Zarr format 2 array with the ``order`` keyword argument no longer raises a warning.
1 change: 1 addition & 0 deletions changes/3112.bugfix.2.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Creating a Zarr format 3 array with the ``order`` argument now conistently ignores this argument and raises a warning.
2 changes: 2 additions & 0 deletions changes/3112.bugfix.3.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
When using ``from_array`` to copy a Zarr format 2 array to a Zarr format 3 array, if the memory order of the input array is ``"F"`` a warning is raised and the order ignored.
This is because Zarr format 3 arrays are always stored in "C" order.
1 change: 1 addition & 0 deletions changes/3112.bugfix.4.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
The ``config`` argument to `zarr.create` (and functions that create arrays) is now used - previously it had no effect.
1 change: 1 addition & 0 deletions changes/3112.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed the error message when passing both ``config`` and ``write_empty_chunks`` arguments to reflect the current behaviour (``write_empty_chunks`` takes precedence).
24 changes: 5 additions & 19 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from_array,
get_array_metadata,
)
from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
from zarr.core.array_spec import ArrayConfigLike, parse_array_config
from zarr.core.buffer import NDArrayLike
from zarr.core.common import (
JSON,
Expand All @@ -27,7 +27,6 @@
MemoryOrder,
ZarrFormat,
_default_zarr_format,
_warn_order_kwarg,
_warn_write_empty_chunks_kwarg,
parse_dtype,
)
Expand Down Expand Up @@ -1013,8 +1012,6 @@ async def create(
warnings.warn("object_codec is not yet implemented", RuntimeWarning, stacklevel=2)
if read_only is not None:
warnings.warn("read_only is not yet implemented", RuntimeWarning, stacklevel=2)
if order is not None:
_warn_order_kwarg()
if write_empty_chunks is not None:
_warn_write_empty_chunks_kwarg()

Expand All @@ -1026,26 +1023,17 @@ async def create(
mode = "a"
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

config_dict: ArrayConfigParams = {}
config_parsed = parse_array_config(config)

if write_empty_chunks is not None:
if config is not None:
msg = (
"Both write_empty_chunks and config keyword arguments are set. "
"This is redundant. When both are set, write_empty_chunks will be ignored and "
"config will be used."
"This is redundant. When both are set, write_empty_chunks will be used instead "
"of the value in config."
)
warnings.warn(UserWarning(msg), stacklevel=1)
config_dict["write_empty_chunks"] = write_empty_chunks
if order is not None and config is not None:
msg = (
"Both order and config keyword arguments are set. "
"This is redundant. When both are set, order will be ignored and "
"config will be used."
)
warnings.warn(UserWarning(msg), stacklevel=1)

config_parsed = ArrayConfig.from_dict(config_dict)
config_parsed = dataclasses.replace(config_parsed, write_empty_chunks=write_empty_chunks)

return await AsyncArray._create(
store_path,
Expand Down Expand Up @@ -1248,8 +1236,6 @@ async def open_array(

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

if "order" in kwargs:
_warn_order_kwarg()
if "write_empty_chunks" in kwargs:
_warn_write_empty_chunks_kwarg()

Expand Down
23 changes: 15 additions & 8 deletions src/zarr/core/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@
_warn_order_kwarg,
concurrent_map,
parse_dtype,
parse_order,
parse_shapelike,
product,
)
Expand Down Expand Up @@ -603,7 +602,6 @@

if order is not None:
_warn_order_kwarg()
config_parsed = replace(config_parsed, order=order)

result = await cls._create_v3(
store_path,
Expand Down Expand Up @@ -631,9 +629,10 @@
raise ValueError("dimension_names cannot be used for arrays with zarr_format 2.")

if order is None:
order_parsed = parse_order(zarr_config.get("array.order"))
order_parsed = config_parsed.order
else:
order_parsed = order
config_parsed = replace(config_parsed, order=order)

result = await cls._create_v2(
store_path,
Expand Down Expand Up @@ -4267,10 +4266,8 @@
chunks_out = chunk_shape_parsed
codecs_out = sub_codecs

if config is None:
config = {}
if order is not None and isinstance(config, dict):
config["order"] = config.get("order", order)
if order is not None:
_warn_order_kwarg()

Check warning on line 4270 in src/zarr/core/array.py

View check run for this annotation

Codecov / codecov/patch

src/zarr/core/array.py#L4270

Added line #L4270 was not covered by tests

meta = AsyncArray._create_metadata_v3(
shape=shape_parsed,
Expand Down Expand Up @@ -4521,8 +4518,18 @@
serializer = "auto"
if fill_value is None:
fill_value = data.fill_value
if order is None:

if data.metadata.zarr_format == 2 and zarr_format == 3 and data.order == "F":
# Can't set order="F" for v3 arrays
warnings.warn(
"Zarr format 3 arrays are always stored with order='C'. "
"The existing order='F' of the source Zarr format 2 array will be ignored.",
UserWarning,
stacklevel=2,
)
elif order is None and zarr_format == 2:
order = data.order

if chunk_key_encoding is None and zarr_format == data.metadata.zarr_format:
if isinstance(data.metadata, ArrayV2Metadata):
chunk_key_encoding = {"name": "v2", "separator": data.metadata.dimension_separator}
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -2417,7 +2417,7 @@ def create_array(
compressor: CompressorLike = "auto",
serializer: SerializerLike = "auto",
fill_value: Any | None = 0,
order: MemoryOrder | None = "C",
order: MemoryOrder | None = None,
attributes: dict[str, JSON] | None = None,
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
dimension_names: DimensionNames = None,
Expand Down Expand Up @@ -2811,7 +2811,7 @@ def array(
compressor: CompressorLike = None,
serializer: SerializerLike = "auto",
fill_value: Any | None = 0,
order: MemoryOrder | None = "C",
order: MemoryOrder | None = None,
attributes: dict[str, JSON] | None = None,
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
dimension_names: DimensionNames = None,
Expand Down
11 changes: 11 additions & 0 deletions test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import zarr

# Write fortran style array
group = zarr.group(store={})
array = group.create_array(
name="example",
shape=(128, 128),
dtype="float32",
order="F",
dimension_names=("row", "col"),
)
54 changes: 36 additions & 18 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,34 +339,52 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None:
zarr.open(store=tmp_path, mode="w-")


def test_array_order(zarr_format: ZarrFormat) -> None:
arr = zarr.ones(shape=(2, 2), order=None, zarr_format=zarr_format)
expected = zarr.config.get("array.order")
assert arr.order == expected
@pytest.mark.parametrize("order", ["C", "F", None])
@pytest.mark.parametrize("config", [{"order": "C"}, {"order": "F"}, {}], ids=["C", "F", "None"])
def test_array_order(
order: MemoryOrder | None, config: dict[str, MemoryOrder | None], zarr_format: ZarrFormat
) -> None:
"""
Check that:
- For v2, memory order is taken from the `order` keyword argument.
- For v3, memory order is taken from `config`, and when order is passed a warning is raised
- The numpy array returned has the expected order
- For v2, the order metadata is set correctly
"""
default_order = zarr.config.get("array.order")
ctx: contextlib.AbstractContextManager # type: ignore[type-arg]

vals = np.asarray(arr)
if expected == "C":
assert vals.flags.c_contiguous
elif expected == "F":
assert vals.flags.f_contiguous
else:
raise AssertionError
if zarr_format == 3:
if order is None:
ctx = contextlib.nullcontext()
else:
ctx = pytest.warns(
RuntimeWarning,
match="The `order` keyword argument has no effect for Zarr format 3 arrays",
)

expected_order = config.get("order", default_order)

@pytest.mark.parametrize("order", ["C", "F"])
def test_array_order_warns(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None:
with pytest.warns(RuntimeWarning, match="The `order` keyword argument .*"):
arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format)
assert arr.order == order
if zarr_format == 2:
ctx = contextlib.nullcontext()
expected_order = order or config.get("order", default_order)

with ctx:
arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format, config=config)

assert arr.order == expected_order
vals = np.asarray(arr)
if order == "C":
if expected_order == "C":
assert vals.flags.c_contiguous
elif order == "F":
elif expected_order == "F":
assert vals.flags.f_contiguous
else:
raise AssertionError

if zarr_format == 2:
assert arr.metadata.zarr_format == 2
assert arr.metadata.order == expected_order


# def test_lazy_loader():
# foo = np.arange(100)
Expand Down
64 changes: 12 additions & 52 deletions tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import re
import sys
from itertools import accumulate
from typing import TYPE_CHECKING, Any, Literal
from typing import Any, Literal
from unittest import mock

import numcodecs
Expand Down Expand Up @@ -42,18 +42,15 @@
from zarr.core.buffer.cpu import NDBuffer
from zarr.core.chunk_grids import _auto_partition
from zarr.core.chunk_key_encodings import ChunkKeyEncodingParams
from zarr.core.common import JSON, MemoryOrder, ZarrFormat
from zarr.core.common import JSON, ZarrFormat
from zarr.core.group import AsyncGroup
from zarr.core.indexing import BasicIndexer, ceildiv
from zarr.core.metadata.v2 import ArrayV2Metadata
from zarr.core.metadata.v3 import ArrayV3Metadata, DataType
from zarr.core.sync import sync
from zarr.errors import ContainsArrayError, ContainsGroupError
from zarr.storage import LocalStore, MemoryStore, StorePath

if TYPE_CHECKING:
from zarr.core.array_spec import ArrayConfigLike
from zarr.core.metadata.v2 import ArrayV2Metadata


@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
@pytest.mark.parametrize("zarr_format", [2, 3])
Expand Down Expand Up @@ -1400,52 +1397,6 @@ async def test_data_ignored_params(store: Store) -> None:
):
await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True)

@staticmethod
@pytest.mark.parametrize("order", ["C", "F", None])
@pytest.mark.parametrize("with_config", [True, False])
def test_order(
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've merged this with another existing test to reduce duplication - they were doing a lot of the same things.

order: MemoryOrder | None,
with_config: bool,
zarr_format: ZarrFormat,
store: MemoryStore,
) -> None:
"""
Test that the arrays generated by array indexing have a memory order defined by the config order
value, and that for zarr v2 arrays, the ``order`` field in the array metadata is set correctly.
"""
config: ArrayConfigLike | None = {}
if order is None:
config = {}
expected = zarr.config.get("array.order")
else:
config = {"order": order}
expected = order

if not with_config:
# Test without passing config parameter
config = None

arr = zarr.create_array(
store=store,
shape=(2, 2),
zarr_format=zarr_format,
dtype="i4",
order=order,
config=config,
)
assert arr.order == expected
if zarr_format == 2:
assert arr.metadata.zarr_format == 2
assert arr.metadata.order == expected

vals = np.asarray(arr)
if expected == "C":
assert vals.flags.c_contiguous
elif expected == "F":
assert vals.flags.f_contiguous
else:
raise AssertionError

@staticmethod
@pytest.mark.parametrize("write_empty_chunks", [True, False])
async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store) -> None:
Expand Down Expand Up @@ -1612,6 +1563,15 @@ async def test_from_array_arraylike(
np.testing.assert_array_equal(result[...], np.full_like(src, fill_value))


def test_from_array_F_order() -> None:
arr = zarr.create_array(store={}, data=np.array([1]), order="F", zarr_format=2)
with pytest.warns(
UserWarning,
match="The existing order='F' of the source Zarr format 2 array will be ignored.",
):
zarr.from_array(store={}, data=arr, zarr_format=3)


async def test_orthogonal_set_total_slice() -> None:
"""Ensure that a whole chunk overwrite does not read chunks"""
store = MemoryStore()
Expand Down