Skip to content
forked from pydata/xarray

Commit c0cf4ee

Browse files
committed
add init_zarr_v2 to demonstrate another approach
1 parent e8bf524 commit c0cf4ee

File tree

1 file changed

+76
-1
lines changed

1 file changed

+76
-1
lines changed

xarray/backends/zarr.py

+76-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import json
44
import os
55
import warnings
6-
from collections.abc import Hashable, Iterable
6+
from collections.abc import Hashable, Iterable, MutableMapping
77
from typing import TYPE_CHECKING, Any, Literal
88

99
import numpy as np
@@ -32,6 +32,8 @@
3232
if TYPE_CHECKING:
3333
from io import BufferedIOBase
3434

35+
import zarr
36+
3537
from xarray.backends.common import AbstractDataStore
3638
from xarray.core.dataset import Dataset
3739

@@ -126,6 +128,79 @@ def initialize_zarr(
126128
return ds
127129

128130

131+
def add_array(vn: str, var: Variable, group: zarr.Group, is_coord: bool) -> None:
132+
"""
133+
Add an array to the Zarr group after encoding it and its attributes
134+
"""
135+
name = _encode_variable_name(vn)
136+
var = encode_zarr_variable(var)
137+
138+
# handle the fill value
139+
fill_value = var.attrs.pop("_FillValue", None)
140+
if var.encoding == {"_FillValue": None} and fill_value is None:
141+
var.encoding = {}
142+
143+
# encode the variable
144+
encoding = extract_zarr_variable_encoding(var)
145+
146+
# create the array
147+
arr = group.create(
148+
name, shape=var.shape, dtype=var.dtype, fill_value=fill_value, **encoding
149+
)
150+
151+
# handle the attributes
152+
attrs = {DIMENSION_KEY: var.dims, **var.attrs}
153+
encoded_attrs = {k: encode_zarr_attr_value(v) for k, v in attrs.items()}
154+
_put_attrs(arr, encoded_attrs)
155+
156+
# write the data if this is a dimension coordinate
157+
if is_coord:
158+
arr[:] = var.data
159+
160+
161+
def init_zarr_v2(ds, store: MutableMapping | None = None) -> MutableMapping:
162+
"""
163+
Initialize a Zarr store with metadata including dimension coordinates
164+
165+
Parameters
166+
----------
167+
ds : Dataset
168+
store : MutableMapping (optional)
169+
Target store. If not provided, a temporary in-memory store will be created.
170+
"""
171+
import zarr
172+
173+
temp_store = zarr.MemoryStore()
174+
if store is None:
175+
store = temp_store
176+
177+
# encode the dataset (importantly, coordinates)
178+
variables, attrs = conventions.encode_dataset_coordinates(ds)
179+
180+
# create the group
181+
group = zarr.open_group(store=temp_store)
182+
# set the group's attributes
183+
_put_attrs(group, attrs)
184+
185+
# add the arrays
186+
for k, v in variables.items():
187+
is_coord = k in ds.dims
188+
add_array(k, v, group, is_coord)
189+
190+
# consolidate metadata
191+
# TODO: this should be optional
192+
zarr.consolidate_metadata(temp_store)
193+
194+
if store is not temp_store:
195+
# if a store was provided, flush the temp store there at once
196+
try:
197+
store.setitems(temp_store)
198+
except AttributeError: # not all stores have setitems :(
199+
store.update(temp_store)
200+
201+
return store
202+
203+
129204
def encode_zarr_attr_value(value):
130205
"""
131206
Encode a attribute value as something that can be serialized as json

0 commit comments

Comments
 (0)