|
3 | 3 | import json
|
4 | 4 | import os
|
5 | 5 | import warnings
|
6 |
| -from collections.abc import Hashable, Iterable |
| 6 | +from collections.abc import Hashable, Iterable, MutableMapping |
7 | 7 | from typing import TYPE_CHECKING, Any, Literal
|
8 | 8 |
|
9 | 9 | import numpy as np
|
|
32 | 32 | if TYPE_CHECKING:
|
33 | 33 | from io import BufferedIOBase
|
34 | 34 |
|
| 35 | + import zarr |
| 36 | + |
35 | 37 | from xarray.backends.common import AbstractDataStore
|
36 | 38 | from xarray.core.dataset import Dataset
|
37 | 39 |
|
@@ -126,6 +128,79 @@ def initialize_zarr(
|
126 | 128 | return ds
|
127 | 129 |
|
128 | 130 |
|
| 131 | +def add_array(vn: str, var: Variable, group: zarr.Group, is_coord: bool) -> None: |
| 132 | + """ |
| 133 | + Add an array to the Zarr group after encoding it and its attributes |
| 134 | + """ |
| 135 | + name = _encode_variable_name(vn) |
| 136 | + var = encode_zarr_variable(var) |
| 137 | + |
| 138 | + # handle the fill value |
| 139 | + fill_value = var.attrs.pop("_FillValue", None) |
| 140 | + if var.encoding == {"_FillValue": None} and fill_value is None: |
| 141 | + var.encoding = {} |
| 142 | + |
| 143 | + # encode the variable |
| 144 | + encoding = extract_zarr_variable_encoding(var) |
| 145 | + |
| 146 | + # create the array |
| 147 | + arr = group.create( |
| 148 | + name, shape=var.shape, dtype=var.dtype, fill_value=fill_value, **encoding |
| 149 | + ) |
| 150 | + |
| 151 | + # handle the attributes |
| 152 | + attrs = {DIMENSION_KEY: var.dims, **var.attrs} |
| 153 | + encoded_attrs = {k: encode_zarr_attr_value(v) for k, v in attrs.items()} |
| 154 | + _put_attrs(arr, encoded_attrs) |
| 155 | + |
| 156 | + # write the data if this is a dimension coordinate |
| 157 | + if is_coord: |
| 158 | + arr[:] = var.data |
| 159 | + |
| 160 | + |
| 161 | +def init_zarr_v2(ds, store: MutableMapping | None = None) -> MutableMapping: |
| 162 | + """ |
| 163 | + Initialize a Zarr store with metadata including dimension coordinates |
| 164 | +
|
| 165 | + Parameters |
| 166 | + ---------- |
| 167 | + ds : Dataset |
| 168 | + store : MutableMapping (optional) |
| 169 | + Target store. If not provided, a temporary in-memory store will be created. |
| 170 | + """ |
| 171 | + import zarr |
| 172 | + |
| 173 | + temp_store = zarr.MemoryStore() |
| 174 | + if store is None: |
| 175 | + store = temp_store |
| 176 | + |
| 177 | + # encode the dataset (importantly, coordinates) |
| 178 | + variables, attrs = conventions.encode_dataset_coordinates(ds) |
| 179 | + |
| 180 | + # create the group |
| 181 | + group = zarr.open_group(store=temp_store) |
| 182 | + # set the group's attributes |
| 183 | + _put_attrs(group, attrs) |
| 184 | + |
| 185 | + # add the arrays |
| 186 | + for k, v in variables.items(): |
| 187 | + is_coord = k in ds.dims |
| 188 | + add_array(k, v, group, is_coord) |
| 189 | + |
| 190 | + # consolidate metadata |
| 191 | + # TODO: this should be optional |
| 192 | + zarr.consolidate_metadata(temp_store) |
| 193 | + |
| 194 | + if store is not temp_store: |
| 195 | + # if a store was provided, flush the temp store there at once |
| 196 | + try: |
| 197 | + store.setitems(temp_store) |
| 198 | + except AttributeError: # not all stores have setitems :( |
| 199 | + store.update(temp_store) |
| 200 | + |
| 201 | + return store |
| 202 | + |
| 203 | + |
129 | 204 | def encode_zarr_attr_value(value):
|
130 | 205 | """
|
131 | 206 | Encode a attribute value as something that can be serialized as json
|
|
0 commit comments