-
-
Notifications
You must be signed in to change notification settings - Fork 327
/
Copy pathsync_group.py
162 lines (131 loc) · 6.02 KB
/
sync_group.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from __future__ import annotations
from typing import TYPE_CHECKING
from zarr.core.group import Group, GroupMetadata, _parse_async_node
from zarr.core.group import create_hierarchy as create_hierarchy_async
from zarr.core.group import create_nodes as create_nodes_async
from zarr.core.group import create_rooted_hierarchy as create_rooted_hierarchy_async
from zarr.core.group import get_node as get_node_async
from zarr.core.sync import _collect_aiterator, sync
if TYPE_CHECKING:
from collections.abc import Iterator
from zarr.abc.store import Store
from zarr.core.array import Array
from zarr.core.common import ZarrFormat
from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
def create_nodes(
*, store: Store, nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata]
) -> Iterator[tuple[str, Group | Array]]:
"""Create a collection of arrays and / or groups concurrently.
Note: no attempt is made to validate that these arrays and / or groups collectively form a
valid Zarr hierarchy. It is the responsibility of the caller of this function to ensure that
the ``nodes`` parameter satisfies any correctness constraints.
Parameters
----------
store : Store
The storage backend to use.
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
A dictionary defining the hierarchy. The keys are the paths of the nodes
in the hierarchy, and the values are the metadata of the nodes. The
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
or ArrayV2Metadata.
Yields
------
Group | Array
The created nodes.
"""
coro = create_nodes_async(store=store, nodes=nodes)
for key, value in sync(_collect_aiterator(coro)):
yield key, _parse_async_node(value)
def create_hierarchy(
*,
store: Store,
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
overwrite: bool = False,
) -> Iterator[tuple[str, Group | Array]]:
"""
Lazily create a complete zarr hierarchy from a collection of metadata objects.
This function will parse its input to ensure that the hierarchy is complete. Any implicit groups
will be inserted as needed. For example, an input like
```{'a/b': GroupMetadata}``` will be parsed to
```{'': GroupMetadata, 'a': GroupMetadata, 'b': Groupmetadata}```
This function returns a generator that will create the nodes in the hierarchy
when consumed. After input parsing, this function then creates all the nodes in the hierarchy concurrently.
Arrays and Groups are yielded in the order they are created. This order is not stable and
should not be relied on.
Parameters
----------
store : Store
The storage backend to use.
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
A dictionary defining the hierarchy. The keys are the paths of the nodes in the hierarchy,
relative to the root of the ``Store``. The root of the store can be specified with the empty
string ``''``. The values are instances of ``GroupMetadata`` or ``ArrayMetadata``. Note that
all values must have the same ``zarr_format`` -- it is an error to mix zarr versions in the
same hierarchy.
Leading "/" characters from keys will be removed.
overwrite : bool
Whether to overwrite existing nodes. Defaults to ``False``, in which case an error is
raised instead of overwriting an existing array or group.
This function will not erase an existing group unless that group is explicitly named in
``nodes``. If ``nodes`` defines implicit groups, e.g. ``{`'a/b/c': GroupMetadata}``, and a
group already exists at path ``a``, then this function will leave the group at ``a`` as-is.
Yields
------
tuple[str, Group | Array]
This function yields (path, node) pairs, in the order the nodes were created.
Examples
--------
>>> from zarr import create_hierarchy
>>> from zarr.storage import MemoryStore
>>> from zarr.core.group import GroupMetadata
>>> store = MemoryStore()
>>> nodes = {'a': GroupMetadata(attributes={'name': 'leaf'})}
>>> nodes_created = dict(create_hierarchy(store=store, nodes=nodes))
>>> print(nodes)
# {'a': GroupMetadata(attributes={'name': 'leaf'}, zarr_format=3, consolidated_metadata=None, node_type='group')}
"""
coro = create_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite)
for key, value in sync(_collect_aiterator(coro)):
yield key, _parse_async_node(value)
def create_rooted_hierarchy(
*,
store: Store,
nodes: dict[str, GroupMetadata | ArrayV2Metadata | ArrayV3Metadata],
overwrite: bool = False,
) -> Group | Array:
"""
Create a Zarr hierarchy with a root, and return the root node, which could be a ``Group``
or ``Array`` instance.
Parameters
----------
store : Store
The storage backend to use.
nodes : dict[str, GroupMetadata | ArrayV3Metadata | ArrayV2Metadata]
A dictionary defining the hierarchy. The keys are the paths of the nodes
in the hierarchy, and the values are the metadata of the nodes. The
metadata must be either an instance of GroupMetadata, ArrayV3Metadata
or ArrayV2Metadata.
overwrite : bool
Whether to overwrite existing nodes. Default is ``False``.
Returns
-------
Group | Array
"""
async_node = sync(create_rooted_hierarchy_async(store=store, nodes=nodes, overwrite=overwrite))
return _parse_async_node(async_node)
def get_node(store: Store, path: str, zarr_format: ZarrFormat) -> Array | Group:
"""
Get an Array or Group from a path in a Store.
Parameters
----------
store : Store
The store-like object to read from.
path : str
The path to the node to read.
zarr_format : {2, 3}
The zarr format of the node to read.
Returns
-------
Array | Group
"""
return _parse_async_node(sync(get_node_async(store=store, path=path, zarr_format=zarr_format)))