Skip to content

Commit a756586

Browse files
d-v-bjoshmoore
andauthored
Expand (and pass) nested FSStore tests (#709)
* FSStore: aflesh out incomplete test and add another. The first test passes after changes to FSStore. The second test fails. * pep8 fixes * TestNestedFSStore: tweak second assertion of test_numbered_groups * FSStore: change ggetitems to return dict with input keys * TestArrayWithFSStore: add key_separator kwarg FSStore constructor * TestArrayWithFSStore: add key_separator arg to store constructor in create_arrray. * revert changes. the logic I need to test actually lives in test_core, not test_storage. * kill some whitespace * add nested tfsstore tests * FSStore: fsstore.listdir now handles nested keys * FSStore: re-order conditional evaluation in listdir * FSStore: use self.fs.find in listdir * Add tests from #718 * Apply suggestion from @grlee77 * Update PartialRead hexdigest values * More hexdigest updates Co-authored-by: jmoore <[email protected]>
1 parent ac068a0 commit a756586

File tree

4 files changed

+250
-21
lines changed

4 files changed

+250
-21
lines changed

zarr/storage.py

+28-5
Original file line numberDiff line numberDiff line change
@@ -1081,8 +1081,11 @@ def _normalize_key(self, key):
10811081
return key.lower() if self.normalize_keys else key
10821082

10831083
def getitems(self, keys, **kwargs):
1084-
keys = [self._normalize_key(key) for key in keys]
1085-
return self.map.getitems(keys, on_error="omit")
1084+
keys_transformed = [self._normalize_key(key) for key in keys]
1085+
results = self.map.getitems(keys_transformed, on_error="omit")
1086+
# The function calling this method may not recognize the transformed keys
1087+
# So we send the values returned by self.map.getitems back into the original key space.
1088+
return {keys[keys_transformed.index(rk)]: rv for rk, rv in results.items()}
10861089

10871090
def __getitem__(self, key):
10881091
key = self._normalize_key(key)
@@ -1144,9 +1147,28 @@ def dir_path(self, path=None):
11441147
def listdir(self, path=None):
11451148
dir_path = self.dir_path(path)
11461149
try:
1147-
out = sorted(p.rstrip('/').rsplit('/', 1)[-1]
1148-
for p in self.fs.ls(dir_path, detail=False))
1149-
return out
1150+
children = sorted(p.rstrip('/').rsplit('/', 1)[-1]
1151+
for p in self.fs.ls(dir_path, detail=False))
1152+
if self.key_separator != "/":
1153+
return children
1154+
else:
1155+
if array_meta_key in children:
1156+
# special handling of directories containing an array to map nested chunk
1157+
# keys back to standard chunk keys
1158+
new_children = []
1159+
root_path = self.dir_path(path)
1160+
for entry in children:
1161+
entry_path = os.path.join(root_path, entry)
1162+
if _prog_number.match(entry) and self.fs.isdir(entry_path):
1163+
for file_name in self.fs.find(entry_path):
1164+
file_path = os.path.join(dir_path, file_name)
1165+
rel_path = file_path.split(root_path)[1]
1166+
new_children.append(rel_path.replace(os.path.sep, '.'))
1167+
else:
1168+
new_children.append(entry)
1169+
return sorted(new_children)
1170+
else:
1171+
return children
11501172
except IOError:
11511173
return []
11521174

@@ -2739,6 +2761,7 @@ class ConsolidatedMetadataStore(MutableMapping):
27392761
zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated
27402762
27412763
"""
2764+
27422765
def __init__(self, store, metadata_key='.zmetadata'):
27432766
self.store = store
27442767

zarr/tests/test_core.py

+165-9
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@
3535
from zarr.util import buffer_size
3636
from zarr.tests.util import skip_test_env_var, have_fsspec
3737

38-
3938
# noinspection PyMethodMayBeStatic
39+
40+
4041
class TestArray(unittest.TestCase):
4142

4243
def test_array_init(self):
@@ -1079,7 +1080,7 @@ def test_structured_array_nested(self):
10791080
(1, (1, ((1, 2), (2, 3), (3, 4)), 1), b'bbb'),
10801081
(2, (2, ((2, 3), (3, 4), (4, 5)), 2), b'ccc')],
10811082
dtype=[('foo', 'i8'), ('bar', [('foo', 'i4'), ('bar', '(3, 2)f4'),
1082-
('baz', 'u1')]), ('baz', 'S3')])
1083+
('baz', 'u1')]), ('baz', 'S3')])
10831084
fill_values = None, b'', (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b'zzz')
10841085
self.check_structured_array(d, fill_values)
10851086

@@ -1802,7 +1803,7 @@ def test_structured_array_nested(self):
18021803
(1, (1, ((1, 2), (2, 3), (3, 4)), 1), b'bbb'),
18031804
(2, (2, ((2, 3), (3, 4), (4, 5)), 2), b'ccc')],
18041805
dtype=[('foo', 'i8'), ('bar', [('foo', 'i4'), ('bar', '(3, 2)f4'),
1805-
('baz', 'u1')]), ('baz', 'S3')])
1806+
('baz', 'u1')]), ('baz', 'S3')])
18061807
fill_values = None, b'', (0, (0, ((0, 0), (1, 1), (2, 2)), 0), b'zzz')
18071808
with pytest.raises(TypeError):
18081809
self.check_structured_array(d, fill_values)
@@ -2469,36 +2470,50 @@ class TestArrayWithFSStore(TestArray):
24692470
def create_array(read_only=False, **kwargs):
24702471
path = mkdtemp()
24712472
atexit.register(shutil.rmtree, path)
2472-
store = FSStore(path)
2473+
key_separator = kwargs.pop('key_separator', ".")
2474+
store = FSStore(path, key_separator=key_separator, auto_mkdir=True)
24732475
cache_metadata = kwargs.pop('cache_metadata', True)
24742476
cache_attrs = kwargs.pop('cache_attrs', True)
24752477
kwargs.setdefault('compressor', Blosc())
24762478
init_array(store, **kwargs)
24772479
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
24782480
cache_attrs=cache_attrs)
24792481

2482+
def expected(self):
2483+
return [
2484+
"ab753fc81df0878589535ca9bad2816ba88d91bc",
2485+
"c16261446f9436b1e9f962e57ce3e8f6074abe8a",
2486+
"c2ef3b2fb2bc9dcace99cd6dad1a7b66cc1ea058",
2487+
"6e52f95ac15b164a8e96843a230fcee0e610729b",
2488+
"091fa99bc60706095c9ce30b56ce2503e0223f56",
2489+
]
2490+
24802491
def test_hexdigest(self):
2492+
found = []
2493+
24812494
# Check basic 1-D array
24822495
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
2483-
assert 'f710da18d45d38d4aaf2afd7fb822fdd73d02957' == z.hexdigest()
2496+
found.append(z.hexdigest())
24842497

24852498
# Check basic 1-D array with different type
24862499
z = self.create_array(shape=(1050,), chunks=100, dtype='<f4')
2487-
assert '1437428e69754b1e1a38bd7fc9e43669577620db' == z.hexdigest()
2500+
found.append(z.hexdigest())
24882501

24892502
# Check basic 2-D array
24902503
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
2491-
assert '6c530b6b9d73e108cc5ee7b6be3d552cc994bdbe' == z.hexdigest()
2504+
found.append(z.hexdigest())
24922505

24932506
# Check basic 1-D array with some data
24942507
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
24952508
z[200:400] = np.arange(200, 400, dtype='i4')
2496-
assert '4c0a76fb1222498e09dcd92f7f9221d6cea8b40e' == z.hexdigest()
2509+
found.append(z.hexdigest())
24972510

24982511
# Check basic 1-D array with attributes
24992512
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
25002513
z.attrs['foo'] = 'bar'
2501-
assert '05b0663ffe1785f38d3a459dec17e57a18f254af' == z.hexdigest()
2514+
found.append(z.hexdigest())
2515+
2516+
assert self.expected() == found
25022517

25032518

25042519
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
@@ -2573,3 +2588,144 @@ def test_read_from_all_blocks(self):
25732588
z[2:99_000] = 1
25742589
b = Array(z.store, read_only=True, partial_decompress=True)
25752590
assert (b[2:99_000] == 1).all()
2591+
2592+
2593+
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
2594+
class TestArrayWithFSStoreNested(TestArray):
2595+
2596+
@staticmethod
2597+
def create_array(read_only=False, **kwargs):
2598+
path = mkdtemp()
2599+
atexit.register(shutil.rmtree, path)
2600+
key_separator = kwargs.pop('key_separator', "/")
2601+
store = FSStore(path, key_separator=key_separator, auto_mkdir=True)
2602+
cache_metadata = kwargs.pop('cache_metadata', True)
2603+
cache_attrs = kwargs.pop('cache_attrs', True)
2604+
kwargs.setdefault('compressor', Blosc())
2605+
init_array(store, **kwargs)
2606+
return Array(store, read_only=read_only, cache_metadata=cache_metadata,
2607+
cache_attrs=cache_attrs)
2608+
2609+
def expected(self):
2610+
return [
2611+
"94884f29b41b9beb8fc99ad7bf9c0cbf0f2ab3c9",
2612+
"077aa3bd77b8d354f8f6c15dce5ae4f545788a72",
2613+
"22be95d83c097460adb339d80b2d7fe19c513c16",
2614+
"85131cec526fa46938fd2c4a6083a58ee11037ea",
2615+
"c3167010c162c6198cb2bf3c1da2c46b047c69a1",
2616+
]
2617+
2618+
def test_hexdigest(self):
2619+
found = []
2620+
2621+
# Check basic 1-D array
2622+
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
2623+
found.append(z.hexdigest())
2624+
2625+
# Check basic 1-D array with different type
2626+
z = self.create_array(shape=(1050,), chunks=100, dtype='<f4')
2627+
found.append(z.hexdigest())
2628+
2629+
# Check basic 2-D array
2630+
z = self.create_array(shape=(20, 35,), chunks=10, dtype='<i4')
2631+
found.append(z.hexdigest())
2632+
2633+
# Check basic 1-D array with some data
2634+
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
2635+
z[200:400] = np.arange(200, 400, dtype='i4')
2636+
found.append(z.hexdigest())
2637+
2638+
# Check basic 1-D array with attributes
2639+
z = self.create_array(shape=(1050,), chunks=100, dtype='<i4')
2640+
z.attrs['foo'] = 'bar'
2641+
found.append(z.hexdigest())
2642+
2643+
assert self.expected() == found
2644+
2645+
2646+
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
2647+
class TestArrayWithFSStoreNestedPartialRead(TestArray):
2648+
@staticmethod
2649+
def create_array(read_only=False, **kwargs):
2650+
path = mkdtemp()
2651+
atexit.register(shutil.rmtree, path)
2652+
key_separator = kwargs.pop('key_separator', "/")
2653+
store = FSStore(path, key_separator=key_separator, auto_mkdir=True)
2654+
cache_metadata = kwargs.pop("cache_metadata", True)
2655+
cache_attrs = kwargs.pop("cache_attrs", True)
2656+
kwargs.setdefault("compressor", Blosc())
2657+
init_array(store, **kwargs)
2658+
return Array(
2659+
store,
2660+
read_only=read_only,
2661+
cache_metadata=cache_metadata,
2662+
cache_attrs=cache_attrs,
2663+
partial_decompress=True,
2664+
)
2665+
2666+
def expected(self):
2667+
return [
2668+
"94884f29b41b9beb8fc99ad7bf9c0cbf0f2ab3c9",
2669+
"077aa3bd77b8d354f8f6c15dce5ae4f545788a72",
2670+
"22be95d83c097460adb339d80b2d7fe19c513c16",
2671+
"85131cec526fa46938fd2c4a6083a58ee11037ea",
2672+
"c3167010c162c6198cb2bf3c1da2c46b047c69a1",
2673+
]
2674+
2675+
def test_hexdigest(self):
2676+
found = []
2677+
2678+
# Check basic 1-D array
2679+
z = self.create_array(shape=(1050,), chunks=100, dtype="<i4")
2680+
found.append(z.hexdigest())
2681+
2682+
# Check basic 1-D array with different type
2683+
z = self.create_array(shape=(1050,), chunks=100, dtype="<f4")
2684+
found.append(z.hexdigest())
2685+
2686+
# Check basic 2-D array
2687+
z = self.create_array(
2688+
shape=(
2689+
20,
2690+
35,
2691+
),
2692+
chunks=10,
2693+
dtype="<i4",
2694+
)
2695+
found.append(z.hexdigest())
2696+
2697+
# Check basic 1-D array with some data
2698+
z = self.create_array(shape=(1050,), chunks=100, dtype="<i4")
2699+
z[200:400] = np.arange(200, 400, dtype="i4")
2700+
found.append(z.hexdigest())
2701+
2702+
# Check basic 1-D array with attributes
2703+
z = self.create_array(shape=(1050,), chunks=100, dtype="<i4")
2704+
z.attrs["foo"] = "bar"
2705+
found.append(z.hexdigest())
2706+
2707+
assert self.expected() == found
2708+
2709+
def test_non_cont(self):
2710+
z = self.create_array(shape=(500, 500, 500), chunks=(50, 50, 50), dtype="<i4")
2711+
z[:, :, :] = 1
2712+
# actually go through the partial read by accessing a single item
2713+
assert z[0, :, 0].any()
2714+
2715+
def test_read_nitems_less_than_blocksize_from_multiple_chunks(self):
2716+
'''Tests to make sure decompression doesn't fail when `nitems` is
2717+
less than a compressed block size, but covers multiple blocks
2718+
'''
2719+
z = self.create_array(shape=1000000, chunks=100_000)
2720+
z[40_000:80_000] = 1
2721+
b = Array(z.store, read_only=True, partial_decompress=True)
2722+
assert (b[40_000:80_000] == 1).all()
2723+
2724+
def test_read_from_all_blocks(self):
2725+
'''Tests to make sure `PartialReadBuffer.read_part` doesn't fail when
2726+
stop isn't in the `start_points` array
2727+
'''
2728+
z = self.create_array(shape=1000000, chunks=100_000)
2729+
z[2:99_000] = 1
2730+
b = Array(z.store, read_only=True, partial_decompress=True)
2731+
assert (b[2:99_000] == 1).all()

zarr/tests/test_hierarchy.py

+51-6
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@
2121
from zarr.core import Array
2222
from zarr.creation import open_array
2323
from zarr.hierarchy import Group, group, open_group
24-
from zarr.storage import (ABSStore, DBMStore, DirectoryStore, LMDBStore,
25-
LRUStoreCache, MemoryStore, NestedDirectoryStore,
26-
SQLiteStore, ZipStore, array_meta_key, atexit_rmglob,
27-
atexit_rmtree, group_meta_key, init_array,
28-
init_group)
24+
from zarr.storage import (ABSStore, DBMStore, DirectoryStore, FSStore,
25+
LMDBStore, LRUStoreCache, MemoryStore,
26+
NestedDirectoryStore, SQLiteStore, ZipStore,
27+
array_meta_key, atexit_rmglob, atexit_rmtree,
28+
group_meta_key, init_array, init_group)
2929
from zarr.util import InfoReporter
30-
from zarr.tests.util import skip_test_env_var
30+
from zarr.tests.util import skip_test_env_var, have_fsspec
3131

3232

3333
# noinspection PyStatementEffect
@@ -971,6 +971,51 @@ def create_store():
971971
return store, None
972972

973973

974+
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
975+
class TestGroupWithFSStore(TestGroup):
976+
977+
@staticmethod
978+
def create_store():
979+
path = tempfile.mkdtemp()
980+
atexit.register(atexit_rmtree, path)
981+
store = FSStore(path)
982+
return store, None
983+
984+
def test_round_trip_nd(self):
985+
data = np.arange(1000).reshape(10, 10, 10)
986+
name = 'raw'
987+
988+
store, _ = self.create_store()
989+
f = open_group(store, mode='w')
990+
f.create_dataset(name, data=data, chunks=(5, 5, 5),
991+
compressor=None)
992+
h = open_group(store, mode='r')
993+
np.testing.assert_array_equal(h[name][:], data)
994+
995+
996+
@pytest.mark.skipif(have_fsspec is False, reason="needs fsspec")
997+
class TestGroupWithNestedFSStore(TestGroupWithFSStore):
998+
999+
@staticmethod
1000+
def create_store():
1001+
path = tempfile.mkdtemp()
1002+
atexit.register(atexit_rmtree, path)
1003+
store = FSStore(path, key_separator='/', auto_mkdir=True)
1004+
return store, None
1005+
1006+
def test_inconsistent_dimension_separator(self):
1007+
data = np.arange(1000).reshape(10, 10, 10)
1008+
name = 'raw'
1009+
1010+
store, _ = self.create_store()
1011+
f = open_group(store, mode='w')
1012+
1013+
# cannot specify dimension_separator that conflicts with the store
1014+
with pytest.raises(ValueError):
1015+
f.create_dataset(name, data=data, chunks=(5, 5, 5),
1016+
compressor=None, dimension_separator='.')
1017+
1018+
9741019
class TestGroupWithZipStore(TestGroup):
9751020

9761021
@staticmethod

zarr/util.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -554,14 +554,19 @@ def __init__(self, store_key, chunk_store):
554554
self.map = self.chunk_store.map
555555
self.fs = self.chunk_store.fs
556556
self.store_key = store_key
557-
self.key_path = self.map._key_to_str(store_key)
558557
self.buff = None
559558
self.nblocks = None
560559
self.start_points = None
561560
self.n_per_block = None
562561
self.start_points_max = None
563562
self.read_blocks = set()
564563

564+
_key_path = self.map._key_to_str(store_key)
565+
_key_path = _key_path.split('/')
566+
_chunk_path = [self.chunk_store._normalize_key(_key_path[-1])]
567+
_key_path = '/'.join(_key_path[:-1] + _chunk_path)
568+
self.key_path = _key_path
569+
565570
def prepare_chunk(self):
566571
assert self.buff is None
567572
header = self.fs.read_block(self.key_path, 0, 16)

0 commit comments

Comments
 (0)