Skip to content

Commit 38d7858

Browse files
authored
Cache mode (#12)
* Add `hasdata` to storagemanager for cachecheck * Add intern * Improve mypy type notations * Add awareness for terminal mode * Add relay (use intern to communicate upstream) * Add simple cache manager * Add documentation
1 parent 057d222 commit 38d7858

7 files changed

+555
-299
lines changed

Pipfile

+2-5
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,21 @@
11
[[source]]
2-
32
url = "https://pypi.python.org/simple"
43
verify_ssl = true
54
name = "pypi"
65

7-
86
[packages]
9-
107
flask = "*"
118
numpy = "*"
129
blosc = "*"
1310
intern = "*"
1411

15-
1612
[dev-packages]
17-
1813
pylint = "*"
1914
mypy = "*"
2015
wheel = "*"
2116
twine = "*"
2217
jupyter = "*"
2318
matplotlib = "*"
2419
pandas = "*"
20+
"autopep8" = "*"
21+
pydocstyle = "*"

Pipfile.lock

+345-282
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bossphorus/storagemanager/StorageManager.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"""
1616

1717
from abc import ABC, abstractmethod
18+
from typing import Tuple
1819

1920
import numpy as np
2021

@@ -29,11 +30,22 @@ class StorageManager(ABC):
2930
"""
3031

3132
@abstractmethod
32-
def getdata(self, col: str, exp: str, chan: str,
33-
res: int, xs: [int, int], ys: [int, int], zs: [int, int]):
34-
pass
33+
def getdata(
34+
self, col: str, exp: str, chan: str, res: int,
35+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
36+
):
37+
...
3538

3639
@abstractmethod
37-
def setdata(self, data: np.array, col: str, exp: str, chan: str,
38-
res: int, xs: [int, int], ys: [int, int], zs: [int, int]):
39-
pass
40+
def hasdata(
41+
self, col: str, exp: str, chan: str, res: int,
42+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
43+
):
44+
...
45+
46+
@abstractmethod
47+
def setdata(
48+
self, data: np.array, col: str, exp: str, chan: str, res: int,
49+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
50+
):
51+
...

bossphorus/storagemanager/_FilesystemStorageManager.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
See the License for the specific language governing permissions and
1414
limitations under the License.
1515
"""
16+
from typing import Tuple
1617

1718
import os
1819
import numpy as np
@@ -27,19 +28,31 @@ class FilesystemStorageManager(StorageManager):
2728
Contains logic for reading and writing to local filesystem.
2829
"""
2930

30-
def __init__(self, storage_path: str, block_size: [int, int, int]):
31+
def __init__(
32+
self, storage_path: str, block_size: Tuple[int, int, int],
33+
is_terminal=True
34+
):
3135
"""
3236
Create a new FileSystemStorageManager.
3337
3438
Arguments:
3539
storage_path: Where to store the data tree
3640
block_size: How much data should go in each file
3741
"""
42+
self.is_terminal = is_terminal
3843
self.storage_path = storage_path
3944
self.block_size = block_size
4045

46+
def hasdata(
47+
self, col: str, exp: str, chan: str, res: int,
48+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
49+
):
50+
# TODO: Should know when it has data and return false even if it's
51+
# in terminal mode
52+
return self.is_terminal
53+
4154
def setdata(self, data: np.array, col: str, exp: str, chan: str, res: int,
42-
xs: [int, int], ys: [int, int], zs: [int, int]):
55+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]):
4356
"""
4457
Upload the file.
4558
@@ -74,7 +87,7 @@ def setdata(self, data: np.array, col: str, exp: str, chan: str, res: int,
7487
data_partial = self.store(data_partial, col, exp, chan, res, f)
7588

7689
def getdata(self, col: str, exp: str, chan: str, res: int,
77-
xs: [int, int], ys: [int, int], zs: [int, int]):
90+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]):
7891
"""
7992
Get the data from disk.
8093
@@ -118,7 +131,7 @@ def getdata(self, col: str, exp: str, chan: str, res: int,
118131
return payload
119132

120133
def store(self, data: np.array, col: str, exp: str, chan: str, res: int,
121-
b: [int, int, int]):
134+
b: Tuple[int, int, int]):
122135
"""
123136
Store a single block file.
124137
@@ -142,7 +155,7 @@ def store(self, data: np.array, col: str, exp: str, chan: str, res: int,
142155
return np.save(fname, data)
143156

144157
def retrieve(self, col: str, exp: str, chan: str, res: int,
145-
b: [int, int, int]):
158+
b: Tuple[int, int, int]):
146159
"""
147160
Pull a single block from disk.
148161
@@ -165,4 +178,3 @@ def retrieve(self, col: str, exp: str, chan: str, res: int,
165178
(b[2], b[2] + self.block_size[2]),
166179
)
167180
return np.load(fname)
168-
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""
2+
Copyright 2018 The Johns Hopkins University Applied Physics Laboratory.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
from typing import Tuple
17+
18+
import numpy as np
19+
20+
from intern.remote.boss import BossRemote
21+
22+
from .StorageManager import StorageManager
23+
24+
25+
class RelayStorageManager(StorageManager):
26+
"""
27+
28+
"""
29+
30+
def __init__(self, **kwargs):
31+
"""
32+
Create a new RelayStorageManager.
33+
34+
Arguments:
35+
36+
block_size: How much data should go in each file
37+
"""
38+
self.block_size = kwargs.get("block_size", (256, 256, 16))
39+
40+
if "boss_remote" in kwargs:
41+
self.boss_remote = kwargs["boss_remote"]
42+
elif "upstream_uri" in kwargs:
43+
self.boss_remote = BossRemote({
44+
"host": kwargs["upstream_uri"],
45+
"protocol": kwargs.get("protocol", "http"),
46+
"token": kwargs.get("token", "no-token")
47+
})
48+
49+
def hasdata(
50+
self, col: str, exp: str, chan: str, res: int,
51+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
52+
):
53+
return True
54+
55+
def setdata(
56+
self, data: np.array, col: str, exp: str, chan: str, res: int,
57+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
58+
):
59+
return self.boss_remote.create_cutout(
60+
self.boss_remote.get_channel(chan, col, exp), res, xs, ys, zs, data
61+
)
62+
63+
def getdata(
64+
self, col: str, exp: str, chan: str, res: int,
65+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
66+
) -> np.array:
67+
return self.boss_remote.get_cutout(
68+
self.boss_remote.get_channel(chan, col, exp), res, xs, ys, zs
69+
)
70+
71+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
"""
2+
Copyright 2018 The Johns Hopkins University Applied Physics Laboratory.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
"""
16+
from typing import List, Tuple
17+
18+
import numpy as np
19+
20+
from intern.remote.boss import BossRemote
21+
22+
from .StorageManager import StorageManager
23+
24+
25+
class SimpleCacheStorageManager(StorageManager):
26+
"""
27+
A SimpleCache is a naïve "cascade" of storage managers.
28+
29+
Reads:
30+
Retrieves data from the first manager in `layers` that can satisfy the
31+
request for data
32+
33+
Writes:
34+
Writes to ALL layers.
35+
"""
36+
37+
def __init__(self, layers: List) -> None:
38+
"""
39+
Create a new SimpleCacheStorageManager.
40+
"""
41+
self.layers = layers
42+
43+
def hasdata(
44+
self, col: str, exp: str, chan: str, res: int,
45+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
46+
):
47+
return any([
48+
layer.hasdata(col, exp, chan, res, xs, ys, zs)
49+
for layer in self.layers
50+
])
51+
52+
def setdata(
53+
self, data: np.array, col: str, exp: str, chan: str, res: int,
54+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
55+
):
56+
for layer in self.layers:
57+
layer.setdata(data, col, exp, chan, res, xs, ys, zs)
58+
59+
def getdata(
60+
self, col: str, exp: str, chan: str, res: int,
61+
xs: Tuple[int, int], ys: Tuple[int, int], zs: Tuple[int, int]
62+
) -> np.array:
63+
for layer in self.layers:
64+
if layer.hasdata(col, exp, chan, res, xs, ys, zs):
65+
return layer.getdata(col, exp, chan, res, xs, ys, zs)
66+
raise ValueError("Data could not be retrieved.")

docs/StorageManagers.md

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# Storage Managers
2+
3+
There are a variety of available storage managers, each with their own strengths and weaknesses. They are listed below for your reference.
4+
5+
*All* storage managers have `getdata`, `setdata`, and `hasdata` functions. If `hasdata` returns `True`, that is an informal promise that `getdata` with the same arguments will successfully return data without error.
6+
7+
## FilesystemStorageManager
8+
9+
Uses the filesystem as a storage mechanism for data.
10+
11+
Data are stored in numpy-compressed format, and are block-chunked to enable parallel data access.
12+
13+
## RelayStorageManager
14+
15+
Uses `intern` (`pip install intern`) to point to an upstream bossDB or bossphorus node.
16+
17+
## SimpleCacheStorageManager
18+
19+
Provides no smarts on its own; instead, acts as a naïve 'cascade' cache for a list of other storage managers.
20+
21+
For example, you could use this as a multilevel cache for a bossDB instance:
22+
23+
```python
24+
25+
SCSM = SimpleCacheStorageManager(
26+
layers=[
27+
RelayStorageManager(upstream_uri="localhost:3000"),
28+
RelayStorageManager(upstream_uri="my-lab-bossdb/"),
29+
RelayStorageManager(upstream_uri="bossdb.my-institution.edu/"),
30+
RelayStorageManager(upstream_uri="my-boss-instance.com/"),
31+
]
32+
)
33+
```
34+
35+
...which will attempt a local cache, then a labwide cache, then an institutional cache, and finally a cloud-based bossDB, in that order.

0 commit comments

Comments
 (0)