Skip to content

Commit 9ddc6f4

Browse files
Have memestra look for extra deprecation information in a shared dir
User or third-party package can populate <prefix>/share/memestra with yaml files describing deprecated functions in order to speedup computation and / or provide information not available from decorators.
1 parent 5d17beb commit 9ddc6f4

File tree

6 files changed

+147
-15
lines changed

6 files changed

+147
-15
lines changed

docs/memestra-cache.rst

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,79 @@
11
Memestra-cache
22
==============
33

4-
Memestra uses a cache to speedup some computations. To handle the cache, memestra installs a tool named ``memestra-cache``.
4+
*Memestra* uses two caches to speedup some computations.
55

6-
Memestra's caching infrastructure is a file-based cache, located in home/<user>/.memestra (RW) and in <prefix>/memestra/cache (RO). The key is a hash of the file content and the value is deprecation information, generator used, etc.
6+
One is a declarative cache, installed in ``<prefix>/share/memestra``. The second
7+
one is an automatic cache, installed in ``<home>/.memestra``.
78

8-
There are two kind of keys: recursive and non-recursive. The recursive one also uses the hash of imported modules, so that if an imported module changes, the hash of the importing module aslo changes.
9+
Declarative Cache
10+
-----------------
911

10-
To interact with memestra cache:
12+
The declarative cache is a file-based cache manually managed by users or
13+
third-party packages. Its structure is very simple: to describe a Python file
14+
whose path is ``<root>/numpy/random/__init__.py``, one need to drop a file in
15+
``<prefix>/share/memestra/numpy/random/__init__.yml``. The yaml content looks
16+
like the following:
17+
18+
.. code:: yaml
19+
20+
deprecated: ['deprecated_function1', 'deprecated_function2:some reason']
21+
generator: manual
22+
name: numpy.random
23+
version: 1
24+
25+
- The ``deprecated`` field is the most important one. It contains a list of
26+
strings, each element being the name of a deprecated identifier. Text after
27+
the first (optional) ``:`` is usead as deprectation documentation.
28+
29+
- The ``generator`` field must be set to ``manual``.
30+
31+
- The ``name`` field is informative, it documents the cache entry and is usually
32+
set to the entry path.
33+
34+
- The ``version`` field is used to track compatibility with further format
35+
changes. Current value is ``1``.
36+
37+
When hitting an entry in the declarative cache, *memestra* does **not**
38+
process the content of the file, and uses the entry content instead.
39+
40+
41+
Automatic Cache
42+
---------------
43+
44+
To avoid redundant computations, *memestra* also maintains a cache of the visited file
45+
and the associated deprecation results.
46+
47+
To handle the cache, *memestra* provides a tool named ``memestra-cache``.
48+
49+
*Memestra*'s caching infrastructure is a file-based cache, located in
50+
home/<user>/.memestra (RW). The key is a
51+
hash of the file content and the value contains deprecation information, generator
52+
used, etc.
53+
54+
There are two kind of keys: recursive and non-recursive. The recursive one also
55+
uses the hash of imported modules, so that if an imported module changes, the
56+
hash of the importing module also changes.
57+
58+
To interact with *memestra* caches:
1159

1260
**Positional arguments:**
1361

1462
``-set``
1563

16-
Set a cache entry
64+
Set a cache entry in the automatic cache
1765

1866
``-list``
1967

20-
List cache entries
68+
List cache entries for both caches
2169

2270
``-clear``
2371

24-
Remove all cache entries
72+
Remove all cache entries from the automatic cache
2573

2674
``-docparse``
2775

28-
Set cache entry from docstring
76+
Set cache entry from docstring in the automatic cache
2977

3078

3179
**Optional arguments:**

memestra/caching.py

Lines changed: 65 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import os
22
import hashlib
3+
import sys
34
import yaml
45

56
# not using gast because we only rely on Import and ImportFrom, which are
@@ -131,12 +132,15 @@ def __init__(self, keycls):
131132
self.keycls = keycls
132133
self.created = dict()
133134

134-
def __call__(self, module_path):
135+
def __call__(self, module_path, name_hint=None ):
135136
if module_path in self.created:
136137
return self.created[module_path]
137138
else:
138139
self.created[module_path] = None # creation in process
139140
key = self.keycls(module_path, self)
141+
if name_hint is None:
142+
name_hint = os.path.splitext(os.path.basename(module_path))[0]
143+
key.name = name_hint
140144
self.created[module_path] = key
141145
return key
142146

@@ -153,12 +157,16 @@ class CacheKeyFactory(CacheKeyFactoryBase):
153157
class CacheKey(object):
154158

155159
def __init__(self, module_path, _):
156-
self.name, _ = os.path.splitext(os.path.basename(module_path))
160+
157161
with open(module_path, 'rb') as fd:
158162
module_content = fd.read()
159163
module_hash = hashlib.sha256(module_content).hexdigest()
160164
self.module_hash = module_hash
161165

166+
@property
167+
def path(self):
168+
return self.name.replace('.', os.path.sep)
169+
162170
def __init__(self):
163171
super(CacheKeyFactory, self).__init__(CacheKeyFactory.CacheKey)
164172

@@ -176,7 +184,6 @@ class CacheKey(object):
176184
def __init__(self, module_path, factory):
177185
assert module_path not in factory.created or factory.created[module_path] is None
178186

179-
self.name, _ = os.path.splitext(os.path.basename(module_path))
180187
with open(module_path, 'rb') as fd:
181188
module_content = fd.read()
182189

@@ -204,13 +211,51 @@ def __init__(self, module_path, factory):
204211

205212
self.module_hash = hashlib.sha256("".join(hashes).encode("ascii")).hexdigest()
206213

214+
@property
215+
def path(self):
216+
return self.name.replace('.', os.path.sep)
217+
207218
def __init__(self):
208219
super(RecursiveCacheKeyFactory, self).__init__(RecursiveCacheKeyFactory.CacheKey)
209220

210221

222+
class SharedCache(object):
223+
224+
def __init__(self):
225+
shared_dir = os.path.join(sys.prefix, 'share', 'memestra')
226+
os.makedirs(shared_dir, exist_ok=True)
227+
self.cache_entries = {}
228+
229+
for root, dirs, files in os.walk(shared_dir):
230+
for fname in files:
231+
if not fname.endswith('.yml'):
232+
continue
233+
# Not loading all entries on startup,
234+
# doing it lazily upon __getitem__
235+
236+
if fname == '__init__.yml':
237+
key = root[1 + len(shared_dir):]
238+
else:
239+
key = os.path.join(root[1 + len(shared_dir):], fname[:-4])
240+
self.cache_entries[key] = os.path.join(root, fname)
241+
242+
def __contains__(self, key):
243+
return key in self.cache_entries
244+
245+
def __getitem__(self, key):
246+
cache_path = self.cache_entries[key]
247+
with open(cache_path, 'r') as yaml_fd:
248+
return yaml.load(yaml_fd, Loader=yaml.SafeLoader)
249+
250+
def keys(self):
251+
return self.cache_entries.keys()
252+
253+
211254
class Cache(object):
212255

213256
def __init__(self, cache_dir=None):
257+
self.shared_cache = SharedCache()
258+
214259
if cache_dir is not None:
215260
self.cachedir = cache_dir
216261
else:
@@ -223,17 +268,20 @@ def __init__(self, cache_dir=None):
223268
memestra_dir = 'memestra'
224269
self.cachedir = os.path.expanduser(os.path.join(user_config_dir,
225270
memestra_dir))
226-
227271
os.makedirs(self.cachedir, exist_ok=True)
228272

229273
def _get_path(self, key):
230274
return os.path.join(self.cachedir, key.module_hash)
231275

232276
def __contains__(self, key):
277+
if key.path in self.shared_cache:
278+
return True
233279
cache_path = self._get_path(key)
234280
return os.path.isfile(cache_path)
235281

236282
def __getitem__(self, key):
283+
if key.path in self.shared_cache:
284+
return self.shared_cache[key.path]
237285
cache_path = self._get_path(key)
238286
with open(cache_path, 'r') as yaml_fd:
239287
return yaml.load(yaml_fd, Loader=yaml.SafeLoader)
@@ -255,6 +303,11 @@ def items(self):
255303
with open(cache_path, 'r') as yaml_fd:
256304
yield key, yaml.load(yaml_fd, Loader=yaml.SafeLoader)
257305

306+
307+
def shared_items(self):
308+
for key in self.shared_cache.keys():
309+
yield key, self.shared_cache[key]
310+
258311
def clear(self):
259312
count = 0
260313
for key in self.keys():
@@ -278,8 +331,15 @@ def run_set(args):
278331

279332
def run_list(args):
280333
cache = Cache(cache_dir=args.cache_dir)
281-
for k, v in cache.items():
334+
print('declarative cache')
335+
print('-----------------')
336+
for k, v in cache.shared_items():
282337
print('{}: {} ({})'.format(k, v['name'], len(v['deprecated'])))
338+
print()
339+
print('automatic cache')
340+
print('---------------')
341+
for k, v in cache.items():
342+
print('{}: {} ({})'.format(k[:16], v['name'], len(v['deprecated'])))
283343

284344

285345
def run_clear(args):

memestra/memestra.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def load_deprecated_from_module(self, module_name, level=None):
106106
if module_path is None:
107107
return None
108108

109-
module_key = self.key_factory(module_path)
109+
module_key = self.key_factory(module_path, name_hint=module_name)
110110

111111
# either find it in the cache
112112
if module_key in self.cache:
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
deprecated: ['parse']
2+
generator: manual
3+
name: gast.__init__
4+
version: 1

tests/share/memestra/gast/astn.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
deprecated: ['AstToGAst:because']
2+
generator: manual
3+
name: gast.astn
4+
version: 1

tests/test_imports.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from unittest import TestCase
1+
from unittest import TestCase, mock
22
from textwrap import dedent
33
from io import StringIO
44
import memestra
@@ -235,3 +235,19 @@ def test_import_pkg_level_star(self):
235235
self.checkDeprecatedUses(
236236
code,
237237
[('foo', '<>', 2, 0, 'why'), ('foo', '<>', 4, 4, 'why')])
238+
239+
def test_shared_cache(self):
240+
# We have a fake description for gast in tests/share/memestra
241+
# Setup the shared cache to use it.
242+
with mock.patch('sys.prefix', os.path.dirname(__file__)):
243+
self.checkDeprecatedUses(
244+
'from gast import parse',
245+
[('parse', '<>', 1, 0, None)])
246+
247+
def test_shared_cache_sub(self):
248+
# We have a fake description for gast in tests/share/memestra
249+
# Setup the shared cache to use it.
250+
with mock.patch('sys.prefix', os.path.dirname(__file__)):
251+
self.checkDeprecatedUses(
252+
'from gast.astn import AstToGAst',
253+
[('AstToGAst', '<>', 1, 0, 'because')])

0 commit comments

Comments
 (0)