Skip to content

Commit 5c48ea0

Browse files
committed
Fix sitemaps
1 parent a1f2998 commit 5c48ea0

File tree

1 file changed

+152
-2
lines changed

1 file changed

+152
-2
lines changed

rnacentral/rnacentral/utils/cache.py

+152-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,20 @@
1010
See the License for the specific language governing permissions and
1111
limitations under the License.
1212
"""
13+
import errno
14+
import glob
15+
import io
16+
import os
17+
import pickle
18+
import random
19+
import re
20+
import tempfile
21+
import time
22+
import zlib
23+
24+
from django.core.cache.backends.base import DEFAULT_TIMEOUT, BaseCache
1325
from django.core.cache.backends.memcached import PyMemcacheCache
26+
from django.core.files.move import file_move_safe
1427

1528

1629
class CustomPyMemcacheCache(PyMemcacheCache):
@@ -24,10 +37,147 @@ def __init__(self, *args, **kwargs):
2437
super().__init__(*args, **kwargs)
2538

2639
def set(self, key, value, timeout=None, version=None):
27-
import pickle
28-
2940
value_size = len(pickle.dumps(value))
3041

3142
if value_size > self.max_cache_size:
3243
return False # do not cache
3344
return super().set(key, value, timeout, version)
45+
46+
47+
class SitemapsCache(BaseCache):
48+
"""
49+
This class is required to generate sitemaps
50+
"""
51+
52+
cache_suffix = ".djcache"
53+
54+
def __init__(self, dir, params):
55+
super(SitemapsCache, self).__init__(params)
56+
self._dir = os.path.abspath(dir)
57+
self._createdir()
58+
59+
def add(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
60+
if self.has_key(key, version):
61+
return False
62+
self.set(key, value, timeout, version)
63+
return True
64+
65+
def get(self, key, default=None, version=None):
66+
fname = self._key_to_file(key, version)
67+
if os.path.exists(fname):
68+
try:
69+
with io.open(fname, "rb") as f:
70+
if not self._is_expired(f):
71+
return pickle.loads(zlib.decompress(f.read()))
72+
except IOError as e:
73+
if e.errno == errno.ENOENT:
74+
pass # Cache file was removed after the exists check
75+
return default
76+
77+
def set(self, key, value, timeout=DEFAULT_TIMEOUT, version=None):
78+
self._createdir() # Cache dir can be deleted at any time.
79+
fname = self._key_to_file(key, version)
80+
self._cull() # make some room if necessary
81+
fd, tmp_path = tempfile.mkstemp(dir=self._dir)
82+
renamed = False
83+
try:
84+
with io.open(fd, "wb") as f:
85+
expiry = self.get_backend_timeout(timeout)
86+
f.write(pickle.dumps(expiry, -1))
87+
f.write(zlib.compress(pickle.dumps(value), -1))
88+
file_move_safe(tmp_path, fname, allow_overwrite=True)
89+
renamed = True
90+
finally:
91+
if not renamed:
92+
os.remove(tmp_path)
93+
94+
def delete(self, key, version=None):
95+
self._delete(self._key_to_file(key, version))
96+
97+
def _delete(self, fname):
98+
if not fname.startswith(self._dir) or not os.path.exists(fname):
99+
return
100+
try:
101+
os.remove(fname)
102+
except OSError as e:
103+
# ENOENT can happen if the cache file is removed (by another
104+
# process) after the os.path.exists check.
105+
if e.errno != errno.ENOENT:
106+
raise
107+
108+
def has_key(self, key, version=None):
109+
fname = self._key_to_file(key, version)
110+
if os.path.exists(fname):
111+
with io.open(fname, "rb") as f:
112+
return not self._is_expired(f)
113+
return False
114+
115+
def _cull(self):
116+
"""
117+
Removes random cache entries if max_entries is reached at a ratio
118+
of num_entries / cull_frequency. A value of 0 for CULL_FREQUENCY means
119+
that the entire cache will be purged.
120+
"""
121+
filelist = self._list_cache_files()
122+
num_entries = len(filelist)
123+
if num_entries < self._max_entries:
124+
return # return early if no culling is required
125+
if self._cull_frequency == 0:
126+
return self.clear() # Clear the cache when CULL_FREQUENCY = 0
127+
# Delete a random selection of entries
128+
filelist = random.sample(filelist, int(num_entries / self._cull_frequency))
129+
for fname in filelist:
130+
self._delete(fname)
131+
132+
def _createdir(self):
133+
if not os.path.exists(self._dir):
134+
try:
135+
os.makedirs(self._dir, 0o700)
136+
except OSError as e:
137+
if e.errno != errno.EEXIST:
138+
raise EnvironmentError(
139+
"Cache directory '%s' does not exist "
140+
"and could not be created'" % self._dir
141+
)
142+
143+
def _key_to_file(self, key, version=None):
144+
"""
145+
Convert a key into a cache file path. Basically this is the
146+
root cache path joined with the md5sum of the key and a suffix.
147+
"""
148+
key = re.sub("[:/#?&=+%]", "_", key)
149+
return os.path.join(self._dir, "".join([key, self.cache_suffix]))
150+
151+
def clear(self):
152+
"""
153+
Remove all the cache files.
154+
"""
155+
if not os.path.exists(self._dir):
156+
return
157+
for fname in self._list_cache_files():
158+
self._delete(fname)
159+
160+
def _is_expired(self, f):
161+
"""
162+
Takes an open cache file and determines if it has expired,
163+
deletes the file if it is has passed its expiry time.
164+
"""
165+
exp = pickle.load(f)
166+
if exp is not None and exp < time.time():
167+
f.close() # On Windows a file has to be closed before deleting
168+
self._delete(f.name)
169+
return True
170+
return False
171+
172+
def _list_cache_files(self):
173+
"""
174+
Get a list of paths to all the cache files. These are all the files
175+
in the root cache dir that end on the cache_suffix.
176+
"""
177+
if not os.path.exists(self._dir):
178+
return []
179+
filelist = [
180+
os.path.join(self._dir, fname)
181+
for fname in glob.glob1(self._dir, "*%s" % self.cache_suffix)
182+
]
183+
return filelist

0 commit comments

Comments
 (0)