Skip to content

Commit 3b98ab4

Browse files
committed
feat: add caching for timezone offsets, significantly speeds up import
this is different from pr scrapinghub#1181. it builds a cache at install time which can be distributed. closes scrapinghub#533
1 parent 47acb88 commit 3b98ab4

File tree

4 files changed

+61
-4
lines changed

4 files changed

+61
-4
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include CONTRIBUTING.rst
33
include HISTORY.rst
44
include LICENSE
55
include README.rst
6+
include dateparser/data/dateparser_tz_cache.pkl
67
include dateparser_data/settings.py
78
include requirements.txt
89

56 KB
Binary file not shown.

dateparser/timezone_parser.py

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import os
2+
import pickle
3+
import zlib
14
from datetime import datetime, timedelta, timezone, tzinfo
5+
from pathlib import Path
26

37
import regex as re
48

@@ -84,8 +88,44 @@ def get_local_tz_offset():
8488
return offset
8589

8690

87-
_search_regex_parts = []
88-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
89-
_search_regex = re.compile("|".join(_search_regex_parts))
90-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
9191
local_tz_offset = get_local_tz_offset()
92+
93+
_tz_offsets = None
94+
_search_regex = None
95+
_search_regex_ignorecase = None
96+
97+
98+
def _load_offsets(cache_path, current_hash):
99+
global _tz_offsets, _search_regex, _search_regex_ignorecase
100+
101+
try:
102+
with open(cache_path, mode="rb") as file:
103+
(
104+
serialized_hash,
105+
_tz_offsets,
106+
_search_regex,
107+
_search_regex_ignorecase,
108+
) = pickle.load(file)
109+
if current_hash == serialized_hash:
110+
return
111+
except (FileNotFoundError, ValueError, TypeError):
112+
pass
113+
114+
_search_regex_parts = []
115+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
116+
_search_regex = re.compile("|".join(_search_regex_parts))
117+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
118+
119+
with open(cache_path, mode="wb") as file:
120+
pickle.dump(
121+
(current_hash, _tz_offsets, _search_regex, _search_regex_ignorecase),
122+
file,
123+
)
124+
125+
126+
CACHE_PATH = Path(__file__).parent.joinpath("data", "dateparser_tz_cache.pkl")
127+
128+
_load_offsets(
129+
cache_path=CACHE_PATH,
130+
current_hash=zlib.crc32(str(timezone_info_list).encode("utf-8")),
131+
)

setup.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
11
import re
2+
import subprocess
23

34
from setuptools import find_packages, setup
5+
from setuptools.command import develop, install
46

57
__version__ = re.search(
68
r"__version__.*\s*=\s*[\"]([^\"]+)[\"]", open("dateparser/__init__.py").read()
79
).group(1)
810

11+
12+
class PostDevelop(develop.develop):
13+
def run(self):
14+
subprocess.call("python 3 dateparser_scripts/timezones.py", shell=True)
15+
develop.develop.run(self)
16+
17+
18+
class PostInstall(install.install):
19+
def run(self):
20+
subprocess.call("python3 dateparser_scripts/timezones.py", shell=True)
21+
install.install.run(self)
22+
23+
924
introduction = re.sub(
1025
r":members:.+|..\sautomodule::.+|:class:|:func:|:ref:",
1126
"",
@@ -45,6 +60,7 @@
4560
"fasttext": ["fasttext"],
4661
"langdetect": ["langdetect"],
4762
},
63+
cmdclass={"develop": PostDevelop, "install": PostInstall},
4864
license="BSD",
4965
zip_safe=False,
5066
keywords="dateparser",

0 commit comments

Comments
 (0)