Skip to content

Commit 3c1307b

Browse files
committed
feat: add caching for timezone offsets, significantly speeds up import
this is different from pr scrapinghub#1181. it builds a cache at install time which can be distributed. closes scrapinghub#533
1 parent 47acb88 commit 3c1307b

File tree

4 files changed

+66
-9
lines changed

4 files changed

+66
-9
lines changed

MANIFEST.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ include CONTRIBUTING.rst
33
include HISTORY.rst
44
include LICENSE
55
include README.rst
6+
include dateparser/data/dateparser_tz_cache.pkl
67
include dateparser_data/settings.py
78
include requirements.txt
89

56 KB
Binary file not shown.

dateparser/timezone_parser.py

Lines changed: 49 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1+
import os
2+
import pickle
3+
import zlib
14
from datetime import datetime, timedelta, timezone, tzinfo
5+
from pathlib import Path
26

37
import regex as re
48

@@ -54,6 +58,12 @@ def convert_to_local_tz(datetime_obj, datetime_tz_offset):
5458
return datetime_obj - datetime_tz_offset + local_tz_offset
5559

5660

61+
def get_local_tz_offset():
62+
offset = datetime.now() - datetime.now(tz=timezone.utc).replace(tzinfo=None)
63+
offset = timedelta(days=offset.days, seconds=round(offset.seconds, -1))
64+
return offset
65+
66+
5767
def build_tz_offsets(search_regex_parts):
5868
def get_offset(tz_obj, regex, repl="", replw=""):
5969
return (
@@ -78,14 +88,44 @@ def get_offset(tz_obj, regex, repl="", replw=""):
7888
yield get_offset(tz_obj, regex, repl=replace, replw=replacewith)
7989

8090

81-
def get_local_tz_offset():
82-
offset = datetime.now() - datetime.now(tz=timezone.utc).replace(tzinfo=None)
83-
offset = timedelta(days=offset.days, seconds=round(offset.seconds, -1))
84-
return offset
91+
local_tz_offset = get_local_tz_offset()
8592

93+
_tz_offsets = None
94+
_search_regex = None
95+
_search_regex_ignorecase = None
96+
97+
98+
def _load_offsets(cache_path, current_hash):
99+
global _tz_offsets, _search_regex, _search_regex_ignorecase
100+
101+
try:
102+
with open(cache_path, mode="rb") as file:
103+
(
104+
serialized_hash,
105+
_tz_offsets,
106+
_search_regex,
107+
_search_regex_ignorecase,
108+
) = pickle.load(file)
109+
if current_hash == serialized_hash:
110+
return
111+
except (FileNotFoundError, ValueError, TypeError):
112+
pass
113+
114+
_search_regex_parts = []
115+
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
116+
_search_regex = re.compile("|".join(_search_regex_parts))
117+
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
118+
119+
with open(cache_path, mode="wb") as file:
120+
pickle.dump(
121+
(current_hash, _tz_offsets, _search_regex, _search_regex_ignorecase),
122+
file,
123+
)
86124

87-
_search_regex_parts = []
88-
_tz_offsets = list(build_tz_offsets(_search_regex_parts))
89-
_search_regex = re.compile("|".join(_search_regex_parts))
90-
_search_regex_ignorecase = re.compile("|".join(_search_regex_parts), re.IGNORECASE)
91-
local_tz_offset = get_local_tz_offset()
125+
126+
CACHE_PATH = Path(__file__).parent.joinpath("data", "dateparser_tz_cache.pkl")
127+
128+
_load_offsets(
129+
cache_path=CACHE_PATH,
130+
current_hash=zlib.crc32(str(timezone_info_list).encode("utf-8")),
131+
)

setup.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,26 @@
11
import re
2+
import subprocess
23

34
from setuptools import find_packages, setup
5+
from setuptools.command import develop, install
46

57
__version__ = re.search(
68
r"__version__.*\s*=\s*[\"]([^\"]+)[\"]", open("dateparser/__init__.py").read()
79
).group(1)
810

11+
12+
class PostDevelop(develop.develop):
13+
def run(self):
14+
subprocess.call("python 3 dateparser_scripts/timezones.py", shell=True)
15+
develop.develop.run(self)
16+
17+
18+
class PostInstall(install.install):
19+
def run(self):
20+
subprocess.call("python3 dateparser_scripts/timezones.py", shell=True)
21+
install.install.run(self)
22+
23+
924
introduction = re.sub(
1025
r":members:.+|..\sautomodule::.+|:class:|:func:|:ref:",
1126
"",
@@ -45,6 +60,7 @@
4560
"fasttext": ["fasttext"],
4661
"langdetect": ["langdetect"],
4762
},
63+
cmdclass={"develop": PostDevelop, "install": PostInstall},
4864
license="BSD",
4965
zip_safe=False,
5066
keywords="dateparser",

0 commit comments

Comments
 (0)