Skip to content

Commit 623b17d

Browse files
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. Tarantool datetime objects are decoded to `tarantool.Datetime` type. `tarantool.Datetime` may be encoded to Tarantool datetime objects. `tarantool.Datetime` stores data in a `pandas.Timestamp` object. You can create `tarantool.Datetime` objects either from msgpack data or by using the same API as in Tarantool: ``` dt1 = tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, sec=54, nsec=308543321) dt2 = tarantool.Datetime(timestamp=1661969274) dt3 = tarantool.Datetime(timestamp=1661969274, nsec=308543321) ``` `tarantool.Datetime` exposes `year`, `month`, `day`, `hour`, `minute`, `sec`, `nsec` and `timestamp` properties if you need to convert `tarantool.Datetime` to any other kind of datetime object: ``` pdt = pandas.Timestamp(year=dt.year, month=dt.month, day=dt.day, hour=dt.hour, minute=dt.minute, second=dt.sec, microsecond=(dt.nsec // 1000), nanosecond=(dt.nsec % 1000)) ``` `pandas.Timestamp` was chosen to store data because it could be used to store both nanoseconds and timezone information. In-build Python `datetime.datetime` supports microseconds at most, `numpy.datetime64` do not support timezones. Tarantool datetime interval type is planned to be stored in custom type `tarantool.Interval` and we'll need a way to support arithmetic between datetime and interval. This is the main reason we use custom class instead of plain `pandas.Timestamp`. It is also hard to implement Tarantool-compatible timezones with full conversion support without custom classes. This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204
1 parent c70dfa6 commit 623b17d

File tree

10 files changed

+478
-6
lines changed

10 files changed

+478
-6
lines changed

Diff for: CHANGELOG.md

+30
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,36 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
### Added
1010
- Decimal type support (#203).
1111
- UUID type support (#202).
12+
- Datetime type support and tarantool.Datetime type (#204).
13+
14+
Tarantool datetime objects are decoded to `tarantool.Datetime`
15+
type. `tarantool.Datetime` may be encoded to Tarantool datetime
16+
objects.
17+
18+
`tarantool.Datetime` stores data in a `pandas.Timestamp` object.
19+
You can create `tarantool.Datetime` objects either from msgpack
20+
data or by using the same API as in Tarantool:
21+
22+
```python
23+
dt1 = tarantool.Datetime(year=2022, month=8, day=31,
24+
hour=18, minute=7, sec=54,
25+
nsec=308543321)
26+
27+
dt2 = tarantool.Datetime(timestamp=1661969274)
28+
29+
dt3 = tarantool.Datetime(timestamp=1661969274, nsec=308543321)
30+
```
31+
32+
`tarantool.Datetime` exposes `year`, `month`, `day`, `hour`,
33+
`minute`, `sec`, `nsec` and `timestamp` properties if you need
34+
to convert `tarantool.Datetime` to any other kind of datetime object:
35+
36+
```python
37+
pdt = pandas.Timestamp(year=dt.year, month=dt.month, day=dt.day,
38+
hour=dt.hour, minute=dt.minute, second=dt.sec,
39+
microsecond=(dt.nsec // 1000),
40+
nanosecond=(dt.nsec % 1000))
41+
```
1242

1343
### Changed
1444
- Bump msgpack requirement to 1.0.4 (PR #223).

Diff for: requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
msgpack>=1.0.4
2+
pandas

Diff for: tarantool/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
ENCODING_DEFAULT,
3333
)
3434

35+
from tarantool.msgpack_ext.types.datetime import (
36+
Datetime,
37+
)
38+
3539
__version__ = "0.9.0"
3640

3741

@@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None,
9195

9296
__all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema',
9397
'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning',
94-
'SchemaError', 'dbapi']
98+
'SchemaError', 'dbapi', 'Datetime']
9599

96100
# ConnectionPool is supported only for Python 3.7 or newer.
97101
if sys.version_info.major >= 3 and sys.version_info.minor >= 7:

Diff for: tarantool/msgpack_ext/datetime.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.datetime import Datetime
2+
3+
EXT_ID = 4
4+
5+
def encode(obj):
6+
return obj.msgpack_encode()
7+
8+
def decode(data):
9+
return Datetime(data)

Diff for: tarantool/msgpack_ext/packer.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,16 @@
22
from uuid import UUID
33
from msgpack import ExtType
44

5+
from tarantool.msgpack_ext.types.datetime import Datetime
6+
57
import tarantool.msgpack_ext.decimal as ext_decimal
68
import tarantool.msgpack_ext.uuid as ext_uuid
9+
import tarantool.msgpack_ext.datetime as ext_datetime
710

811
encoders = [
9-
{'type': Decimal, 'ext': ext_decimal},
10-
{'type': UUID, 'ext': ext_uuid },
12+
{'type': Decimal, 'ext': ext_decimal },
13+
{'type': UUID, 'ext': ext_uuid },
14+
{'type': Datetime, 'ext': ext_datetime},
1115
]
1216

1317
def default(obj):

Diff for: tarantool/msgpack_ext/types/datetime.py

+192
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
from copy import deepcopy
2+
3+
import pandas
4+
5+
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
6+
#
7+
# The datetime MessagePack representation looks like this:
8+
# +---------+----------------+==========+-----------------+
9+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
10+
# | = d7/d8 | = 4 | | tzindex; |
11+
# +---------+----------------+==========+-----------------+
12+
# MessagePack data contains:
13+
#
14+
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
15+
# little-endian order.
16+
# * The optional fields (8 bytes), if any of them have a non-zero value.
17+
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
18+
# tzindex (2 bytes) packed in the little-endian order.
19+
#
20+
# seconds is seconds since Epoch, where the epoch is the point where the time
21+
# starts, and is platform dependent. For Unix, the epoch is January 1,
22+
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
23+
# definition in src/lib/core/datetime.h and reasons in
24+
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
25+
#
26+
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
27+
# a definition in src/lib/core/datetime.h.
28+
#
29+
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
30+
# see a structure definition in src/lib/core/datetime.h.
31+
#
32+
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
33+
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
34+
# specified, tzindex has the preference and the tzoffset value is ignored.
35+
36+
SECONDS_SIZE_BYTES = 8
37+
NSEC_SIZE_BYTES = 4
38+
TZOFFSET_SIZE_BYTES = 2
39+
TZINDEX_SIZE_BYTES = 2
40+
41+
BYTEORDER = 'little'
42+
43+
NSEC_IN_SEC = 1000000000
44+
NSEC_IN_MKSEC = 1000
45+
46+
def get_bytes_as_int(data, cursor, size):
47+
part = data[cursor:cursor + size]
48+
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size
49+
50+
def get_int_as_bytes(data, size):
51+
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)
52+
53+
def msgpack_decode(data):
54+
cursor = 0
55+
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
56+
57+
data_len = len(data)
58+
if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \
59+
TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES):
60+
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
61+
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
62+
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
63+
elif data_len == SECONDS_SIZE_BYTES:
64+
nsec = 0
65+
tzoffset = 0
66+
tzindex = 0
67+
else:
68+
raise MsgpackError(f'Unexpected datetime payload length {data_len}')
69+
70+
if (tzoffset != 0) or (tzindex != 0):
71+
raise NotImplementedError
72+
73+
total_nsec = seconds * NSEC_IN_SEC + nsec
74+
75+
return pandas.to_datetime(total_nsec, unit='ns')
76+
77+
class Datetime():
78+
def __init__(self, data=None, *, timestamp=None, year=None, month=None,
79+
day=None, hour=None, minute=None, sec=None, nsec=None):
80+
if data is not None:
81+
if not isinstance(data, bytes):
82+
raise ValueError('data argument (first positional argument) ' +
83+
'expected to be a "bytes" instance')
84+
85+
self._datetime = msgpack_decode(data)
86+
return
87+
88+
# The logic is same as in Tarantool, refer to datetime API.
89+
# https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/
90+
if timestamp is not None:
91+
if ((year is not None) or (month is not None) or \
92+
(day is not None) or (hour is not None) or \
93+
(minute is not None) or (sec is not None)):
94+
raise ValueError('Cannot provide both timestamp and year, month, ' +
95+
'day, hour, minute, sec')
96+
97+
if nsec is not None:
98+
if not isinstance(timestamp, int):
99+
raise ValueError('timestamp must be int if nsec provided')
100+
101+
total_nsec = timestamp * NSEC_IN_SEC + nsec
102+
self._datetime = pandas.to_datetime(total_nsec, unit='ns')
103+
else:
104+
self._datetime = pandas.to_datetime(timestamp, unit='s')
105+
else:
106+
if nsec is not None:
107+
microsecond = nsec // NSEC_IN_MKSEC
108+
nanosecond = nsec % NSEC_IN_MKSEC
109+
else:
110+
microsecond = 0
111+
nanosecond = 0
112+
113+
self._datetime = pandas.Timestamp(year=year, month=month, day=day,
114+
hour=hour, minute=minute, second=sec,
115+
microsecond=microsecond,
116+
nanosecond=nanosecond)
117+
118+
def __eq__(self, other):
119+
if isinstance(other, Datetime):
120+
return self._datetime == other._datetime
121+
elif isinstance(other, pandas.Timestamp):
122+
return self._datetime == other
123+
else:
124+
return False
125+
126+
def __str__(self):
127+
return self._datetime.__str__()
128+
129+
def __repr__(self):
130+
return f'datetime: {self._datetime.__repr__()}'
131+
132+
def __copy__(self):
133+
cls = self.__class__
134+
result = cls.__new__(cls)
135+
result.__dict__.update(self.__dict__)
136+
return result
137+
138+
def __deepcopy__(self, memo):
139+
cls = self.__class__
140+
result = cls.__new__(cls)
141+
memo[id(self)] = result
142+
for k, v in self.__dict__.items():
143+
setattr(result, k, deepcopy(v, memo))
144+
return result
145+
146+
@property
147+
def year(self):
148+
return self._datetime.year
149+
150+
@property
151+
def month(self):
152+
return self._datetime.month
153+
154+
@property
155+
def day(self):
156+
return self._datetime.day
157+
158+
@property
159+
def hour(self):
160+
return self._datetime.hour
161+
162+
@property
163+
def minute(self):
164+
return self._datetime.minute
165+
166+
@property
167+
def sec(self):
168+
return self._datetime.second
169+
170+
@property
171+
def nsec(self):
172+
# microseconds + nanoseconds
173+
return self._datetime.value % NSEC_IN_SEC
174+
175+
@property
176+
def timestamp(self):
177+
return self._datetime.timestamp()
178+
179+
def msgpack_encode(self):
180+
seconds = self._datetime.value // NSEC_IN_SEC
181+
nsec = self.nsec
182+
tzoffset = 0
183+
tzindex = 0
184+
185+
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
186+
187+
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
188+
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
189+
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
190+
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)
191+
192+
return buf

Diff for: tarantool/msgpack_ext/unpacker.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import tarantool.msgpack_ext.decimal as ext_decimal
22
import tarantool.msgpack_ext.uuid as ext_uuid
3+
import tarantool.msgpack_ext.datetime as ext_datetime
34

45
decoders = {
5-
ext_decimal.EXT_ID: ext_decimal.decode,
6-
ext_uuid.EXT_ID : ext_uuid.decode ,
6+
ext_decimal.EXT_ID : ext_decimal.decode ,
7+
ext_uuid.EXT_ID : ext_uuid.decode ,
8+
ext_datetime.EXT_ID: ext_datetime.decode,
79
}
810

911
def ext_hook(code, data):

Diff for: test/suites/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
from .test_ssl import TestSuite_Ssl
1818
from .test_decimal import TestSuite_Decimal
1919
from .test_uuid import TestSuite_UUID
20+
from .test_datetime import TestSuite_Datetime
2021

2122
test_cases = (TestSuite_Schema_UnicodeConnection,
2223
TestSuite_Schema_BinaryConnection,
2324
TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect,
2425
TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI,
2526
TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl,
26-
TestSuite_Decimal, TestSuite_UUID)
27+
TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime)
2728

2829
def load_tests(loader, tests, pattern):
2930
suite = unittest.TestSuite()

Diff for: test/suites/lib/skip.py

+11
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):
154154

155155
return skip_or_run_test_tarantool(func, '2.4.1',
156156
'does not support UUID type')
157+
158+
def skip_or_run_datetime_test(func):
159+
"""Decorator to skip or run datetime-related tests depending on
160+
the tarantool version.
161+
162+
Tarantool supports datetime type only since 2.10.0 version.
163+
See https://github.com/tarantool/tarantool/issues/5941
164+
"""
165+
166+
return skip_or_run_test_pcall_require(func, 'datetime',
167+
'does not support datetime type')

0 commit comments

Comments
 (0)