Skip to content

Commit 7830356

Browse files
msgpack: support datetime extended type
Tarantool supports datetime type since version 2.10.0 [1]. This patch introduced the support of Tarantool datetime type in msgpack decoders and encoders. The Tarantool datetime type is mapped to new tarantool.Datetime type which inherits pandas.Timestamp [2]. pandas.Timestamp was chosen as a base class because it could be used to store both nanoseconds and timezone information. In-build Python datetime.datetime supports microseconds at most, numpy.datetime64 do not support timezones. There are two reasons to use custom type instead of plain pandas.Timestamp: - tzindex may be lost on conversion to pandas.Timestamp - Tarantool datetime interval type is planned to be stored in custom type tarantool.Interval and we'll need a way to support arithmetic between datetime and interval. msgpack encoder supports encoding pandas.Timestamp with tarantool.Datetime tools, but it always decodes to tarantool.Datetime. If you plan to work with tarantool datetimes, please stick to the tarantool.Datetime object rather than pure pandas.Timestamp. You can create tarantool.Datetime from pandas.Timestamp or by using the same API as in plain pandas.Timestamp. If you used numpy.datetime64 or datetime.datetime in your logic, you can use pandas.to_datetime64() and pandas.to_datetime() converters. This patch does not yet introduce the support of timezones in datetime. 1. tarantool/tarantool#5941 2. https://pandas.pydata.org/docs/reference/api/pandas.Timestamp.html Part of #204
1 parent c70dfa6 commit 7830356

File tree

11 files changed

+304
-6
lines changed

11 files changed

+304
-6
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99
### Added
1010
- Decimal type support (#203).
1111
- UUID type support (#202).
12+
- Datetime type support and tarantool.Datetime type (#204).
1213

1314
### Changed
1415
- Bump msgpack requirement to 1.0.4 (PR #223).

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
msgpack>=1.0.4
2+
pandas

tarantool/__init__.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
ENCODING_DEFAULT,
3333
)
3434

35+
from tarantool.msgpack_ext.types.datetime import (
36+
Datetime,
37+
)
38+
3539
__version__ = "0.9.0"
3640

3741

@@ -91,7 +95,7 @@ def connectmesh(addrs=({'host': 'localhost', 'port': 3301},), user=None,
9195

9296
__all__ = ['connect', 'Connection', 'connectmesh', 'MeshConnection', 'Schema',
9397
'Error', 'DatabaseError', 'NetworkError', 'NetworkWarning',
94-
'SchemaError', 'dbapi']
98+
'SchemaError', 'dbapi', 'Datetime']
9599

96100
# ConnectionPool is supported only for Python 3.7 or newer.
97101
if sys.version_info.major >= 3 and sys.version_info.minor >= 7:

tarantool/error.py

+10
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,16 @@ class MsgpackWarning(UserWarning):
119119
Warning with encoding or decoding of MP_EXT types
120120
'''
121121

122+
class ExtTypeError(ValueError):
123+
'''
124+
Error related to tarantool.Datetime type
125+
'''
126+
127+
class ExtTypeWarning(UserWarning):
128+
'''
129+
Warning related to tarantool.Datetime type
130+
'''
131+
122132
__all__ = ("Warning", "Error", "InterfaceError", "DatabaseError", "DataError",
123133
"OperationalError", "IntegrityError", "InternalError",
124134
"ProgrammingError", "NotSupportedError", "MsgpackError",

tarantool/msgpack_ext/datetime.py

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
from tarantool.msgpack_ext.types.datetime import Datetime
2+
3+
EXT_ID = 4
4+
5+
def encode(obj):
6+
return Datetime.msgpack_encode(obj)
7+
8+
def decode(data):
9+
return Datetime(data)

tarantool/msgpack_ext/packer.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
from decimal import Decimal
22
from uuid import UUID
33
from msgpack import ExtType
4+
import pandas
5+
6+
from tarantool.msgpack_ext.types.datetime import Datetime
47

58
import tarantool.msgpack_ext.decimal as ext_decimal
69
import tarantool.msgpack_ext.uuid as ext_uuid
10+
import tarantool.msgpack_ext.datetime as ext_datetime
711

812
encoders = [
9-
{'type': Decimal, 'ext': ext_decimal},
10-
{'type': UUID, 'ext': ext_uuid },
13+
{'type': Decimal, 'ext': ext_decimal },
14+
{'type': UUID, 'ext': ext_uuid },
15+
{'type': Datetime, 'ext': ext_datetime},
16+
{'type': pandas.Timestamp, 'ext': ext_datetime},
1117
]
1218

1319
def default(obj):
+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import pandas
2+
3+
# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type
4+
#
5+
# The datetime MessagePack representation looks like this:
6+
# +---------+----------------+==========+-----------------+
7+
# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; |
8+
# | = d7/d8 | = 4 | | tzindex; |
9+
# +---------+----------------+==========+-----------------+
10+
# MessagePack data contains:
11+
#
12+
# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the
13+
# little-endian order.
14+
# * The optional fields (8 bytes), if any of them have a non-zero value.
15+
# The fields include nsec (4 bytes), tzoffset (2 bytes), and
16+
# tzindex (2 bytes) packed in the little-endian order.
17+
#
18+
# seconds is seconds since Epoch, where the epoch is the point where the time
19+
# starts, and is platform dependent. For Unix, the epoch is January 1,
20+
# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure
21+
# definition in src/lib/core/datetime.h and reasons in
22+
# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c
23+
#
24+
# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see
25+
# a definition in src/lib/core/datetime.h.
26+
#
27+
# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type,
28+
# see a structure definition in src/lib/core/datetime.h.
29+
#
30+
# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure
31+
# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are
32+
# specified, tzindex has the preference and the tzoffset value is ignored.
33+
34+
SECONDS_SIZE_BYTES = 8
35+
NSEC_SIZE_BYTES = 4
36+
TZOFFSET_SIZE_BYTES = 2
37+
TZINDEX_SIZE_BYTES = 2
38+
39+
BYTEORDER = 'little'
40+
41+
NSEC_IN_SEC = 1000000000
42+
43+
44+
def get_bytes_as_int(data, cursor, size):
45+
part = data[cursor:cursor + size]
46+
return int.from_bytes(part, BYTEORDER, signed=True), cursor + size
47+
48+
def get_int_as_bytes(data, size):
49+
return data.to_bytes(size, byteorder=BYTEORDER, signed=True)
50+
51+
def msgpack_decode(data):
52+
cursor = 0
53+
seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES)
54+
55+
if len(data) > SECONDS_SIZE_BYTES:
56+
nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES)
57+
tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES)
58+
tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES)
59+
else:
60+
nsec = 0
61+
tzoffset = 0
62+
tzindex = 0
63+
64+
if (tzoffset != 0) or (tzindex != 0):
65+
raise NotImplementedError
66+
67+
total_nsec = seconds * NSEC_IN_SEC + nsec
68+
69+
dt = pandas.to_datetime(total_nsec, unit='ns')
70+
return dt, tzoffset, tzindex
71+
72+
class Datetime(pandas.Timestamp):
73+
def __new__(cls, *args, **kwargs):
74+
if len(args) > 0 and isinstance(args[0], bytes):
75+
dt, tzoffset, tzindex = msgpack_decode(args[0])
76+
else:
77+
dt = super().__new__(cls, *args, **kwargs)
78+
79+
dt.__class__ = cls
80+
return dt
81+
82+
def msgpack_encode(self):
83+
seconds = self.value // NSEC_IN_SEC
84+
nsec = self.value % NSEC_IN_SEC
85+
tzoffset = 0
86+
tzindex = 0
87+
88+
buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES)
89+
90+
if (nsec != 0) or (tzoffset != 0) or (tzindex != 0):
91+
buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES)
92+
buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES)
93+
buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES)
94+
95+
return buf

tarantool/msgpack_ext/unpacker.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
11
import tarantool.msgpack_ext.decimal as ext_decimal
22
import tarantool.msgpack_ext.uuid as ext_uuid
3+
import tarantool.msgpack_ext.datetime as ext_datetime
34

45
decoders = {
5-
ext_decimal.EXT_ID: ext_decimal.decode,
6-
ext_uuid.EXT_ID : ext_uuid.decode ,
6+
ext_decimal.EXT_ID : ext_decimal.decode ,
7+
ext_uuid.EXT_ID : ext_uuid.decode ,
8+
ext_datetime.EXT_ID: ext_datetime.decode,
79
}
810

911
def ext_hook(code, data):

test/suites/__init__.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,14 @@
1717
from .test_ssl import TestSuite_Ssl
1818
from .test_decimal import TestSuite_Decimal
1919
from .test_uuid import TestSuite_UUID
20+
from .test_datetime import TestSuite_Datetime
2021

2122
test_cases = (TestSuite_Schema_UnicodeConnection,
2223
TestSuite_Schema_BinaryConnection,
2324
TestSuite_Request, TestSuite_Protocol, TestSuite_Reconnect,
2425
TestSuite_Mesh, TestSuite_Execute, TestSuite_DBAPI,
2526
TestSuite_Encoding, TestSuite_Pool, TestSuite_Ssl,
26-
TestSuite_Decimal, TestSuite_UUID)
27+
TestSuite_Decimal, TestSuite_UUID, TestSuite_Datetime)
2728

2829
def load_tests(loader, tests, pattern):
2930
suite = unittest.TestSuite()

test/suites/lib/skip.py

+11
Original file line numberDiff line numberDiff line change
@@ -154,3 +154,14 @@ def skip_or_run_UUID_test(func):
154154

155155
return skip_or_run_test_tarantool(func, '2.4.1',
156156
'does not support UUID type')
157+
158+
def skip_or_run_datetime_test(func):
159+
"""Decorator to skip or run datetime-related tests depending on
160+
the tarantool version.
161+
162+
Tarantool supports datetime type only since 2.10.0 version.
163+
See https://github.com/tarantool/tarantool/issues/5941
164+
"""
165+
166+
return skip_or_run_test_pcall_require(func, 'datetime',
167+
'does not support datetime type')

test/suites/test_datetime.py

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
# -*- coding: utf-8 -*-
2+
3+
from __future__ import print_function
4+
5+
import sys
6+
import unittest
7+
import msgpack
8+
import warnings
9+
import tarantool
10+
import pandas
11+
12+
from tarantool.msgpack_ext.packer import default as packer_default
13+
from tarantool.msgpack_ext.unpacker import ext_hook as unpacker_ext_hook
14+
15+
from .lib.tarantool_server import TarantoolServer
16+
from .lib.skip import skip_or_run_datetime_test
17+
from tarantool.error import MsgpackError, MsgpackWarning
18+
19+
class TestSuite_Datetime(unittest.TestCase):
20+
@classmethod
21+
def setUpClass(self):
22+
print(' DATETIME EXT TYPE '.center(70, '='), file=sys.stderr)
23+
print('-' * 70, file=sys.stderr)
24+
self.srv = TarantoolServer()
25+
self.srv.script = 'test/suites/box.lua'
26+
self.srv.start()
27+
28+
self.adm = self.srv.admin
29+
self.adm(r"""
30+
_, datetime = pcall(require, 'datetime')
31+
32+
box.schema.space.create('test')
33+
box.space['test']:create_index('primary', {
34+
type = 'tree',
35+
parts = {1, 'string'},
36+
unique = true})
37+
38+
box.schema.user.create('test', {password = 'test', if_not_exists = true})
39+
box.schema.user.grant('test', 'read,write,execute', 'universe')
40+
""")
41+
42+
self.con = tarantool.Connection(self.srv.host, self.srv.args['primary'],
43+
user='test', password='test')
44+
45+
def setUp(self):
46+
# prevent a remote tarantool from clean our session
47+
if self.srv.is_started():
48+
self.srv.touch_lock()
49+
50+
self.adm("box.space['test']:truncate()")
51+
52+
53+
cases = {
54+
'date': {
55+
'python': tarantool.Datetime(year=2022, month=8, day=31),
56+
'msgpack': (b'\x80\xa4\x0e\x63\x00\x00\x00\x00'),
57+
'tarantool': r"datetime.new({year=2022, month=8, day=31})",
58+
},
59+
'date_unix_start': {
60+
'python': tarantool.Datetime(year=1970, month=1, day=1),
61+
'msgpack': (b'\x00\x00\x00\x00\x00\x00\x00\x00'),
62+
'tarantool': r"datetime.new({year=1970, month=1, day=1})",
63+
},
64+
'date_before_1970': {
65+
'python': tarantool.Datetime(year=1900, month=1, day=1),
66+
'msgpack': (b'\x80\x81\x55\x7c\xff\xff\xff\xff'),
67+
'tarantool': r"datetime.new({year=1900, month=1, day=1})",
68+
},
69+
'datetime_with_minutes': {
70+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7),
71+
'msgpack': (b'\x44\xa3\x0f\x63\x00\x00\x00\x00'),
72+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7})",
73+
},
74+
'datetime_with_seconds': {
75+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54),
76+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00'),
77+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54})",
78+
},
79+
'datetime_with_microseconds': {
80+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
81+
microsecond=308543),
82+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x18\xfe\x63\x12\x00\x00\x00\x00'),
83+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
84+
r"nsec=308543000})",
85+
},
86+
'datetime_with_nanoseconds': {
87+
'python': tarantool.Datetime(year=2022, month=8, day=31, hour=18, minute=7, second=54,
88+
microsecond=308543, nanosecond=321),
89+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
90+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
91+
r"nsec=308543321})",
92+
},
93+
'pandas_timestamp': {
94+
'python': pandas.Timestamp(year=2022, month=8, day=31, hour=18, minute=7, second=54,
95+
microsecond=308543, nanosecond=321),
96+
'msgpack': (b'\x7a\xa3\x0f\x63\x00\x00\x00\x00\x59\xff\x63\x12\x00\x00\x00\x00'),
97+
'tarantool': r"datetime.new({year=2022, month=8, day=31, hour=18, min=7, sec=54, " +
98+
r"nsec=308543321})",
99+
},
100+
}
101+
102+
def test_msgpack_decode(self):
103+
for name in self.cases.keys():
104+
with self.subTest(msg=name):
105+
case = self.cases[name]
106+
107+
self.assertEqual(unpacker_ext_hook(4, case['msgpack']),
108+
case['python'])
109+
110+
@skip_or_run_datetime_test
111+
def test_tarantool_decode(self):
112+
for name in self.cases.keys():
113+
with self.subTest(msg=name):
114+
case = self.cases[name]
115+
116+
self.adm(f"box.space['test']:replace{{'{name}', {case['tarantool']}}}")
117+
118+
self.assertSequenceEqual(self.con.select('test', name),
119+
[[name, case['python']]])
120+
121+
def test_msgpack_encode(self):
122+
for name in self.cases.keys():
123+
with self.subTest(msg=name):
124+
case = self.cases[name]
125+
126+
self.assertEqual(packer_default(case['python']),
127+
msgpack.ExtType(code=4, data=case['msgpack']))
128+
129+
@skip_or_run_datetime_test
130+
def test_tarantool_encode(self):
131+
for name in self.cases.keys():
132+
with self.subTest(msg=name):
133+
case = self.cases[name]
134+
135+
self.con.insert('test', [name, case['python']])
136+
137+
lua_eval = f"""
138+
local dt = {case['tarantool']}
139+
140+
local tuple = box.space['test']:get('{name}')
141+
assert(tuple ~= nil)
142+
143+
if tuple[2] == dt then
144+
return true
145+
else
146+
return nil, ('%s is not equal to expected %s'):format(
147+
tostring(tuple[2]), tostring(dt))
148+
end
149+
"""
150+
151+
self.assertSequenceEqual(self.adm(lua_eval), [True])
152+
153+
154+
@classmethod
155+
def tearDownClass(self):
156+
self.con.close()
157+
self.srv.stop()
158+
self.srv.clean()

0 commit comments

Comments
 (0)