|
| 1 | +from copy import deepcopy |
| 2 | + |
| 3 | +import pandas |
| 4 | + |
| 5 | +# https://www.tarantool.io/en/doc/latest/dev_guide/internals/msgpack_extensions/#the-datetime-type |
| 6 | +# |
| 7 | +# The datetime MessagePack representation looks like this: |
| 8 | +# +---------+----------------+==========+-----------------+ |
| 9 | +# | MP_EXT | MP_DATETIME | seconds | nsec; tzoffset; | |
| 10 | +# | = d7/d8 | = 4 | | tzindex; | |
| 11 | +# +---------+----------------+==========+-----------------+ |
| 12 | +# MessagePack data contains: |
| 13 | +# |
| 14 | +# * Seconds (8 bytes) as an unencoded 64-bit signed integer stored in the |
| 15 | +# little-endian order. |
| 16 | +# * The optional fields (8 bytes), if any of them have a non-zero value. |
| 17 | +# The fields include nsec (4 bytes), tzoffset (2 bytes), and |
| 18 | +# tzindex (2 bytes) packed in the little-endian order. |
| 19 | +# |
| 20 | +# seconds is seconds since Epoch, where the epoch is the point where the time |
| 21 | +# starts, and is platform dependent. For Unix, the epoch is January 1, |
| 22 | +# 1970, 00:00:00 (UTC). Tarantool uses a double type, see a structure |
| 23 | +# definition in src/lib/core/datetime.h and reasons in |
| 24 | +# https://github.com/tarantool/tarantool/wiki/Datetime-internals#intervals-in-c |
| 25 | +# |
| 26 | +# nsec is nanoseconds, fractional part of seconds. Tarantool uses int32_t, see |
| 27 | +# a definition in src/lib/core/datetime.h. |
| 28 | +# |
| 29 | +# tzoffset is timezone offset in minutes from UTC. Tarantool uses a int16_t type, |
| 30 | +# see a structure definition in src/lib/core/datetime.h. |
| 31 | +# |
| 32 | +# tzindex is Olson timezone id. Tarantool uses a int16_t type, see a structure |
| 33 | +# definition in src/lib/core/datetime.h. If both tzoffset and tzindex are |
| 34 | +# specified, tzindex has the preference and the tzoffset value is ignored. |
| 35 | + |
| 36 | +SECONDS_SIZE_BYTES = 8 |
| 37 | +NSEC_SIZE_BYTES = 4 |
| 38 | +TZOFFSET_SIZE_BYTES = 2 |
| 39 | +TZINDEX_SIZE_BYTES = 2 |
| 40 | + |
| 41 | +BYTEORDER = 'little' |
| 42 | + |
| 43 | +NSEC_IN_SEC = 1000000000 |
| 44 | +NSEC_IN_MKSEC = 1000 |
| 45 | + |
| 46 | +def get_bytes_as_int(data, cursor, size): |
| 47 | + part = data[cursor:cursor + size] |
| 48 | + return int.from_bytes(part, BYTEORDER, signed=True), cursor + size |
| 49 | + |
| 50 | +def get_int_as_bytes(data, size): |
| 51 | + return data.to_bytes(size, byteorder=BYTEORDER, signed=True) |
| 52 | + |
| 53 | +def msgpack_decode(data): |
| 54 | + cursor = 0 |
| 55 | + seconds, cursor = get_bytes_as_int(data, cursor, SECONDS_SIZE_BYTES) |
| 56 | + |
| 57 | + data_len = len(data) |
| 58 | + if data_len == (SECONDS_SIZE_BYTES + NSEC_SIZE_BYTES + \ |
| 59 | + TZOFFSET_SIZE_BYTES + TZINDEX_SIZE_BYTES): |
| 60 | + nsec, cursor = get_bytes_as_int(data, cursor, NSEC_SIZE_BYTES) |
| 61 | + tzoffset, cursor = get_bytes_as_int(data, cursor, TZOFFSET_SIZE_BYTES) |
| 62 | + tzindex, cursor = get_bytes_as_int(data, cursor, TZINDEX_SIZE_BYTES) |
| 63 | + elif data_len == SECONDS_SIZE_BYTES: |
| 64 | + nsec = 0 |
| 65 | + tzoffset = 0 |
| 66 | + tzindex = 0 |
| 67 | + else: |
| 68 | + raise MsgpackError(f'Unexpected datetime payload length {data_len}') |
| 69 | + |
| 70 | + if (tzoffset != 0) or (tzindex != 0): |
| 71 | + raise NotImplementedError |
| 72 | + |
| 73 | + total_nsec = seconds * NSEC_IN_SEC + nsec |
| 74 | + |
| 75 | + return pandas.to_datetime(total_nsec, unit='ns') |
| 76 | + |
| 77 | +class Datetime(): |
| 78 | + def __init__(self, data=None, *, timestamp=None, year=None, month=None, |
| 79 | + day=None, hour=None, minute=None, sec=None, nsec=None): |
| 80 | + if data is not None: |
| 81 | + if not isinstance(data, bytes): |
| 82 | + raise ValueError('data argument (first positional argument) ' + |
| 83 | + 'expected to be a "bytes" instance') |
| 84 | + |
| 85 | + self._datetime = msgpack_decode(data) |
| 86 | + return |
| 87 | + |
| 88 | + # The logic is same as in Tarantool, refer to datetime API. |
| 89 | + # https://www.tarantool.io/en/doc/latest/reference/reference_lua/datetime/new/ |
| 90 | + if timestamp is not None: |
| 91 | + if ((year is not None) or (month is not None) or \ |
| 92 | + (day is not None) or (hour is not None) or \ |
| 93 | + (minute is not None) or (sec is not None)): |
| 94 | + raise ValueError('Cannot provide both timestamp and year, month, ' + |
| 95 | + 'day, hour, minute, sec') |
| 96 | + |
| 97 | + if nsec is not None: |
| 98 | + if not isinstance(timestamp, int): |
| 99 | + raise ValueError('timestamp must be int if nsec provided') |
| 100 | + |
| 101 | + total_nsec = timestamp * NSEC_IN_SEC + nsec |
| 102 | + self._datetime = pandas.to_datetime(total_nsec, unit='ns') |
| 103 | + else: |
| 104 | + self._datetime = pandas.to_datetime(timestamp, unit='s') |
| 105 | + else: |
| 106 | + if nsec is not None: |
| 107 | + microsecond = nsec // NSEC_IN_MKSEC |
| 108 | + nanosecond = nsec % NSEC_IN_MKSEC |
| 109 | + else: |
| 110 | + microsecond = 0 |
| 111 | + nanosecond = 0 |
| 112 | + |
| 113 | + self._datetime = pandas.Timestamp(year=year, month=month, day=day, |
| 114 | + hour=hour, minute=minute, second=sec, |
| 115 | + microsecond=microsecond, |
| 116 | + nanosecond=nanosecond) |
| 117 | + |
| 118 | + def __eq__(self, other): |
| 119 | + if isinstance(other, Datetime): |
| 120 | + return self._datetime == other._datetime |
| 121 | + elif isinstance(other, pandas.Timestamp): |
| 122 | + return self._datetime == other |
| 123 | + else: |
| 124 | + return False |
| 125 | + |
| 126 | + def __str__(self): |
| 127 | + return self._datetime.__str__() |
| 128 | + |
| 129 | + def __repr__(self): |
| 130 | + return f'datetime: {self._datetime.__repr__()}' |
| 131 | + |
| 132 | + def __copy__(self): |
| 133 | + cls = self.__class__ |
| 134 | + result = cls.__new__(cls) |
| 135 | + result.__dict__.update(self.__dict__) |
| 136 | + return result |
| 137 | + |
| 138 | + def __deepcopy__(self, memo): |
| 139 | + cls = self.__class__ |
| 140 | + result = cls.__new__(cls) |
| 141 | + memo[id(self)] = result |
| 142 | + for k, v in self.__dict__.items(): |
| 143 | + setattr(result, k, deepcopy(v, memo)) |
| 144 | + return result |
| 145 | + |
| 146 | + @property |
| 147 | + def year(self): |
| 148 | + return self._datetime.year |
| 149 | + |
| 150 | + @property |
| 151 | + def month(self): |
| 152 | + return self._datetime.month |
| 153 | + |
| 154 | + @property |
| 155 | + def day(self): |
| 156 | + return self._datetime.day |
| 157 | + |
| 158 | + @property |
| 159 | + def hour(self): |
| 160 | + return self._datetime.hour |
| 161 | + |
| 162 | + @property |
| 163 | + def minute(self): |
| 164 | + return self._datetime.minute |
| 165 | + |
| 166 | + @property |
| 167 | + def sec(self): |
| 168 | + return self._datetime.second |
| 169 | + |
| 170 | + @property |
| 171 | + def nsec(self): |
| 172 | + # microseconds + nanoseconds |
| 173 | + return self._datetime.value % NSEC_IN_SEC |
| 174 | + |
| 175 | + @property |
| 176 | + def timestamp(self): |
| 177 | + return self._datetime.timestamp() |
| 178 | + |
| 179 | + def msgpack_encode(self): |
| 180 | + seconds = self._datetime.value // NSEC_IN_SEC |
| 181 | + nsec = self.nsec |
| 182 | + tzoffset = 0 |
| 183 | + tzindex = 0 |
| 184 | + |
| 185 | + buf = get_int_as_bytes(seconds, SECONDS_SIZE_BYTES) |
| 186 | + |
| 187 | + if (nsec != 0) or (tzoffset != 0) or (tzindex != 0): |
| 188 | + buf = buf + get_int_as_bytes(nsec, NSEC_SIZE_BYTES) |
| 189 | + buf = buf + get_int_as_bytes(tzoffset, TZOFFSET_SIZE_BYTES) |
| 190 | + buf = buf + get_int_as_bytes(tzindex, TZINDEX_SIZE_BYTES) |
| 191 | + |
| 192 | + return buf |
0 commit comments