Skip to content

Introduce Datetime type for ranges outside datetime.[MIN|MAX]YEAR #1200

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions cassandra/cqlengine/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from cassandra.cqltypes import SimpleDateType, _cqltypes, UserType
from cassandra.cqlengine import ValidationError
from cassandra.cqlengine.functions import get_total_seconds
from cassandra.util import Duration as _Duration
from cassandra.util import Datetime, Duration as _Duration

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -541,17 +541,19 @@ class DateTime(Column):
def to_python(self, value):
if value is None:
return
elif isinstance(value, Datetime):
return value
if isinstance(value, datetime):
if DateTime.truncate_microseconds:
us = value.microsecond
truncated_us = us // 1000 * 1000
return value - timedelta(microseconds=us - truncated_us)
return Datetime(value - timedelta(microseconds=us - truncated_us))
else:
return value
return Datetime(value)
elif isinstance(value, date):
return datetime(*(value.timetuple()[:6]))
return Datetime(datetime(*(value.timetuple()[:6])))

return datetime.utcfromtimestamp(value)
return Datetime(datetime.utcfromtimestamp(value))

def to_database(self, value):
value = super(DateTime, self).to_database(value)
Expand All @@ -560,6 +562,11 @@ def to_database(self, value):
if not isinstance(value, datetime):
if isinstance(value, date):
value = datetime(value.year, value.month, value.day)
elif isinstance(value, Datetime):
try:
value = value.datetime()
except ValueError:
return int(value.milliseconds_from_epoch)
else:
raise ValidationError("{0} '{1}' is not a datetime object".format(self.column_name, value))
epoch = datetime(1970, 1, 1, tzinfo=value.tzinfo)
Expand Down
10 changes: 5 additions & 5 deletions cassandra/cqltypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from binascii import unhexlify
import calendar
from collections import namedtuple
import datetime
from decimal import Decimal
import io
from itertools import chain
Expand Down Expand Up @@ -627,17 +628,16 @@ def interpret_datestring(val):
@staticmethod
def deserialize(byts, protocol_version):
timestamp = int64_unpack(byts) / 1000.0
return util.datetime_from_timestamp(timestamp)
return util.Datetime(util.Datetime(util.DATETIME_EPOC) + datetime.timedelta(seconds=timestamp))

@staticmethod
def serialize(v, protocol_version):
try:
# v is datetime
timestamp_seconds = calendar.timegm(v.utctimetuple())
timestamp = timestamp_seconds * 1e3 + getattr(v, 'microsecond', 0) / 1e3
# v is Datetime
timestamp = v.milliseconds_from_epoch
except AttributeError:
try:
timestamp = calendar.timegm(v.timetuple()) * 1e3
timestamp = util.Datetime(v).milliseconds_from_epoch
except AttributeError:
# Ints and floats are valid timestamps too
if type(v) not in _number_types:
Expand Down
4 changes: 2 additions & 2 deletions cassandra/deserializers.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ from libc.stdint cimport int32_t, uint16_t

include 'cython_marshal.pyx'
from cassandra.buffer cimport Buffer, to_bytes, slice_buffer
from cassandra.cython_utils cimport datetime_from_timestamp
from cassandra.cython_utils cimport datetime_from_timestamp, DATETIME_EPOC

from cython.view cimport array as cython_array
from cassandra.tuple cimport tuple_new, tuple_set
Expand Down Expand Up @@ -136,7 +136,7 @@ cdef class DesCounterColumnType(DesLongType):
cdef class DesDateType(Deserializer):
cdef deserialize(self, Buffer *buf, int protocol_version):
cdef double timestamp = unpack_num[int64_t](buf) / 1000.0
return datetime_from_timestamp(timestamp)
return util.Datetime(util.Datetime(DATETIME_EPOC) + datetime.timedelta(seconds=timestamp))


cdef class TimestampType(DesDateType):
Expand Down
10 changes: 9 additions & 1 deletion cassandra/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from uuid import UUID
import ipaddress

from cassandra.util import (OrderedDict, OrderedMap, OrderedMapSerializedKey,
from cassandra.util import (Datetime, OrderedDict, OrderedMap, OrderedMapSerializedKey,
sortedset, Time, Date, Point, LineString, Polygon)


Expand Down Expand Up @@ -67,6 +67,7 @@ def __init__(self):
datetime.date: self.cql_encode_date,
datetime.time: self.cql_encode_time,
Date: self.cql_encode_date_ext,
Datetime: self.cql_encode_datetime_ext,
Time: self.cql_encode_time,
dict: self.cql_encode_map_collection,
OrderedDict: self.cql_encode_map_collection,
Expand Down Expand Up @@ -142,6 +143,13 @@ def cql_encode_datetime(self, val):
timestamp = calendar.timegm(val.utctimetuple())
return str(int(timestamp * 1e3 + getattr(val, 'microsecond', 0) / 1e3))

def cql_encode_datetime_ext(self, val):
"""
Encodes a :class:`cassandra.util.Datetime` object as an integer
"""
# using the int form in case the Datetime exceeds datetime.[MIN|MAX]YEAR
return str(val.milliseconds_from_epoch)

def cql_encode_date(self, val):
"""
Converts a :class:`datetime.date` object to a string with format
Expand Down
125 changes: 125 additions & 0 deletions cassandra/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -1006,6 +1006,131 @@ def __str__(self):
return str(self.days_from_epoch)


@total_ordering
class Datetime(object):
'''
Idealized datetime: year, month, day, hour, minute, second, microsecond, and tzinfo

Offers wider year range than datetime.datetime. For Datetimess that cannot be represented
as a datetime.datetime (because datetime.MINYEAR, datetime.MAXYEAR), this type falls back
to printing milliseconds_from_epoch offset.
'''

MICRO = 1000
MILLI = 1000 * MICRO
SECOND = 1000 * MILLI
MINUTE = 60
HOUR = 60 * MINUTE
DAY = 24 * HOUR

milliseconds_from_epoch = 0
tzinfo = None

def __init__(self, value):
"""
Initializer value can be:

- number_type: milliseconds from epoch (1970, 1, 1). Can be negative.
- datetime.datetime: built-in datetime
"""
if isinstance(value, (int, long, float)):
self.milliseconds_from_epoch = value
elif isinstance(value, datetime.datetime):
self._from_datetime(value)
elif isinstance(value, datetime.date):
self._from_timetuple(value.timetuple())
elif isinstance(value, Datetime):
self.milliseconds_from_epoch = value.milliseconds_from_epoch
self.tzinfo = value.tzinfo
else:
raise TypeError('Date arguments must be a whole number or datetime.datetime')

@property
def seconds(self):
"""
Absolute seconds from epoch (can be negative)
"""
return self.milliseconds_from_epoch // 1000

@property
def days(self):
"""
Absolute days from epoch (can be negative)
"""
return self.seconds // Date.DAY

def datetime(self):
"""
Return a built-in datetime.datetime for Dates falling in the years [datetime.MINYEAR, datetime.MAXYEAR]

ValueError is raised for Dates outside this range.
"""
try:
dt = datetime.datetime(1970, 1, 1, tzinfo=self.tzinfo) + datetime.timedelta(milliseconds=self.milliseconds_from_epoch)
return dt
except Exception:
raise ValueError("%r exceeds ranges for built-in datetime.datetime" % self)

def utctimetuple(self):
return self.datetime().utctimetuple()

def timetuple(self):
return self.datetime().timetuple()

def isoformat(self, sep='T', timespec='auto'):
return self.datetime().isoformat(sep, timespec)

def _from_timetuple(self, t):
self.milliseconds_from_epoch = calendar.timegm(t) * 1000

def _from_datetime(self, v):
self.milliseconds_from_epoch = calendar.timegm(v.timetuple()) * 1000 + v.microsecond // 1000
self.tzinfo = v.tzinfo

def __hash__(self):
return self.milliseconds_from_epoch

def __eq__(self, other):
if isinstance(other, Datetime):
return self.milliseconds_from_epoch == other.milliseconds_from_epoch

if isinstance(other, (int, long, float)):
return self.milliseconds_from_epoch == other

try:
return self.datetime() == other
except Exception:
return False

def __ne__(self, other):
return not self.__eq__(other)

def __lt__(self, other):
if not isinstance(other, Datetime):
return NotImplemented
return self.milliseconds_from_epoch < other.milliseconds_from_epoch

def __add__(self, other):
if isinstance(other, datetime.timedelta):
return Datetime(int(self.milliseconds_from_epoch + other.total_seconds() * 1000))
return self + other

def __sub__(self, other):
if isinstance(other, Datetime):
return datetime.timedelta(milliseconds=self.milliseconds_from_epoch - other.milliseconds_from_epoch)
return self - other

def __repr__(self):
return "Datetime(%s)" % self.milliseconds_from_epoch

def __str__(self):
try:
dt = datetime.datetime(1970, 1, 1, tzinfo=self.tzinfo) + datetime.timedelta(milliseconds=self.milliseconds_from_epoch)
return "%04d-%02d-%02d %02d:%02d:%02d.%09d" % (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second, dt.microsecond)
except:
return str(self.milliseconds_from_epoch)


inet_pton = socket.inet_pton
inet_ntop = socket.inet_ntop

Expand Down
32 changes: 12 additions & 20 deletions docs/dates_and_times.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,46 +11,38 @@ Timestamps in Cassandra are timezone-naive timestamps encoded as millseconds sin
timestamps in this database usually find it easiest to reason about them if they are always assumed to be UTC. To quote the
pytz documentation, "The preferred way of dealing with times is to always work in UTC, converting to localtime only when
generating output to be read by humans." The driver adheres to this tenant, and assumes UTC is always in the database. The
driver attempts to make this correct on the way in, and assumes no timezone on the way out.
driver attempts to make this correct on the way in, and assumes no timezone on the way out. Timestamps in Cassandra are
idealized markers, much like ``datetime.datetime`` in the Python standard library. Unlike this Python implementation, the
Cassandra encoding supports much wider ranges. To accommodate these ranges without overflow, this driver returns these data
in custom type: :class:`.util.Datetime`.

Write Path
~~~~~~~~~~
When inserting timestamps, the driver handles serialization for the write path as follows:

If the input is a ``datetime.datetime``, the serialization is normalized by starting with the ``utctimetuple()`` of the
value.

- If the ``datetime`` object is timezone-aware, the timestamp is shifted, and represents the UTC timestamp equivalent.
- If the ``datetime`` object is timezone-naive, this results in no shift -- any ``datetime`` with no timezone information is assumed to be UTC

Note the second point above applies even to "local" times created using ``now()``::

>>> d = datetime.now()

>>> print(d.tzinfo)
None


These do not contain timezone information intrinsically, so they will be assumed to be UTC and not shifted. When generating
timestamps in the application, it is clearer to use ``datetime.utcnow()`` to be explicit about it.
The driver accepts anything that can be used to construct the :class:`.util.Datetime` class.
See the linked API docs for details. It uses :attr:`.util.Datetime.milliseconds_from_epoch` as epoch-relative millisecond timestamp.

If the input for a timestamp is numeric, it is assumed to be a epoch-relative millisecond timestamp, as specified in the
CQL spec -- no scaling or conversion is done.

Read Path
~~~~~~~~~
The driver always returns custom type for ``timestamp``.

The driver always assumes persisted timestamps are UTC and makes no attempt to localize them. Returned values are
timezone-naive ``datetime.datetime``. We follow this approach because the datetime API has deficiencies around daylight
timezone-naive :class:`.util.Datetime`. We follow this approach because the datetime API has deficiencies around daylight
saving time, and the defacto package for handling this is a third-party package (we try to minimize external dependencies
and not make decisions for the integrator).

The decision for how to handle timezones is left to the application. For the most part it is straightforward to apply
localization to the ``datetime``\s returned by queries. One prevalent method is to use pytz for localization::
localization to the :class:`.util.Datetime` returned by queries converted to ``datetime.datetime`` by
`.util.Datetime.datetime`. One prevalent method is to use pytz for localization::

import pytz
user_tz = pytz.timezone('US/Central')
timestamp_naive = row.ts
timestamp_utc = pytz.utc.localize(timestamp_naive)
timestamp_utc = pytz.utc.localize(timestamp_naive.datetime())
timestamp_presented = timestamp_utc.astimezone(user_tz)

This is the most robust approach (likely refactored into a function). If it is deemed too cumbersome to apply for all call
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from cassandra.cqlengine.functions import get_total_seconds
from cassandra.cqlengine.models import Model, ValidationError
from cassandra.cqlengine.management import sync_table, drop_table
from cassandra.util import Datetime

from tests.integration import CASSANDRA_IP
from tests.integration.cqlengine import is_prepend_reversed
Expand Down Expand Up @@ -389,7 +390,7 @@ def test_io_success(self):
""" Tests that a basic usage works as expected """
k1 = uuid4()
k2 = uuid4()
now = datetime.now()
now = Datetime(datetime.now())
then = now + timedelta(days=1)
m1 = TestMapModel.create(int_map={1: k1, 2: k2},
text_map={'now': now, 'then': then})
Expand Down
10 changes: 5 additions & 5 deletions tests/integration/cqlengine/columns/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def test_datetime_timestamp(self):
dt_value = 1454520554
self.DatetimeTest.objects.create(test_id=5, created_at=dt_value)
dt2 = self.DatetimeTest.objects(test_id=5).first()
self.assertEqual(dt2.created_at, datetime.utcfromtimestamp(dt_value))
self.assertEqual(dt2.created_at, util.Datetime(datetime.utcfromtimestamp(dt_value)))

def test_datetime_large(self):
dt_value = datetime(2038, 12, 31, 10, 10, 10, 123000)
Expand Down Expand Up @@ -318,12 +318,12 @@ class TestDateTime(DataType, BaseCassEngTestCase):
def setUpClass(cls):
cls.db_klass, cls.python_klass = (
DateTime,
datetime
util.Datetime
)
cls.first_value, cls.second_value, cls.third_value = (
datetime(2017, 4, 13, 18, 34, 24, 317000),
datetime(1, 1, 1),
datetime(1, 1, 2)
util.Datetime(datetime(2017, 4, 13, 18, 34, 24, 317000)),
util.Datetime(datetime(1, 1, 1)),
util.Datetime(datetime(1, 1, 2))
)
super(TestDateTime, cls).setUpClass()

Expand Down
3 changes: 2 additions & 1 deletion tests/integration/cqlengine/query/test_datetime_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from datetime import datetime, timedelta
from uuid import uuid4
from cassandra.cqlengine.functions import get_total_seconds
from cassandra.util import Datetime

from tests.integration.cqlengine.base import BaseCassEngTestCase

Expand Down Expand Up @@ -65,7 +66,7 @@ def test_range_query(self):
@execute_count(3)
def test_datetime_precision(self):
""" Tests that millisecond resolution is preserved when saving datetime objects """
now = datetime.now()
now = Datetime(datetime.now())
pk = 1000
obj = DateTimeQueryTestModel.create(user=pk, day=now, data='energy cheese')
load = DateTimeQueryTestModel.get(user=pk)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/datatype_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import ipaddress
from uuid import uuid1, uuid4

from cassandra.util import OrderedMap, Date, Time, sortedset, Duration
from cassandra.util import Datetime, OrderedMap, Date, Time, sortedset, Duration

from tests.integration import get_server_versions

Expand Down Expand Up @@ -103,7 +103,7 @@ def get_sample_data():
sample_data[datatype] = 'text'

elif datatype == 'timestamp':
sample_data[datatype] = datetime(2013, 12, 31, 23, 59, 59, 999000)
sample_data[datatype] = Datetime(datetime(2013, 12, 31, 23, 59, 59, 999000))

elif datatype == 'timeuuid':
sample_data[datatype] = uuid1()
Expand Down
Loading