From 9aa04139b4f58566d69fb4ef4746817718f136ce Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jan 2019 06:54:08 -0600 Subject: [PATCH 1/6] DEPR: __array__ for tz-aware Series/Index This deprecates the current behvior when converting tz-aware Series or Index to an ndarray. Previously, we converted to M8[ns], throwing away the timezone information. In the future, we will return an object-dtype array filled with Timestamps, each of which has the correct tz. ```python In [1]: import pandas as pd; import numpy as np In [2]: ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) In [3]: np.asarray(ser) /bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'. To accept the future behavior, pass 'dtype=object'. To keep the old behavior, pass 'dtype="datetime64[ns]"'. #!/Users/taugspurger/Envs/pandas-dev/bin/python3 Out[3]: array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], dtype='datetime64[ns]') ``` xref https://github.com/pandas-dev/pandas/issues/23569 --- doc/source/whatsnew/v0.24.0.rst | 56 ++++++++++++++++++- pandas/core/arrays/datetimes.py | 2 +- pandas/core/dtypes/cast.py | 2 +- pandas/core/dtypes/dtypes.py | 6 ++ pandas/core/groupby/groupby.py | 23 +++++++- pandas/core/indexes/datetimes.py | 16 +++++- pandas/core/indexing.py | 7 ++- pandas/core/internals/blocks.py | 40 +++++++++++++ pandas/core/internals/construction.py | 6 +- pandas/core/reshape/tile.py | 7 ++- pandas/core/series.py | 18 +++++- pandas/tests/arrays/test_datetimelike.py | 4 +- pandas/tests/arrays/test_datetimes.py | 33 +++++++++++ pandas/tests/dtypes/test_missing.py | 25 +++++---- .../tests/indexes/datetimes/test_datetime.py | 42 ++++++++++++++ pandas/tests/series/test_timeseries.py | 41 ++++++++++++++ 16 files changed, 301 insertions(+), 27 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index fbbbe51473e1c..6e9fa2ca0094a 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1228,7 +1228,7 @@ Deprecations .. _whatsnew_0240.deprecations.datetimelike_int_ops: Integer Addition/Subtraction with Datetimes and Timedeltas is Deprecated -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In the past, users could—in some cases—add or subtract integers or integer-dtype arrays from :class:`Timestamp`, :class:`DatetimeIndex` and :class:`TimedeltaIndex`. @@ -1266,6 +1266,60 @@ the object's ``freq`` attribute (:issue:`21939`, :issue:`23878`). dti = pd.date_range('2001-01-01', periods=2, freq='7D') dti + pd.Index([1 * dti.freq, 2 * dti.freq]) + +.. _whatsnew_0240.deprecations.tz_aware_array: + +Converting Timezone-Aware Series and Index to NumPy Arrays +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The conversion from a :class:`Series` or :class:`Index` with timezone-aware +datetime data will changing to preserve timezones by default (:issue:`23569`). + +NumPy doesn't have a dedicated dtype for timezone-aware datetimes. +In the past, converting a :class:`Series` or :class:`DatetimeIndex` with +timezone-aware datatimes would convert to a NumPy array by + +1. converting the tz-aware data to UTC +2. dropping the timezone-info +3. returning a :class:`numpy.ndarray` with ``datetime64[ns]`` dtype + +Future versions of pandas will preserve the timezone information by returning an +object-dtype NumPy array where each value is a :class:`Timestamp` with the correct +timezone attached + +.. ipython:: python + + ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + ser + +The default behavior renames the same, but issues a warning + +.. code-block:: python + + In [8]: np.asarray(ser) + /bin/ipython:1: FutureWarning: Converting timezone-aware DatetimeArray to timezone-naive + ndarray with 'datetime64[ns]' dtype. In the future, this will return an ndarray + with 'object' dtype where each element is a 'pandas.Timestamp' with the correct 'tz'. + + To accept the future behavior, pass 'dtype=object'. + To keep the old behavior, pass 'dtype="datetime64[ns]"'. + #!/bin/python3 + Out[8]: + array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], + dtype='datetime64[ns]') + +The old or new behavior can be obtained by specifying the ``dtype`` + +.. ipython:: python + :okwarning: + + # Old behavior + np.asarray(ser, dtype='datetime64[ns]') + + # New behavior + np.asarray(ser, dtype=object) + + .. _whatsnew_0240.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index c428fd2e75e08..c9adfb5c3992b 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -522,7 +522,7 @@ def _resolution(self): # Array-Like / EA-Interface Methods def __array__(self, dtype=None): - if is_object_dtype(dtype): + if is_object_dtype(dtype) or (dtype is None and self.tz): return np.array(list(self), dtype=object) elif is_int64_dtype(dtype): return self.asi8 diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b55bad46580fe..3cef71e2d78d7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1020,7 +1020,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): # datetime64tz is assumed to be naive which should # be localized to the timezone. is_dt_string = is_string_dtype(value) - value = to_datetime(value, errors=errors) + value = to_datetime(value, errors=errors).array if is_dt_string: # Strings here are naive, so directly localize value = value.tz_localize(dtype.tz) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 9e2564c4f825b..9a262df574690 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -318,6 +318,7 @@ def _hash_categories(categories, ordered=True): from pandas.core.util.hashing import ( hash_array, _combine_hash_arrays, hash_tuples ) + from pandas.core.dtypes.common import is_datetime64tz_dtype, _NS_DTYPE if len(categories) and isinstance(categories[0], tuple): # assumes if any individual category is a tuple, then all our. ATM @@ -335,6 +336,11 @@ def _hash_categories(categories, ordered=True): # find a better solution hashed = hash((tuple(categories), ordered)) return hashed + + if is_datetime64tz_dtype(categories.dtype): + # Avoid future warning. + categories = categories.astype(_NS_DTYPE) + cat_array = hash_array(np.asarray(categories), categorize=False) if ordered: cat_array = np.vstack([ diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b8b73b6aab1a5..b16e79e620739 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -26,7 +26,8 @@ class providing the base-class of operations. from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar) + _NS_DTYPE, ensure_float, is_datetime64tz_dtype, is_extension_array_dtype, + is_numeric_dtype, is_scalar) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms @@ -1269,10 +1270,18 @@ def f(self, **kwargs): return f def first_compat(x, axis=0): + # This is a bit strange. + # We only hit this block when grouping a DatetimeTZBlock *and* + # a categorical. Something strange going on with first for + # categorical dta. + if is_datetime64tz_dtype(x.dtype): + dtype = _NS_DTYPE + else: + dtype = None def first(x): - x = np.asarray(x) + x = np.asarray(x, dtype=dtype) x = x[notna(x)] if len(x) == 0: return np.nan @@ -1284,10 +1293,18 @@ def first(x): return first(x) def last_compat(x, axis=0): + # This is a bit strange. + # We only hit this block when grouping a DatetimeTZBlock *and* + # a categorical. Something strange going on with first for + # categorical dta. + if is_datetime64tz_dtype(x.dtype): + dtype = _NS_DTYPE + else: + dtype = None def last(x): - x = np.asarray(x) + x = np.asarray(x, dtype=dtype) x = x[notna(x)] if len(x) == 0: return np.nan diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 7d901f4656731..01e8b339716e2 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -339,6 +339,21 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): # -------------------------------------------------------------------- + def __array__(self, dtype=None): + if dtype is None and isinstance(self._data, DatetimeArray)\ + and getattr(self.dtype, 'tz', None): + msg = ( + "Converting timezone-aware DatetimeArray to timezone-naive " + "ndarray with 'datetime64[ns]' dtype. In the future, this " + "will return an ndarray with 'object' dtype where each " + "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t" + "To accept the future behavior, pass 'dtype=object'.\n\t" + "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'." + ) + warnings.warn(msg, FutureWarning, stacklevel=3) + dtype = 'M8[ns]' + return np.asarray(self._data, dtype=dtype) + @property def dtype(self): return self._eadata.dtype @@ -1114,7 +1129,6 @@ def slice_indexer(self, start=None, end=None, step=None, kind=None): strftime = ea_passthrough(DatetimeArray.strftime) _has_same_tz = ea_passthrough(DatetimeArray._has_same_tz) - __array__ = ea_passthrough(DatetimeArray.__array__) @property def offset(self): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 3504c6e12b896..95bf776b1f19d 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -581,7 +581,12 @@ def can_do_equal_len(): setter(item, v) # we have an equal len ndarray/convertible to our labels - elif np.array(value).ndim == 2: + # hasattr first, to avoid coercing to ndarray without reason. + # But we may be relying on the ndarray coercion to check ndim. + # Why not just convert to an ndarray earlier on if needed? + elif ((hasattr(value, 'ndim') and value.ndim == 2) + or (not hasattr(value, 'ndim') and + np.array(value).ndim) == 2): # note that this coerces the dtype if we are mixed # GH 7551 diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 3b2c13af785d4..f336c25938470 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2400,6 +2400,12 @@ def get_values(self, dtype=None): values = values.reshape(1, -1) return values + def to_dense(self): + # we request M8[ns] dtype here, even though it discards tzinfo, + # as lots of code (e.g. anything using values_from_object) + # expects that behavior. + return np.asarray(self.values, dtype=_NS_DTYPE) + def _slice(self, slicer): """ return a slice of my values """ if isinstance(slicer, tuple): @@ -2544,6 +2550,40 @@ def setitem(self, indexer, value): klass=ObjectBlock,) return newb.setitem(indexer, value) + def quantile(self, qs, interpolation='linear', axis=0, axes=None): + # TODO: Add quantile as a reduction method. + # We can't just use Block.quantile, as that converts the DTA + # to an ndarray[object] via get_values. + # This method + # 1. Convert DatetimeTZBlock -> DatetimeBlock + # 2. Perform the op via Block.quantile + # 3. Converts back to tz-aware + # Alternatively, we could special case the call to `get_values` + # in Block.quantile for DatetimeTZ. + + new_values = np.asarray(self.values, dtype=_NS_DTYPE) + if self.ndim == 2: + new_values = new_values[None, :] + + new_block = DatetimeBlock(new_values, placement=self.mgr_locs) + + ax, naive = new_block.quantile(qs, interpolation=interpolation, + axis=axis, axes=axes) + + ndim = getattr(naive, 'ndim', None) or 0 + if ndim == 0: + return ax, self.make_block_scalar( + tslibs.Timestamp(naive.values.value, tz=self.values.tz) + ) + else: + naive = naive.values.ravel() + + result = DatetimeArray(naive, dtype=self.values.dtype) + + return ax, make_block(result, + placement=np.arange(len(result)), + ndim=ndim) + class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): __slots__ = () diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 62e7f64518bcc..a0eb3986e37f7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -34,6 +34,7 @@ from pandas.core.indexes import base as ibase from pandas.core.internals import ( create_block_manager_from_arrays, create_block_manager_from_blocks) +from pandas.core.internals.arrays import extract_array # --------------------------------------------------------------------- # BlockManager Interface @@ -539,7 +540,6 @@ def sanitize_array(data, index, dtype=None, copy=False, Sanitize input data to an ndarray, copy if specified, coerce to the dtype if specified. """ - if dtype is not None: dtype = pandas_dtype(dtype) @@ -551,8 +551,10 @@ def sanitize_array(data, index, dtype=None, copy=False, else: data = data.copy() + data = extract_array(data, extract_numpy=True) + # GH#846 - if isinstance(data, (np.ndarray, Index, ABCSeries)): + if isinstance(data, np.ndarray): if dtype is not None: subarr = np.array(data, copy=False) diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 6f95b14993228..15df0ca2442fa 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -8,7 +8,7 @@ from pandas._libs.lib import infer_dtype from pandas.core.dtypes.common import ( - ensure_int64, is_categorical_dtype, is_datetime64_dtype, + _NS_DTYPE, ensure_int64, is_categorical_dtype, is_datetime64_dtype, is_datetime64tz_dtype, is_datetime_or_timedelta_dtype, is_integer, is_scalar, is_timedelta64_dtype) from pandas.core.dtypes.missing import isna @@ -226,7 +226,10 @@ def cut(x, bins, right=True, labels=None, retbins=False, precision=3, raise ValueError('Overlapping IntervalIndex is not accepted.') else: - bins = np.asarray(bins) + if is_datetime64tz_dtype(bins): + bins = np.asarray(bins, dtype=_NS_DTYPE) + else: + bins = np.asarray(bins) bins = _convert_bin_to_numeric_type(bins, dtype) if (np.diff(bins) < 0).any(): raise ValueError('bins must increase monotonically.') diff --git a/pandas/core/series.py b/pandas/core/series.py index 52b60339a7d68..2f5542aabae99 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -21,7 +21,8 @@ is_extension_array_dtype, is_extension_type, is_hashable, is_integer, is_iterator, is_list_like, is_scalar, is_string_like, is_timedelta64_dtype) from pandas.core.dtypes.generic import ( - ABCDataFrame, ABCDatetimeIndex, ABCSeries, ABCSparseArray, ABCSparseSeries) + ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, ABCSeries, + ABCSparseArray, ABCSparseSeries) from pandas.core.dtypes.missing import ( isna, na_value_for_dtype, notna, remove_na_arraylike) @@ -665,7 +666,20 @@ def __array__(self, result=None): """ The array interface, return my values. """ - return self.get_values() + # TODO: change the keyword name from result to dtype? + if (result is None and isinstance(self.array, ABCDatetimeArray) + and getattr(self.dtype, 'tz', None)): + msg = ( + "Converting timezone-aware DatetimeArray to timezone-naive " + "ndarray with 'datetime64[ns]' dtype. In the future, this " + "will return an ndarray with 'object' dtype where each " + "element is a 'pandas.Timestamp' with the correct 'tz'.\n\t" + "To accept the future behavior, pass 'dtype=object'.\n\t" + "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'." + ) + warnings.warn(msg, FutureWarning, stacklevel=3) + result = 'M8[ns]' + return np.asarray(self.array, result) def __array_wrap__(self, result, context=None): """ diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 348ac4579ffb5..302f7164c02f2 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -264,11 +264,11 @@ def test_array(self, tz_naive_fixture): arr = DatetimeArray(dti) expected = dti.asi8.view('M8[ns]') - result = np.array(arr) + result = np.array(arr, dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) # check that we are not making copies when setting copy=False - result = np.array(arr, copy=False) + result = np.array(arr, dtype='M8[ns]', copy=False) assert result.base is expected.base assert result.base is not None diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1375969c961fd..e84c54f06b076 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -178,6 +178,39 @@ def test_fillna_preserves_tz(self, method): assert arr[2] is pd.NaT assert dti[2] == pd.Timestamp('2000-01-03', tz='US/Central') + def test_array_interface_tz(self): + tz = "US/Central" + data = DatetimeArray(pd.date_range('2017', periods=2, tz=tz)) + result = np.asarray(data) + + expected = np.array([pd.Timestamp('2017-01-01T00:00:00', tz=tz), + pd.Timestamp('2017-01-02T00:00:00', tz=tz)], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype='M8[ns]') + + expected = np.array(['2017-01-01T06:00:00', + '2017-01-02T06:00:00'], dtype="M8[ns]") + tm.assert_numpy_array_equal(result, expected) + + def test_array_interface(self): + data = DatetimeArray(pd.date_range('2017', periods=2)) + expected = np.array(['2017-01-01T00:00:00', '2017-01-02T00:00:00'], + dtype='datetime64[ns]') + + result = np.asarray(data) + tm.assert_numpy_array_equal(result, expected) + + result = np.asarray(data, dtype=object) + expected = np.array([pd.Timestamp('2017-01-01T00:00:00'), + pd.Timestamp('2017-01-02T00:00:00')], + dtype=object) + tm.assert_numpy_array_equal(result, expected) + class TestSequenceToDT64NS(object): diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index 56c9395d0f802..dfb9e0bad06b6 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -278,17 +278,20 @@ def test_array_equivalent(): TimedeltaIndex([0, np.nan])) assert not array_equivalent( TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) - assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), - DatetimeIndex([0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( - [1, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan]), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) - assert not array_equivalent( - DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( - [0, np.nan], tz='US/Eastern')) + with catch_warnings(): + simplefilter("ignore") + assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), + DatetimeIndex([0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex( + [1, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan]), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( + DatetimeIndex([0, np.nan], tz='CET'), DatetimeIndex( + [0, np.nan], tz='US/Eastern')) + assert not array_equivalent( DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py index e76de2ebedf67..e1ba0e1708442 100644 --- a/pandas/tests/indexes/datetimes/test_datetime.py +++ b/pandas/tests/indexes/datetimes/test_datetime.py @@ -392,3 +392,45 @@ def test_unique(self, arr, expected): # GH 21737 # Ensure the underlying data is consistent assert result[0] == expected[0] + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + idx = pd.date_range('2000', periods=2) + # M8[ns] by default + with tm.assert_produces_warning(None): + result = np.asarray(idx) + + expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]') + tm.assert_numpy_array_equal(result, expected) + + # optionally, object + with tm.assert_produces_warning(None): + result = np.asarray(idx, dtype=object) + + expected = np.array([pd.Timestamp('2000-01-01'), + pd.Timestamp('2000-01-02')]) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = 'US/Central' + idx = pd.date_range('2000', periods=2, tz=tz) + expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]') + # We warn by default and return an ndarray[M8[ns]] + with tm.assert_produces_warning(FutureWarning): + result = np.asarray(idx) + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + with tm.assert_produces_warning(None): + result = np.asarray(idx, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Future behavior with no warning + expected = np.array([pd.Timestamp("2000-01-01", tz=tz), + pd.Timestamp("2000-01-02", tz=tz)]) + with tm.assert_produces_warning(None): + result = np.asarray(idx, dtype=object) + + tm.assert_numpy_array_equal(result, expected) diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index fcb486d832c76..07808008c081c 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1036,3 +1036,44 @@ def test_view_tz(self): 946879200000000000, 946965600000000000]) tm.assert_series_equal(result, expected) + + def test_asarray_tz_naive(self): + # This shouldn't produce a warning. + ser = pd.Series(pd.date_range('2000', periods=2)) + expected = np.array(['2000-01-01', '2000-01-02'], dtype='M8[ns]') + with tm.assert_produces_warning(None): + result = np.asarray(ser) + + tm.assert_numpy_array_equal(result, expected) + + # optionally, object + with tm.assert_produces_warning(None): + result = np.asarray(ser, dtype=object) + + expected = np.array([pd.Timestamp('2000-01-01'), + pd.Timestamp('2000-01-02')]) + tm.assert_numpy_array_equal(result, expected) + + def test_asarray_tz_aware(self): + tz = 'US/Central' + ser = pd.Series(pd.date_range('2000', periods=2, tz=tz)) + expected = np.array(['2000-01-01T06', '2000-01-02T06'], dtype='M8[ns]') + # We warn by default and return an ndarray[M8[ns]] + with tm.assert_produces_warning(FutureWarning): + result = np.asarray(ser) + + tm.assert_numpy_array_equal(result, expected) + + # Old behavior with no warning + with tm.assert_produces_warning(None): + result = np.asarray(ser, dtype="M8[ns]") + + tm.assert_numpy_array_equal(result, expected) + + # Future behavior with no warning + expected = np.array([pd.Timestamp("2000-01-01", tz=tz), + pd.Timestamp("2000-01-02", tz=tz)]) + with tm.assert_produces_warning(None): + result = np.asarray(ser, dtype=object) + + tm.assert_numpy_array_equal(result, expected) From ea4479239ad4732b95b026d14cc0fefce0431524 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jan 2019 13:51:23 -0600 Subject: [PATCH 2/6] fixup --- doc/source/whatsnew/v0.24.0.rst | 4 ++-- pandas/core/groupby/groupby.py | 24 +++--------------------- pandas/core/indexes/datetimes.py | 4 ++-- 3 files changed, 7 insertions(+), 25 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 6e9fa2ca0094a..1e33338ae2faf 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1273,7 +1273,7 @@ Converting Timezone-Aware Series and Index to NumPy Arrays ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The conversion from a :class:`Series` or :class:`Index` with timezone-aware -datetime data will changing to preserve timezones by default (:issue:`23569`). +datetime data will change to preserve timezones by default (:issue:`23569`). NumPy doesn't have a dedicated dtype for timezone-aware datetimes. In the past, converting a :class:`Series` or :class:`DatetimeIndex` with @@ -1292,7 +1292,7 @@ timezone attached ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) ser -The default behavior renames the same, but issues a warning +The default behavior remains the same, but issues a warning .. code-block:: python diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b16e79e620739..e52ab66ef9cb4 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -26,8 +26,7 @@ class providing the base-class of operations. from pandas.core.dtypes.cast import maybe_downcast_to_dtype from pandas.core.dtypes.common import ( - _NS_DTYPE, ensure_float, is_datetime64tz_dtype, is_extension_array_dtype, - is_numeric_dtype, is_scalar) + ensure_float, is_extension_array_dtype, is_numeric_dtype, is_scalar) from pandas.core.dtypes.missing import isna, notna import pandas.core.algorithms as algorithms @@ -1270,18 +1269,10 @@ def f(self, **kwargs): return f def first_compat(x, axis=0): - # This is a bit strange. - # We only hit this block when grouping a DatetimeTZBlock *and* - # a categorical. Something strange going on with first for - # categorical dta. - if is_datetime64tz_dtype(x.dtype): - dtype = _NS_DTYPE - else: - dtype = None def first(x): + x = x.to_numpy() - x = np.asarray(x, dtype=dtype) x = x[notna(x)] if len(x) == 0: return np.nan @@ -1293,18 +1284,9 @@ def first(x): return first(x) def last_compat(x, axis=0): - # This is a bit strange. - # We only hit this block when grouping a DatetimeTZBlock *and* - # a categorical. Something strange going on with first for - # categorical dta. - if is_datetime64tz_dtype(x.dtype): - dtype = _NS_DTYPE - else: - dtype = None def last(x): - - x = np.asarray(x, dtype=dtype) + x = x.to_numpy() x = x[notna(x)] if len(x) == 0: return np.nan diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 01e8b339716e2..86bdd88c4e367 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -340,8 +340,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None): # -------------------------------------------------------------------- def __array__(self, dtype=None): - if dtype is None and isinstance(self._data, DatetimeArray)\ - and getattr(self.dtype, 'tz', None): + if (dtype is None and isinstance(self._data, DatetimeArray) + and getattr(self.dtype, 'tz', None)): msg = ( "Converting timezone-aware DatetimeArray to timezone-naive " "ndarray with 'datetime64[ns]' dtype. In the future, this " From 66d1843c3b21eb10c0a854cbe499363eeca288c6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 4 Jan 2019 08:25:41 -0600 Subject: [PATCH 3/6] updates --- doc/source/whatsnew/v0.24.0.rst | 10 ++++++- pandas/core/internals/blocks.py | 48 +++++++++------------------------ 2 files changed, 21 insertions(+), 37 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 5854280b8d5d4..c3e3a49e6845b 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1310,7 +1310,6 @@ The default behavior remains the same, but issues a warning The old or new behavior can be obtained by specifying the ``dtype`` .. ipython:: python - :okwarning: # Old behavior np.asarray(ser, dtype='datetime64[ns]') @@ -1319,6 +1318,15 @@ The old or new behavior can be obtained by specifying the ``dtype`` np.asarray(ser, dtype=object) +Or by using :meth:`Series.to_numpy` + +.. ipython:: python + + ser.to_numpy() + ser.to_numpy(dtype="datetime64[ns]") + +All the above applies to a :class:`DatetimeIndex` with tz-aware values as well. + .. _whatsnew_0240.prior_deprecations: Removal of prior version deprecations/changes diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0d1abc0312fb0..35b014ce764fc 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1447,8 +1447,18 @@ def quantile(self, qs, interpolation='linear', axis=0): ------- Block """ - values = self.get_values() - values, _ = self._try_coerce_args(values, values) + if self.is_datetimetz: + # We need to operate on i8 values for datetimetz + # but `Block.get_values()` returns an ndarray of objects + # right now. + values = self.values.asi8 + + # Usual shape inconsistencies for ExtensionBlocks + if self.ndim > 1: + values = values[None, :] + else: + values = self.get_values() + values, _ = self._try_coerce_args(values, values) is_empty = values.shape[axis] == 0 orig_scalar = not is_list_like(qs) @@ -2480,40 +2490,6 @@ def setitem(self, indexer, value): klass=ObjectBlock,) return newb.setitem(indexer, value) - def quantile(self, qs, interpolation='linear', axis=0, axes=None): - # TODO: Add quantile as a reduction method. - # We can't just use Block.quantile, as that converts the DTA - # to an ndarray[object] via get_values. - # This method - # 1. Convert DatetimeTZBlock -> DatetimeBlock - # 2. Perform the op via Block.quantile - # 3. Converts back to tz-aware - # Alternatively, we could special case the call to `get_values` - # in Block.quantile for DatetimeTZ. - - new_values = np.asarray(self.values, dtype=_NS_DTYPE) - if self.ndim == 2: - new_values = new_values[None, :] - - new_block = DatetimeBlock(new_values, placement=self.mgr_locs) - - ax, naive = new_block.quantile(qs, interpolation=interpolation, - axis=axis, axes=axes) - - ndim = getattr(naive, 'ndim', None) or 0 - if ndim == 0: - return ax, self.make_block_scalar( - tslibs.Timestamp(naive.values.value, tz=self.values.tz) - ) - else: - naive = naive.values.ravel() - - result = DatetimeArray(naive, dtype=self.values.dtype) - - return ax, make_block(result, - placement=np.arange(len(result)), - ndim=ndim) - class TimeDeltaBlock(DatetimeLikeBlockMixin, IntBlock): __slots__ = () From 328338cf60aa648695fb9e7ece79706d2a41a894 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 4 Jan 2019 10:21:42 -0600 Subject: [PATCH 4/6] exclude datetimetz for bn --- pandas/core/nanops.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 89e191f171f97..cafd3a9915fa0 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -144,7 +144,9 @@ def f(values, axis=None, skipna=True, **kwds): def _bn_ok_dtype(dt, name): # Bottleneck chokes on datetime64 - if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)): + if (not is_object_dtype(dt) and + not (is_datetime_or_timedelta_dtype(dt) or + is_datetime64tz_dtype(dt))): # GH 15507 # bottleneck does not properly upcast during the sum From 349f818a712cf4d641727aa2b86f6107846e2a15 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 4 Jan 2019 11:40:40 -0600 Subject: [PATCH 5/6] update parameter name and docstring --- doc/source/api/series.rst | 3 +++ pandas/core/series.py | 53 ++++++++++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/doc/source/api/series.rst b/doc/source/api/series.rst index 7d5e6037b012a..8e4c378b9fefe 100644 --- a/doc/source/api/series.rst +++ b/doc/source/api/series.rst @@ -26,6 +26,7 @@ Attributes .. autosummary:: :toctree: generated/ + Series.array Series.values Series.dtype Series.ftype @@ -58,10 +59,12 @@ Conversion Series.convert_objects Series.copy Series.bool + Series.to_numpy Series.to_period Series.to_timestamp Series.to_list Series.get_values + Series.__array__ Indexing, iteration ------------------- diff --git a/pandas/core/series.py b/pandas/core/series.py index 3128e67f86873..f416e638b9176 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -662,12 +662,53 @@ def view(self, dtype=None): # ---------------------------------------------------------------------- # NDArray Compat - def __array__(self, result=None): + def __array__(self, dtype=None): """ - The array interface, return my values. + Return the values as a NumPy array. + + Users should not call this directly. Rather, it is invoked by + :func:`numpy.array` and :func:`numpy.asarray`. + + Parameters + ---------- + dtype : str or numpy.dtype, optional + The dtype to use for the resulting NumPy array. By default, + the dtype is inferred from the data. + + Returns + ------- + numpy.ndarray + The values in the series converted to a :class:`numpy.ndarary` + with the specified `dtype`. + + See Also + -------- + Series.array : Zero-copy view to the array backing the Series. + Series.to_numpy : Series method for similar behavior. + + Examples + -------- + >>> ser = pd.Series([1, 2, 3]) + >>> np.asarray(ser) + array([1, 2, 3]) + + For timezone-aware data, the timezones may be retained with + ``dtype='object'`` + + >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) + >>> np.asarray(tzser, dtype="object") + array([Timestamp('2000-01-01 00:00:00+0100', tz='CET', freq='D'), + Timestamp('2000-01-02 00:00:00+0100', tz='CET', freq='D')], + dtype=object) + + Or the values may be localized to UTC and the tzinfo discared with + ``dtype='datetime64[ns]'`` + + >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS + array(['1999-12-31T23:00:00.000000000', ...], + dtype='datetime64[ns]') """ - # TODO: change the keyword name from result to dtype? - if (result is None and isinstance(self.array, ABCDatetimeArray) + if (dtype is None and isinstance(self.array, ABCDatetimeArray) and getattr(self.dtype, 'tz', None)): msg = ( "Converting timezone-aware DatetimeArray to timezone-naive " @@ -678,8 +719,8 @@ def __array__(self, result=None): "To keep the old behavior, pass 'dtype=\"datetime64[ns]\"'." ) warnings.warn(msg, FutureWarning, stacklevel=3) - result = 'M8[ns]' - return np.asarray(self.array, result) + dtype = 'M8[ns]' + return np.asarray(self.array, dtype) def __array_wrap__(self, result, context=None): """ From 50f4fbde4472700525959dbb7255185e36155f21 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 4 Jan 2019 12:55:03 -0600 Subject: [PATCH 6/6] updates --- doc/source/whatsnew/v0.24.0.rst | 10 ++++++++-- pandas/core/internals/blocks.py | 8 +++----- pandas/core/series.py | 1 + pandas/tests/dtypes/test_missing.py | 4 ++-- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index c3e3a49e6845b..f9a4a2b005045 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -1307,13 +1307,19 @@ The default behavior remains the same, but issues a warning array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00.000000000'], dtype='datetime64[ns]') -The old or new behavior can be obtained by specifying the ``dtype`` +The previous or future behavior can be obtained, without any warnings, by specifying +the ``dtype`` + +*Previous Behavior* .. ipython:: python - # Old behavior np.asarray(ser, dtype='datetime64[ns]') +*Future Behavior* + +.. ipython:: python + # New behavior np.asarray(ser, dtype=object) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 35b014ce764fc..4b2f93451dad0 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1448,11 +1448,13 @@ def quantile(self, qs, interpolation='linear', axis=0): Block """ if self.is_datetimetz: + # TODO: cleanup this special case. # We need to operate on i8 values for datetimetz # but `Block.get_values()` returns an ndarray of objects - # right now. + # right now. We need an API for "values to do numeric-like ops on" values = self.values.asi8 + # TODO: NonConsolidatableMixin shape # Usual shape inconsistencies for ExtensionBlocks if self.ndim > 1: values = values[None, :] @@ -2065,10 +2067,6 @@ def _na_value(self): def fill_value(self): return tslibs.iNaT - def to_dense(self): - # TODO(DatetimeBlock): remove - return np.asarray(self.values) - def get_values(self, dtype=None): """ return object dtype as boxed values, such as Timestamps/Timedelta diff --git a/pandas/core/series.py b/pandas/core/series.py index f416e638b9176..04b8b1ed74d9c 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -683,6 +683,7 @@ def __array__(self, dtype=None): See Also -------- + pandas.array : Create a new array from data. Series.array : Zero-copy view to the array backing the Series. Series.to_numpy : Series method for similar behavior. diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index dfb9e0bad06b6..965e5e000d026 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- from datetime import datetime -from warnings import catch_warnings, simplefilter +from warnings import catch_warnings, filterwarnings, simplefilter import numpy as np import pytest @@ -279,7 +279,7 @@ def test_array_equivalent(): assert not array_equivalent( TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) with catch_warnings(): - simplefilter("ignore") + filterwarnings("ignore", "Converting timezone", FutureWarning) assert array_equivalent(DatetimeIndex([0, np.nan], tz='US/Eastern'), DatetimeIndex([0, np.nan], tz='US/Eastern')) assert not array_equivalent(