From 70a8d0e1b9b8ef2bfb558d06eb0fae30d38fff61 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 19 Nov 2018 22:20:05 +0100 Subject: [PATCH 01/16] CLN/DEPS: Clean up post numpy bump to 1.12 --- README.md | 2 +- pandas/_libs/algos_rank_helper.pxi.in | 10 +------ pandas/_libs/lib.pyx | 3 +- pandas/_libs/sparse.pyx | 8 ------ pandas/_libs/sparse_op_helper.pxi.in | 7 ----- pandas/core/arrays/categorical.py | 20 ++----------- pandas/core/arrays/datetimelike.py | 3 +- pandas/core/arrays/sparse.py | 3 -- pandas/core/dtypes/cast.py | 27 ++++-------------- pandas/core/dtypes/dtypes.py | 7 +---- pandas/core/dtypes/missing.py | 1 + pandas/core/indexes/datetimes.py | 5 ---- pandas/core/internals/blocks.py | 22 ++------------- pandas/core/internals/managers.py | 28 +++---------------- pandas/core/reshape/tile.py | 3 +- pandas/io/packers.py | 1 - pandas/io/pickle.py | 8 ------ pandas/tests/arrays/categorical/test_repr.py | 1 - pandas/tests/dtypes/test_inference.py | 5 +--- pandas/tests/frame/test_constructors.py | 4 +-- pandas/tests/frame/test_operators.py | 1 - pandas/tests/indexes/common.py | 5 +--- pandas/tests/indexes/multi/test_analytics.py | 5 +--- .../indexing/test_chaining_and_caching.py | 1 - pandas/tests/indexing/test_iloc.py | 2 +- pandas/tests/io/test_pytables.py | 2 -- pandas/tests/series/test_analytics.py | 10 ++----- pandas/tests/test_base.py | 9 +++--- pandas/tests/test_nanops.py | 1 - pandas/tests/test_panel.py | 1 - pandas/tests/test_sorting.py | 7 ----- 31 files changed, 32 insertions(+), 180 deletions(-) diff --git a/README.md b/README.md index b4dedecb4c697..1993b1ecb9dc1 100644 --- a/README.md +++ b/README.md @@ -171,7 +171,7 @@ pip install pandas ``` ## Dependencies -- [NumPy](https://www.numpy.org): 1.9.0 or higher +- [NumPy](https://www.numpy.org): 1.12.0 or higher - [python-dateutil](https://labix.org/python-dateutil): 2.5.0 or higher - [pytz](https://pythonhosted.org/pytz): 2011k or higher diff --git a/pandas/_libs/algos_rank_helper.pxi.in b/pandas/_libs/algos_rank_helper.pxi.in index 5ffc6dd578023..5dac94394c7ed 100644 --- a/pandas/_libs/algos_rank_helper.pxi.in +++ b/pandas/_libs/algos_rank_helper.pxi.in @@ -102,15 +102,7 @@ def rank_1d_{{dtype}}(object in_arr, ties_method='average', ranks = np.empty(n, dtype='f8') {{if dtype == 'object'}} - - try: - _as = np.lexsort(keys=order) - except TypeError: - # lexsort on object array will raise TypeError for numpy version - # earlier than 1.11.0. Use argsort with order argument instead. - _dt = [('values', 'O'), ('mask', '?')] - _values = np.asarray(list(zip(order[0], order[1])), dtype=_dt) - _as = np.argsort(_values, kind='mergesort', order=('mask', 'values')) + _as = np.lexsort(keys=order) {{else}} if tiebreak == TIEBREAK_FIRST: # need to use a stable sort here diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e89c8fa579687..7b83387036de0 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -485,8 +485,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): bint is_datelike ndarray result - # on 32-bit, 1.6.2 numpy M8[ns] is a subdtype of integer, which is weird - is_datelike = new_dtype in ['M8[ns]', 'm8[ns]'] + is_datelike = new_dtype == 'm8[ns]' result = np.empty(n, dtype=new_dtype) for i in range(n): diff --git a/pandas/_libs/sparse.pyx b/pandas/_libs/sparse.pyx index 668bd0ae6bbb7..f5980998f6db4 100644 --- a/pandas/_libs/sparse.pyx +++ b/pandas/_libs/sparse.pyx @@ -8,14 +8,6 @@ from numpy cimport (ndarray, uint8_t, int64_t, int32_t, int16_t, int8_t, cnp.import_array() -from distutils.version import LooseVersion - -# numpy versioning -_np_version = np.version.short_version -_np_version_under1p10 = LooseVersion(_np_version) < LooseVersion('1.10') -_np_version_under1p11 = LooseVersion(_np_version) < LooseVersion('1.11') - - # ----------------------------------------------------------------------------- # Preamble stuff diff --git a/pandas/_libs/sparse_op_helper.pxi.in b/pandas/_libs/sparse_op_helper.pxi.in index 1f41096a3f194..c6621ab5977ca 100644 --- a/pandas/_libs/sparse_op_helper.pxi.in +++ b/pandas/_libs/sparse_op_helper.pxi.in @@ -42,13 +42,6 @@ cdef inline sparse_t __mod__(sparse_t a, sparse_t b): cdef inline sparse_t __floordiv__(sparse_t a, sparse_t b): if b == 0: if sparse_t is float64_t: - # numpy >= 1.11 returns NaN - # for a // 0, rather than +-inf - if _np_version_under1p11: - if a > 0: - return INF - elif a < 0: - return -INF return NaN else: return 0 diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 276ef6426a51b..645714f40f211 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -6,7 +6,7 @@ from pandas import compat from pandas.compat import u, lzip -from pandas._libs import lib, algos as libalgos +from pandas._libs import algos as libalgos from pandas.core.dtypes.generic import ( ABCSeries, ABCIndexClass, ABCCategoricalIndex) @@ -113,14 +113,6 @@ def f(self, other): ret[na_mask] = False return ret - # Numpy-1.9 and earlier may convert a scalar to a zerodim array during - # comparison operation when second arg has higher priority, e.g. - # - # cat[0] < cat - # - # With cat[0], for example, being ``np.int64(1)`` by the time it gets - # into this function would become ``np.array(1)``. - other = lib.item_from_zerodim(other) if is_scalar(other): if other in self.categories: i = self.categories.get_loc(other) @@ -2053,15 +2045,7 @@ def __setitem__(self, key, value): elif isinstance(key, slice): pass - # Array of True/False in Series or Categorical - else: - # There is a bug in numpy, which does not accept a Series as a - # indexer - # https://github.com/pandas-dev/pandas/issues/6168 - # https://github.com/numpy/numpy/issues/4240 -> fixed in numpy 1.9 - # FIXME: remove when numpy 1.9 is the lowest numpy version pandas - # accepts... - key = np.asarray(key) + # else: array of True/False in Series or Categorical lindexer = self.categories.get_indexer(rvalue) lindexer = self._maybe_coerce_indexer(lindexer) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 631257b7a5264..47cb0b870ef06 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -846,8 +846,7 @@ def __isub__(self, other): def _evaluate_compare(self, other, op): """ We have been called because a comparison between - 8 aware arrays. numpy >= 1.11 will - now warn about NaT comparisons + 8 aware arrays. numpy will warn about NaT comparisons """ # Called by comparison methods when comparing datetimelike # with datetimelike diff --git a/pandas/core/arrays/sparse.py b/pandas/core/arrays/sparse.py index 672261c2a407e..af0337e6b08bb 100644 --- a/pandas/core/arrays/sparse.py +++ b/pandas/core/arrays/sparse.py @@ -1015,9 +1015,6 @@ def __getitem__(self, key): key = np.asarray(key) if com.is_bool_indexer(key) and len(self) == len(key): - # TODO(numpy 1.11): Remove this asarray. - # Old NumPy didn't treat array-like as boolean masks. - key = np.asarray(key) return self.take(np.arange(len(key), dtype=np.int32)[key]) elif hasattr(key, '__len__'): return self.take(key) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index c7c6f89eb13a4..241e7a8639e0b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -264,27 +264,12 @@ def maybe_promote(dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - # for now: refuse to upcast datetime64 - # (this is because datetime64 will not implicitly upconvert - # to object correctly as of numpy 1.6.1) - if isna(fill_value): - fill_value = iNaT - else: - if issubclass(dtype.type, np.datetime64): - try: - fill_value = tslibs.Timestamp(fill_value).value - except Exception: - # the proper thing to do here would probably be to upcast - # to object (but numpy 1.6.1 doesn't do this properly) - fill_value = iNaT - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = tslibs.Timedelta(fill_value).value - except Exception: - # as for datetimes, cannot upcast to object - fill_value = iNaT - else: - fill_value = iNaT + try: + fill_value = tslibs.Timedelta(fill_value).value + except Exception: + # upcast to object + dtype = np.object_ + fill_value = np.nan elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 4dfefdec031b2..1a0e052c0e5c6 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -338,12 +338,7 @@ def _hash_categories(categories, ordered=True): cat_array = [cat_array] hashed = _combine_hash_arrays(iter(cat_array), num_items=len(cat_array)) - if len(hashed) == 0: - # bug in Numpy<1.12 for length 0 arrays. Just return the correct - # value of 0 - return 0 - else: - return np.bitwise_xor.reduce(hashed) + return np.bitwise_xor.reduce(hashed) @classmethod def construct_array_type(cls): diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index fa60c326a19ea..78b3ae4427604 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -408,6 +408,7 @@ def array_equivalent(left, right, strict_nan=False): # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat + # TODO: remove old numpy compat code (or comment) if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index 04a4669cc1a24..5870072751d72 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -418,11 +418,6 @@ def __setstate__(self, state): self._freq = own_state[1] self._tz = timezones.tz_standardize(own_state[2]) - # provide numpy < 1.7 compat - if nd_state[2] == 'M8[us]': - new_state = np.ndarray.__reduce__(data.astype('M8[ns]')) - np.ndarray.__setstate__(data, new_state[2]) - else: # pragma: no cover data = np.empty(state) np.ndarray.__setstate__(data, state) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 1f2a1ee52159e..ee366732595a2 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1489,11 +1489,6 @@ def quantile(self, qs, interpolation='linear', axis=0, axes=None): def _nanpercentile1D(values, mask, q, **kw): # mask is Union[ExtensionArray, ndarray] - # we convert to an ndarray for NumPy 1.9 compat, which didn't - # treat boolean-like arrays as boolean. This conversion would have - # been done inside ndarray.__getitem__ anyway, since values is - # an ndarray at this point. - mask = np.asarray(mask) values = values[~mask] if len(values) == 0: @@ -2812,9 +2807,7 @@ def set(self, locs, values, check=False): ------- None """ - if values.dtype != _NS_DTYPE: - # Workaround for numpy 1.6 bug - values = conversion.ensure_datetime64ns(values) + values = conversion.ensure_datetime64ns(values) self.values[locs] = values @@ -3133,7 +3126,7 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): # FIXME: optimization potential in case all mgrs contain slices and # combination of those slices is a slice, too. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) - new_values = _vstack([b.values for b in blocks], dtype) + new_values = np.vstack([b.values for b in blocks]) argsort = np.argsort(new_mgr_locs) new_values = new_values[argsort] @@ -3145,17 +3138,6 @@ def _merge_blocks(blocks, dtype=None, _can_consolidate=True): return blocks -def _vstack(to_stack, dtype): - - # work around NumPy 1.6 bug - if dtype == _NS_DTYPE or dtype == _TD_DTYPE: - new_values = np.vstack([x.view('i8') for x in to_stack]) - return new_values.view(dtype) - - else: - return np.vstack(to_stack) - - def _block2d_to_blocknd(values, placement, shape, labels, ref_items): """ pivot to the labels shape """ panel_shape = (len(placement),) + shape diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0519c5e5abe33..c3bfd1ad24e51 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -256,9 +256,6 @@ def __getstate__(self): def __setstate__(self, state): def unpickle_block(values, mgr_locs): - # numpy < 1.7 pickle compat - if values.dtype == 'M8[us]': - values = values.astype('M8[ns]') return make_block(values, placement=mgr_locs) if (isinstance(state, tuple) and len(state) >= 4 and @@ -784,18 +781,6 @@ def _interleave(self): result = np.empty(self.shape, dtype=dtype) - if result.shape[0] == 0: - # Workaround for numpy 1.7 bug: - # - # >>> a = np.empty((0,10)) - # >>> a[slice(0,0)] - # array([], shape=(0, 10), dtype=float64) - # >>> a[[]] - # Traceback (most recent call last): - # File "", line 1, in - # IndexError: index 0 is out of bounds for axis 0 with size 0 - return result - itemmask = np.zeros(self.shape[0]) for blk in self.blocks: @@ -1178,8 +1163,7 @@ def insert(self, loc, item, value, allow_duplicates=False): blk.mgr_locs = new_mgr_locs if loc == self._blklocs.shape[0]: - # np.append is a lot faster (at least in numpy 1.7.1), let's use it - # if we can. + # np.append is a lot faster, let's use it if we can. self._blklocs = np.append(self._blklocs, 0) self._blknos = np.append(self._blknos, len(self.blocks)) else: @@ -2003,13 +1987,9 @@ def _transform_index(index, func, level=None): def _fast_count_smallints(arr): """Faster version of set(arr) for sequences of small numbers.""" - if len(arr) == 0: - # Handle empty arr case separately: numpy 1.6 chokes on that. - return np.empty((0, 2), dtype=arr.dtype) - else: - counts = np.bincount(arr.astype(np.int_)) - nz = counts.nonzero()[0] - return np.c_[nz, counts[nz]] + counts = np.bincount(arr.astype(np.int_)) + nz = counts.nonzero()[0] + return np.c_[nz, counts[nz]] def _preprocess_slice_or_indexer(slice_or_indexer, length, allow_fill): diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index 4a863372eea13..8ad2a48e8767c 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -334,8 +334,7 @@ def _bins_to_cuts(x, bins, right=True, labels=None, ids = ensure_int64(bins.searchsorted(x, side=side)) if include_lowest: - # Numpy 1.9 support: ensure this mask is a Numpy array - ids[np.asarray(x == bins[0])] = 1 + ids[x == bins[0]] = 1 na_mask = isna(x) | (ids == len(bins)) | (ids == 0) has_nas = na_mask.any() diff --git a/pandas/io/packers.py b/pandas/io/packers.py index 19a0b08d8fdda..c123ed864b6b7 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -251,7 +251,6 @@ def dtype_for(t): 'complex128': np.float64, 'complex64': np.float32} -# numpy 1.6.1 compat if hasattr(np, 'float128'): c2f_dict['complex256'] = np.float128 diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py index c89d1df8ee64b..789f55a62dc58 100644 --- a/pandas/io/pickle.py +++ b/pandas/io/pickle.py @@ -6,8 +6,6 @@ from pandas.compat import PY3, BytesIO, cPickle as pkl, pickle_compat as pc -from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64_dtype - from pandas.io.common import _get_handle, _stringify_path @@ -200,10 +198,4 @@ def _pickle_array(arr): def _unpickle_array(bytes): arr = read_array(BytesIO(bytes)) - # All datetimes should be stored as M8[ns]. When unpickling with - # numpy1.6, it will read these as M8[us]. So this ensures all - # datetime64 types are read as MS[ns] - if is_datetime64_dtype(arr): - arr = arr.view(_NS_DTYPE) - return arr diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 5f71d0148ee88..227edf60951e6 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -37,7 +37,6 @@ def test_big_print(self): def test_empty_print(self): factor = Categorical([], ["a", "b", "c"]) expected = ("[], Categories (3, object): [a, b, c]") - # hack because array_repr changed in numpy > 1.6.x actual = repr(factor) assert actual == expected diff --git a/pandas/tests/dtypes/test_inference.py b/pandas/tests/dtypes/test_inference.py index f2552cffc6651..040a85007f5f9 100644 --- a/pandas/tests/dtypes/test_inference.py +++ b/pandas/tests/dtypes/test_inference.py @@ -1269,10 +1269,7 @@ def test_nan_to_nat_conversions(): s._data = s._data.setitem(indexer=tuple([slice(8, 9)]), value=np.nan) assert (isna(s[8])) - # numpy < 1.7.0 is wrong - from distutils.version import LooseVersion - if LooseVersion(np.__version__) >= LooseVersion('1.7.0'): - assert (s[8].value == np.datetime64('NaT').astype(np.int64)) + assert (s[8].value == np.datetime64('NaT').astype(np.int64)) @td.skip_if_no_scipy diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index c71d5d9f977f6..94eb49f276f78 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -164,9 +164,7 @@ def test_constructor_dtype_str_na_values(self, string_dtype): def test_constructor_rec(self): rec = self.frame.to_records(index=False) - - # Assigning causes segfault in NumPy < 1.5.1 - # rec.dtype.names = list(rec.dtype.names)[::-1] + rec.dtype.names = list(rec.dtype.names)[::-1] index = self.frame.index diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index bbe4914b5f447..88c64bf9e9b97 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -190,7 +190,6 @@ def _check_unary_op(op): _check_bin_op(operator.or_) _check_bin_op(operator.xor) - # operator.neg is deprecated in numpy >= 1.9 _check_unary_op(operator.inv) # TODO: belongs elsewhere def test_logical_with_nas(self): diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 4b0daac34c2e3..0071403816cbb 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -850,12 +850,9 @@ def test_equals_op(self): tm.assert_series_equal(series_a == item, Series(expected3)) def test_numpy_ufuncs(self): - # test ufuncs of numpy 1.9.2. see: + # test ufuncs of numpy, see: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - for name, idx in compat.iteritems(self.indices): for func in [np.exp, np.exp2, np.expm1, np.log, np.log2, np.log10, np.log1p, np.sqrt, np.sin, np.cos, np.tan, np.arcsin, diff --git a/pandas/tests/indexes/multi/test_analytics.py b/pandas/tests/indexes/multi/test_analytics.py index 05adaada01ee5..3b40b2afe9c6d 100644 --- a/pandas/tests/indexes/multi/test_analytics.py +++ b/pandas/tests/indexes/multi/test_analytics.py @@ -275,12 +275,9 @@ def test_map_dictlike(idx, mapper): np.rad2deg ]) def test_numpy_ufuncs(func): - # test ufuncs of numpy 1.9.2. see: + # test ufuncs of numpy. see: # http://docs.scipy.org/doc/numpy/reference/ufuncs.html - # some functions are skipped because it may return different result - # for unicode input depending on numpy version - # copy and paste from idx fixture as pytest doesn't support # parameters and fixtures at the same time. major_axis = Index(['foo', 'bar', 'baz', 'qux']) diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index 8bc8cb3fb1535..f012c9c255cd9 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -94,7 +94,6 @@ class TestChaining(object): def test_setitem_chained_setfault(self): # GH6026 - # setfaults under numpy 1.7.1 (ok on 1.8) data = ['right', 'left', 'left', 'left', 'right', 'left', 'timeout'] mdata = ['right', 'left', 'left', 'left', 'right', 'left', 'none'] diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py index 53d07aeef304a..85b06001cf8a0 100644 --- a/pandas/tests/indexing/test_iloc.py +++ b/pandas/tests/indexing/test_iloc.py @@ -337,7 +337,7 @@ def test_iloc_setitem_list(self): tm.assert_frame_equal(df, expected) def test_iloc_setitem_pandas_object(self): - # GH 17193, affecting old numpy (1.7 and 1.8) + # GH 17193 s_orig = Series([0, 1, 2, 3]) expected = Series([0, -1, -2, 3]) diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index 4a68719eedc9a..84a0e3d867783 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -199,8 +199,6 @@ def roundtrip(key, obj, **kwargs): def test_long_strings(self): # GH6166 - # unconversion of long strings was being chopped in earlier - # versions of numpy < 1.7.2 df = DataFrame({'a': tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index a5a7cc2217864..dae6fa2464b10 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -1051,12 +1051,6 @@ def test_clip_with_datetimes(self): def test_cummethods_bool(self): # GH 6270 - # looks like a buggy np.maximum.accumulate for numpy 1.6.1, py 3.2 - def cummin(x): - return np.minimum.accumulate(x) - - def cummax(x): - return np.maximum.accumulate(x) a = pd.Series([False, False, False, True, True, False, False]) b = ~a @@ -1064,8 +1058,8 @@ def cummax(x): d = ~c methods = {'cumsum': np.cumsum, 'cumprod': np.cumprod, - 'cummin': cummin, - 'cummax': cummax} + 'cummin': np.minimum.accumulate, + 'cummax': np.maximum.accumulate} args = product((a, b, c, d), methods) for s, method in args: expected = Series(methods[method](s.values)) diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 084477d8202b1..c7efc1efaee8f 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -292,11 +292,10 @@ def test_none_comparison(self): assert not result.iat[0] assert not result.iat[1] - # this fails for numpy < 1.9 - # and oddly for *some* platforms - # result = None != o # noqa - # assert result.iat[0] - # assert result.iat[1] + result = None != o # noqa + assert result.iat[0] + assert result.iat[1] + if (is_datetime64_dtype(o) or is_datetimetz(o)): # Following DatetimeIndex (and Timestamp) convention, # inequality comparisons with Series[datetime64] raise diff --git a/pandas/tests/test_nanops.py b/pandas/tests/test_nanops.py index 49dbccb82fac8..e214d4c1985a9 100644 --- a/pandas/tests/test_nanops.py +++ b/pandas/tests/test_nanops.py @@ -464,7 +464,6 @@ def test_nankurt(self): allow_str=False, allow_date=False, allow_tdelta=False) - @td.skip_if_no("numpy", min_version="1.10.0") def test_nanprod(self): self.check_funs(nanops.nanprod, np.prod, allow_str=False, allow_date=False, allow_tdelta=False, diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 6d5d07b00398c..c0c4e627b1b2e 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -85,7 +85,6 @@ def test_sum(self): def test_mean(self): self._check_stat_op('mean', np.mean) - @td.skip_if_no("numpy", min_version="1.10.0") def test_prod(self): self._check_stat_op('prod', np.prod, skipna_alternative=np.nanprod) diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py index 22e758a0e59a7..333b93dbdf580 100644 --- a/pandas/tests/test_sorting.py +++ b/pandas/tests/test_sorting.py @@ -127,13 +127,6 @@ def test_nargsort(self): # np.argsort(items2) may not place NaNs first items2 = np.array(items, dtype='O') - try: - # GH 2785; due to a regression in NumPy1.6.2 - np.argsort(np.array([[1, 2], [1, 3], [1, 2]], dtype='i')) - np.argsort(items2, kind='mergesort') - except TypeError: - pytest.skip('requested sort not available for type') - # mergesort is the most difficult to get right because we want it to be # stable. From 8e00645332883bd37a9f6ef94fe658015e5b956d Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 19 Nov 2018 22:24:32 +0100 Subject: [PATCH 02/16] Clean up post MPL bump to 2.0 --- pandas/plotting/_misc.py | 3 --- pandas/tests/plotting/test_datetimelike.py | 1 - pandas/tests/plotting/test_frame.py | 13 ++++--------- pandas/tests/plotting/test_series.py | 3 +-- 4 files changed, 5 insertions(+), 15 deletions(-) diff --git a/pandas/plotting/_misc.py b/pandas/plotting/_misc.py index aeb97a84e594a..dbad5a04161c9 100644 --- a/pandas/plotting/_misc.py +++ b/pandas/plotting/_misc.py @@ -138,9 +138,6 @@ def scatter_matrix(frame, alpha=0.5, figsize=None, ax=None, grid=False, def _get_marker_compat(marker): import matplotlib.lines as mlines - import matplotlib as mpl - if mpl.__version__ < '1.1.0' and marker == '.': - return 'o' if marker not in mlines.lineMarkers: return 'o' return marker diff --git a/pandas/tests/plotting/test_datetimelike.py b/pandas/tests/plotting/test_datetimelike.py index 4865638671ea9..2e204f6d18d70 100644 --- a/pandas/tests/plotting/test_datetimelike.py +++ b/pandas/tests/plotting/test_datetimelike.py @@ -1075,7 +1075,6 @@ def test_irreg_dtypes(self): _, ax = self.plt.subplots() _check_plot_works(df.plot, ax=ax) - @pytest.mark.xfail(not PY3, reason="failing on mpl 1.4.3 on PY2") @pytest.mark.slow def test_time(self): t = datetime(1, 1, 1, 3, 30, 0) diff --git a/pandas/tests/plotting/test_frame.py b/pandas/tests/plotting/test_frame.py index 25dfbaba762c9..f5708b24d22b1 100644 --- a/pandas/tests/plotting/test_frame.py +++ b/pandas/tests/plotting/test_frame.py @@ -69,8 +69,7 @@ def test_plot(self): self._check_axes_shape(axes, axes_num=4, layout=(4, 1)) df = DataFrame({'x': [1, 2], 'y': [3, 4]}) - # mpl >= 1.5.2 (or slightly below) throw AttributError - with pytest.raises((TypeError, AttributeError)): + with pytest.raises(AttributeError, match='Unknown property blarg'): df.plot.line(blarg=True) df = DataFrame(np.random.rand(10, 3), @@ -2967,13 +2966,9 @@ def test_passed_bar_colors(self): def test_rcParams_bar_colors(self): import matplotlib as mpl color_tuples = [(0.9, 0, 0, 1), (0, 0.9, 0, 1), (0, 0, 0.9, 1)] - try: # mpl 1.5 - with mpl.rc_context( - rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") - except (AttributeError, KeyError): # mpl 1.4 - with mpl.rc_context(rc={'axes.color_cycle': color_tuples}): - barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") + with mpl.rc_context( + rc={'axes.prop_cycle': mpl.cycler("color", color_tuples)}): + barplot = pd.DataFrame([[1, 2, 3]]).plot(kind="bar") assert color_tuples == [c.get_facecolor() for c in barplot.patches] @pytest.mark.parametrize('method', ['line', 'barh', 'bar']) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index dc708278836d2..2413c370a1a87 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -767,8 +767,7 @@ def test_errorbar_plot(self): s.plot(yerr=np.arange(11)) s_err = ['zzz'] * 10 - # in mpl 1.5+ this is a TypeError - with pytest.raises((ValueError, TypeError)): + with pytest.raises(TypeError): s.plot(yerr=s_err) @td.xfail_if_mpl_2_2 From 91e97103e2fda92d2169ad5c411bf69eca5e0507 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 19 Nov 2018 23:02:45 +0100 Subject: [PATCH 03/16] Revert change that causes segfault in take --- pandas/core/dtypes/cast.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 241e7a8639e0b..b686ea7f86fde 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -264,12 +264,29 @@ def maybe_promote(dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - try: - fill_value = tslibs.Timedelta(fill_value).value - except Exception: - # upcast to object - dtype = np.object_ - fill_value = np.nan + # for now: refuse to upcast datetime64 + # (this is because datetime64 will not implicitly upconvert + # to object correctly as of numpy 1.6.1) + # TODO: remove old numpy compat code (without introducing segfault for + # tests/test_take.py::TestTake::test_2d_datetime64) + if isna(fill_value): + fill_value = iNaT + else: + if issubclass(dtype.type, np.datetime64): + try: + fill_value = tslibs.Timestamp(fill_value).value + except Exception: + # the proper thing to do here would probably be to upcast + # to object (but numpy 1.6.1 doesn't do this properly) + fill_value = iNaT + elif issubclass(dtype.type, np.timedelta64): + try: + fill_value = tslibs.Timedelta(fill_value).value + except Exception: + # as for datetimes, cannot upcast to object + fill_value = iNaT + else: + fill_value = iNaT elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT From 17d971d9b312913d6151beaa7922b62e4480c50b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Mon, 19 Nov 2018 23:10:59 +0100 Subject: [PATCH 04/16] Review (jbrockmendel) --- pandas/_libs/lib.pyx | 22 ---------------------- pandas/core/dtypes/cast.py | 6 +----- pandas/core/internals/blocks.py | 2 +- pandas/io/packers.py | 1 + 4 files changed, 3 insertions(+), 28 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 7b83387036de0..4b909c3b59667 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -476,28 +476,6 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return True -@cython.wraparound(False) -@cython.boundscheck(False) -def astype_intsafe(ndarray[object] arr, new_dtype): - cdef: - Py_ssize_t i, n = len(arr) - object val - bint is_datelike - ndarray result - - is_datelike = new_dtype == 'm8[ns]' - - result = np.empty(n, dtype=new_dtype) - for i in range(n): - val = arr[i] - if is_datelike and checknull(val): - result[i] = NPY_NAT - else: - result[i] = val - - return result - - @cython.wraparound(False) @cython.boundscheck(False) def astype_unicode(arr: ndarray, skipna: bool=False) -> ndarray[object]: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b686ea7f86fde..27f72f25adf33 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -725,14 +725,10 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): elif is_object_dtype(arr): - # work around NumPy brokenness, #1987 - if np.issubdtype(dtype.type, np.integer): - return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) - # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe - elif is_datetime64_dtype(dtype): + if is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) elif is_timedelta64_dtype(dtype): diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index ee366732595a2..d75b3191573e6 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2807,7 +2807,7 @@ def set(self, locs, values, check=False): ------- None """ - values = conversion.ensure_datetime64ns(values) + values = conversion.ensure_datetime64ns(values, copy=False) self.values[locs] = values diff --git a/pandas/io/packers.py b/pandas/io/packers.py index c123ed864b6b7..9d0f550ff73c5 100644 --- a/pandas/io/packers.py +++ b/pandas/io/packers.py @@ -251,6 +251,7 @@ def dtype_for(t): 'complex128': np.float64, 'complex64': np.float32} +# windows (32 bit) compat if hasattr(np, 'float128'): c2f_dict['complex256'] = np.float128 From 7ef44b6e7275c5d437b7154aaa1c64df0e34431e Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 18:23:56 +0100 Subject: [PATCH 05/16] Revert removal that breaks a test --- pandas/_libs/lib.pyx | 19 +++++++++++++++++++ pandas/core/dtypes/cast.py | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 6adb244112bb7..8f8b89ea35ddf 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -509,6 +509,25 @@ def array_equivalent_object(left: object[:], right: object[:]) -> bool: return True +@cython.wraparound(False) +@cython.boundscheck(False) +def astype_intsafe(ndarray[object] arr, new_dtype): + cdef: + Py_ssize_t i, n = len(arr) + object val + bint is_datelike + ndarray result + is_datelike = new_dtype == 'm8[ns]' + result = np.empty(n, dtype=new_dtype) + for i in range(n): + val = arr[i] + if is_datelike and checknull(val): + result[i] = NPY_NAT + else: + result[i] = val + return result + + @cython.wraparound(False) @cython.boundscheck(False) def astype_unicode(arr: ndarray, skipna: bool=False) -> ndarray[object]: diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 8b2d85c0723e3..f91802efba3a4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -725,10 +725,25 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): elif is_object_dtype(arr): + if np.issubdtype(dtype.type, np.integer): + # TODO: this is an old numpy compat branch that is not necessary + # anymore for its original purpose (unsafe casting from object to + # int, see GH 1987). + # Currently, timedelta dtypes get routed through here; whereas + # uncommenting them would re-call (see below) + # >>> astype_nansafe(to_timedelta(arr).values, dtype, copy=copy), + # and end up in the `is_timedelta64_dtype(arr)` above, which + # explicitly and deliberately returns a float dtype. + # However, the test + # reshape/merge/test_merge.py::TestMerge:;test_other_timedelta_unit + # expects an explicit timedelta dtype as output. + # Once this is fixed, `astype_intsafe` can be deleted from lib. + return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe - if is_datetime64_dtype(dtype): + elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) elif is_timedelta64_dtype(dtype): From 83813d7a0414cc0f1823667b72539e1e131edddc Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 21:47:20 +0100 Subject: [PATCH 06/16] Fix isort --- pandas/core/arrays/categorical.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 184af1087eb38..de2b91c8eae3d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -5,13 +5,13 @@ import numpy as np -from pandas import compat +from pandas._libs import algos as libalgos +import pandas.compat as compat from pandas.compat import lzip, u from pandas.compat.numpy import function as nv from pandas.util._decorators import ( Appender, Substitution, cache_readonly, deprecate_kwarg) from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs -from pandas._libs import algos as libalgos from pandas.core.dtypes.cast import ( coerce_indexer_dtype, maybe_infer_to_datetimelike) From f8a68a8c50b08abd2c608f56966301064b83ecb7 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 21:51:57 +0100 Subject: [PATCH 07/16] Add if branch for old code failing under PY2 --- pandas/tests/frame/test_constructors.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 94eb49f276f78..76e92042cbe6a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -15,7 +15,7 @@ from pandas.core.dtypes.common import is_integer_dtype from pandas.compat import (lmap, long, zip, range, lrange, lzip, - OrderedDict, is_platform_little_endian, PY36) + OrderedDict, is_platform_little_endian, PY3, PY36) from pandas import compat from pandas import (DataFrame, Index, Series, isna, MultiIndex, Timedelta, Timestamp, @@ -164,7 +164,9 @@ def test_constructor_dtype_str_na_values(self, string_dtype): def test_constructor_rec(self): rec = self.frame.to_records(index=False) - rec.dtype.names = list(rec.dtype.names)[::-1] + if PY3: + # unicode error under PY2 + rec.dtype.names = list(rec.dtype.names)[::-1] index = self.frame.index From aef0fe2bda25b82ddd6305cdc73b62e26e0ad038 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 21:52:20 +0100 Subject: [PATCH 08/16] Partly revert MPL compat fix --- pandas/tests/plotting/test_series.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/plotting/test_series.py b/pandas/tests/plotting/test_series.py index 2413c370a1a87..e6519c7db7a7b 100644 --- a/pandas/tests/plotting/test_series.py +++ b/pandas/tests/plotting/test_series.py @@ -767,7 +767,8 @@ def test_errorbar_plot(self): s.plot(yerr=np.arange(11)) s_err = ['zzz'] * 10 - with pytest.raises(TypeError): + # MPL > 2.0.0 will most likely use TypeError here + with pytest.raises((TypeError, ValueError)): s.plot(yerr=s_err) @td.xfail_if_mpl_2_2 From 1fdaecd3760910ee1b296ec46a566d475930d4a0 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 22:04:26 +0100 Subject: [PATCH 09/16] revert fix in arrays/categorical.py --- pandas/core/arrays/categorical.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index de2b91c8eae3d..24df4c7a9f379 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -5,7 +5,7 @@ import numpy as np -from pandas._libs import algos as libalgos +from pandas._libs import algos as libalgos, lib import pandas.compat as compat from pandas.compat import lzip, u from pandas.compat.numpy import function as nv @@ -98,6 +98,14 @@ def f(self, other): ret[na_mask] = False return ret + # Numpy < 1.13 may convert a scalar to a zerodim array during + # comparison operation when second arg has higher priority, e.g. + # + # cat[0] < cat + # + # With cat[0], for example, being ``np.int64(1)`` by the time it gets + # into this function would become ``np.array(1)``. + other = lib.item_from_zerodim(other) if is_scalar(other): if other in self.categories: i = self.categories.get_loc(other) From d2cb333edbf99e158b9f776a4a2bb0b5b3822a71 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 22:07:32 +0100 Subject: [PATCH 10/16] Correctly rename azure container --- ci/azure/linux.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/linux.yml b/ci/azure/linux.yml index b5a8e36d5097d..1d39d0ce22638 100644 --- a/ci/azure/linux.yml +++ b/ci/azure/linux.yml @@ -9,7 +9,7 @@ jobs: strategy: maxParallel: 11 matrix: - py27_np_19: + py27_np_120: ENV_FILE: ci/deps/azure-27-compat.yaml CONDA_PY: "27" CONDA_ENV: pandas From d854b29d3ad68fe6f7fcb0a4d675ccdae87a59bc Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 22:14:28 +0100 Subject: [PATCH 11/16] Change timedelta-branch in cast.py (review jreback) --- pandas/core/dtypes/cast.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f91802efba3a4..072955663405a 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -725,7 +725,10 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): elif is_object_dtype(arr): - if np.issubdtype(dtype.type, np.integer): + # if we have a datetime/timedelta array of objects + # then coerce to a proper dtype and recall astype_nansafe + + if is_timedelta64_dtype(dtype): # TODO: this is an old numpy compat branch that is not necessary # anymore for its original purpose (unsafe casting from object to # int, see GH 1987). @@ -740,15 +743,12 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): # Once this is fixed, `astype_intsafe` can be deleted from lib. return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) - # if we have a datetime/timedelta array of objects - # then coerce to a proper dtype and recall astype_nansafe - elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) - elif is_timedelta64_dtype(dtype): - from pandas import to_timedelta - return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) + # elif is_timedelta64_dtype(dtype): + # from pandas import to_timedelta + # return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = ("The '{dtype}' dtype has no unit. " From d2b1f78f9315a2f477438cedf778169c0fc01042 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Wed, 21 Nov 2018 22:18:39 +0100 Subject: [PATCH 12/16] Re-add missing empty lines --- pandas/_libs/lib.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 8f8b89ea35ddf..874206378f79c 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -517,6 +517,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): object val bint is_datelike ndarray result + is_datelike = new_dtype == 'm8[ns]' result = np.empty(n, dtype=new_dtype) for i in range(n): @@ -525,6 +526,7 @@ def astype_intsafe(ndarray[object] arr, new_dtype): result[i] = NPY_NAT else: result[i] = val + return result From 9c2cfbd01a2f858c40ce88c280e8f34845486d27 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 22 Nov 2018 00:16:27 +0100 Subject: [PATCH 13/16] Re-add integer subtype branch due to failed IntegerArray tests --- pandas/core/dtypes/cast.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 072955663405a..e08ae5d21c992 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -728,7 +728,8 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe - if is_timedelta64_dtype(dtype): + if (is_timedelta64_dtype(dtype) + or np.issubdtype(dtype.type, np.integer)): # TODO: this is an old numpy compat branch that is not necessary # anymore for its original purpose (unsafe casting from object to # int, see GH 1987). @@ -736,11 +737,17 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): # uncommenting them would re-call (see below) # >>> astype_nansafe(to_timedelta(arr).values, dtype, copy=copy), # and end up in the `is_timedelta64_dtype(arr)` above, which - # explicitly and deliberately returns a float dtype. - # However, the test + # deliberately returns a float dtype. However, the test # reshape/merge/test_merge.py::TestMerge:;test_other_timedelta_unit - # expects an explicit timedelta dtype as output. - # Once this is fixed, `astype_intsafe` can be deleted from lib. + # expects an explicit timedelta dtype as output - a contradiction. + + # TODO: the case of np.issubdtype(dtype.type, np.integer) is only + # relevant anymore for IntegerArray, and should be solved by having + # consistent astyping for extension arrays, see GH 22384, as well + # as a branch here for `is_extension_array_dtype(arr)` + + # TODO: Once those things are fixed, `astype_intsafe` can be + # removed completely from _libs.lib. return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) elif is_datetime64_dtype(dtype): From ecfe8248a856594aaa3d10c758e1b724ba2e2024 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Thu, 22 Nov 2018 19:52:49 +0100 Subject: [PATCH 14/16] Second attempt at removing compat code from maybe_promote --- pandas/core/dtypes/cast.py | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index e08ae5d21c992..f6eb16bc9569d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -264,29 +264,22 @@ def maybe_promote(dtype, fill_value=np.nan): # returns tuple of (dtype, fill_value) if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - # for now: refuse to upcast datetime64 - # (this is because datetime64 will not implicitly upconvert - # to object correctly as of numpy 1.6.1) - # TODO: remove old numpy compat code (without introducing segfault for - # tests/test_take.py::TestTake::test_2d_datetime64) if isna(fill_value): fill_value = iNaT + elif issubclass(dtype.type, np.datetime64): + try: + fill_value = tslibs.Timestamp(fill_value).value + except Exception: + dtype = np.object_ + fill_value = np.nan + elif issubclass(dtype.type, np.timedelta64): + try: + fill_value = tslibs.Timedelta(fill_value).value + except Exception: + dtype = np.object_ + fill_value = np.nan else: - if issubclass(dtype.type, np.datetime64): - try: - fill_value = tslibs.Timestamp(fill_value).value - except Exception: - # the proper thing to do here would probably be to upcast - # to object (but numpy 1.6.1 doesn't do this properly) - fill_value = iNaT - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = tslibs.Timedelta(fill_value).value - except Exception: - # as for datetimes, cannot upcast to object - fill_value = iNaT - else: - fill_value = iNaT + fill_value = iNaT elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT From 844c2aa42c871c39e9c6db04886200a5db295d16 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Fri, 23 Nov 2018 08:53:59 +0100 Subject: [PATCH 15/16] Review (jreback) --- pandas/core/dtypes/cast.py | 57 ++++++++++------------------------- pandas/core/dtypes/missing.py | 1 - 2 files changed, 16 insertions(+), 42 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f6eb16bc9569d..9ae2e3a1b88c1 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -263,23 +263,10 @@ def maybe_promote(dtype, fill_value=np.nan): fill_value = np.nan # returns tuple of (dtype, fill_value) - if issubclass(dtype.type, (np.datetime64, np.timedelta64)): - if isna(fill_value): - fill_value = iNaT - elif issubclass(dtype.type, np.datetime64): - try: - fill_value = tslibs.Timestamp(fill_value).value - except Exception: - dtype = np.object_ - fill_value = np.nan - elif issubclass(dtype.type, np.timedelta64): - try: - fill_value = tslibs.Timedelta(fill_value).value - except Exception: - dtype = np.object_ - fill_value = np.nan - else: - fill_value = iNaT + if issubclass(dtype.type, np.datetime64): + fill_value = tslibs.Timestamp(fill_value).value + elif issubclass(dtype.type, np.timedelta64): + fill_value = tslibs.Timedelta(fill_value).value elif is_datetimetz(dtype): if isna(fill_value): fill_value = iNaT @@ -718,37 +705,25 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): elif is_object_dtype(arr): - # if we have a datetime/timedelta array of objects - # then coerce to a proper dtype and recall astype_nansafe - - if (is_timedelta64_dtype(dtype) - or np.issubdtype(dtype.type, np.integer)): + if np.issubdtype(dtype.type, np.integer): # TODO: this is an old numpy compat branch that is not necessary # anymore for its original purpose (unsafe casting from object to - # int, see GH 1987). - # Currently, timedelta dtypes get routed through here; whereas - # uncommenting them would re-call (see below) - # >>> astype_nansafe(to_timedelta(arr).values, dtype, copy=copy), - # and end up in the `is_timedelta64_dtype(arr)` above, which - # deliberately returns a float dtype. However, the test - # reshape/merge/test_merge.py::TestMerge:;test_other_timedelta_unit - # expects an explicit timedelta dtype as output - a contradiction. - - # TODO: the case of np.issubdtype(dtype.type, np.integer) is only - # relevant anymore for IntegerArray, and should be solved by having - # consistent astyping for extension arrays, see GH 22384, as well - # as a branch here for `is_extension_array_dtype(arr)` - - # TODO: Once those things are fixed, `astype_intsafe` can be - # removed completely from _libs.lib. + # int, see GH 1987). However, it is currently necessary for + # timedelta and IntegerArray tests (the dedicated timedelta branch + # below contradicts TestMerge::test_other_timedelta_unit and the + # IntegerArray tests would need a `is_extension_array_dtype(arr)` + # branch in this method). return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) + # if we have a datetime/timedelta array of objects + # then coerce to a proper dtype and recall astype_nansafe + elif is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe(to_datetime(arr).values, dtype, copy=copy) - # elif is_timedelta64_dtype(dtype): - # from pandas import to_timedelta - # return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) + elif is_timedelta64_dtype(dtype): + from pandas import to_timedelta + return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy) if dtype.name in ("datetime64", "timedelta64"): msg = ("The '{dtype}' dtype has no unit. " diff --git a/pandas/core/dtypes/missing.py b/pandas/core/dtypes/missing.py index 78b3ae4427604..fa60c326a19ea 100644 --- a/pandas/core/dtypes/missing.py +++ b/pandas/core/dtypes/missing.py @@ -408,7 +408,6 @@ def array_equivalent(left, right, strict_nan=False): # Object arrays can contain None, NaN and NaT. # string dtypes must be come to this path for NumPy 1.7.1 compat - # TODO: remove old numpy compat code (or comment) if is_string_dtype(left) or is_string_dtype(right): if not strict_nan: From ce5bd94125310571c6ca9cfa3ab4aed31b49dcde Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 25 Nov 2018 14:01:03 +0100 Subject: [PATCH 16/16] Review (jreback) --- pandas/core/dtypes/cast.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 9ae2e3a1b88c1..afe6ba45bb400 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -705,14 +705,8 @@ def astype_nansafe(arr, dtype, copy=True, skipna=False): elif is_object_dtype(arr): + # work around NumPy brokenness, #1987 if np.issubdtype(dtype.type, np.integer): - # TODO: this is an old numpy compat branch that is not necessary - # anymore for its original purpose (unsafe casting from object to - # int, see GH 1987). However, it is currently necessary for - # timedelta and IntegerArray tests (the dedicated timedelta branch - # below contradicts TestMerge::test_other_timedelta_unit and the - # IntegerArray tests would need a `is_extension_array_dtype(arr)` - # branch in this method). return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape) # if we have a datetime/timedelta array of objects