From c79901ffb28c5b68016ea88325d7b0efaf342f9b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 24 Feb 2019 02:48:07 +0100 Subject: [PATCH 1/7] TST: add test coverage for maybe_promote --- pandas/conftest.py | 65 +- pandas/tests/dtypes/cast/test_promote.py | 955 +++++++++++++++++++++++ 2 files changed, 1013 insertions(+), 7 deletions(-) create mode 100644 pandas/tests/dtypes/cast/test_promote.py diff --git a/pandas/conftest.py b/pandas/conftest.py index 35a6b5df35ddc..debc9734730f3 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -367,10 +367,15 @@ def unique_nulls_fixture(request): TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific', 'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300), FixedOffset(0), FixedOffset(-300)] +TIMEZONE_IDS = ['None', 'UTC', 'US/Eastern', 'Asia/Tokyp', + 'dateutil/US/Pacific', 'dateutil/Asia/Singapore', + 'dateutil.tz.tzutz()', 'dateutil.tz.tzlocal()', + 'pytz.FixedOffset(300)', 'pytz.FixedOffset(0)', + 'pytz.FixedOffset(-300)'] -@td.parametrize_fixture_doc(str(TIMEZONES)) -@pytest.fixture(params=TIMEZONES) +@td.parametrize_fixture_doc(str(TIMEZONE_IDS)) +@pytest.fixture(params=TIMEZONES, ids=TIMEZONE_IDS) def tz_naive_fixture(request): """ Fixture for trying timezones including default (None): {0} @@ -378,8 +383,8 @@ def tz_naive_fixture(request): return request.param -@td.parametrize_fixture_doc(str(TIMEZONES[1:])) -@pytest.fixture(params=TIMEZONES[1:]) +@td.parametrize_fixture_doc(str(TIMEZONE_IDS[1:])) +@pytest.fixture(params=TIMEZONES[1:], ids=TIMEZONE_IDS[1:]) def tz_aware_fixture(request): """ Fixture for trying explicit timezones: {0} @@ -387,8 +392,14 @@ def tz_aware_fixture(request): return request.param +# Generate cartesian product of tz_aware_fixture: +tz_aware_fixture2 = tz_aware_fixture + + # ---------------------------------------------------------------- # Dtypes + + UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"] UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"] SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"] @@ -400,8 +411,8 @@ def tz_aware_fixture(request): COMPLEX_DTYPES = [complex, "complex64", "complex128"] STRING_DTYPES = [str, 'str', 'U'] -DATETIME_DTYPES = ['datetime64[ns]', 'M8[ns]'] -TIMEDELTA_DTYPES = ['timedelta64[ns]', 'm8[ns]'] +DATETIME64_DTYPES = ['datetime64[ns]', 'M8[ns]'] +TIMEDELTA64_DTYPES = ['timedelta64[ns]', 'm8[ns]'] BOOL_DTYPES = [bool, 'bool'] BYTES_DTYPES = [bytes, 'bytes'] @@ -409,7 +420,7 @@ def tz_aware_fixture(request): ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES - + DATETIME_DTYPES + TIMEDELTA_DTYPES + BOOL_DTYPES + + DATETIME64_DTYPES + TIMEDELTA64_DTYPES + BOOL_DTYPES + OBJECT_DTYPES + BYTES_DTYPES * PY3) # bytes only for PY3 @@ -424,6 +435,46 @@ def string_dtype(request): return request.param +@pytest.fixture(params=BYTES_DTYPES) +def bytes_dtype(request): + """Parametrized fixture for bytes dtypes. + + * bytes + * 'bytes' + """ + return request.param + + +@pytest.fixture(params=OBJECT_DTYPES) +def object_dtype(request): + """Parametrized fixture for object dtypes. + + * object + * 'object' + """ + return request.param + + +@pytest.fixture(params=DATETIME64_DTYPES) +def datetime64_dtype(request): + """Parametrized fixture for datetime/timedelta dtypes. + + * 'datetime64[ns]' + * 'M8[ns]' + """ + return request.param + + +@pytest.fixture(params=TIMEDELTA64_DTYPES) +def timedelta64_dtype(request): + """Parametrized fixture for datetime/timedelta dtypes. + + * 'timedelta64[ns]' + * 'm8[ns]' + """ + return request.param + + @pytest.fixture(params=FLOAT_DTYPES) def float_dtype(request): """ diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py new file mode 100644 index 0000000000000..f294c82897b69 --- /dev/null +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -0,0 +1,955 @@ +# -*- coding: utf-8 -*- + +""" +These test the method maybe_promote from core/dtypes/cast.py +""" + +import datetime + +import numpy as np +import pytest + +from pandas._libs.tslibs import NaT, iNaT +from pandas.compat import is_platform_windows + +from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.common import ( + is_complex_dtype, is_datetime64_dtype, is_datetime_or_timedelta_dtype, + is_float_dtype, is_integer_dtype, is_object_dtype, is_scalar, + is_string_dtype, is_timedelta64_dtype) +from pandas.core.dtypes.dtypes import DatetimeTZDtype + +import pandas as pd + + +def _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar=None, exp_val_for_array=None): + assert is_scalar(fill_value) + + if boxed: + fill_array = np.array([fill_value], dtype=box_dtype) + result_dtype, result_fill_value = maybe_promote(dtype, fill_array) + expected_fill_value = exp_val_for_array + else: + result_dtype, result_fill_value = maybe_promote(dtype, fill_value) + expected_fill_value = exp_val_for_scalar + + # try/except as numpy dtypes (i.e. if result_dtype is np.object_) do not + # know some expected dtypes like DatetimeTZDtype, and hence raise TypeError + try: + assert result_dtype == expected_dtype + except TypeError: + assert expected_dtype == result_dtype + + # for equal values, also check type (relevant e.g. for int vs float, resp. + # for different datetimes and timedeltas) + # for missing values, None == None and iNaT == iNaT, but np.nan != np.nan + assert ((result_fill_value == expected_fill_value + and type(result_fill_value) == type(expected_fill_value)) + or (result_fill_value is np.nan and expected_fill_value is np.nan) + or (result_fill_value is NaT and expected_fill_value is NaT)) + + +@pytest.mark.parametrize('dtype, fill_value, expected_dtype', [ + # size 8 + ('int8', 1, 'int8'), + ('int8', np.iinfo('int8').max + 1, 'int16'), + ('int8', np.iinfo('int16').max + 1, 'int32'), + ('int8', np.iinfo('int32').max + 1, 'int64'), + ('int8', np.iinfo('int64').max + 1, 'object'), + ('int8', -1, 'int8'), + ('int8', np.iinfo('int8').min - 1, 'int16'), + ('int8', np.iinfo('int16').min - 1, 'int32'), + ('int8', np.iinfo('int32').min - 1, 'int64'), + ('int8', np.iinfo('int64').min - 1, 'object'), + # keep signed-ness as long as possible + ('uint8', 1, 'uint8'), + ('uint8', np.iinfo('int8').max + 1, 'uint8'), + ('uint8', np.iinfo('uint8').max + 1, 'uint16'), + ('uint8', np.iinfo('int16').max + 1, 'uint16'), + ('uint8', np.iinfo('uint16').max + 1, 'uint32'), + ('uint8', np.iinfo('int32').max + 1, 'uint32'), + ('uint8', np.iinfo('uint32').max + 1, 'uint64'), + ('uint8', np.iinfo('int64').max + 1, 'uint64'), + ('uint8', np.iinfo('uint64').max + 1, 'object'), + # max of uint8 cannot be contained in int8 + ('uint8', -1, 'int16'), + ('uint8', np.iinfo('int8').min - 1, 'int16'), + ('uint8', np.iinfo('int16').min - 1, 'int32'), + ('uint8', np.iinfo('int32').min - 1, 'int64'), + ('uint8', np.iinfo('int64').min - 1, 'object'), + # size 16 + ('int16', 1, 'int16'), + ('int16', np.iinfo('int8').max + 1, 'int16'), + ('int16', np.iinfo('int16').max + 1, 'int32'), + ('int16', np.iinfo('int32').max + 1, 'int64'), + ('int16', np.iinfo('int64').max + 1, 'object'), + ('int16', -1, 'int16'), + ('int16', np.iinfo('int8').min - 1, 'int16'), + ('int16', np.iinfo('int16').min - 1, 'int32'), + ('int16', np.iinfo('int32').min - 1, 'int64'), + ('int16', np.iinfo('int64').min - 1, 'object'), + ('uint16', 1, 'uint16'), + ('uint16', np.iinfo('int8').max + 1, 'uint16'), + ('uint16', np.iinfo('uint8').max + 1, 'uint16'), + ('uint16', np.iinfo('int16').max + 1, 'uint16'), + ('uint16', np.iinfo('uint16').max + 1, 'uint32'), + ('uint16', np.iinfo('int32').max + 1, 'uint32'), + ('uint16', np.iinfo('uint32').max + 1, 'uint64'), + ('uint16', np.iinfo('int64').max + 1, 'uint64'), + ('uint16', np.iinfo('uint64').max + 1, 'object'), + ('uint16', -1, 'int32'), + ('uint16', np.iinfo('int8').min - 1, 'int32'), + ('uint16', np.iinfo('int16').min - 1, 'int32'), + ('uint16', np.iinfo('int32').min - 1, 'int64'), + ('uint16', np.iinfo('int64').min - 1, 'object'), + # size 32 + ('int32', 1, 'int32'), + ('int32', np.iinfo('int8').max + 1, 'int32'), + ('int32', np.iinfo('int16').max + 1, 'int32'), + ('int32', np.iinfo('int32').max + 1, 'int64'), + ('int32', np.iinfo('int64').max + 1, 'object'), + ('int32', -1, 'int32'), + ('int32', np.iinfo('int8').min - 1, 'int32'), + ('int32', np.iinfo('int16').min - 1, 'int32'), + ('int32', np.iinfo('int32').min - 1, 'int64'), + ('int32', np.iinfo('int64').min - 1, 'object'), + ('uint32', 1, 'uint32'), + ('uint32', np.iinfo('int8').max + 1, 'uint32'), + ('uint32', np.iinfo('uint8').max + 1, 'uint32'), + ('uint32', np.iinfo('int16').max + 1, 'uint32'), + ('uint32', np.iinfo('uint16').max + 1, 'uint32'), + ('uint32', np.iinfo('int32').max + 1, 'uint32'), + ('uint32', np.iinfo('uint32').max + 1, 'uint64'), + ('uint32', np.iinfo('int64').max + 1, 'uint64'), + ('uint32', np.iinfo('uint64').max + 1, 'object'), + ('uint32', -1, 'int64'), + ('uint32', np.iinfo('int8').min - 1, 'int64'), + ('uint32', np.iinfo('int16').min - 1, 'int64'), + ('uint32', np.iinfo('int32').min - 1, 'int64'), + ('uint32', np.iinfo('int64').min - 1, 'object'), + # size 64 + ('int64', 1, 'int64'), + ('int64', np.iinfo('int8').max + 1, 'int64'), + ('int64', np.iinfo('int16').max + 1, 'int64'), + ('int64', np.iinfo('int32').max + 1, 'int64'), + ('int64', np.iinfo('int64').max + 1, 'object'), + ('int64', -1, 'int64'), + ('int64', np.iinfo('int8').min - 1, 'int64'), + ('int64', np.iinfo('int16').min - 1, 'int64'), + ('int64', np.iinfo('int32').min - 1, 'int64'), + ('int64', np.iinfo('int64').min - 1, 'object'), + ('uint64', 1, 'uint64'), + ('uint64', np.iinfo('int8').max + 1, 'uint64'), + ('uint64', np.iinfo('uint8').max + 1, 'uint64'), + ('uint64', np.iinfo('int16').max + 1, 'uint64'), + ('uint64', np.iinfo('uint16').max + 1, 'uint64'), + ('uint64', np.iinfo('int32').max + 1, 'uint64'), + ('uint64', np.iinfo('uint32').max + 1, 'uint64'), + ('uint64', np.iinfo('int64').max + 1, 'uint64'), + ('uint64', np.iinfo('uint64').max + 1, 'object'), + ('uint64', -1, 'object'), + ('uint64', np.iinfo('int8').min - 1, 'object'), + ('uint64', np.iinfo('int16').min - 1, 'object'), + ('uint64', np.iinfo('int32').min - 1, 'object'), + ('uint64', np.iinfo('int64').min - 1, 'object') +]) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, + boxed, box_dtype): + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + if not boxed: + if expected_dtype == object: + pytest.xfail('overflow error') + if expected_dtype == 'int32': + pytest.xfail('always upcasts to platform int') + if dtype == 'int8' and expected_dtype == 'int16': + pytest.xfail('casts to int32 instead of int16') + if (issubclass(dtype.type, np.unsignedinteger) + and np.iinfo(dtype).max < fill_value <= np.iinfo('int64').max): + pytest.xfail('falsely casts to signed') + if ((dtype, expected_dtype) in [('uint8', 'int16'), + ('uint32', 'int64')] + and fill_value != np.iinfo('int32').min - 1): + pytest.xfail('casts to int32 instead of int8/int16') + # this following xfail is "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + pytest.xfail('wrong return type of fill_value') + if boxed: + if expected_dtype != object: + pytest.xfail('falsely casts to object') + if box_dtype is None and (fill_value > np.iinfo('int64').max + or np.iinfo('int64').min < fill_value < 0): + pytest.xfail('falsely casts to float instead of object') + + # output is not a generic int, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + # no missing value marker for integers + exp_val_for_array = None if expected_dtype != 'object' else np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, + boxed, box_dtype): + dtype = np.dtype(any_int_dtype) + fill_dtype = np.dtype(float_dtype) + + if float_dtype == 'float32' and not boxed: + pytest.xfail('falsely upcasts to float64') + if box_dtype == object: + pytest.xfail('falsely upcasts to object') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling int with float always upcasts to float64 + expected_dtype = np.float64 + # fill_value can be different float type + exp_val_for_scalar = np.float64(fill_value) + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, + boxed, box_dtype): + + dtype = np.dtype(float_dtype) + fill_dtype = np.dtype(any_int_dtype) + + if box_dtype == object: + pytest.xfail('falsely upcasts to object') + # this following xfail is "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + if not boxed: + pytest.xfail('wrong return type of fill_value') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling float with int always keeps float dtype + # because: np.finfo('float32').max > np.iinfo('uint64').max + expected_dtype = dtype + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('dtype, fill_value, expected_dtype', [ + # float filled with float + ('float32', 1, 'float32'), + ('float32', np.finfo('float32').max * 1.1, 'float64'), + ('float64', 1, 'float64'), + ('float64', np.finfo('float32').max * 1.1, 'float64'), + # complex filled with float + ('complex64', 1, 'complex64'), + ('complex64', np.finfo('float32').max * 1.1, 'complex128'), + ('complex128', 1, 'complex128'), + ('complex128', np.finfo('float32').max * 1.1, 'complex128'), + # float filled with complex + ('float32', 1 + 1j, 'complex64'), + ('float32', np.finfo('float32').max * (1.1 + 1j), 'complex128'), + ('float64', 1 + 1j, 'complex128'), + ('float64', np.finfo('float32').max * (1.1 + 1j), 'complex128'), + # complex filled with complex + ('complex64', 1 + 1j, 'complex64'), + ('complex64', np.finfo('float32').max * (1.1 + 1j), 'complex128'), + ('complex128', 1 + 1j, 'complex128'), + ('complex128', np.finfo('float32').max * (1.1 + 1j), 'complex128') +]) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype, + boxed, box_dtype): + + dtype = np.dtype(dtype) + expected_dtype = np.dtype(expected_dtype) + + if box_dtype == object: + pytest.xfail('falsely upcasts to object') + if boxed and is_float_dtype(dtype) and is_complex_dtype(expected_dtype): + pytest.xfail('does not upcast to complex') + if (dtype, expected_dtype) in [('float32', 'float64'), + ('float32', 'complex64'), + ('complex64', 'complex128')]: + pytest.xfail('does not upcast correctly depending on value') + # this following xfails are "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + if not boxed and abs(fill_value) < 2: + pytest.xfail('wrong return type of fill_value') + if (not boxed and dtype == 'complex128' and expected_dtype == 'complex128' + and is_float_dtype(type(fill_value))): + pytest.xfail('wrong return type of fill_value') + + # output is not a generic float, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_bool_with_any(any_numpy_dtype, boxed, box_dtype): + dtype = np.dtype(bool) + fill_dtype = np.dtype(any_numpy_dtype) + + if boxed and fill_dtype == bool: + pytest.xfail('falsely upcasts to object') + if (boxed and box_dtype is None + and is_datetime_or_timedelta_dtype(fill_dtype)): + pytest.xfail('wrongly casts fill_value') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling bool with anything but bool casts to object + expected_dtype = np.dtype(object) if fill_dtype != bool else fill_dtype + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan if fill_dtype != bool else None + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_with_bool(any_numpy_dtype, boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + fill_value = True + + if boxed: + if dtype == bool: + pytest.xfail('falsely upcasts to object') + if dtype not in (str, object) and box_dtype is None: + pytest.xfail('falsely upcasts to object') + if not boxed: + if is_datetime_or_timedelta_dtype(dtype): + pytest.xfail('raises error') + # this following xfail is "only" a consequence of the - now strictly + # enforced - principle that maybe_promote_with_scalar always casts + if dtype == bool: + pytest.xfail('wrong return type of fill_value') + + # filling anything but bool with bool casts to object + expected_dtype = np.dtype(object) if dtype != bool else dtype + # output is not a generic bool, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + exp_val_for_array = np.nan if dtype != bool else None + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype, + boxed, box_dtype): + dtype = np.dtype(bytes_dtype) + fill_dtype = np.dtype(any_numpy_dtype) + + if issubclass(fill_dtype.type, np.bytes_): + if not boxed or box_dtype == object: + pytest.xfail('falsely upcasts to object') + # takes the opinion that bool dtype has no missing value marker + else: + pytest.xfail('wrong missing value marker') + else: + if boxed and box_dtype is None: + pytest.xfail('does not upcast to object') + if ((is_integer_dtype(fill_dtype) or is_float_dtype(fill_dtype) + or is_complex_dtype(fill_dtype) or is_object_dtype(fill_dtype) + or is_timedelta64_dtype(fill_dtype)) and not boxed): + pytest.xfail('does not upcast to object') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling bytes with anything but bytes casts to object + expected_dtype = (dtype if issubclass(fill_dtype.type, np.bytes_) + else np.dtype(object)) + exp_val_for_scalar = fill_value + exp_val_for_array = (None if issubclass(fill_dtype.type, np.bytes_) + else np.nan) + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + (True, 'bytes'), # fill_value wrapped in array with generic bytes-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_with_bytes(any_numpy_dtype, bytes_dtype, + boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + fill_dtype = np.dtype(bytes_dtype) + + if issubclass(dtype.type, np.bytes_): + if not boxed or box_dtype == object: + pytest.xfail('falsely upcasts to object') + # takes the opinion that bool dtype has no missing value marker + else: + pytest.xfail('wrong missing value marker') + else: + pass + if (boxed and (box_dtype == 'bytes' or box_dtype is None) + and not (is_string_dtype(dtype) or dtype == bool)): + pytest.xfail('does not upcast to object') + if not boxed and is_datetime_or_timedelta_dtype(dtype): + pytest.xfail('raises error') + + # create array of given dtype + fill_value = b'abc' + + # special case for box_dtype (cannot use fixture in parametrization) + box_dtype = fill_dtype if box_dtype == 'bytes' else box_dtype + + # filling bytes with anything but bytes casts to object + expected_dtype = (dtype if issubclass(dtype.type, np.bytes_) + else np.dtype(object)) + # output is not a generic bytes, but corresponds to expected_dtype + exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] + exp_val_for_array = None if issubclass(dtype.type, np.bytes_) else np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype, + boxed, box_dtype): + dtype = np.dtype(datetime64_dtype) + fill_dtype = np.dtype(any_numpy_dtype) + + if is_datetime64_dtype(fill_dtype): + if box_dtype == object: + pytest.xfail('falsely upcasts to object') + else: + if boxed and box_dtype is None: + pytest.xfail('does not upcast to object') + if not boxed: + pytest.xfail('does not upcast to object or raises') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(fill_dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to pd.Timestamp.value + exp_val_for_scalar = pd.Timestamp(fill_value).value + exp_val_for_array = iNaT + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value array with auto-dtype + (True, 'dt_dtype'), # fill_value array with explicit datetime dtype + (True, object), # fill_value array with object dtype + (False, None) # fill_value directly +]) +@pytest.mark.parametrize('fill_value', [ + pd.Timestamp('now'), np.datetime64('now'), + datetime.datetime.now(), datetime.date.today() +], ids=['pd.Timestamp', 'np.datetime64', 'datetime.datetime', 'datetime.date']) +def test_maybe_promote_any_with_datetime64(any_numpy_dtype, datetime64_dtype, + fill_value, boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + + if is_datetime64_dtype(dtype): + if (boxed and (box_dtype == object + or (box_dtype is None + and not is_datetime64_dtype(type(fill_value))))): + pytest.xfail('falsely upcasts to object') + else: + if (boxed and (box_dtype == 'dt_dtype' + or (box_dtype is None + and is_datetime64_dtype(type(fill_value))))): + pytest.xfail('mix of lack of upcasting, resp. wrong missing value') + if not boxed and is_timedelta64_dtype(dtype): + pytest.xfail('raises error') + + # special case for box_dtype + box_dtype = (np.dtype(datetime64_dtype) if box_dtype == 'dt_dtype' + else box_dtype) + + # filling datetime with anything but datetime casts to object + if is_datetime64_dtype(dtype): + expected_dtype = dtype + # for datetime dtypes, scalar values get cast to pd.Timestamp.value + exp_val_for_scalar = pd.Timestamp(fill_value).value + exp_val_for_array = iNaT + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_datetimetz_with_any_numpy_dtype( + tz_aware_fixture, any_numpy_dtype, boxed, box_dtype): + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + fill_dtype = np.dtype(any_numpy_dtype) + + if box_dtype != object: + pytest.xfail('does not upcast correctly') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling datetimetz with any numpy dtype casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, + tz_aware_fixture2, + boxed, box_dtype): + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2) + + from dateutil.tz import tzlocal + if is_platform_windows() and tz_aware_fixture2 == tzlocal(): + pytest.xfail('Cannot process fill_value with this dtype, see GH 24310') + if dtype.tz == fill_dtype.tz and boxed: + pytest.xfail('falsely upcasts') + if dtype.tz != fill_dtype.tz and not boxed: + pytest.xfail('falsely upcasts') + + # create array of given dtype; casts "1" to correct dtype + fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0] + + # filling datetimetz with datetimetz casts to object, unless tz matches + exp_val_for_scalar = fill_value + if dtype.tz == fill_dtype.tz: + expected_dtype = dtype + exp_val_for_array = NaT + else: + expected_dtype = np.dtype(object) + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('fill_value', [None, np.nan, NaT, iNaT], + ids=['None', 'np.nan', 'pd.NaT', 'iNaT']) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, + boxed, box_dtype): + + dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + if (boxed and (box_dtype == object + or (box_dtype is None + and (fill_value is None or fill_value is NaT)))): + pytest.xfail('false upcasts to object') + # takes the opinion that DatetimeTZ should have single na-marker + # using iNaT would lead to errors elsewhere -> NaT + if not boxed and fill_value == iNaT: + pytest.xfail('wrong missing value marker') + + expected_dtype = dtype + # DatetimeTZDtype does not use iNaT as missing value marker + exp_val_for_scalar = NaT + exp_val_for_array = NaT + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('fill_value', [ + pd.Timestamp('now'), np.datetime64('now'), + datetime.datetime.now(), datetime.date.today() +], ids=['pd.Timestamp', 'np.datetime64', 'datetime.datetime', 'datetime.date']) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_numpy_dtype_with_datetimetz( + any_numpy_dtype, tz_aware_fixture, fill_value, boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) + + if is_datetime_or_timedelta_dtype(dtype) and not boxed: + pytest.xfail('raises error') + + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] + + # filling any numpy dtype with datetimetz casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype, + boxed, box_dtype): + dtype = np.dtype(timedelta64_dtype) + fill_dtype = np.dtype(any_numpy_dtype) + + if is_timedelta64_dtype(fill_dtype): + if box_dtype == object: + pytest.xfail('falsely upcasts to object') + else: + if boxed and box_dtype is None: + pytest.xfail('does not upcast to object') + if not boxed: + pytest.xfail('does not upcast to object or raises') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling timedelta with anything but timedelta casts to object + if is_timedelta64_dtype(fill_dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).value + exp_val_for_array = iNaT + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('fill_value', [ + pd.Timedelta(days=1), np.timedelta64(24, 'h'), datetime.timedelta(1) +], ids=['pd.Timedelta', 'np.timedelta64', 'datetime.timedelta']) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value array with auto-dtype + (True, 'td_dtype'), # fill_value array with explicit timedelta dtype + (True, object), # fill_value array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, timedelta64_dtype, + fill_value, boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + + if is_timedelta64_dtype(dtype): + if (boxed and (box_dtype == object + or (box_dtype is None + and not is_timedelta64_dtype(type(fill_value))))): + pytest.xfail('falsely upcasts to object') + else: + if (boxed and box_dtype is None + and is_timedelta64_dtype(type(fill_value))): + pytest.xfail('does not upcast correctly') + if (not boxed and is_timedelta64_dtype(type(fill_value)) and ( + is_integer_dtype(dtype) or is_float_dtype(dtype) + or is_complex_dtype(dtype) + or issubclass(dtype.type, np.bytes_))): + pytest.xfail('does not upcast correctly') + if box_dtype == 'td_dtype': + pytest.xfail('falsely upcasts') + if not boxed and is_datetime64_dtype(dtype): + pytest.xfail('raises error') + + # special case for box_dtype + box_dtype = (np.dtype(timedelta64_dtype) if box_dtype == 'td_dtype' + else box_dtype) + + # filling anything but timedelta with timedelta casts to object + if is_timedelta64_dtype(dtype): + expected_dtype = dtype + # for timedelta dtypes, scalar values get cast to pd.Timedelta.value + exp_val_for_scalar = pd.Timedelta(fill_value).value + exp_val_for_array = iNaT + else: + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype, + boxed, box_dtype): + dtype = np.dtype(string_dtype) + fill_dtype = np.dtype(any_numpy_dtype) + + if (boxed and box_dtype is None + and is_datetime_or_timedelta_dtype(fill_dtype)): + pytest.xfail('wrong missing value marker') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling string with anything casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + (True, 'str'), # fill_value wrapped in array with generic string-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_with_string(any_numpy_dtype, string_dtype, + boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + fill_dtype = np.dtype(string_dtype) + + if is_datetime_or_timedelta_dtype(dtype) and box_dtype != object: + pytest.xfail('does not upcast or raises') + if (boxed and box_dtype in (None, 'str') and ( + is_integer_dtype(dtype) or is_float_dtype(dtype) + or is_complex_dtype(dtype) + or issubclass(dtype.type, np.bytes_))): + pytest.xfail('does not upcast correctly') + + # create array of given dtype + fill_value = 'abc' + + # special case for box_dtype (cannot use fixture in parametrization) + box_dtype = fill_dtype if box_dtype == 'str' else box_dtype + + # filling string with anything casts to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype, + boxed, box_dtype): + dtype = np.dtype(object_dtype) + fill_dtype = np.dtype(any_numpy_dtype) + + if (boxed and box_dtype is None + and is_datetime_or_timedelta_dtype(fill_dtype)): + pytest.xfail('wrong missing value marker') + + # create array of given dtype; casts "1" to correct dtype + fill_value = np.array([1], dtype=fill_dtype)[0] + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_with_object(any_numpy_dtype, object_dtype, + boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + + if not boxed and is_datetime_or_timedelta_dtype(dtype): + pytest.xfail('raises error') + + # create array of object dtype from a scalar value (i.e. passing + # dtypes.common.is_scalar), which can however not be cast to int/float etc. + fill_value = pd.DateOffset(1) + + # filling object with anything stays object + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('fill_value', [None, np.nan, NaT, iNaT], + ids=['None', 'np.nan', 'pd.NaT', 'iNaT']) +@pytest.mark.parametrize('boxed, box_dtype', [ + (True, object), # fill_value wrapped in array with object dtype + (False, None) # fill_value directly +]) +def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, fill_value, + boxed, box_dtype): + dtype = np.dtype(any_numpy_dtype) + + if (dtype == bytes and not boxed + and fill_value is not None and fill_value is not NaT): + pytest.xfail('does not upcast to object') + elif dtype == 'uint64' and not boxed and fill_value == iNaT: + pytest.xfail('does not upcast correctly') + elif is_datetime_or_timedelta_dtype(dtype) and boxed: + pytest.xfail('falsely upcasts to object') + elif (boxed and (is_integer_dtype(dtype) or is_float_dtype(dtype) + or is_complex_dtype(dtype)) + and fill_value is not NaT and dtype != 'uint64'): + pytest.xfail('falsely upcasts to object') + elif (boxed and dtype == 'uint64' + and (fill_value is np.nan or fill_value is None)): + pytest.xfail('falsely upcasts to object') + # below: opinionated that iNaT should be interpreted as missing value + elif (not boxed and (is_float_dtype(dtype) or is_complex_dtype(dtype)) + and fill_value == iNaT): + pytest.xfail('does not cast to missing value marker correctly') + elif dtype in (bool, str) and not boxed and fill_value == iNaT: + pytest.xfail('does not cast to missing value marker correctly') + + if is_integer_dtype(dtype) and dtype == 'uint64' and fill_value == iNaT: + # uint64 + negative int casts to object; iNaT is considered as missing + expected_dtype = np.dtype(object) + exp_val_for_scalar = np.nan + elif is_integer_dtype(dtype) and fill_value == iNaT: + # other integer + iNaT casts to int64 + expected_dtype = np.int64 + exp_val_for_scalar = iNaT + elif is_integer_dtype(dtype) and fill_value is not NaT: + # integer + other missing value (np.nan / None) casts to float + expected_dtype = np.float64 + exp_val_for_scalar = np.nan + elif is_object_dtype(dtype) and (fill_value == iNaT or fill_value is NaT): + # inserting into object does not cast the value + # but *does* cast None to np.nan + expected_dtype = np.dtype(object) + exp_val_for_scalar = fill_value + elif is_datetime_or_timedelta_dtype(dtype): + # datetime / timedelta cast all missing values to iNaT + expected_dtype = dtype + exp_val_for_scalar = iNaT + elif fill_value is NaT: + # NaT upcasts everything that's not datetime/timedelta to object + expected_dtype = np.dtype(object) + exp_val_for_scalar = NaT + elif is_float_dtype(dtype) or is_complex_dtype(dtype): + # float / complex + missing value (!= NaT) stays the same + expected_dtype = dtype + exp_val_for_scalar = np.nan + else: + # all other cases cast to object, and use np.nan as missing value + expected_dtype = np.dtype(object) + exp_val_for_scalar = np.nan + + # array case has same expected_dtype; but returns corresponding na-marker + if is_integer_dtype(expected_dtype): + # integers cannot hold NaNs; maybe_promote_with_array returns None + exp_val_for_array = None + elif is_datetime_or_timedelta_dtype(expected_dtype): + exp_val_for_array = iNaT + else: # expected_dtype = float / complex / object + exp_val_for_array = np.nan + + _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, + exp_val_for_scalar, exp_val_for_array) + + +@pytest.mark.parametrize('dim', [0, 2, 3]) +def test_maybe_promote_dimensions(any_numpy_dtype, dim): + dtype = np.dtype(any_numpy_dtype) + + # create 0-dim array of given dtype; casts "1" to correct dtype + fill_array = np.array(1, dtype=dtype) + + # expand to desired dimension: + for _ in range(dim): + fill_array = np.expand_dims(fill_array, 0) + + # test against 1-dimensional case + expected_dtype, expected_missing_value = maybe_promote( + dtype, np.array([1], dtype=dtype)) + + result_dtype, result_missing_value = maybe_promote(dtype, fill_array) + + assert result_dtype == expected_dtype + # None == None, iNaT == iNaT, but np.nan != np.nan + assert ((result_missing_value == expected_missing_value) + or (result_missing_value is np.nan + and expected_missing_value is np.nan)) From abbce541435e7cc792ed35adc79027687804da6f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 24 Feb 2019 03:38:35 +0100 Subject: [PATCH 2/7] Fix PY2 --- pandas/tests/dtypes/cast/test_promote.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index f294c82897b69..c1cf0542ba42c 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -10,7 +10,7 @@ import pytest from pandas._libs.tslibs import NaT, iNaT -from pandas.compat import is_platform_windows +from pandas.compat import PY2, is_platform_windows from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( @@ -373,6 +373,7 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype, boxed, box_dtype): exp_val_for_scalar, exp_val_for_array) +@pytest.mark.skip(PY2, 'no bytes in PY2') @pytest.mark.parametrize('boxed, box_dtype', [ (True, None), # fill_value wrapped in array with auto-dtype (True, object), # fill_value wrapped in array with object dtype @@ -411,6 +412,7 @@ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype, exp_val_for_scalar, exp_val_for_array) +@pytest.mark.skip(PY2, 'no bytes in PY2') @pytest.mark.parametrize('boxed, box_dtype', [ (True, None), # fill_value wrapped in array with auto-dtype (fixed len) (True, 'bytes'), # fill_value wrapped in array with generic bytes-dtype @@ -881,7 +883,8 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, fill_value, elif (not boxed and (is_float_dtype(dtype) or is_complex_dtype(dtype)) and fill_value == iNaT): pytest.xfail('does not cast to missing value marker correctly') - elif dtype in (bool, str) and not boxed and fill_value == iNaT: + elif ((is_string_dtype(dtype) or dtype == bool) + and not boxed and fill_value == iNaT): pytest.xfail('does not cast to missing value marker correctly') if is_integer_dtype(dtype) and dtype == 'uint64' and fill_value == iNaT: From 07e3b0ca2d016ac12c098a96a66973c19cce6c77 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 24 Feb 2019 03:46:28 +0100 Subject: [PATCH 3/7] Fix skip syntax --- pandas/tests/dtypes/cast/test_promote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index c1cf0542ba42c..b8c1a2a2fac8e 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -373,7 +373,7 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype, boxed, box_dtype): exp_val_for_scalar, exp_val_for_array) -@pytest.mark.skip(PY2, 'no bytes in PY2') +@pytest.mark.skipif(PY2, reason='no bytes in PY2') @pytest.mark.parametrize('boxed, box_dtype', [ (True, None), # fill_value wrapped in array with auto-dtype (True, object), # fill_value wrapped in array with object dtype @@ -412,7 +412,7 @@ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype, exp_val_for_scalar, exp_val_for_array) -@pytest.mark.skip(PY2, 'no bytes in PY2') +@pytest.mark.skipif(PY2, reason='no bytes in PY2') @pytest.mark.parametrize('boxed, box_dtype', [ (True, None), # fill_value wrapped in array with auto-dtype (fixed len) (True, 'bytes'), # fill_value wrapped in array with generic bytes-dtype From a9516ec06e4f475afe7c0edabd32417c66b414e7 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Sun, 22 Sep 2019 00:29:24 +0200 Subject: [PATCH 4/7] blackify conflict files before merge --- pandas/conftest.py | 297 +++-- pandas/tests/dtypes/cast/test_promote.py | 1346 +++++++++++++--------- 2 files changed, 998 insertions(+), 645 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index debc9734730f3..c00c871b295bc 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -23,68 +23,70 @@ # or `deadline=None` to entirely disable timeouts for that test. deadline=500, timeout=hypothesis.unlimited, - suppress_health_check=(hypothesis.HealthCheck.too_slow,) + suppress_health_check=(hypothesis.HealthCheck.too_slow,), ) hypothesis.settings.load_profile("ci") def pytest_addoption(parser): - parser.addoption("--skip-slow", action="store_true", - help="skip slow tests") - parser.addoption("--skip-network", action="store_true", - help="skip network tests") - parser.addoption("--skip-db", action="store_true", - help="skip db tests") - parser.addoption("--run-high-memory", action="store_true", - help="run high memory tests") - parser.addoption("--only-slow", action="store_true", - help="run only slow tests") - parser.addoption("--strict-data-files", action="store_true", - help="Fail if a test is skipped for missing data file.") + parser.addoption("--skip-slow", action="store_true", help="skip slow tests") + parser.addoption("--skip-network", action="store_true", help="skip network tests") + parser.addoption("--skip-db", action="store_true", help="skip db tests") + parser.addoption( + "--run-high-memory", action="store_true", help="run high memory tests" + ) + parser.addoption("--only-slow", action="store_true", help="run only slow tests") + parser.addoption( + "--strict-data-files", + action="store_true", + help="Fail if a test is skipped for missing data file.", + ) def pytest_runtest_setup(item): - if 'slow' in item.keywords and item.config.getoption("--skip-slow"): + if "slow" in item.keywords and item.config.getoption("--skip-slow"): pytest.skip("skipping due to --skip-slow") - if 'slow' not in item.keywords and item.config.getoption("--only-slow"): + if "slow" not in item.keywords and item.config.getoption("--only-slow"): pytest.skip("skipping due to --only-slow") - if 'network' in item.keywords and item.config.getoption("--skip-network"): + if "network" in item.keywords and item.config.getoption("--skip-network"): pytest.skip("skipping due to --skip-network") - if 'db' in item.keywords and item.config.getoption("--skip-db"): + if "db" in item.keywords and item.config.getoption("--skip-db"): pytest.skip("skipping due to --skip-db") - if 'high_memory' in item.keywords and not item.config.getoption( - "--run-high-memory"): - pytest.skip( - "skipping high memory test since --run-high-memory was not set") + if "high_memory" in item.keywords and not item.config.getoption( + "--run-high-memory" + ): + pytest.skip("skipping high memory test since --run-high-memory was not set") # Configurations for all tests and all test modules + @pytest.fixture(autouse=True) def configure_tests(): - pd.set_option('chained_assignment', 'raise') + pd.set_option("chained_assignment", "raise") # For running doctests: make np and pd names available + @pytest.fixture(autouse=True) def add_imports(doctest_namespace): - doctest_namespace['np'] = np - doctest_namespace['pd'] = pd + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd -@pytest.fixture(params=['bsr', 'coo', 'csc', 'csr', 'dia', 'dok', 'lil']) +@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) def spmatrix(request): from scipy import sparse - return getattr(sparse, request.param + '_matrix') + return getattr(sparse, request.param + "_matrix") -@pytest.fixture(params=[0, 1, 'index', 'columns'], - ids=lambda x: "axis {!r}".format(x)) + +@pytest.fixture(params=[0, 1, "index", "columns"], ids=lambda x: "axis {!r}".format(x)) def axis(request): """ Fixture for returning the axis numbers of a DataFrame. @@ -95,7 +97,7 @@ def axis(request): axis_frame = axis -@pytest.fixture(params=[0, 'index'], ids=lambda x: "axis {!r}".format(x)) +@pytest.fixture(params=[0, "index"], ids=lambda x: "axis {!r}".format(x)) def axis_series(request): """ Fixture for returning the axis numbers of a Series. @@ -111,8 +113,9 @@ def ip(): Will raise a skip if IPython is not installed. """ - pytest.importorskip('IPython', minversion="6.0.0") + pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.interactiveshell import InteractiveShell + return InteractiveShell() @@ -127,15 +130,24 @@ def observed(request): return request.param -_all_arithmetic_operators = ['__add__', '__radd__', - '__sub__', '__rsub__', - '__mul__', '__rmul__', - '__floordiv__', '__rfloordiv__', - '__truediv__', '__rtruediv__', - '__pow__', '__rpow__', - '__mod__', '__rmod__'] +_all_arithmetic_operators = [ + "__add__", + "__radd__", + "__sub__", + "__rsub__", + "__mul__", + "__rmul__", + "__floordiv__", + "__rfloordiv__", + "__truediv__", + "__rtruediv__", + "__pow__", + "__rpow__", + "__mod__", + "__rmod__", +] if not PY3: - _all_arithmetic_operators.extend(['__div__', '__rdiv__']) + _all_arithmetic_operators.extend(["__div__", "__rdiv__"]) @pytest.fixture(params=_all_arithmetic_operators) @@ -146,9 +158,18 @@ def all_arithmetic_operators(request): return request.param -_all_numeric_reductions = ['sum', 'max', 'min', - 'mean', 'prod', 'std', 'var', 'median', - 'kurt', 'skew'] +_all_numeric_reductions = [ + "sum", + "max", + "min", + "mean", + "prod", + "std", + "var", + "median", + "kurt", + "skew", +] @pytest.fixture(params=_all_numeric_reductions) @@ -159,7 +180,7 @@ def all_numeric_reductions(request): return request.param -_all_boolean_reductions = ['all', 'any'] +_all_boolean_reductions = ["all", "any"] @pytest.fixture(params=_all_boolean_reductions) @@ -197,13 +218,15 @@ def _get_cython_table_params(ndframe, func_names_and_expected): results = [] for func_name, expected in func_names_and_expected: results.append((ndframe, func_name, expected)) - results += [(ndframe, func, expected) for func, name in _cython_table - if name == func_name] + results += [ + (ndframe, func, expected) + for func, name in _cython_table + if name == func_name + ] return results -@pytest.fixture(params=['__eq__', '__ne__', '__le__', - '__lt__', '__ge__', '__gt__']) +@pytest.fixture(params=["__eq__", "__ne__", "__le__", "__lt__", "__ge__", "__gt__"]) def all_compare_operators(request): """ Fixture for dunder names for common compare operations @@ -218,8 +241,9 @@ def all_compare_operators(request): return request.param -@pytest.fixture(params=[None, 'gzip', 'bz2', 'zip', - pytest.param('xz', marks=td.skip_if_no_lzma)]) +@pytest.fixture( + params=[None, "gzip", "bz2", "zip", pytest.param("xz", marks=td.skip_if_no_lzma)] +) def compression(request): """ Fixture for trying common compression types in compression tests @@ -227,8 +251,9 @@ def compression(request): return request.param -@pytest.fixture(params=['gzip', 'bz2', 'zip', - pytest.param('xz', marks=td.skip_if_no_lzma)]) +@pytest.fixture( + params=["gzip", "bz2", "zip", pytest.param("xz", marks=td.skip_if_no_lzma)] +) def compression_only(request): """ Fixture for trying common compression types in compression tests excluding @@ -245,15 +270,17 @@ def writable(request): return request.param -@pytest.fixture(scope='module') +@pytest.fixture(scope="module") def datetime_tz_utc(): from datetime import timezone + return timezone.utc -utc_objs = ['utc', 'dateutil/UTC', utc, tzutc()] +utc_objs = ["utc", "dateutil/UTC", utc, tzutc()] if PY3: from datetime import timezone + utc_objs.append(timezone.utc) @@ -265,7 +292,7 @@ def utc_fixture(request): return request.param -@pytest.fixture(params=['inner', 'outer', 'left', 'right']) +@pytest.fixture(params=["inner", "outer", "left", "right"]) def join_type(request): """ Fixture for trying all types of join operations @@ -296,7 +323,7 @@ def datapath(strict_data_files): ValueError If the path doesn't exist and the --strict-data-files option is set. """ - BASE_PATH = os.path.join(os.path.dirname(__file__), 'tests') + BASE_PATH = os.path.join(os.path.dirname(__file__), "tests") def deco(*args): path = os.path.join(BASE_PATH, *args) @@ -308,16 +335,17 @@ def deco(*args): msg = "Could not find {}." pytest.skip(msg.format(path)) return path + return deco @pytest.fixture def iris(datapath): """The iris dataset as a DataFrame.""" - return pd.read_csv(datapath('data', 'iris.csv')) + return pd.read_csv(datapath("data", "iris.csv")) -@pytest.fixture(params=['nlargest', 'nsmallest']) +@pytest.fixture(params=["nlargest", "nsmallest"]) def nselect_method(request): """ Fixture for trying all nselect methods @@ -325,7 +353,7 @@ def nselect_method(request): return request.param -@pytest.fixture(params=['left', 'right', 'both', 'neither']) +@pytest.fixture(params=["left", "right", "both", "neither"]) def closed(request): """ Fixture for trying all interval closed parameters @@ -333,7 +361,7 @@ def closed(request): return request.param -@pytest.fixture(params=['left', 'right', 'both', 'neither']) +@pytest.fixture(params=["left", "right", "both", "neither"]) def other_closed(request): """ Secondary closed fixture to allow parametrizing over all pairs of closed @@ -341,7 +369,7 @@ def other_closed(request): return request.param -@pytest.fixture(params=[None, np.nan, pd.NaT, float('nan'), np.float('NaN')]) +@pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), np.float("NaN")]) def nulls_fixture(request): """ Fixture for each null type in pandas @@ -364,14 +392,32 @@ def unique_nulls_fixture(request): unique_nulls_fixture2 = unique_nulls_fixture -TIMEZONES = [None, 'UTC', 'US/Eastern', 'Asia/Tokyo', 'dateutil/US/Pacific', - 'dateutil/Asia/Singapore', tzutc(), tzlocal(), FixedOffset(300), - FixedOffset(0), FixedOffset(-300)] -TIMEZONE_IDS = ['None', 'UTC', 'US/Eastern', 'Asia/Tokyp', - 'dateutil/US/Pacific', 'dateutil/Asia/Singapore', - 'dateutil.tz.tzutz()', 'dateutil.tz.tzlocal()', - 'pytz.FixedOffset(300)', 'pytz.FixedOffset(0)', - 'pytz.FixedOffset(-300)'] +TIMEZONES = [ + None, + "UTC", + "US/Eastern", + "Asia/Tokyo", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + tzutc(), + tzlocal(), + FixedOffset(300), + FixedOffset(0), + FixedOffset(-300), +] +TIMEZONE_IDS = [ + "None", + "UTC", + "US/Eastern", + "Asia/Tokyp", + "dateutil/US/Pacific", + "dateutil/Asia/Singapore", + "dateutil.tz.tzutz()", + "dateutil.tz.tzlocal()", + "pytz.FixedOffset(300)", + "pytz.FixedOffset(0)", + "pytz.FixedOffset(-300)", +] @td.parametrize_fixture_doc(str(TIMEZONE_IDS)) @@ -409,19 +455,26 @@ def tz_aware_fixture(request): FLOAT_DTYPES = [float, "float32", "float64"] COMPLEX_DTYPES = [complex, "complex64", "complex128"] -STRING_DTYPES = [str, 'str', 'U'] +STRING_DTYPES = [str, "str", "U"] -DATETIME64_DTYPES = ['datetime64[ns]', 'M8[ns]'] -TIMEDELTA64_DTYPES = ['timedelta64[ns]', 'm8[ns]'] +DATETIME64_DTYPES = ["datetime64[ns]", "M8[ns]"] +TIMEDELTA64_DTYPES = ["timedelta64[ns]", "m8[ns]"] -BOOL_DTYPES = [bool, 'bool'] -BYTES_DTYPES = [bytes, 'bytes'] -OBJECT_DTYPES = [object, 'object'] +BOOL_DTYPES = [bool, "bool"] +BYTES_DTYPES = [bytes, "bytes"] +OBJECT_DTYPES = [object, "object"] ALL_REAL_DTYPES = FLOAT_DTYPES + ALL_INT_DTYPES -ALL_NUMPY_DTYPES = (ALL_REAL_DTYPES + COMPLEX_DTYPES + STRING_DTYPES - + DATETIME64_DTYPES + TIMEDELTA64_DTYPES + BOOL_DTYPES - + OBJECT_DTYPES + BYTES_DTYPES * PY3) # bytes only for PY3 +ALL_NUMPY_DTYPES = ( + ALL_REAL_DTYPES + + COMPLEX_DTYPES + + STRING_DTYPES + + DATETIME64_DTYPES + + TIMEDELTA64_DTYPES + + BOOL_DTYPES + + OBJECT_DTYPES + + BYTES_DTYPES * PY3 +) # bytes only for PY3 @pytest.fixture(params=STRING_DTYPES) @@ -611,30 +664,30 @@ def any_numpy_dtype(request): # categoricals are handled separately _any_skipna_inferred_dtype = [ - ('string', ['a', np.nan, 'c']), - ('unicode' if not PY3 else 'string', [u('a'), np.nan, u('c')]), - ('bytes' if PY3 else 'string', [b'a', np.nan, b'c']), - ('empty', [np.nan, np.nan, np.nan]), - ('empty', []), - ('mixed-integer', ['a', np.nan, 2]), - ('mixed', ['a', np.nan, 2.0]), - ('floating', [1.0, np.nan, 2.0]), - ('integer', [1, np.nan, 2]), - ('mixed-integer-float', [1, np.nan, 2.0]), - ('decimal', [Decimal(1), np.nan, Decimal(2)]), - ('boolean', [True, np.nan, False]), - ('datetime64', [np.datetime64('2013-01-01'), np.nan, - np.datetime64('2018-01-01')]), - ('datetime', [pd.Timestamp('20130101'), np.nan, pd.Timestamp('20180101')]), - ('date', [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), + ("string", ["a", np.nan, "c"]), + ("unicode" if not PY3 else "string", [u("a"), np.nan, u("c")]), + ("bytes" if PY3 else "string", [b"a", np.nan, b"c"]), + ("empty", [np.nan, np.nan, np.nan]), + ("empty", []), + ("mixed-integer", ["a", np.nan, 2]), + ("mixed", ["a", np.nan, 2.0]), + ("floating", [1.0, np.nan, 2.0]), + ("integer", [1, np.nan, 2]), + ("mixed-integer-float", [1, np.nan, 2.0]), + ("decimal", [Decimal(1), np.nan, Decimal(2)]), + ("boolean", [True, np.nan, False]), + ("datetime64", [np.datetime64("2013-01-01"), np.nan, np.datetime64("2018-01-01")]), + ("datetime", [pd.Timestamp("20130101"), np.nan, pd.Timestamp("20180101")]), + ("date", [date(2013, 1, 1), np.nan, date(2018, 1, 1)]), # The following two dtypes are commented out due to GH 23554 # ('complex', [1 + 1j, np.nan, 2 + 2j]), # ('timedelta64', [np.timedelta64(1, 'D'), # np.nan, np.timedelta64(2, 'D')]), - ('timedelta', [timedelta(1), np.nan, timedelta(2)]), - ('time', [time(1), np.nan, time(2)]), - ('period', [pd.Period(2013), pd.NaT, pd.Period(2018)]), - ('interval', [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)])] + ("timedelta", [timedelta(1), np.nan, timedelta(2)]), + ("time", [time(1), np.nan, time(2)]), + ("period", [pd.Period(2013), pd.NaT, pd.Period(2018)]), + ("interval", [pd.Interval(0, 1), np.nan, pd.Interval(0, 2)]), +] ids, _ = zip(*_any_skipna_inferred_dtype) # use inferred type as fixture-id @@ -687,42 +740,52 @@ def any_skipna_inferred_dtype(request): return inferred_dtype, values -@pytest.fixture(params=[getattr(pd.offsets, o) for o in pd.offsets.__all__ if - issubclass(getattr(pd.offsets, o), pd.offsets.Tick)]) +@pytest.fixture( + params=[ + getattr(pd.offsets, o) + for o in pd.offsets.__all__ + if issubclass(getattr(pd.offsets, o), pd.offsets.Tick) + ] +) def tick_classes(request): """ Fixture for Tick based datetime offsets available for a time series. """ return request.param + # ---------------------------------------------------------------- # Global setup for tests using Hypothesis # Registering these strategies makes them globally available via st.from_type, # which is use for offsets in tests/tseries/offsets/test_offsets_properties.py -for name in 'MonthBegin MonthEnd BMonthBegin BMonthEnd'.split(): +for name in "MonthBegin MonthEnd BMonthBegin BMonthEnd".split(): cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy(cls, st.builds( - cls, - n=st.integers(-99, 99), - normalize=st.booleans(), - )) + st.register_type_strategy( + cls, st.builds(cls, n=st.integers(-99, 99), normalize=st.booleans()) + ) -for name in 'YearBegin YearEnd BYearBegin BYearEnd'.split(): +for name in "YearBegin YearEnd BYearBegin BYearEnd".split(): cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy(cls, st.builds( + st.register_type_strategy( cls, - n=st.integers(-5, 5), - normalize=st.booleans(), - month=st.integers(min_value=1, max_value=12), - )) - -for name in 'QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd'.split(): + st.builds( + cls, + n=st.integers(-5, 5), + normalize=st.booleans(), + month=st.integers(min_value=1, max_value=12), + ), + ) + +for name in "QuarterBegin QuarterEnd BQuarterBegin BQuarterEnd".split(): cls = getattr(pd.tseries.offsets, name) - st.register_type_strategy(cls, st.builds( + st.register_type_strategy( cls, - n=st.integers(-24, 24), - normalize=st.booleans(), - startingMonth=st.integers(min_value=1, max_value=12) - )) + st.builds( + cls, + n=st.integers(-24, 24), + normalize=st.booleans(), + startingMonth=st.integers(min_value=1, max_value=12), + ), + ) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index b8c1a2a2fac8e..628efc3eba84d 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -14,16 +14,30 @@ from pandas.core.dtypes.cast import maybe_promote from pandas.core.dtypes.common import ( - is_complex_dtype, is_datetime64_dtype, is_datetime_or_timedelta_dtype, - is_float_dtype, is_integer_dtype, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype) + is_complex_dtype, + is_datetime64_dtype, + is_datetime_or_timedelta_dtype, + is_float_dtype, + is_integer_dtype, + is_object_dtype, + is_scalar, + is_string_dtype, + is_timedelta64_dtype, +) from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd -def _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar=None, exp_val_for_array=None): +def _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar=None, + exp_val_for_array=None, +): assert is_scalar(fill_value) if boxed: @@ -44,173 +58,197 @@ def _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, # for equal values, also check type (relevant e.g. for int vs float, resp. # for different datetimes and timedeltas) # for missing values, None == None and iNaT == iNaT, but np.nan != np.nan - assert ((result_fill_value == expected_fill_value - and type(result_fill_value) == type(expected_fill_value)) - or (result_fill_value is np.nan and expected_fill_value is np.nan) - or (result_fill_value is NaT and expected_fill_value is NaT)) - - -@pytest.mark.parametrize('dtype, fill_value, expected_dtype', [ - # size 8 - ('int8', 1, 'int8'), - ('int8', np.iinfo('int8').max + 1, 'int16'), - ('int8', np.iinfo('int16').max + 1, 'int32'), - ('int8', np.iinfo('int32').max + 1, 'int64'), - ('int8', np.iinfo('int64').max + 1, 'object'), - ('int8', -1, 'int8'), - ('int8', np.iinfo('int8').min - 1, 'int16'), - ('int8', np.iinfo('int16').min - 1, 'int32'), - ('int8', np.iinfo('int32').min - 1, 'int64'), - ('int8', np.iinfo('int64').min - 1, 'object'), - # keep signed-ness as long as possible - ('uint8', 1, 'uint8'), - ('uint8', np.iinfo('int8').max + 1, 'uint8'), - ('uint8', np.iinfo('uint8').max + 1, 'uint16'), - ('uint8', np.iinfo('int16').max + 1, 'uint16'), - ('uint8', np.iinfo('uint16').max + 1, 'uint32'), - ('uint8', np.iinfo('int32').max + 1, 'uint32'), - ('uint8', np.iinfo('uint32').max + 1, 'uint64'), - ('uint8', np.iinfo('int64').max + 1, 'uint64'), - ('uint8', np.iinfo('uint64').max + 1, 'object'), - # max of uint8 cannot be contained in int8 - ('uint8', -1, 'int16'), - ('uint8', np.iinfo('int8').min - 1, 'int16'), - ('uint8', np.iinfo('int16').min - 1, 'int32'), - ('uint8', np.iinfo('int32').min - 1, 'int64'), - ('uint8', np.iinfo('int64').min - 1, 'object'), - # size 16 - ('int16', 1, 'int16'), - ('int16', np.iinfo('int8').max + 1, 'int16'), - ('int16', np.iinfo('int16').max + 1, 'int32'), - ('int16', np.iinfo('int32').max + 1, 'int64'), - ('int16', np.iinfo('int64').max + 1, 'object'), - ('int16', -1, 'int16'), - ('int16', np.iinfo('int8').min - 1, 'int16'), - ('int16', np.iinfo('int16').min - 1, 'int32'), - ('int16', np.iinfo('int32').min - 1, 'int64'), - ('int16', np.iinfo('int64').min - 1, 'object'), - ('uint16', 1, 'uint16'), - ('uint16', np.iinfo('int8').max + 1, 'uint16'), - ('uint16', np.iinfo('uint8').max + 1, 'uint16'), - ('uint16', np.iinfo('int16').max + 1, 'uint16'), - ('uint16', np.iinfo('uint16').max + 1, 'uint32'), - ('uint16', np.iinfo('int32').max + 1, 'uint32'), - ('uint16', np.iinfo('uint32').max + 1, 'uint64'), - ('uint16', np.iinfo('int64').max + 1, 'uint64'), - ('uint16', np.iinfo('uint64').max + 1, 'object'), - ('uint16', -1, 'int32'), - ('uint16', np.iinfo('int8').min - 1, 'int32'), - ('uint16', np.iinfo('int16').min - 1, 'int32'), - ('uint16', np.iinfo('int32').min - 1, 'int64'), - ('uint16', np.iinfo('int64').min - 1, 'object'), - # size 32 - ('int32', 1, 'int32'), - ('int32', np.iinfo('int8').max + 1, 'int32'), - ('int32', np.iinfo('int16').max + 1, 'int32'), - ('int32', np.iinfo('int32').max + 1, 'int64'), - ('int32', np.iinfo('int64').max + 1, 'object'), - ('int32', -1, 'int32'), - ('int32', np.iinfo('int8').min - 1, 'int32'), - ('int32', np.iinfo('int16').min - 1, 'int32'), - ('int32', np.iinfo('int32').min - 1, 'int64'), - ('int32', np.iinfo('int64').min - 1, 'object'), - ('uint32', 1, 'uint32'), - ('uint32', np.iinfo('int8').max + 1, 'uint32'), - ('uint32', np.iinfo('uint8').max + 1, 'uint32'), - ('uint32', np.iinfo('int16').max + 1, 'uint32'), - ('uint32', np.iinfo('uint16').max + 1, 'uint32'), - ('uint32', np.iinfo('int32').max + 1, 'uint32'), - ('uint32', np.iinfo('uint32').max + 1, 'uint64'), - ('uint32', np.iinfo('int64').max + 1, 'uint64'), - ('uint32', np.iinfo('uint64').max + 1, 'object'), - ('uint32', -1, 'int64'), - ('uint32', np.iinfo('int8').min - 1, 'int64'), - ('uint32', np.iinfo('int16').min - 1, 'int64'), - ('uint32', np.iinfo('int32').min - 1, 'int64'), - ('uint32', np.iinfo('int64').min - 1, 'object'), - # size 64 - ('int64', 1, 'int64'), - ('int64', np.iinfo('int8').max + 1, 'int64'), - ('int64', np.iinfo('int16').max + 1, 'int64'), - ('int64', np.iinfo('int32').max + 1, 'int64'), - ('int64', np.iinfo('int64').max + 1, 'object'), - ('int64', -1, 'int64'), - ('int64', np.iinfo('int8').min - 1, 'int64'), - ('int64', np.iinfo('int16').min - 1, 'int64'), - ('int64', np.iinfo('int32').min - 1, 'int64'), - ('int64', np.iinfo('int64').min - 1, 'object'), - ('uint64', 1, 'uint64'), - ('uint64', np.iinfo('int8').max + 1, 'uint64'), - ('uint64', np.iinfo('uint8').max + 1, 'uint64'), - ('uint64', np.iinfo('int16').max + 1, 'uint64'), - ('uint64', np.iinfo('uint16').max + 1, 'uint64'), - ('uint64', np.iinfo('int32').max + 1, 'uint64'), - ('uint64', np.iinfo('uint32').max + 1, 'uint64'), - ('uint64', np.iinfo('int64').max + 1, 'uint64'), - ('uint64', np.iinfo('uint64').max + 1, 'object'), - ('uint64', -1, 'object'), - ('uint64', np.iinfo('int8').min - 1, 'object'), - ('uint64', np.iinfo('int16').min - 1, 'object'), - ('uint64', np.iinfo('int32').min - 1, 'object'), - ('uint64', np.iinfo('int64').min - 1, 'object') -]) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, - boxed, box_dtype): + assert ( + ( + result_fill_value == expected_fill_value + and type(result_fill_value) == type(expected_fill_value) + ) + or (result_fill_value is np.nan and expected_fill_value is np.nan) + or (result_fill_value is NaT and expected_fill_value is NaT) + ) + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # size 8 + ("int8", 1, "int8"), + ("int8", np.iinfo("int8").max + 1, "int16"), + ("int8", np.iinfo("int16").max + 1, "int32"), + ("int8", np.iinfo("int32").max + 1, "int64"), + ("int8", np.iinfo("int64").max + 1, "object"), + ("int8", -1, "int8"), + ("int8", np.iinfo("int8").min - 1, "int16"), + ("int8", np.iinfo("int16").min - 1, "int32"), + ("int8", np.iinfo("int32").min - 1, "int64"), + ("int8", np.iinfo("int64").min - 1, "object"), + # keep signed-ness as long as possible + ("uint8", 1, "uint8"), + ("uint8", np.iinfo("int8").max + 1, "uint8"), + ("uint8", np.iinfo("uint8").max + 1, "uint16"), + ("uint8", np.iinfo("int16").max + 1, "uint16"), + ("uint8", np.iinfo("uint16").max + 1, "uint32"), + ("uint8", np.iinfo("int32").max + 1, "uint32"), + ("uint8", np.iinfo("uint32").max + 1, "uint64"), + ("uint8", np.iinfo("int64").max + 1, "uint64"), + ("uint8", np.iinfo("uint64").max + 1, "object"), + # max of uint8 cannot be contained in int8 + ("uint8", -1, "int16"), + ("uint8", np.iinfo("int8").min - 1, "int16"), + ("uint8", np.iinfo("int16").min - 1, "int32"), + ("uint8", np.iinfo("int32").min - 1, "int64"), + ("uint8", np.iinfo("int64").min - 1, "object"), + # size 16 + ("int16", 1, "int16"), + ("int16", np.iinfo("int8").max + 1, "int16"), + ("int16", np.iinfo("int16").max + 1, "int32"), + ("int16", np.iinfo("int32").max + 1, "int64"), + ("int16", np.iinfo("int64").max + 1, "object"), + ("int16", -1, "int16"), + ("int16", np.iinfo("int8").min - 1, "int16"), + ("int16", np.iinfo("int16").min - 1, "int32"), + ("int16", np.iinfo("int32").min - 1, "int64"), + ("int16", np.iinfo("int64").min - 1, "object"), + ("uint16", 1, "uint16"), + ("uint16", np.iinfo("int8").max + 1, "uint16"), + ("uint16", np.iinfo("uint8").max + 1, "uint16"), + ("uint16", np.iinfo("int16").max + 1, "uint16"), + ("uint16", np.iinfo("uint16").max + 1, "uint32"), + ("uint16", np.iinfo("int32").max + 1, "uint32"), + ("uint16", np.iinfo("uint32").max + 1, "uint64"), + ("uint16", np.iinfo("int64").max + 1, "uint64"), + ("uint16", np.iinfo("uint64").max + 1, "object"), + ("uint16", -1, "int32"), + ("uint16", np.iinfo("int8").min - 1, "int32"), + ("uint16", np.iinfo("int16").min - 1, "int32"), + ("uint16", np.iinfo("int32").min - 1, "int64"), + ("uint16", np.iinfo("int64").min - 1, "object"), + # size 32 + ("int32", 1, "int32"), + ("int32", np.iinfo("int8").max + 1, "int32"), + ("int32", np.iinfo("int16").max + 1, "int32"), + ("int32", np.iinfo("int32").max + 1, "int64"), + ("int32", np.iinfo("int64").max + 1, "object"), + ("int32", -1, "int32"), + ("int32", np.iinfo("int8").min - 1, "int32"), + ("int32", np.iinfo("int16").min - 1, "int32"), + ("int32", np.iinfo("int32").min - 1, "int64"), + ("int32", np.iinfo("int64").min - 1, "object"), + ("uint32", 1, "uint32"), + ("uint32", np.iinfo("int8").max + 1, "uint32"), + ("uint32", np.iinfo("uint8").max + 1, "uint32"), + ("uint32", np.iinfo("int16").max + 1, "uint32"), + ("uint32", np.iinfo("uint16").max + 1, "uint32"), + ("uint32", np.iinfo("int32").max + 1, "uint32"), + ("uint32", np.iinfo("uint32").max + 1, "uint64"), + ("uint32", np.iinfo("int64").max + 1, "uint64"), + ("uint32", np.iinfo("uint64").max + 1, "object"), + ("uint32", -1, "int64"), + ("uint32", np.iinfo("int8").min - 1, "int64"), + ("uint32", np.iinfo("int16").min - 1, "int64"), + ("uint32", np.iinfo("int32").min - 1, "int64"), + ("uint32", np.iinfo("int64").min - 1, "object"), + # size 64 + ("int64", 1, "int64"), + ("int64", np.iinfo("int8").max + 1, "int64"), + ("int64", np.iinfo("int16").max + 1, "int64"), + ("int64", np.iinfo("int32").max + 1, "int64"), + ("int64", np.iinfo("int64").max + 1, "object"), + ("int64", -1, "int64"), + ("int64", np.iinfo("int8").min - 1, "int64"), + ("int64", np.iinfo("int16").min - 1, "int64"), + ("int64", np.iinfo("int32").min - 1, "int64"), + ("int64", np.iinfo("int64").min - 1, "object"), + ("uint64", 1, "uint64"), + ("uint64", np.iinfo("int8").max + 1, "uint64"), + ("uint64", np.iinfo("uint8").max + 1, "uint64"), + ("uint64", np.iinfo("int16").max + 1, "uint64"), + ("uint64", np.iinfo("uint16").max + 1, "uint64"), + ("uint64", np.iinfo("int32").max + 1, "uint64"), + ("uint64", np.iinfo("uint32").max + 1, "uint64"), + ("uint64", np.iinfo("int64").max + 1, "uint64"), + ("uint64", np.iinfo("uint64").max + 1, "object"), + ("uint64", -1, "object"), + ("uint64", np.iinfo("int8").min - 1, "object"), + ("uint64", np.iinfo("int16").min - 1, "object"), + ("uint64", np.iinfo("int32").min - 1, "object"), + ("uint64", np.iinfo("int64").min - 1, "object"), + ], +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_int_with_int( + dtype, fill_value, expected_dtype, boxed, box_dtype +): dtype = np.dtype(dtype) expected_dtype = np.dtype(expected_dtype) if not boxed: if expected_dtype == object: - pytest.xfail('overflow error') - if expected_dtype == 'int32': - pytest.xfail('always upcasts to platform int') - if dtype == 'int8' and expected_dtype == 'int16': - pytest.xfail('casts to int32 instead of int16') - if (issubclass(dtype.type, np.unsignedinteger) - and np.iinfo(dtype).max < fill_value <= np.iinfo('int64').max): - pytest.xfail('falsely casts to signed') - if ((dtype, expected_dtype) in [('uint8', 'int16'), - ('uint32', 'int64')] - and fill_value != np.iinfo('int32').min - 1): - pytest.xfail('casts to int32 instead of int8/int16') + pytest.xfail("overflow error") + if expected_dtype == "int32": + pytest.xfail("always upcasts to platform int") + if dtype == "int8" and expected_dtype == "int16": + pytest.xfail("casts to int32 instead of int16") + if ( + issubclass(dtype.type, np.unsignedinteger) + and np.iinfo(dtype).max < fill_value <= np.iinfo("int64").max + ): + pytest.xfail("falsely casts to signed") + if (dtype, expected_dtype) in [ + ("uint8", "int16"), + ("uint32", "int64"), + ] and fill_value != np.iinfo("int32").min - 1: + pytest.xfail("casts to int32 instead of int8/int16") # this following xfail is "only" a consequence of the - now strictly # enforced - principle that maybe_promote_with_scalar always casts - pytest.xfail('wrong return type of fill_value') + pytest.xfail("wrong return type of fill_value") if boxed: if expected_dtype != object: - pytest.xfail('falsely casts to object') - if box_dtype is None and (fill_value > np.iinfo('int64').max - or np.iinfo('int64').min < fill_value < 0): - pytest.xfail('falsely casts to float instead of object') + pytest.xfail("falsely casts to object") + if box_dtype is None and ( + fill_value > np.iinfo("int64").max or np.iinfo("int64").min < fill_value < 0 + ): + pytest.xfail("falsely casts to float instead of object") # output is not a generic int, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] # no missing value marker for integers - exp_val_for_array = None if expected_dtype != 'object' else np.nan - - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, - boxed, box_dtype): + exp_val_for_array = None if expected_dtype != "object" else np.nan + + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, boxed, box_dtype): dtype = np.dtype(any_int_dtype) fill_dtype = np.dtype(float_dtype) - if float_dtype == 'float32' and not boxed: - pytest.xfail('falsely upcasts to float64') + if float_dtype == "float32" and not boxed: + pytest.xfail("falsely upcasts to float64") if box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -221,27 +259,36 @@ def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, exp_val_for_scalar = np.float64(fill_value) exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, boxed, box_dtype): dtype = np.dtype(float_dtype) fill_dtype = np.dtype(any_int_dtype) if box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") # this following xfail is "only" a consequence of the - now strictly # enforced - principle that maybe_promote_with_scalar always casts if not boxed: - pytest.xfail('wrong return type of fill_value') + pytest.xfail("wrong return type of fill_value") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -253,81 +300,110 @@ def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('dtype, fill_value, expected_dtype', [ - # float filled with float - ('float32', 1, 'float32'), - ('float32', np.finfo('float32').max * 1.1, 'float64'), - ('float64', 1, 'float64'), - ('float64', np.finfo('float32').max * 1.1, 'float64'), - # complex filled with float - ('complex64', 1, 'complex64'), - ('complex64', np.finfo('float32').max * 1.1, 'complex128'), - ('complex128', 1, 'complex128'), - ('complex128', np.finfo('float32').max * 1.1, 'complex128'), - # float filled with complex - ('float32', 1 + 1j, 'complex64'), - ('float32', np.finfo('float32').max * (1.1 + 1j), 'complex128'), - ('float64', 1 + 1j, 'complex128'), - ('float64', np.finfo('float32').max * (1.1 + 1j), 'complex128'), - # complex filled with complex - ('complex64', 1 + 1j, 'complex64'), - ('complex64', np.finfo('float32').max * (1.1 + 1j), 'complex128'), - ('complex128', 1 + 1j, 'complex128'), - ('complex128', np.finfo('float32').max * (1.1 + 1j), 'complex128') -]) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "dtype, fill_value, expected_dtype", + [ + # float filled with float + ("float32", 1, "float32"), + ("float32", np.finfo("float32").max * 1.1, "float64"), + ("float64", 1, "float64"), + ("float64", np.finfo("float32").max * 1.1, "float64"), + # complex filled with float + ("complex64", 1, "complex64"), + ("complex64", np.finfo("float32").max * 1.1, "complex128"), + ("complex128", 1, "complex128"), + ("complex128", np.finfo("float32").max * 1.1, "complex128"), + # float filled with complex + ("float32", 1 + 1j, "complex64"), + ("float32", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("float64", 1 + 1j, "complex128"), + ("float64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + # complex filled with complex + ("complex64", 1 + 1j, "complex64"), + ("complex64", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ("complex128", 1 + 1j, "complex128"), + ("complex128", np.finfo("float32").max * (1.1 + 1j), "complex128"), + ], +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_float_with_float( + dtype, fill_value, expected_dtype, boxed, box_dtype +): dtype = np.dtype(dtype) expected_dtype = np.dtype(expected_dtype) if box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") if boxed and is_float_dtype(dtype) and is_complex_dtype(expected_dtype): - pytest.xfail('does not upcast to complex') - if (dtype, expected_dtype) in [('float32', 'float64'), - ('float32', 'complex64'), - ('complex64', 'complex128')]: - pytest.xfail('does not upcast correctly depending on value') + pytest.xfail("does not upcast to complex") + if (dtype, expected_dtype) in [ + ("float32", "float64"), + ("float32", "complex64"), + ("complex64", "complex128"), + ]: + pytest.xfail("does not upcast correctly depending on value") # this following xfails are "only" a consequence of the - now strictly # enforced - principle that maybe_promote_with_scalar always casts if not boxed and abs(fill_value) < 2: - pytest.xfail('wrong return type of fill_value') - if (not boxed and dtype == 'complex128' and expected_dtype == 'complex128' - and is_float_dtype(type(fill_value))): - pytest.xfail('wrong return type of fill_value') + pytest.xfail("wrong return type of fill_value") + if ( + not boxed + and dtype == "complex128" + and expected_dtype == "complex128" + and is_float_dtype(type(fill_value)) + ): + pytest.xfail("wrong return type of fill_value") # output is not a generic float, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) def test_maybe_promote_bool_with_any(any_numpy_dtype, boxed, box_dtype): dtype = np.dtype(bool) fill_dtype = np.dtype(any_numpy_dtype) if boxed and fill_dtype == bool: - pytest.xfail('falsely upcasts to object') - if (boxed and box_dtype is None - and is_datetime_or_timedelta_dtype(fill_dtype)): - pytest.xfail('wrongly casts fill_value') + pytest.xfail("falsely upcasts to object") + if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype): + pytest.xfail("wrongly casts fill_value") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -337,31 +413,41 @@ def test_maybe_promote_bool_with_any(any_numpy_dtype, boxed, box_dtype): exp_val_for_scalar = fill_value exp_val_for_array = np.nan if fill_dtype != bool else None - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) def test_maybe_promote_any_with_bool(any_numpy_dtype, boxed, box_dtype): dtype = np.dtype(any_numpy_dtype) fill_value = True if boxed: if dtype == bool: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") if dtype not in (str, object) and box_dtype is None: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") if not boxed: if is_datetime_or_timedelta_dtype(dtype): - pytest.xfail('raises error') + pytest.xfail("raises error") # this following xfail is "only" a consequence of the - now strictly # enforced - principle that maybe_promote_with_scalar always casts if dtype == bool: - pytest.xfail('wrong return type of fill_value') + pytest.xfail("wrong return type of fill_value") # filling anything but bool with bool casts to object expected_dtype = np.dtype(object) if dtype != bool else dtype @@ -369,110 +455,145 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype, boxed, box_dtype): exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] exp_val_for_array = np.nan if dtype != bool else None - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.skipif(PY2, reason='no bytes in PY2') -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.skipif(PY2, reason="no bytes in PY2") +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype, boxed, box_dtype): dtype = np.dtype(bytes_dtype) fill_dtype = np.dtype(any_numpy_dtype) if issubclass(fill_dtype.type, np.bytes_): if not boxed or box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") # takes the opinion that bool dtype has no missing value marker else: - pytest.xfail('wrong missing value marker') + pytest.xfail("wrong missing value marker") else: if boxed and box_dtype is None: - pytest.xfail('does not upcast to object') - if ((is_integer_dtype(fill_dtype) or is_float_dtype(fill_dtype) - or is_complex_dtype(fill_dtype) or is_object_dtype(fill_dtype) - or is_timedelta64_dtype(fill_dtype)) and not boxed): - pytest.xfail('does not upcast to object') + pytest.xfail("does not upcast to object") + if ( + is_integer_dtype(fill_dtype) + or is_float_dtype(fill_dtype) + or is_complex_dtype(fill_dtype) + or is_object_dtype(fill_dtype) + or is_timedelta64_dtype(fill_dtype) + ) and not boxed: + pytest.xfail("does not upcast to object") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] # filling bytes with anything but bytes casts to object - expected_dtype = (dtype if issubclass(fill_dtype.type, np.bytes_) - else np.dtype(object)) + expected_dtype = ( + dtype if issubclass(fill_dtype.type, np.bytes_) else np.dtype(object) + ) exp_val_for_scalar = fill_value - exp_val_for_array = (None if issubclass(fill_dtype.type, np.bytes_) - else np.nan) - - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.skipif(PY2, reason='no bytes in PY2') -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype (fixed len) - (True, 'bytes'), # fill_value wrapped in array with generic bytes-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_any_with_bytes(any_numpy_dtype, bytes_dtype, - boxed, box_dtype): + exp_val_for_array = None if issubclass(fill_dtype.type, np.bytes_) else np.nan + + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.skipif(PY2, reason="no bytes in PY2") +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_any_with_bytes(any_numpy_dtype, bytes_dtype, boxed, box_dtype): dtype = np.dtype(any_numpy_dtype) fill_dtype = np.dtype(bytes_dtype) if issubclass(dtype.type, np.bytes_): if not boxed or box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") # takes the opinion that bool dtype has no missing value marker else: - pytest.xfail('wrong missing value marker') + pytest.xfail("wrong missing value marker") else: pass - if (boxed and (box_dtype == 'bytes' or box_dtype is None) - and not (is_string_dtype(dtype) or dtype == bool)): - pytest.xfail('does not upcast to object') + if ( + boxed + and (box_dtype == "bytes" or box_dtype is None) + and not (is_string_dtype(dtype) or dtype == bool) + ): + pytest.xfail("does not upcast to object") if not boxed and is_datetime_or_timedelta_dtype(dtype): - pytest.xfail('raises error') + pytest.xfail("raises error") # create array of given dtype - fill_value = b'abc' + fill_value = b"abc" # special case for box_dtype (cannot use fixture in parametrization) - box_dtype = fill_dtype if box_dtype == 'bytes' else box_dtype + box_dtype = fill_dtype if box_dtype == "bytes" else box_dtype # filling bytes with anything but bytes casts to object - expected_dtype = (dtype if issubclass(dtype.type, np.bytes_) - else np.dtype(object)) + expected_dtype = dtype if issubclass(dtype.type, np.bytes_) else np.dtype(object) # output is not a generic bytes, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] exp_val_for_array = None if issubclass(dtype.type, np.bytes_) else np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_datetime64_with_any( + datetime64_dtype, any_numpy_dtype, boxed, box_dtype +): dtype = np.dtype(datetime64_dtype) fill_dtype = np.dtype(any_numpy_dtype) if is_datetime64_dtype(fill_dtype): if box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") else: if boxed and box_dtype is None: - pytest.xfail('does not upcast to object') + pytest.xfail("does not upcast to object") if not boxed: - pytest.xfail('does not upcast to object or raises') + pytest.xfail("does not upcast to object or raises") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -488,40 +609,58 @@ def test_maybe_promote_datetime64_with_any(datetime64_dtype, any_numpy_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value array with auto-dtype - (True, 'dt_dtype'), # fill_value array with explicit datetime dtype - (True, object), # fill_value array with object dtype - (False, None) # fill_value directly -]) -@pytest.mark.parametrize('fill_value', [ - pd.Timestamp('now'), np.datetime64('now'), - datetime.datetime.now(), datetime.date.today() -], ids=['pd.Timestamp', 'np.datetime64', 'datetime.datetime', 'datetime.date']) -def test_maybe_promote_any_with_datetime64(any_numpy_dtype, datetime64_dtype, - fill_value, boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value array with auto-dtype + (True, "dt_dtype"), # fill_value array with explicit datetime dtype + (True, object), # fill_value array with object dtype + (False, None), # fill_value directly + ], +) +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +def test_maybe_promote_any_with_datetime64( + any_numpy_dtype, datetime64_dtype, fill_value, boxed, box_dtype +): dtype = np.dtype(any_numpy_dtype) if is_datetime64_dtype(dtype): - if (boxed and (box_dtype == object - or (box_dtype is None - and not is_datetime64_dtype(type(fill_value))))): - pytest.xfail('falsely upcasts to object') + if boxed and ( + box_dtype == object + or (box_dtype is None and not is_datetime64_dtype(type(fill_value))) + ): + pytest.xfail("falsely upcasts to object") else: - if (boxed and (box_dtype == 'dt_dtype' - or (box_dtype is None - and is_datetime64_dtype(type(fill_value))))): - pytest.xfail('mix of lack of upcasting, resp. wrong missing value') + if boxed and ( + box_dtype == "dt_dtype" + or (box_dtype is None and is_datetime64_dtype(type(fill_value))) + ): + pytest.xfail("mix of lack of upcasting, resp. wrong missing value") if not boxed and is_timedelta64_dtype(dtype): - pytest.xfail('raises error') + pytest.xfail("raises error") # special case for box_dtype - box_dtype = (np.dtype(datetime64_dtype) if box_dtype == 'dt_dtype' - else box_dtype) + box_dtype = np.dtype(datetime64_dtype) if box_dtype == "dt_dtype" else box_dtype # filling datetime with anything but datetime casts to object if is_datetime64_dtype(dtype): @@ -534,22 +673,33 @@ def test_maybe_promote_any_with_datetime64(any_numpy_dtype, datetime64_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) def test_maybe_promote_datetimetz_with_any_numpy_dtype( - tz_aware_fixture, any_numpy_dtype, boxed, box_dtype): + tz_aware_fixture, any_numpy_dtype, boxed, box_dtype +): dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_dtype = np.dtype(any_numpy_dtype) if box_dtype != object: - pytest.xfail('does not upcast correctly') + pytest.xfail("does not upcast correctly") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -559,28 +709,39 @@ def test_maybe_promote_datetimetz_with_any_numpy_dtype( exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, - tz_aware_fixture2, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_datetimetz_with_datetimetz( + tz_aware_fixture, tz_aware_fixture2, boxed, box_dtype +): dtype = DatetimeTZDtype(tz=tz_aware_fixture) fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture2) from dateutil.tz import tzlocal + if is_platform_windows() and tz_aware_fixture2 == tzlocal(): - pytest.xfail('Cannot process fill_value with this dtype, see GH 24310') + pytest.xfail("Cannot process fill_value with this dtype, see GH 24310") if dtype.tz == fill_dtype.tz and boxed: - pytest.xfail('falsely upcasts') + pytest.xfail("falsely upcasts") if dtype.tz != fill_dtype.tz and not boxed: - pytest.xfail('falsely upcasts') + pytest.xfail("falsely upcasts") # create array of given dtype; casts "1" to correct dtype fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0] @@ -594,56 +755,86 @@ def test_maybe_promote_datetimetz_with_datetimetz(tz_aware_fixture, expected_dtype = np.dtype(object) exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('fill_value', [None, np.nan, NaT, iNaT], - ids=['None', 'np.nan', 'pd.NaT', 'iNaT']) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"] +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_datetimetz_with_na( + tz_aware_fixture, fill_value, boxed, box_dtype +): dtype = DatetimeTZDtype(tz=tz_aware_fixture) - if (boxed and (box_dtype == object - or (box_dtype is None - and (fill_value is None or fill_value is NaT)))): - pytest.xfail('false upcasts to object') + if boxed and ( + box_dtype == object + or (box_dtype is None and (fill_value is None or fill_value is NaT)) + ): + pytest.xfail("false upcasts to object") # takes the opinion that DatetimeTZ should have single na-marker # using iNaT would lead to errors elsewhere -> NaT if not boxed and fill_value == iNaT: - pytest.xfail('wrong missing value marker') + pytest.xfail("wrong missing value marker") expected_dtype = dtype # DatetimeTZDtype does not use iNaT as missing value marker exp_val_for_scalar = NaT exp_val_for_array = NaT - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('fill_value', [ - pd.Timestamp('now'), np.datetime64('now'), - datetime.datetime.now(), datetime.date.today() -], ids=['pd.Timestamp', 'np.datetime64', 'datetime.datetime', 'datetime.date']) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "fill_value", + [ + pd.Timestamp("now"), + np.datetime64("now"), + datetime.datetime.now(), + datetime.date.today(), + ], + ids=["pd.Timestamp", "np.datetime64", "datetime.datetime", "datetime.date"], +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) def test_maybe_promote_any_numpy_dtype_with_datetimetz( - any_numpy_dtype, tz_aware_fixture, fill_value, boxed, box_dtype): + any_numpy_dtype, tz_aware_fixture, fill_value, boxed, box_dtype +): dtype = np.dtype(any_numpy_dtype) fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) if is_datetime_or_timedelta_dtype(dtype) and not boxed: - pytest.xfail('raises error') + pytest.xfail("raises error") fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] @@ -652,28 +843,39 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz( exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_timedelta64_with_any( + timedelta64_dtype, any_numpy_dtype, boxed, box_dtype +): dtype = np.dtype(timedelta64_dtype) fill_dtype = np.dtype(any_numpy_dtype) if is_timedelta64_dtype(fill_dtype): if box_dtype == object: - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") else: if boxed and box_dtype is None: - pytest.xfail('does not upcast to object') + pytest.xfail("does not upcast to object") if not boxed: - pytest.xfail('does not upcast to object or raises') + pytest.xfail("does not upcast to object or raises") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -689,45 +891,63 @@ def test_maybe_promote_timedelta64_with_any(timedelta64_dtype, any_numpy_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('fill_value', [ - pd.Timedelta(days=1), np.timedelta64(24, 'h'), datetime.timedelta(1) -], ids=['pd.Timedelta', 'np.timedelta64', 'datetime.timedelta']) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value array with auto-dtype - (True, 'td_dtype'), # fill_value array with explicit timedelta dtype - (True, object), # fill_value array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, timedelta64_dtype, - fill_value, boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "fill_value", + [pd.Timedelta(days=1), np.timedelta64(24, "h"), datetime.timedelta(1)], + ids=["pd.Timedelta", "np.timedelta64", "datetime.timedelta"], +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value array with auto-dtype + (True, "td_dtype"), # fill_value array with explicit timedelta dtype + (True, object), # fill_value array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_any_with_timedelta64( + any_numpy_dtype, timedelta64_dtype, fill_value, boxed, box_dtype +): dtype = np.dtype(any_numpy_dtype) if is_timedelta64_dtype(dtype): - if (boxed and (box_dtype == object - or (box_dtype is None - and not is_timedelta64_dtype(type(fill_value))))): - pytest.xfail('falsely upcasts to object') + if boxed and ( + box_dtype == object + or (box_dtype is None and not is_timedelta64_dtype(type(fill_value))) + ): + pytest.xfail("falsely upcasts to object") else: - if (boxed and box_dtype is None - and is_timedelta64_dtype(type(fill_value))): - pytest.xfail('does not upcast correctly') - if (not boxed and is_timedelta64_dtype(type(fill_value)) and ( - is_integer_dtype(dtype) or is_float_dtype(dtype) + if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)): + pytest.xfail("does not upcast correctly") + if ( + not boxed + and is_timedelta64_dtype(type(fill_value)) + and ( + is_integer_dtype(dtype) + or is_float_dtype(dtype) or is_complex_dtype(dtype) - or issubclass(dtype.type, np.bytes_))): - pytest.xfail('does not upcast correctly') - if box_dtype == 'td_dtype': - pytest.xfail('falsely upcasts') + or issubclass(dtype.type, np.bytes_) + ) + ): + pytest.xfail("does not upcast correctly") + if box_dtype == "td_dtype": + pytest.xfail("falsely upcasts") if not boxed and is_datetime64_dtype(dtype): - pytest.xfail('raises error') + pytest.xfail("raises error") # special case for box_dtype - box_dtype = (np.dtype(timedelta64_dtype) if box_dtype == 'td_dtype' - else box_dtype) + box_dtype = np.dtype(timedelta64_dtype) if box_dtype == "td_dtype" else box_dtype # filling anything but timedelta with timedelta casts to object if is_timedelta64_dtype(dtype): @@ -740,23 +960,31 @@ def test_maybe_promote_any_with_timedelta64(any_numpy_dtype, timedelta64_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype, boxed, box_dtype): dtype = np.dtype(string_dtype) fill_dtype = np.dtype(any_numpy_dtype) - if (boxed and box_dtype is None - and is_datetime_or_timedelta_dtype(fill_dtype)): - pytest.xfail('wrong missing value marker') + if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype): + pytest.xfail("wrong missing value marker") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -766,57 +994,80 @@ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype (fixed len) - (True, 'str'), # fill_value wrapped in array with generic string-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_any_with_string(any_numpy_dtype, string_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + (True, "str"), # fill_value wrapped in array with generic string-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_any_with_string(any_numpy_dtype, string_dtype, boxed, box_dtype): dtype = np.dtype(any_numpy_dtype) fill_dtype = np.dtype(string_dtype) if is_datetime_or_timedelta_dtype(dtype) and box_dtype != object: - pytest.xfail('does not upcast or raises') - if (boxed and box_dtype in (None, 'str') and ( - is_integer_dtype(dtype) or is_float_dtype(dtype) + pytest.xfail("does not upcast or raises") + if ( + boxed + and box_dtype in (None, "str") + and ( + is_integer_dtype(dtype) + or is_float_dtype(dtype) or is_complex_dtype(dtype) - or issubclass(dtype.type, np.bytes_))): - pytest.xfail('does not upcast correctly') + or issubclass(dtype.type, np.bytes_) + ) + ): + pytest.xfail("does not upcast correctly") # create array of given dtype - fill_value = 'abc' + fill_value = "abc" # special case for box_dtype (cannot use fixture in parametrization) - box_dtype = fill_dtype if box_dtype == 'str' else box_dtype + box_dtype = fill_dtype if box_dtype == "str" else box_dtype # filling string with anything casts to object expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype, boxed, box_dtype): dtype = np.dtype(object_dtype) fill_dtype = np.dtype(any_numpy_dtype) - if (boxed and box_dtype is None - and is_datetime_or_timedelta_dtype(fill_dtype)): - pytest.xfail('wrong missing value marker') + if boxed and box_dtype is None and is_datetime_or_timedelta_dtype(fill_dtype): + pytest.xfail("wrong missing value marker") # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -826,21 +1077,30 @@ def test_maybe_promote_object_with_any(object_dtype, any_numpy_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, None), # fill_value wrapped in array with auto-dtype - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_any_with_object(any_numpy_dtype, object_dtype, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, None), # fill_value wrapped in array with auto-dtype + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_any_with_object(any_numpy_dtype, object_dtype, boxed, box_dtype): dtype = np.dtype(any_numpy_dtype) if not boxed and is_datetime_or_timedelta_dtype(dtype): - pytest.xfail('raises error') + pytest.xfail("raises error") # create array of object dtype from a scalar value (i.e. passing # dtypes.common.is_scalar), which can however not be cast to int/float etc. @@ -851,43 +1111,65 @@ def test_maybe_promote_any_with_object(any_numpy_dtype, object_dtype, exp_val_for_scalar = fill_value exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) - - -@pytest.mark.parametrize('fill_value', [None, np.nan, NaT, iNaT], - ids=['None', 'np.nan', 'pd.NaT', 'iNaT']) -@pytest.mark.parametrize('boxed, box_dtype', [ - (True, object), # fill_value wrapped in array with object dtype - (False, None) # fill_value directly -]) -def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, fill_value, - boxed, box_dtype): + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) + + +@pytest.mark.parametrize( + "fill_value", [None, np.nan, NaT, iNaT], ids=["None", "np.nan", "pd.NaT", "iNaT"] +) +@pytest.mark.parametrize( + "boxed, box_dtype", + [ + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value directly + ], +) +def test_maybe_promote_any_numpy_dtype_with_na( + any_numpy_dtype, fill_value, boxed, box_dtype +): dtype = np.dtype(any_numpy_dtype) - if (dtype == bytes and not boxed - and fill_value is not None and fill_value is not NaT): - pytest.xfail('does not upcast to object') - elif dtype == 'uint64' and not boxed and fill_value == iNaT: - pytest.xfail('does not upcast correctly') + if ( + dtype == bytes + and not boxed + and fill_value is not None + and fill_value is not NaT + ): + pytest.xfail("does not upcast to object") + elif dtype == "uint64" and not boxed and fill_value == iNaT: + pytest.xfail("does not upcast correctly") elif is_datetime_or_timedelta_dtype(dtype) and boxed: - pytest.xfail('falsely upcasts to object') - elif (boxed and (is_integer_dtype(dtype) or is_float_dtype(dtype) - or is_complex_dtype(dtype)) - and fill_value is not NaT and dtype != 'uint64'): - pytest.xfail('falsely upcasts to object') - elif (boxed and dtype == 'uint64' - and (fill_value is np.nan or fill_value is None)): - pytest.xfail('falsely upcasts to object') + pytest.xfail("falsely upcasts to object") + elif ( + boxed + and ( + is_integer_dtype(dtype) or is_float_dtype(dtype) or is_complex_dtype(dtype) + ) + and fill_value is not NaT + and dtype != "uint64" + ): + pytest.xfail("falsely upcasts to object") + elif boxed and dtype == "uint64" and (fill_value is np.nan or fill_value is None): + pytest.xfail("falsely upcasts to object") # below: opinionated that iNaT should be interpreted as missing value - elif (not boxed and (is_float_dtype(dtype) or is_complex_dtype(dtype)) - and fill_value == iNaT): - pytest.xfail('does not cast to missing value marker correctly') - elif ((is_string_dtype(dtype) or dtype == bool) - and not boxed and fill_value == iNaT): - pytest.xfail('does not cast to missing value marker correctly') - - if is_integer_dtype(dtype) and dtype == 'uint64' and fill_value == iNaT: + elif ( + not boxed + and (is_float_dtype(dtype) or is_complex_dtype(dtype)) + and fill_value == iNaT + ): + pytest.xfail("does not cast to missing value marker correctly") + elif (is_string_dtype(dtype) or dtype == bool) and not boxed and fill_value == iNaT: + pytest.xfail("does not cast to missing value marker correctly") + + if is_integer_dtype(dtype) and dtype == "uint64" and fill_value == iNaT: # uint64 + negative int casts to object; iNaT is considered as missing expected_dtype = np.dtype(object) exp_val_for_scalar = np.nan @@ -930,11 +1212,18 @@ def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype, fill_value, else: # expected_dtype = float / complex / object exp_val_for_array = np.nan - _check_promote(dtype, fill_value, boxed, box_dtype, expected_dtype, - exp_val_for_scalar, exp_val_for_array) + _check_promote( + dtype, + fill_value, + boxed, + box_dtype, + expected_dtype, + exp_val_for_scalar, + exp_val_for_array, + ) -@pytest.mark.parametrize('dim', [0, 2, 3]) +@pytest.mark.parametrize("dim", [0, 2, 3]) def test_maybe_promote_dimensions(any_numpy_dtype, dim): dtype = np.dtype(any_numpy_dtype) @@ -947,12 +1236,13 @@ def test_maybe_promote_dimensions(any_numpy_dtype, dim): # test against 1-dimensional case expected_dtype, expected_missing_value = maybe_promote( - dtype, np.array([1], dtype=dtype)) + dtype, np.array([1], dtype=dtype) + ) result_dtype, result_missing_value = maybe_promote(dtype, fill_array) assert result_dtype == expected_dtype # None == None, iNaT == iNaT, but np.nan != np.nan - assert ((result_missing_value == expected_missing_value) - or (result_missing_value is np.nan - and expected_missing_value is np.nan)) + assert (result_missing_value == expected_missing_value) or ( + result_missing_value is np.nan and expected_missing_value is np.nan + ) From 0c5b5246115e64b15d048d1cc503a1cfd50317a5 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 8 Oct 2019 08:32:54 +0200 Subject: [PATCH 5/7] reduce diff with master and skip some more tests --- pandas/tests/dtypes/cast/test_promote.py | 55 +++++++++++------------- 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 639161582a80c..c5b4b4397684a 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -318,6 +318,8 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): ) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(True, None), (False, None)]) def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, box): dtype = np.dtype(any_int_dtype) fill_dtype = np.dtype(float_dtype) @@ -343,19 +345,14 @@ def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, box): ) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(True, None), (False, None)]) def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, box): dtype = np.dtype(float_dtype) fill_dtype = np.dtype(any_int_dtype) boxed, box_dtype = box # read from parametrized fixture - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - # this following xfail is "only" a consequence of the - now strictly - # enforced - principle that maybe_promote_with_scalar always casts - if not boxed: - pytest.xfail("wrong return type of fill_value") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -551,12 +548,12 @@ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced, box) @pytest.mark.parametrize( "box", [ - (True, None), # fill_value wrapped in array with auto-dtype (fixed len) - (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype + # disabled due to too many xfails; see GH 23982 / 25425 + # (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + # (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value directly ], - ids=["True-None", "True-bytes", "True-object", "False-None"], ) def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype, box): dtype = np.dtype(any_numpy_dtype_reduced) @@ -625,9 +622,9 @@ def test_maybe_promote_datetime64_with_any( # filling datetime with anything but datetime casts to object if is_datetime64_dtype(fill_dtype): expected_dtype = dtype - # for datetime dtypes, scalar values get cast to pd.Timestamp.value - exp_val_for_scalar = pd.Timestamp(fill_value).value - exp_val_for_array = iNaT + # for datetime dtypes, scalar values get cast to to_datetime64 + exp_val_for_scalar = pd.Timestamp(fill_value).to_datetime64() + exp_val_for_array = np.datetime64("NaT", "ns") else: expected_dtype = np.dtype(object) exp_val_for_scalar = fill_value @@ -649,11 +646,11 @@ def test_maybe_promote_datetime64_with_any( "box", [ (True, None), # fill_value wrapped in array with default dtype - (True, "dt_dtype"), # fill_value in array with explicit datetime dtype - (True, object), # fill_value wrapped in array with object dtype + # disabled due to too many xfails; see GH 23982 / 25425 + # (True, 'dt_dtype'), # fill_value in array with explicit datetime dtype + # (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], - ids=["True-None", "True-dt_dtype", "True-object", "False-None"], ) @pytest.mark.parametrize( "fill_value", @@ -709,6 +706,8 @@ def test_maybe_promote_any_with_datetime64( ) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(True, object)]) def test_maybe_promote_datetimetz_with_any_numpy_dtype( tz_aware_fixture, any_numpy_dtype_reduced, box ): @@ -735,6 +734,8 @@ def test_maybe_promote_datetimetz_with_any_numpy_dtype( ) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(True, None), (True, object)]) def test_maybe_promote_datetimetz_with_datetimetz( tz_aware_fixture, tz_aware_fixture2, box ): @@ -773,6 +774,8 @@ def test_maybe_promote_datetimetz_with_datetimetz( @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box): dtype = DatetimeTZDtype(tz=tz_aware_fixture) @@ -879,11 +882,11 @@ def test_maybe_promote_timedelta64_with_any( "box", [ (True, None), # fill_value wrapped in array with default dtype - (True, "td_dtype"), # fill_value in array with explicit timedelta dtype + # disabled due to too many xfails; see GH 23982 / 25425 + # (True, 'td_dtype'), # fill_value in array with explicit timedelta dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], - ids=["True-None", "True-td_dtype", "True-object", "False-None"], ) def test_maybe_promote_any_with_timedelta64( any_numpy_dtype_reduced, timedelta64_dtype, fill_value, box @@ -954,8 +957,9 @@ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced, bo @pytest.mark.parametrize( "box", [ - (True, None), # fill_value wrapped in array with default dtype - (True, "str"), # fill_value wrapped in array with generic string-dtype + # disabled due to too many xfails; see GH 23982 / 25425 + # (True, None), # fill_value wrapped in array with default dtype + # (True, 'str'), # fill_value wrapped in array with generic string-dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], @@ -1036,15 +1040,8 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) -# override parametrization of box, because default dtype for na is always float -@pytest.mark.parametrize( - "box", - [ - (True, object), # fill_value wrapped in array with object dtype - (False, None), # fill_value passed on as scalar - ], - ids=["True-object", "False-None"], -) +# override parametrization due to to many xfails; see GH 23982 / 25425 +@pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_any_numpy_dtype_with_na( any_numpy_dtype_reduced, fill_value, box ): From 6ca1f8fd79eeed779a20b7820cc0d50b8c6b8cb4 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 8 Oct 2019 10:37:14 +0200 Subject: [PATCH 6/7] lint --- pandas/tests/dtypes/cast/test_promote.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index c5b4b4397684a..9e6f7e0a81952 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -19,6 +19,7 @@ is_integer_dtype, is_object_dtype, is_scalar, + is_string_dtype, is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype From 0eb4ffa733005942f1df8a2cc34b139c3d0b856b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" Date: Tue, 8 Oct 2019 11:58:13 +0200 Subject: [PATCH 7/7] remove spurious 'pass', and re-enable corresponding tests --- pandas/tests/dtypes/cast/test_promote.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 9e6f7e0a81952..7d70c706ea910 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -549,9 +549,8 @@ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced, box) @pytest.mark.parametrize( "box", [ - # disabled due to too many xfails; see GH 23982 / 25425 - # (True, None), # fill_value wrapped in array with auto-dtype (fixed len) - # (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype + (True, None), # fill_value wrapped in array with auto-dtype (fixed len) + (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value directly ], @@ -568,7 +567,6 @@ def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype, box) else: pytest.xfail("wrong missing value marker") else: - pass if ( boxed and (box_dtype == "bytes" or box_dtype is None)