diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 3e92906be706c..7720dfa713c03 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1,6 +1,7 @@ """ routings for casting """ from datetime import datetime, timedelta +import warnings import numpy as np @@ -48,6 +49,7 @@ ABCDataFrame, ABCDatetimeArray, ABCDatetimeIndex, + ABCIndexClass, ABCPeriodArray, ABCPeriodIndex, ABCSeries, @@ -59,6 +61,9 @@ _int16_max = np.iinfo(np.int16).max _int32_max = np.iinfo(np.int32).max _int64_max = np.iinfo(np.int64).max +_int64_min = np.iinfo(np.int64).min +_uint64_max = np.iinfo(np.uint64).max +_float32_max = np.finfo(np.float32).max def maybe_convert_platform(values): @@ -335,6 +340,40 @@ def changeit(): def maybe_promote(dtype, fill_value=np.nan): + """ + Determine minimal dtype to hold fill_value, when starting from dtype + + Parameters + ---------- + dtype : DType + The dtype to start from. + fill_value : scalar or np.ndarray / Series / Index + The value that the output dtype needs to be able to hold. + + NOTE: using arrays is discouraged and will likely be removed from this + method in the foreseeable future. Use maybe_promote_with_array instead. + + Returns + ------- + dtype : DType + The updated dtype. + fill_value : scalar + The type of this value depends on the type of the passed fill_value + + * If fill_value is a scalar, the method returns that scalar, but + modified to fit the updated dtype. For example, a datetime fill_value + will be returned as an integer (representing ns) for M8[ns], and + values considered missing (see pd.isna) will be returned as the + corresponding missing value marker for the updated dtype. + * If fill_value is an ndarray/Series/Index, this method will always + return the missing value marker for the updated dtype. This value + will be None for dtypes that cannot hold missing values (integers, + booleans, bytes). + + See Also + -------- + maybe_promote_with_array : underlying method for array case + """ # if we passed an array here, determine the fill value by dtype if isinstance(fill_value, np.ndarray): if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)): @@ -462,6 +501,281 @@ def maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value +def maybe_promote_with_array(dtype, fill_value=np.nan): + """ + Determine minimal dtype to hold fill_value, when starting from dtype + + This will also return the default missing value for the resulting dtype, if + necessary (e.g. for datetime / timedelta, the missing value will be `NaT`) + + Parameters + ---------- + dtype : DType + The dtype to start from. + fill_value : np.ndarray / Series / Index + Array-like of values that the output dtype needs to be able to hold. + + Returns + ------- + dtype : DType + The updated dtype. + na_value : scalar + The missing value for the new dtype. Returns None or dtypes that + cannot hold missing values (integers, booleans, bytes). + + See Also + -------- + maybe_promote : similar method for scalar case + + Examples + -------- + >>> maybe_promote_with_array(np.dtype('int'), fill_value=np.array([None])) + (dtype('float64'), nan) + >>> maybe_promote_with_array(np.dtype('float'), + ... fill_value=np.array(['abcd'])) + (dtype('O'), nan) + + For datetimes without timezones, the missing value marker is + numpy.datetime64('NaT'), and similarly for timedelta values. + + >>> maybe_promote_with_array(np.dtype('datetime64[ns]'), + ... fill_value=np.array([None])) + (dtype('>> maybe_promote_with_array(np.dtype('datetime64[ns]'), + ... fill_value=np.array(['2018-01-01'])) + (dtype('O'), nan) + + The method will infer as conservatively as possible for integer types: + + >>> maybe_promote_with_array( + ... np.dtype('uint8'), fill_value=np.array([np.iinfo('uint8').max + 1]) + ... ) + (dtype('uint16'), None) + >>> maybe_promote_with_array(np.dtype('uint8'), fill_value=np.array([-1])) + (dtype('int16'), None) + """ + + if isinstance(fill_value, np.ndarray): + if fill_value.ndim == 0: + # zero-dimensional arrays cannot be iterated over + fill_value = np.expand_dims(fill_value, 0) + elif fill_value.ndim > 1: + # ndarray, but too high-dimensional + fill_value = fill_value.ravel() + elif not isinstance(fill_value, (ABCSeries, ABCIndexClass)): + fill_type = type(fill_value).__name__ + raise ValueError( + "fill_value must either be a Series / Index / " + "np.ndarray, received {}".format(fill_type) + ) + + if all(isna(x) for x in fill_value): + # only missing values (or no values at all) + + if is_datetime64_dtype(dtype): + return dtype, np.datetime64("NaT", "ns") + elif is_timedelta64_dtype(dtype): + return dtype, np.timedelta64("NaT", "ns") + elif is_datetime64tz_dtype(dtype): + return dtype, NaT + + na_value = np.nan + if len(fill_value) == 0: + # empty array; no values to force change + if is_integer_dtype(dtype) or dtype in (bool, bytes): + # these types do not have a missing value marker + na_value = None + # otherwise nothing changes + elif any(x is NaT for x in fill_value): + # presence of pd.NaT upcasts everything that's not + # datetime/timedelta (see above) to object + dtype = np.dtype(object) + elif is_integer_dtype(dtype): + # integer + other missing value (np.nan / None) casts to float + dtype = np.dtype("float64") + elif is_extension_array_dtype(dtype): + na_value = dtype.na_value + elif is_string_dtype(dtype) or dtype in (bool, bytes): + # original dtype cannot hold nans + dtype = np.dtype(object) + + return dtype, na_value + + fill_dtype = fill_value.dtype + if fill_dtype == object: + # for object dtype, we determine if we actually need to upcast + # by inferring the dtype of fill_value + inferred_dtype = lib.infer_dtype(fill_value, skipna=True) + + # cases that would yield 'empty' have been treated in branch above + if inferred_dtype in ["period", "interval", "datetime64tz"]: + # TODO: handle & test pandas-dtypes + # TODO: lib.infer_dtype does not support datetime64tz yet + pass + else: + # rest can be mapped to numpy dtypes + map_inferred_to_numpy = { + "floating": float, + "mixed-integer-float": float, + "decimal": float, + "integer": int, + "boolean": bool, + "complex": complex, + "bytes": bytes, + "datetime64": "datetime64[ns]", + "datetime": "datetime64[ns]", + "date": "datetime64[ns]", + "timedelta64": "timedelta64[ns]", + "timedelta": "timedelta64[ns]", + "time": object, # time cannot be cast to datetime/timedelta + "string": object, + "mixed-integer": object, + "mixed": object, + } + fill_dtype = np.dtype(map_inferred_to_numpy[inferred_dtype]) + + # now that we have the correct dtype; check how we must upcast + # * extension arrays + # * int vs int + # * int vs float / complex + # * float vs float + # * float vs complex (and vice versa) + # * bool + # * datetimetz + # * datetime + # * timedelta + # * string/object + + # if (is_extension_array_dtype(dtype) + # or is_extension_array_dtype(fill_dtype)): + # # TODO: dispatch to ExtensionDType.maybe_promote? GH 24246 + if is_integer_dtype(dtype) and is_integer_dtype(fill_dtype): + if is_unsigned_integer_dtype(dtype) and all(fill_value >= 0): + # can stay unsigned + fill_max = fill_value.max() + if fill_max > _uint64_max: + return np.dtype(object), np.nan + + while fill_max > np.iinfo(dtype).max: + # itemsize is the number of bytes; times eight is number of + # bits, which is used in the string identifier of the dtype; + # if fill_max is above the max for that dtype, + # we double the number of bytes/bits. + dtype = np.dtype("uint{}".format(dtype.itemsize * 8 * 2)) + return dtype, None + else: + # cannot stay unsigned + if dtype == "uint64": + # need to hold negative values, but int64 cannot hold + # maximum of uint64 -> needs object + return np.dtype(object), np.nan + elif is_unsigned_integer_dtype(dtype): + # need to turn into signed integers to hold negative values + # int8 cannot hold maximum of uint8; similar for 16/32 + # therefore, upcast at least to next higher int-type + dtype = np.dtype("int{}".format(dtype.itemsize * 8 * 2)) + + fill_max = fill_value.max() + fill_min = fill_value.min() + if isinstance(fill_max, np.uint64): + # numpy comparator is broken for uint64; + # see https://github.com/numpy/numpy/issues/12525 + # use .item to get int object + fill_max = fill_max.item() + + # comparison mechanics are broken above _int64_max; + # use greater equal instead of equal + if fill_max >= _int64_max + 1 or fill_min <= _int64_min - 1: + return np.dtype(object), np.nan + + while fill_max > np.iinfo(dtype).max or fill_min < np.iinfo(dtype).min: + # same mechanism as above, but for int instead of uint + dtype = np.dtype("int{}".format(dtype.itemsize * 8 * 2)) + return dtype, None + elif is_integer_dtype(dtype) and is_float_dtype(fill_dtype): + # int with float: always upcasts to float64 + return np.dtype("float64"), np.nan + elif is_integer_dtype(dtype) and is_complex_dtype(fill_dtype): + # int with complex: always upcasts to complex128 + return np.dtype("complex128"), np.nan + elif (is_float_dtype(dtype) or is_complex_dtype(dtype)) and is_integer_dtype( + fill_dtype + ): + # float/complex with int: always stays original float/complex dtype + return dtype, np.nan + elif is_float_dtype(dtype) and is_float_dtype(fill_dtype): + # float with float; upcasts depending on absolute max of fill_value + if dtype == "float32" and np.abs(fill_value).max() <= _float32_max: + return dtype, np.nan + # all other cases return float64 + return np.dtype("float64"), np.nan + elif (is_float_dtype(dtype) or is_complex_dtype(dtype)) and ( + is_float_dtype(fill_dtype) or is_complex_dtype(fill_dtype) + ): + # at least one is complex; otherwise we'd have hit float/float above + with warnings.catch_warnings(): + # work around GH 27610 + warnings.filterwarnings("ignore", category=FutureWarning) + if ( + dtype in ["float32", "complex64"] + and max( + np.abs(np.real(fill_value)).max(), # also works for float + np.abs(np.imag(fill_value)).max(), + ) + <= _float32_max + ): + return np.complex64, np.nan + # all other cases return complex128 + return np.dtype("complex128"), np.nan + elif is_bool_dtype(dtype) and is_bool_dtype(fill_dtype): + # bool with bool is the only combination that stays bool; any other + # combination involving bool upcasts to object, see else-clause below + return dtype, None + elif ( + is_datetime64tz_dtype(dtype) + and is_datetime64tz_dtype(fill_dtype) + and (dtype.tz == fill_dtype.tz) + ): + # datetimetz with datetimetz with the same timezone is the only + # combination that stays datetimetz (in particular, mixing timezones or + # tz-aware and tz-naive datetimes will cast to object); any other + # combination involving datetimetz upcasts to object, see below + return dtype, NaT + elif (is_timedelta64_dtype(dtype) and is_timedelta64_dtype(fill_dtype)) or ( + is_datetime64_dtype(dtype) and is_datetime64_dtype(fill_dtype) + ): + # datetime and timedelta try to cast; if successful, keep dtype, + # otherwise upcast to object + try: + with warnings.catch_warnings(): + msg = ( + "parsing timezone aware datetimes is deprecated; " + "this will raise an error in the future" + ) + warnings.filterwarnings( + "ignore", message=msg, category=DeprecationWarning + ) + fill_value.astype(dtype) + + # can simplify if-cond. compared to cond. for entering this branch + if is_datetime64_dtype(dtype): + na_value = np.datetime64("NaT", "ns") + else: + na_value = np.timedelta64("NaT", "ns") + except (ValueError, TypeError): + dtype = np.dtype(object) + na_value = np.nan + return dtype, na_value + else: + # anything else (e.g. strings, objects, bytes, or unmatched + # bool / datetime / datetimetz / timedelta) + return np.dtype(object), np.nan + + def _ensure_dtype_type(value, dtype): """ Ensure that the given value is an instance of the given dtype. diff --git a/pandas/tests/dtypes/cast/test_promote.py b/pandas/tests/dtypes/cast/test_promote.py index 5c61574eddb50..34aaa0605b15f 100644 --- a/pandas/tests/dtypes/cast/test_promote.py +++ b/pandas/tests/dtypes/cast/test_promote.py @@ -3,6 +3,7 @@ """ import datetime +import warnings import numpy as np import pytest @@ -10,7 +11,7 @@ from pandas._libs.tslibs import NaT from pandas.compat import is_platform_windows -from pandas.core.dtypes.cast import maybe_promote +from pandas.core.dtypes.cast import maybe_promote, maybe_promote_with_array from pandas.core.dtypes.common import ( is_complex_dtype, is_datetime64_dtype, @@ -19,7 +20,6 @@ is_integer_dtype, is_object_dtype, is_scalar, - is_string_dtype, is_timedelta64_dtype, ) from pandas.core.dtypes.dtypes import DatetimeTZDtype @@ -134,7 +134,7 @@ def _check_promote( # box_dtype; the expected value returned from maybe_promote is the # missing value marker for the returned dtype. fill_array = np.array([fill_value], dtype=box_dtype) - result_dtype, result_fill_value = maybe_promote(dtype, fill_array) + result_dtype, result_fill_value = maybe_promote_with_array(dtype, fill_array) expected_fill_value = exp_val_for_array else: # here, we pass on fill_value as a scalar directly; the expected value @@ -163,7 +163,11 @@ def _assert_match(result_fill_value, expected_fill_value): # On some builds, type comparison fails, e.g. np.int32 != np.int32 assert res_type == ex_type or res_type.__name__ == ex_type.__name__ - match_value = result_fill_value == expected_fill_value + with warnings.catch_warnings(): + # we do not care about this warning, NaT is handled below anyway + msg = "In the future, 'NAT == x' and 'x == NAT' will always be False" + warnings.filterwarnings("ignore", message=msg, category=FutureWarning) + match_value = result_fill_value == expected_fill_value # Note: type check above ensures that we have the _same_ NA value # for missing values, None == None (which is checked @@ -285,15 +289,7 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): expected_dtype = np.dtype(expected_dtype) boxed, box_dtype = box # read from parametrized fixture - if boxed: - if expected_dtype != object: - pytest.xfail("falsely casts to object") - if box_dtype is None and ( - fill_value > np.iinfo("int64").max or np.iinfo("int64").min < fill_value < 0 - ): - pytest.xfail("falsely casts to float instead of object") - - # output is not a generic int, but corresponds to expected_dtype + # output is not a python int, but a numpy int of expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] # no missing value marker for integers exp_val_for_array = None if expected_dtype != "object" else np.nan @@ -309,8 +305,6 @@ def test_maybe_promote_int_with_int(dtype, fill_value, expected_dtype, box): ) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(True, None), (False, None)]) def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, box): dtype = np.dtype(any_int_dtype) fill_dtype = np.dtype(float_dtype) @@ -336,8 +330,6 @@ def test_maybe_promote_int_with_float(any_int_dtype, float_dtype, box): ) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(True, None), (False, None)]) def test_maybe_promote_float_with_int(float_dtype, any_int_dtype, box): dtype = np.dtype(float_dtype) @@ -396,17 +388,6 @@ def test_maybe_promote_float_with_float(dtype, fill_value, expected_dtype, box): expected_dtype = np.dtype(expected_dtype) boxed, box_dtype = box # read from parametrized fixture - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - elif boxed and is_float_dtype(dtype) and is_complex_dtype(expected_dtype): - pytest.xfail("does not upcast to complex") - elif boxed and (dtype, expected_dtype) in [ - ("float32", "float64"), - ("float32", "complex64"), - ("complex64", "complex128"), - ]: - pytest.xfail("does not upcast correctly depending on value") - # output is not a generic float, but corresponds to expected_dtype exp_val_for_scalar = np.array([fill_value], dtype=expected_dtype)[0] exp_val_for_array = np.nan @@ -427,13 +408,6 @@ def test_maybe_promote_bool_with_any(any_numpy_dtype_reduced, box): fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if boxed and fill_dtype == bool: - pytest.xfail("falsely upcasts to object") - if boxed and box_dtype is None and fill_dtype.kind == "M": - pytest.xfail("wrongly casts fill_value") - if boxed and box_dtype is None and fill_dtype.kind == "m": - pytest.xfail("wrongly casts fill_value") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -458,11 +432,6 @@ def test_maybe_promote_any_with_bool(any_numpy_dtype_reduced, box): fill_value = True boxed, box_dtype = box # read from parametrized fixture - if boxed and dtype == bool: - pytest.xfail("falsely upcasts to object") - if boxed and dtype not in (str, object) and box_dtype is None: - pytest.xfail("falsely upcasts to object") - # filling anything but bool with bool casts to object expected_dtype = np.dtype(object) if dtype != bool else dtype # output is not a generic bool, but corresponds to expected_dtype @@ -508,25 +477,18 @@ def test_maybe_promote_bytes_with_any(bytes_dtype, any_numpy_dtype_reduced, box) @pytest.mark.parametrize( "box", [ - (True, None), # fill_value wrapped in array with auto-dtype (fixed len) - (True, "bytes"), # fill_value wrapped in array with generic bytes-dtype + (True, None), # fill_value wrapped in array with default dtype + (True, "bytes"), # fill_value in array with generic bytes dtype (True, object), # fill_value wrapped in array with object dtype - (False, None), # fill_value directly + (False, None), # fill_value passed on as scalar ], + ids=["True-None", "True-bytes", "True-object", "False-None"], ) def test_maybe_promote_any_with_bytes(any_numpy_dtype_reduced, bytes_dtype, box): dtype = np.dtype(any_numpy_dtype_reduced) fill_dtype = np.dtype(bytes_dtype) boxed, box_dtype = box # read from parametrized fixture - if not issubclass(dtype.type, np.bytes_): - if ( - boxed - and (box_dtype == "bytes" or box_dtype is None) - and not (is_string_dtype(dtype) or dtype == bool) - ): - pytest.xfail("does not upcast to object") - # create array of given dtype fill_value = b"abc" @@ -557,13 +519,6 @@ def test_maybe_promote_datetime64_with_any( fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_datetime64_dtype(fill_dtype): - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - else: - if boxed and box_dtype is None: - pytest.xfail("does not upcast to object") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -594,11 +549,11 @@ def test_maybe_promote_datetime64_with_any( "box", [ (True, None), # fill_value wrapped in array with default dtype - # disabled due to too many xfails; see GH 23982 / 25425 - # (True, 'dt_dtype'), # fill_value in array with explicit datetime dtype - # (True, object), # fill_value wrapped in array with object dtype + (True, "dt_dtype"), # fill_value in array with explicit datetime dtype + (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], + ids=["True-None", "True-dt_dtype", "True-object", "False-None"], ) @pytest.mark.parametrize( "fill_value", @@ -616,23 +571,10 @@ def test_maybe_promote_any_with_datetime64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_datetime64_dtype(dtype): - if boxed and ( - box_dtype == object - or (box_dtype is None and not is_datetime64_dtype(type(fill_value))) - ): - pytest.xfail("falsely upcasts to object") - else: - if boxed and ( - box_dtype == "dt_dtype" - or (box_dtype is None and is_datetime64_dtype(type(fill_value))) - ): - pytest.xfail("mix of lack of upcasting, resp. wrong missing value") - # special case for box_dtype box_dtype = np.dtype(datetime64_dtype) if box_dtype == "dt_dtype" else box_dtype - # filling datetime with anything but datetime casts to object + # filling anything but datetime with datetime casts to object if is_datetime64_dtype(dtype): expected_dtype = dtype # for datetime dtypes, scalar values get cast to pd.Timestamp.value @@ -654,8 +596,6 @@ def test_maybe_promote_any_with_datetime64( ) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(True, object)]) def test_maybe_promote_datetimetz_with_any_numpy_dtype( tz_aware_fixture, any_numpy_dtype_reduced, box ): @@ -663,6 +603,9 @@ def test_maybe_promote_datetimetz_with_any_numpy_dtype( fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture + if not boxed: + pytest.xfail("unfixed error: does not upcast correctly") + # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -682,8 +625,6 @@ def test_maybe_promote_datetimetz_with_any_numpy_dtype( ) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(True, None), (True, object)]) def test_maybe_promote_datetimetz_with_datetimetz( tz_aware_fixture, tz_aware_fixture2, box ): @@ -693,12 +634,17 @@ def test_maybe_promote_datetimetz_with_datetimetz( from dateutil.tz import tzlocal + if not boxed: + pytest.xfail("unfixed error: does not upcast for unmatched timezones") if is_platform_windows() and tz_aware_fixture2 == tzlocal(): pytest.xfail("Cannot process fill_value with this dtype, see GH 24310") - if dtype.tz == fill_dtype.tz and boxed: - pytest.xfail("falsely upcasts") + if dtype.tz == fill_dtype.tz: + # here we should keep the datetime64tz dtype, but since that cannot be + # inferred correctly for fill_value, the calling dtype ends up being + # compared to a tz-naive datetime64-dtype, and must therefore upcast + pytest.xfail("cannot infer datetime64tz dtype, see GH 23554") - # create array of given dtype; casts "1" to correct dtype + # create array of given dtype; casts "10 ** 9" to correct dtype fill_value = pd.Series([10 ** 9], dtype=fill_dtype)[0] # filling datetimetz with datetimetz casts to object, unless tz matches @@ -722,8 +668,6 @@ def test_maybe_promote_datetimetz_with_datetimetz( @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(False, None)]) def test_maybe_promote_datetimetz_with_na(tz_aware_fixture, fill_value, box): dtype = DatetimeTZDtype(tz=tz_aware_fixture) @@ -761,6 +705,11 @@ def test_maybe_promote_any_numpy_dtype_with_datetimetz( fill_dtype = DatetimeTZDtype(tz=tz_aware_fixture) boxed, box_dtype = box # read from parametrized fixture + if is_datetime64_dtype(dtype): + # fill_dtype does not get inferred correctly to datetime64tz but to + # datetime64, which then falsely matches with datetime64 dtypes. + pytest.xfail("cannot infer datetime64tz dtype, see GH 23554") + fill_value = pd.Series([fill_value], dtype=fill_dtype)[0] # filling any numpy dtype with datetimetz casts to object @@ -786,13 +735,6 @@ def test_maybe_promote_timedelta64_with_any( fill_dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_timedelta64_dtype(fill_dtype): - if box_dtype == object: - pytest.xfail("falsely upcasts to object") - else: - if boxed and box_dtype is None: - pytest.xfail("does not upcast to object") - # create array of given dtype; casts "1" to correct dtype fill_value = np.array([1], dtype=fill_dtype)[0] @@ -828,11 +770,11 @@ def test_maybe_promote_timedelta64_with_any( "box", [ (True, None), # fill_value wrapped in array with default dtype - # disabled due to too many xfails; see GH 23982 / 25425 - # (True, 'td_dtype'), # fill_value in array with explicit timedelta dtype + (True, "td_dtype"), # fill_value in array with explicit timedelta dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], + ids=["True-None", "True-td_dtype", "True-object", "False-None"], ) def test_maybe_promote_any_with_timedelta64( any_numpy_dtype_reduced, timedelta64_dtype, fill_value, box @@ -840,16 +782,6 @@ def test_maybe_promote_any_with_timedelta64( dtype = np.dtype(any_numpy_dtype_reduced) boxed, box_dtype = box # read from parametrized fixture - if is_timedelta64_dtype(dtype): - if boxed and ( - box_dtype == object - or (box_dtype is None and not is_timedelta64_dtype(type(fill_value))) - ): - pytest.xfail("falsely upcasts to object") - else: - if boxed and box_dtype is None and is_timedelta64_dtype(type(fill_value)): - pytest.xfail("does not upcast correctly") - # special case for box_dtype box_dtype = np.dtype(timedelta64_dtype) if box_dtype == "td_dtype" else box_dtype @@ -904,11 +836,12 @@ def test_maybe_promote_string_with_any(string_dtype, any_numpy_dtype_reduced, bo "box", [ # disabled due to too many xfails; see GH 23982 / 25425 - # (True, None), # fill_value wrapped in array with default dtype - # (True, 'str'), # fill_value wrapped in array with generic string-dtype + (True, None), # fill_value wrapped in array with default dtype + (True, "str"), # fill_value wrapped in array with generic string-dtype (True, object), # fill_value wrapped in array with object dtype (False, None), # fill_value passed on as scalar ], + ids=["True-None", "True-str", "True-object", "False-None"], ) def test_maybe_promote_any_with_string(any_numpy_dtype_reduced, string_dtype, box): dtype = np.dtype(any_numpy_dtype_reduced) @@ -986,8 +919,15 @@ def test_maybe_promote_any_with_object(any_numpy_dtype_reduced, object_dtype, bo @pytest.mark.parametrize("fill_value", [None, np.nan, NaT]) -# override parametrization due to to many xfails; see GH 23982 / 25425 -@pytest.mark.parametrize("box", [(False, None)]) +# override parametrization of box, because default dtype for na is always float +@pytest.mark.parametrize( + "box", + [ + (True, object), # fill_value wrapped in array with object dtype + (False, None), # fill_value passed on as scalar + ], + ids=["True-object", "False-None"], +) def test_maybe_promote_any_numpy_dtype_with_na( any_numpy_dtype_reduced, fill_value, box ): @@ -1052,11 +992,18 @@ def test_maybe_promote_dimensions(any_numpy_dtype_reduced, dim): fill_array = np.expand_dims(fill_array, 0) # test against 1-dimensional case - expected_dtype, expected_missing_value = maybe_promote( + expected_dtype, expected_missing_value = maybe_promote_with_array( dtype, np.array([1], dtype=dtype) ) - result_dtype, result_missing_value = maybe_promote(dtype, fill_array) + result_dtype, result_missing_value = maybe_promote_with_array(dtype, fill_array) assert result_dtype == expected_dtype _assert_match(result_missing_value, expected_missing_value) + + +def test_maybe_promote_raises(any_numpy_dtype): + msg = "fill_value must either be a Series / Index / np.ndarray, received.*" + with pytest.raises(ValueError, match=msg): + # something that's not a Series / Index / np.ndarray + maybe_promote_with_array(any_numpy_dtype, 1)