Skip to content

Commit 19f715c

Browse files
authored
CLN: use idiomatic pandas_dtypes in pandas/dtypes/common.py (#24541)
1 parent a42c9be commit 19f715c

File tree

13 files changed

+349
-261
lines changed

13 files changed

+349
-261
lines changed

asv_bench/benchmarks/dtypes.py

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
from pandas.api.types import pandas_dtype
2+
3+
import numpy as np
4+
from .pandas_vb_common import (
5+
numeric_dtypes, datetime_dtypes, string_dtypes, extension_dtypes)
6+
7+
8+
_numpy_dtypes = [np.dtype(dtype)
9+
for dtype in (numeric_dtypes +
10+
datetime_dtypes +
11+
string_dtypes)]
12+
_dtypes = _numpy_dtypes + extension_dtypes
13+
14+
15+
class Dtypes(object):
16+
params = (_dtypes +
17+
list(map(lambda dt: dt.name, _dtypes)))
18+
param_names = ['dtype']
19+
20+
def time_pandas_dtype(self, dtype):
21+
pandas_dtype(dtype)
22+
23+
24+
class DtypesInvalid(object):
25+
param_names = ['dtype']
26+
params = ['scalar-string', 'scalar-int', 'list-string', 'array-string']
27+
data_dict = {'scalar-string': 'foo',
28+
'scalar-int': 1,
29+
'list-string': ['foo'] * 1000,
30+
'array-string': np.array(['foo'] * 1000)}
31+
32+
def time_pandas_dtype_invalid(self, dtype):
33+
try:
34+
pandas_dtype(self.data_dict[dtype])
35+
except TypeError:
36+
pass
37+
38+
39+
from .pandas_vb_common import setup # noqa: F401

asv_bench/benchmarks/pandas_vb_common.py

+10
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from importlib import import_module
33

44
import numpy as np
5+
import pandas as pd
56

67
# Compatibility import for lib
78
for imp in ['pandas._libs.lib', 'pandas.lib']:
@@ -14,6 +15,15 @@
1415
numeric_dtypes = [np.int64, np.int32, np.uint32, np.uint64, np.float32,
1516
np.float64, np.int16, np.int8, np.uint16, np.uint8]
1617
datetime_dtypes = [np.datetime64, np.timedelta64]
18+
string_dtypes = [np.object]
19+
extension_dtypes = [pd.Int8Dtype, pd.Int16Dtype,
20+
pd.Int32Dtype, pd.Int64Dtype,
21+
pd.UInt8Dtype, pd.UInt16Dtype,
22+
pd.UInt32Dtype, pd.UInt64Dtype,
23+
pd.CategoricalDtype,
24+
pd.IntervalDtype,
25+
pd.DatetimeTZDtype('ns', 'UTC'),
26+
pd.PeriodDtype('D')]
1727

1828

1929
def setup(*args, **kwargs):

doc/source/whatsnew/v0.24.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -430,7 +430,7 @@ Backwards incompatible API changes
430430
- The column order of the resultant :class:`DataFrame` from :meth:`MultiIndex.to_frame` is now guaranteed to match the :attr:`MultiIndex.names` order. (:issue:`22420`)
431431
- Incorrectly passing a :class:`DatetimeIndex` to :meth:`MultiIndex.from_tuples`, rather than a sequence of tuples, now raises a ``TypeError`` rather than a ``ValueError`` (:issue:`24024`)
432432
- :func:`pd.offsets.generate_range` argument ``time_rule`` has been removed; use ``offset`` instead (:issue:`24157`)
433-
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes (:issue:`21681`)
433+
- In 0.23.x, pandas would raise a ``ValueError`` on a merge of a numeric column (e.g. ``int`` dtyped column) and an ``object`` dtyped column (:issue:`9780`). We have re-enabled the ability to merge ``object`` and other dtypes; pandas will still raise on a merge between a numeric and an ``object`` dtyped column that is composed only of strings (:issue:`21681`)
434434

435435
Percentage change on groupby
436436
^^^^^^^^^^^^^^^^^^^^^^^^^^^^

pandas/conftest.py

+5
Original file line numberDiff line numberDiff line change
@@ -388,9 +388,14 @@ def tz_aware_fixture(request):
388388
return request.param
389389

390390

391+
# ----------------------------------------------------------------
392+
# Dtypes
391393
UNSIGNED_INT_DTYPES = ["uint8", "uint16", "uint32", "uint64"]
394+
UNSIGNED_EA_INT_DTYPES = ["UInt8", "UInt16", "UInt32", "UInt64"]
392395
SIGNED_INT_DTYPES = [int, "int8", "int16", "int32", "int64"]
396+
SIGNED_EA_INT_DTYPES = ["Int8", "Int16", "Int32", "Int64"]
393397
ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
398+
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
394399

395400
FLOAT_DTYPES = [float, "float32", "float64"]
396401
COMPLEX_DTYPES = [complex, "complex64", "complex128"]

pandas/core/arrays/integer.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class _IntegerDtype(ExtensionDtype):
3232
The attributes name & type are set when these subclasses are created.
3333
"""
3434
name = None
35+
base = None
3536
type = None
3637
na_value = np.nan
3738

@@ -153,6 +154,7 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
153154
# Avoid DeprecationWarning from NumPy about np.dtype("Int64")
154155
# https://github.com/numpy/numpy/pull/7476
155156
dtype = dtype.lower()
157+
156158
if not issubclass(type(dtype), _IntegerDtype):
157159
try:
158160
dtype = _dtypes[str(np.dtype(dtype))]
@@ -655,7 +657,8 @@ def integer_arithmetic_method(self, other):
655657
else:
656658
name = dtype.capitalize()
657659
classname = "{}Dtype".format(name)
658-
attributes_dict = {'type': getattr(np, dtype),
660+
numpy_dtype = getattr(np, dtype)
661+
attributes_dict = {'type': numpy_dtype,
659662
'name': name}
660663
dtype_type = register_extension_dtype(
661664
type(classname, (_IntegerDtype, ), attributes_dict)

pandas/core/dtypes/cast.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,9 @@
99
from pandas.compat import PY3, string_types, text_type, to_str
1010

1111
from .common import (
12-
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, _string_dtypes,
13-
ensure_int8, ensure_int16, ensure_int32, ensure_int64, ensure_object,
14-
is_bool, is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
12+
_INT64_DTYPE, _NS_DTYPE, _POSSIBLY_CAST_DTYPES, _TD_DTYPE, ensure_int8,
13+
ensure_int16, ensure_int32, ensure_int64, ensure_object, is_bool,
14+
is_bool_dtype, is_categorical_dtype, is_complex, is_complex_dtype,
1515
is_datetime64_dtype, is_datetime64_ns_dtype, is_datetime64tz_dtype,
1616
is_datetime_or_timedelta_dtype, is_datetimelike, is_dtype_equal,
1717
is_extension_array_dtype, is_extension_type, is_float, is_float_dtype,
@@ -544,7 +544,7 @@ def invalidate_string_dtypes(dtype_set):
544544
"""Change string like dtypes to object for
545545
``DataFrame.select_dtypes()``.
546546
"""
547-
non_string_dtypes = dtype_set - _string_dtypes
547+
non_string_dtypes = dtype_set - {np.dtype('S').type, np.dtype('<U').type}
548548
if non_string_dtypes != dtype_set:
549549
raise TypeError("string dtypes are not allowed, use 'object' instead")
550550

0 commit comments

Comments
 (0)