Skip to content

Commit 6ae91c4

Browse files
arw2019luckyvs1
authored andcommitted
ENH: support downcasting of nullable EAs in pd.to_numeric (pandas-dev#38746)
1 parent 1cc3f88 commit 6ae91c4

File tree

3 files changed

+75
-0
lines changed

3 files changed

+75
-0
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ Other enhancements
5050
- Improved consistency of error message when passing an invalid ``win_type`` argument in :class:`Window` (:issue:`15969`)
5151
- :func:`pandas.read_sql_query` now accepts a ``dtype`` argument to cast the columnar data from the SQL database based on user input (:issue:`10285`)
5252
- Improved integer type mapping from pandas to SQLAlchemy when using :meth:`DataFrame.to_sql` (:issue:`35076`)
53+
- :func:`to_numeric` now supports downcasting of nullable ``ExtensionDtype`` objects (:issue:`33013`)
5354

5455
.. ---------------------------------------------------------------------------
5556

pandas/core/tools/numeric.py

+35
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
ensure_object,
88
is_datetime_or_timedelta_dtype,
99
is_decimal,
10+
is_integer_dtype,
1011
is_number,
1112
is_numeric_dtype,
1213
is_scalar,
@@ -15,6 +16,7 @@
1516
from pandas.core.dtypes.generic import ABCIndex, ABCSeries
1617

1718
import pandas as pd
19+
from pandas.core.arrays.numeric import NumericArray
1820

1921

2022
def to_numeric(arg, errors="raise", downcast=None):
@@ -108,6 +110,21 @@ def to_numeric(arg, errors="raise", downcast=None):
108110
2 2.0
109111
3 -3.0
110112
dtype: float64
113+
114+
Downcasting of nullable integer and floating dtypes is supported:
115+
116+
>>> s = pd.Series([1, 2, 3], dtype="Int64")
117+
>>> pd.to_numeric(s, downcast="integer")
118+
0 1
119+
1 2
120+
2 3
121+
dtype: Int8
122+
>>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
123+
>>> pd.to_numeric(s, downcast="float")
124+
0 1.0
125+
1 2.1
126+
2 3.0
127+
dtype: Float32
111128
"""
112129
if downcast not in (None, "integer", "signed", "unsigned", "float"):
113130
raise ValueError("invalid downcasting method provided")
@@ -142,6 +159,14 @@ def to_numeric(arg, errors="raise", downcast=None):
142159
else:
143160
values = arg
144161

162+
# GH33013: for IntegerArray & FloatingArray extract non-null values for casting
163+
# save mask to reconstruct the full array after casting
164+
if isinstance(values, NumericArray):
165+
mask = values._mask
166+
values = values._data[~mask]
167+
else:
168+
mask = None
169+
145170
values_dtype = getattr(values, "dtype", None)
146171
if is_numeric_dtype(values_dtype):
147172
pass
@@ -188,6 +213,16 @@ def to_numeric(arg, errors="raise", downcast=None):
188213
if values.dtype == dtype:
189214
break
190215

216+
# GH33013: for IntegerArray & FloatingArray need to reconstruct masked array
217+
if mask is not None:
218+
data = np.zeros(mask.shape, dtype=values.dtype)
219+
data[~mask] = values
220+
221+
from pandas.core.arrays import FloatingArray, IntegerArray
222+
223+
klass = IntegerArray if is_integer_dtype(data.dtype) else FloatingArray
224+
values = klass(data, mask)
225+
191226
if is_series:
192227
return arg._constructor(values, index=arg.index, name=arg.name)
193228
elif is_index:

pandas/tests/tools/test_to_numeric.py

+39
Original file line numberDiff line numberDiff line change
@@ -725,3 +725,42 @@ def test_to_numeric_from_nullable_string(values, expected):
725725
s = Series(values, dtype="string")
726726
result = to_numeric(s)
727727
tm.assert_series_equal(result, expected)
728+
729+
730+
@pytest.mark.parametrize(
731+
"data, input_dtype, downcast, expected_dtype",
732+
(
733+
([1, 1], "Int64", "integer", "Int8"),
734+
([1.0, pd.NA], "Float64", "integer", "Int8"),
735+
([1.0, 1.1], "Float64", "integer", "Float64"),
736+
([1, pd.NA], "Int64", "integer", "Int8"),
737+
([450, 300], "Int64", "integer", "Int16"),
738+
([1, 1], "Float64", "integer", "Int8"),
739+
([np.iinfo(np.int64).max - 1, 1], "Int64", "integer", "Int64"),
740+
([1, 1], "Int64", "signed", "Int8"),
741+
([1.0, 1.0], "Float32", "signed", "Int8"),
742+
([1.0, 1.1], "Float64", "signed", "Float64"),
743+
([1, pd.NA], "Int64", "signed", "Int8"),
744+
([450, -300], "Int64", "signed", "Int16"),
745+
pytest.param(
746+
[np.iinfo(np.uint64).max - 1, 1],
747+
"UInt64",
748+
"signed",
749+
"UInt64",
750+
marks=pytest.mark.xfail(reason="GH38798"),
751+
),
752+
([1, 1], "Int64", "unsigned", "UInt8"),
753+
([1.0, 1.0], "Float32", "unsigned", "UInt8"),
754+
([1.0, 1.1], "Float64", "unsigned", "Float64"),
755+
([1, pd.NA], "Int64", "unsigned", "UInt8"),
756+
([450, -300], "Int64", "unsigned", "Int64"),
757+
([-1, -1], "Int32", "unsigned", "Int32"),
758+
([1, 1], "Float64", "float", "Float32"),
759+
([1, 1.1], "Float64", "float", "Float32"),
760+
),
761+
)
762+
def test_downcast_nullable_numeric(data, input_dtype, downcast, expected_dtype):
763+
arr = pd.array(data, dtype=input_dtype)
764+
result = pd.to_numeric(arr, downcast=downcast)
765+
expected = pd.array(data, dtype=expected_dtype)
766+
tm.assert_extension_array_equal(result, expected)

0 commit comments

Comments
 (0)