Skip to content

Commit 54c0d5a

Browse files
mroeschkejreback
authored andcommitted
DEPR: DataFrame.get_dtype_counts (#27145)
1 parent 647c635 commit 54c0d5a

29 files changed

+229
-180
lines changed

doc/source/getting_started/basics.rst

+2-2
Original file line numberDiff line numberDiff line change
@@ -1968,11 +1968,11 @@ dtype of the column will be chosen to accommodate all of the data types
19681968
pd.Series([1, 2, 3, 6., 'foo'])
19691969
19701970
The number of columns of each type in a ``DataFrame`` can be found by calling
1971-
:meth:`~DataFrame.get_dtype_counts`.
1971+
``DataFrame.dtypes.value_counts()``.
19721972

19731973
.. ipython:: python
19741974
1975-
dft.get_dtype_counts()
1975+
dft.dtypes.value_counts()
19761976
19771977
Numeric dtypes will propagate and can coexist in DataFrames.
19781978
If a dtype is passed (either directly via the ``dtype`` keyword, a passed ``ndarray``,

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -3767,7 +3767,7 @@ defaults to `nan`.
37673767
store.append('df_mixed', df_mixed, min_itemsize={'values': 50})
37683768
df_mixed1 = store.select('df_mixed')
37693769
df_mixed1
3770-
df_mixed1.get_dtype_counts()
3770+
df_mixed1.dtypes.value_counts()
37713771
37723772
# we have provided a minimum string column size
37733773
store.root.df_mixed.table

doc/source/user_guide/missing_data.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ pandas objects provide compatibility between ``NaT`` and ``NaN``.
105105
df2
106106
df2.loc[['a', 'c', 'h'], ['one', 'timestamp']] = np.nan
107107
df2
108-
df2.get_dtype_counts()
108+
df2.dtypes.value_counts()
109109
110110
.. _missing.inserting:
111111

doc/source/whatsnew/v0.10.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ You can now store ``datetime64`` in data columns
8989
store.append('df_mixed', df_mixed)
9090
df_mixed1 = store.select('df_mixed')
9191
df_mixed1
92-
df_mixed1.get_dtype_counts()
92+
df_mixed1.dtypes.value_counts()
9393
9494
You can pass ``columns`` keyword to select to filter a list of the return
9595
columns, this is equivalent to passing a

doc/source/whatsnew/v0.11.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ Furthermore ``datetime64[ns]`` columns are created by default, when passed datet
296296
df
297297
298298
# datetime64[ns] out of the box
299-
df.get_dtype_counts()
299+
df.dtypes.value_counts()
300300
301301
# use the traditional nan, which is mapped to NaT internally
302302
df.loc[df.index[2:4], ['A', 'timestamp']] = np.nan

doc/source/whatsnew/v0.25.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,7 @@ Other deprecations
786786
- :meth:`Index.item` and :meth:`Series.item` is deprecated. (:issue:`18262`)
787787
- The default value ``ordered=None`` in :class:`~pandas.api.types.CategoricalDtype` has been deprecated in favor of ``ordered=False``. When converting between categorical types ``ordered=True`` must be explicitly passed in order to be preserved. (:issue:`26336`)
788788
- :meth:`Index.contains` is deprecated. Use ``key in index`` (``__contains__``) instead (:issue:`17753`).
789+
- :meth:`DataFrame.get_dtype_counts` is deprecated. (:issue:`18262`)
789790
790791
.. _whatsnew_0250.prior_deprecations:
791792

pandas/core/computation/expressions.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -79,11 +79,11 @@ def _can_use_numexpr(op, op_str, a, b, dtype_check):
7979
# check for dtype compatibility
8080
dtypes = set()
8181
for o in [a, b]:
82-
if hasattr(o, 'get_dtype_counts'):
83-
s = o.get_dtype_counts()
82+
if hasattr(o, 'dtypes'):
83+
s = o.dtypes.value_counts()
8484
if len(s) > 1:
8585
return False
86-
dtypes |= set(s.index)
86+
dtypes |= set(s.index.astype(str))
8787
elif isinstance(o, np.ndarray):
8888
dtypes |= {o.dtype.name}
8989

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2326,7 +2326,7 @@ def _sizeof_fmt(num, size_qualifier):
23262326
else:
23272327
_verbose_repr()
23282328

2329-
counts = self.get_dtype_counts()
2329+
counts = self._data.get_dtype_counts()
23302330
dtypes = ['{k}({kk:d})'.format(k=k[0], kk=k[1]) for k
23312331
in sorted(counts.items())]
23322332
lines.append('dtypes: {types}'.format(types=', '.join(dtypes)))

pandas/core/generic.py

+8
Original file line numberDiff line numberDiff line change
@@ -5263,6 +5263,10 @@ def get_dtype_counts(self):
52635263
"""
52645264
Return counts of unique dtypes in this object.
52655265
5266+
.. deprecated:: 0.25.0
5267+
5268+
Use `.dtypes.value_counts()` instead.
5269+
52665270
Returns
52675271
-------
52685272
dtype : Series
@@ -5288,6 +5292,10 @@ def get_dtype_counts(self):
52885292
object 1
52895293
dtype: int64
52905294
"""
5295+
warnings.warn("`get_dtype_counts` has been deprecated and will be "
5296+
"removed in a future version. For DataFrames use "
5297+
"`.dtypes.value_counts()", FutureWarning,
5298+
stacklevel=2)
52915299
from pandas import Series
52925300
return Series(self._data.get_dtype_counts())
52935301

pandas/tests/frame/test_api.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77

88
import pandas as pd
99
from pandas import (
10-
Categorical, DataFrame, Series, SparseDataFrame, compat, date_range,
11-
timedelta_range)
10+
Categorical, DataFrame, Series, SparseDataFrame, SparseDtype, compat,
11+
date_range, timedelta_range)
1212
import pandas.util.testing as tm
1313
from pandas.util.testing import (
1414
assert_almost_equal, assert_frame_equal, assert_series_equal)
@@ -433,11 +433,11 @@ def test_with_datetimelikes(self):
433433
'B': timedelta_range('1 day', periods=10)})
434434
t = df.T
435435

436-
result = t.get_dtype_counts()
436+
result = t.dtypes.value_counts()
437437
if self.klass is DataFrame:
438-
expected = Series({'object': 10})
438+
expected = Series({np.dtype('object'): 10})
439439
else:
440-
expected = Series({'Sparse[object, nan]': 10})
440+
expected = Series({SparseDtype(dtype=object): 10})
441441
tm.assert_series_equal(result, expected)
442442

443443

pandas/tests/frame/test_arithmetic.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -273,8 +273,8 @@ def test_df_flex_cmp_constant_return_types(self, opname):
273273
df = pd.DataFrame({'x': [1, 2, 3], 'y': [1., 2., 3.]})
274274
const = 2
275275

276-
result = getattr(df, opname)(const).get_dtype_counts()
277-
tm.assert_series_equal(result, pd.Series([2], ['bool']))
276+
result = getattr(df, opname)(const).dtypes.value_counts()
277+
tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)]))
278278

279279
@pytest.mark.parametrize('opname', ['eq', 'ne', 'gt', 'lt', 'ge', 'le'])
280280
def test_df_flex_cmp_constant_return_types_empty(self, opname):
@@ -283,8 +283,8 @@ def test_df_flex_cmp_constant_return_types_empty(self, opname):
283283
const = 2
284284

285285
empty = df.iloc[:0]
286-
result = getattr(empty, opname)(const).get_dtype_counts()
287-
tm.assert_series_equal(result, pd.Series([2], ['bool']))
286+
result = getattr(empty, opname)(const).dtypes.value_counts()
287+
tm.assert_series_equal(result, pd.Series([2], index=[np.dtype(bool)]))
288288

289289

290290
# -------------------------------------------------------------------

pandas/tests/frame/test_block_internals.py

+14-11
Original file line numberDiff line numberDiff line change
@@ -217,19 +217,21 @@ def test_construction_with_mixed(self, float_string_frame):
217217
df = DataFrame(data)
218218

219219
# check dtypes
220-
result = df.get_dtype_counts().sort_values()
220+
result = df.dtypes
221221
expected = Series({'datetime64[ns]': 3})
222222

223223
# mixed-type frames
224224
float_string_frame['datetime'] = datetime.now()
225225
float_string_frame['timedelta'] = timedelta(days=1, seconds=1)
226226
assert float_string_frame['datetime'].dtype == 'M8[ns]'
227227
assert float_string_frame['timedelta'].dtype == 'm8[ns]'
228-
result = float_string_frame.get_dtype_counts().sort_values()
229-
expected = Series({'float64': 4,
230-
'object': 1,
231-
'datetime64[ns]': 1,
232-
'timedelta64[ns]': 1}).sort_values()
228+
result = float_string_frame.dtypes
229+
expected = Series([np.dtype('float64')] * 4 +
230+
[np.dtype('object'),
231+
np.dtype('datetime64[ns]'),
232+
np.dtype('timedelta64[ns]')],
233+
index=list('ABCD') + ['foo', 'datetime',
234+
'timedelta'])
233235
assert_series_equal(result, expected)
234236

235237
def test_construction_with_conversions(self):
@@ -409,11 +411,12 @@ def test_get_numeric_data(self):
409411
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo',
410412
'f': Timestamp('20010102')},
411413
index=np.arange(10))
412-
result = df.get_dtype_counts()
413-
expected = Series({'int64': 1, 'float64': 1,
414-
datetime64name: 1, objectname: 1})
415-
result = result.sort_index()
416-
expected = expected.sort_index()
414+
result = df.dtypes
415+
expected = Series([np.dtype('float64'),
416+
np.dtype('int64'),
417+
np.dtype(objectname),
418+
np.dtype(datetime64name)],
419+
index=['a', 'b', 'c', 'f'])
417420
assert_series_equal(result, expected)
418421

419422
df = DataFrame({'a': 1., 'b': 2, 'c': 'foo',

pandas/tests/frame/test_combine_concat.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ def test_concat_multiple_frames_dtypes(self):
1717
A = DataFrame(data=np.ones((10, 2)), columns=[
1818
'foo', 'bar'], dtype=np.float64)
1919
B = DataFrame(data=np.ones((10, 2)), dtype=np.float32)
20-
results = pd.concat((A, B), axis=1).get_dtype_counts()
21-
expected = Series(dict(float64=2, float32=2))
20+
results = pd.concat((A, B), axis=1).dtypes
21+
expected = Series([np.dtype('float64')] * 2 +
22+
[np.dtype('float32')] * 2,
23+
index=['foo', 'bar', 0, 1])
2224
assert_series_equal(results, expected)
2325

2426
@pytest.mark.parametrize('data', [

0 commit comments

Comments
 (0)