Skip to content

Commit bb94ebe

Browse files
committed
DEPR: some removals
DEPR: Removal of cols keyword in favor of subset in DataFrame.duplicated() and DataFrame.drop_duplicates(), xref #6680 Author: Jeff Reback <[email protected]> Closes #12165 from jreback/deprecate and squashes the following commits: 5be6dc6 [Jeff Reback] DOC: small fix on Timestamp doc-string e3579a5 [Jeff Reback] DEPR: Removal of cols keyword in favor of subset in DataFrame.duplicated() and DataFrame.drop_duplicates(), xref #6680
1 parent 9bc8243 commit bb94ebe

File tree

4 files changed

+2
-65
lines changed

4 files changed

+2
-65
lines changed

doc/source/whatsnew/v0.18.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ Removal of prior version deprecations/changes
447447
- Removal of ``rolling_corr_pairwise`` in favor of ``.rolling().corr(pairwise=True)`` (:issue:`4950`)
448448
- Removal of ``expanding_corr_pairwise`` in favor of ``.expanding().corr(pairwise=True)`` (:issue:`4950`)
449449
- Removal of ``DataMatrix`` module. This was not imported into the pandas namespace in any event (:issue:`12111`)
450-
450+
- Removal of ``cols`` keyword in favor of ``subset`` in ``DataFrame.duplicated()`` and ``DataFrame.drop_duplicates()`` (:issue:`6680`)
451451

452452

453453
.. _whatsnew_0180.performance:
@@ -544,4 +544,4 @@ Bug Fixes
544544

545545
- Bug in ``.skew`` and ``.kurt`` due to roundoff error for highly similar values (:issue:`11974`)
546546

547-
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)
547+
- Bug in ``buffer_rd_bytes`` src->buffer could be freed more than once if reading failed, causing a segfault (:issue:`12098`)

pandas/core/frame.py

-4
Original file line numberDiff line numberDiff line change
@@ -3012,7 +3012,6 @@ def dropna(self, axis=0, how='any', thresh=None, subset=None,
30123012

30133013
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
30143014
False: 'first'})
3015-
@deprecate_kwarg(old_arg_name='cols', new_arg_name='subset', stacklevel=3)
30163015
def drop_duplicates(self, subset=None, keep='first', inplace=False):
30173016
"""
30183017
Return DataFrame with duplicate rows removed, optionally only
@@ -3030,7 +3029,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):
30303029
take_last : deprecated
30313030
inplace : boolean, default False
30323031
Whether to drop duplicates in place or to return a copy
3033-
cols : kwargs only argument of subset [deprecated]
30343032
30353033
Returns
30363034
-------
@@ -3047,7 +3045,6 @@ def drop_duplicates(self, subset=None, keep='first', inplace=False):
30473045

30483046
@deprecate_kwarg('take_last', 'keep', mapping={True: 'last',
30493047
False: 'first'})
3050-
@deprecate_kwarg(old_arg_name='cols', new_arg_name='subset', stacklevel=3)
30513048
def duplicated(self, subset=None, keep='first'):
30523049
"""
30533050
Return boolean Series denoting duplicate rows, optionally only
@@ -3065,7 +3062,6 @@ def duplicated(self, subset=None, keep='first'):
30653062
last occurrence.
30663063
- False : Mark all duplicates as ``True``.
30673064
take_last : deprecated
3068-
cols : kwargs only argument of subset [deprecated]
30693065
30703066
Returns
30713067
-------

pandas/tests/frame/test_analytics.py

-57
Original file line numberDiff line numberDiff line change
@@ -1670,40 +1670,6 @@ def test_drop_duplicates_for_take_all(self):
16701670
expected = df.iloc[[0, 1, 2, 6]]
16711671
assert_frame_equal(result, expected)
16721672

1673-
def test_drop_duplicates_deprecated_warning(self):
1674-
df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
1675-
'foo', 'bar', 'bar', 'foo'],
1676-
'B': ['one', 'one', 'two', 'two',
1677-
'two', 'two', 'one', 'two'],
1678-
'C': [1, 1, 2, 2, 2, 2, 1, 2],
1679-
'D': lrange(8)})
1680-
expected = df[:2]
1681-
1682-
# Raises warning
1683-
with tm.assert_produces_warning(False):
1684-
result = df.drop_duplicates(subset='AAA')
1685-
assert_frame_equal(result, expected)
1686-
1687-
with tm.assert_produces_warning(FutureWarning):
1688-
result = df.drop_duplicates(cols='AAA')
1689-
assert_frame_equal(result, expected)
1690-
1691-
# Does not allow both subset and cols
1692-
self.assertRaises(TypeError, df.drop_duplicates,
1693-
kwargs={'cols': 'AAA', 'subset': 'B'})
1694-
1695-
# Does not allow unknown kwargs
1696-
self.assertRaises(TypeError, df.drop_duplicates,
1697-
kwargs={'subset': 'AAA', 'bad_arg': True})
1698-
1699-
# deprecate take_last
1700-
# Raises warning
1701-
with tm.assert_produces_warning(FutureWarning):
1702-
result = df.drop_duplicates(take_last=False, subset='AAA')
1703-
assert_frame_equal(result, expected)
1704-
1705-
self.assertRaises(ValueError, df.drop_duplicates, keep='invalid_name')
1706-
17071673
def test_drop_duplicates_tuple(self):
17081674
df = DataFrame({('AA', 'AB'): ['foo', 'bar', 'foo', 'bar',
17091675
'foo', 'bar', 'bar', 'foo'],
@@ -1960,29 +1926,6 @@ def test_drop_duplicates_inplace(self):
19601926
result = df2
19611927
assert_frame_equal(result, expected)
19621928

1963-
def test_duplicated_deprecated_warning(self):
1964-
df = DataFrame({'AAA': ['foo', 'bar', 'foo', 'bar',
1965-
'foo', 'bar', 'bar', 'foo'],
1966-
'B': ['one', 'one', 'two', 'two',
1967-
'two', 'two', 'one', 'two'],
1968-
'C': [1, 1, 2, 2, 2, 2, 1, 2],
1969-
'D': lrange(8)})
1970-
1971-
# Raises warning
1972-
with tm.assert_produces_warning(False):
1973-
result = df.duplicated(subset='AAA')
1974-
1975-
with tm.assert_produces_warning(FutureWarning):
1976-
result = df.duplicated(cols='AAA') # noqa
1977-
1978-
# Does not allow both subset and cols
1979-
self.assertRaises(TypeError, df.duplicated,
1980-
kwargs={'cols': 'AAA', 'subset': 'B'})
1981-
1982-
# Does not allow unknown kwargs
1983-
self.assertRaises(TypeError, df.duplicated,
1984-
kwargs={'subset': 'AAA', 'bad_arg': True})
1985-
19861929
# Rounding
19871930

19881931
def test_round(self):

pandas/tslib.pyx

-2
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,6 @@ class Timestamp(_Timestamp):
233233
Offset which Timestamp will have
234234
tz : string, pytz.timezone, dateutil.tz.tzfile or None
235235
Time zone for time which Timestamp will have.
236-
unit : string
237-
numpy unit used for conversion, if ts_input is int or float
238236
"""
239237

240238
# Do not add ``dayfirst`` and ``yearfist`` to Timestamp based on the discussion

0 commit comments

Comments
 (0)