From cb97ce6e496e7d1df76f06550c0fe8c4590ff3ce Mon Sep 17 00:00:00 2001 From: Natalia Mokeeva <91160475+natmokval@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:38:20 +0100 Subject: [PATCH] ENH: raise ValueError if invalid period freq pass to asfreq when the index of df is a PeriodIndex (#56945) --- doc/source/whatsnew/v2.2.1.rst | 1 + pandas/_libs/tslibs/offsets.pyx | 32 +++++++----- pandas/core/arrays/period.py | 11 ++-- pandas/plotting/_matplotlib/timeseries.py | 5 +- pandas/tests/dtypes/test_dtypes.py | 10 ++-- .../datetimes/methods/test_to_period.py | 2 +- .../indexes/period/methods/test_asfreq.py | 51 +++++++++++++++++++ pandas/tests/resample/test_period_index.py | 4 +- pandas/tests/scalar/period/test_asfreq.py | 5 +- pandas/tests/scalar/period/test_period.py | 16 +++--- 10 files changed, 96 insertions(+), 41 deletions(-) diff --git a/doc/source/whatsnew/v2.2.1.rst b/doc/source/whatsnew/v2.2.1.rst index 3cc11974b14e5..3f70c72a55a4c 100644 --- a/doc/source/whatsnew/v2.2.1.rst +++ b/doc/source/whatsnew/v2.2.1.rst @@ -35,6 +35,7 @@ Bug fixes - Fixed bug in :func:`pandas.api.interchange.from_dataframe` which was raising for empty inputs (:issue:`56700`) - Fixed bug in :func:`pandas.api.interchange.from_dataframe` which wasn't converting columns names to strings (:issue:`55069`) - Fixed bug in :meth:`DataFrame.__getitem__` for empty :class:`DataFrame` with Copy-on-Write enabled (:issue:`57130`) +- Fixed bug in :meth:`PeriodIndex.asfreq` which was silently converting frequencies which are not supported as period frequencies instead of raising an error (:issue:`56945`) .. --------------------------------------------------------------------------- .. _whatsnew_221.other: diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index e1615a1ceb818..e96a905367f69 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -4271,9 +4271,7 @@ cdef class CustomBusinessDay(BusinessDay): @property def _period_dtype_code(self): # GH#52534 - raise TypeError( - "CustomBusinessDay is not supported as period frequency" - ) + raise ValueError(f"{self.base} is not supported as period frequency") _apply_array = BaseOffset._apply_array @@ -4837,19 +4835,19 @@ cpdef to_offset(freq, bint is_period=False): if freq is None: return None - if isinstance(freq, BaseOffset): - return freq - if isinstance(freq, tuple): raise TypeError( f"to_offset does not support tuples {freq}, pass as a string instead" ) + if isinstance(freq, BaseOffset): + result = freq + elif PyDelta_Check(freq): - return delta_to_tick(freq) + result = delta_to_tick(freq) elif isinstance(freq, str): - delta = None + result = None stride_sign = None try: @@ -4950,21 +4948,27 @@ cpdef to_offset(freq, bint is_period=False): offset = _get_offset(prefix) offset = offset * int(np.fabs(stride) * stride_sign) - if delta is None: - delta = offset + if result is None: + result = offset else: - delta = delta + offset + result = result + offset except (ValueError, TypeError) as err: raise ValueError(INVALID_FREQ_ERR_MSG.format( f"{freq}, failed to parse with error message: {repr(err)}") ) else: - delta = None + result = None - if delta is None: + if result is None: raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) - return delta + if is_period and not hasattr(result, "_period_dtype_code"): + if isinstance(freq, str): + raise ValueError(f"{result.name} is not supported as period frequency") + else: + raise ValueError(f"{freq} is not supported as period frequency") + + return result # ---------------------------------------------------------------------- diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 3a245d353dca2..8bbc4976675c8 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -733,8 +733,8 @@ def asfreq(self, freq=None, how: str = "E") -> Self: '2015-01'], dtype='period[M]') """ how = libperiod.validate_end_alias(how) - if isinstance(freq, BaseOffset): - freq = freq_to_period_freqstr(freq.n, freq.name) + if isinstance(freq, BaseOffset) and hasattr(freq, "_period_dtype_code"): + freq = PeriodDtype(freq)._freqstr freq = Period._maybe_convert_freq(freq) base1 = self._dtype._dtype_code @@ -1186,12 +1186,7 @@ def dt64arr_to_periodarr( reso = get_unit_from_dtype(data.dtype) freq = Period._maybe_convert_freq(freq) - try: - base = freq._period_dtype_code - except (AttributeError, TypeError) as err: - # AttributeError: _period_dtype_code might not exist - # TypeError: _period_dtype_code might intentionally raise - raise TypeError(f"{freq.name} is not supported as period frequency") from err + base = freq._period_dtype_code return c_dt64arr_to_periodarr(data.view("i8"), base, tz, reso=reso), freq diff --git a/pandas/plotting/_matplotlib/timeseries.py b/pandas/plotting/_matplotlib/timeseries.py index 067bcf0b01ccb..d4587a6ccf90a 100644 --- a/pandas/plotting/_matplotlib/timeseries.py +++ b/pandas/plotting/_matplotlib/timeseries.py @@ -205,7 +205,10 @@ def _get_ax_freq(ax: Axes): def _get_period_alias(freq: timedelta | BaseOffset | str) -> str | None: - freqstr = to_offset(freq, is_period=True).rule_code + if isinstance(freq, BaseOffset): + freqstr = freq.name + else: + freqstr = to_offset(freq, is_period=True).rule_code return get_period_alias(freqstr) diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py index c52d49034f74e..c6da01636247d 100644 --- a/pandas/tests/dtypes/test_dtypes.py +++ b/pandas/tests/dtypes/test_dtypes.py @@ -445,12 +445,12 @@ def test_construction(self): def test_cannot_use_custom_businessday(self): # GH#52534 - msg = "CustomBusinessDay is not supported as period frequency" + msg = "C is not supported as period frequency" + msg1 = " is not supported as period frequency" msg2 = r"PeriodDtype\[B\] is deprecated" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg2): - PeriodDtype("C") - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): + PeriodDtype("C") + with pytest.raises(ValueError, match=msg1): with tm.assert_produces_warning(FutureWarning, match=msg2): PeriodDtype(pd.offsets.CustomBusinessDay()) diff --git a/pandas/tests/indexes/datetimes/methods/test_to_period.py b/pandas/tests/indexes/datetimes/methods/test_to_period.py index 00c0216a9b3b5..de8d32f64cde2 100644 --- a/pandas/tests/indexes/datetimes/methods/test_to_period.py +++ b/pandas/tests/indexes/datetimes/methods/test_to_period.py @@ -221,5 +221,5 @@ def test_to_period_offsets_not_supported(self, freq): # GH#56243 msg = f"{freq[1:]} is not supported as period frequency" ts = date_range("1/1/2012", periods=4, freq=freq) - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): ts.to_period() diff --git a/pandas/tests/indexes/period/methods/test_asfreq.py b/pandas/tests/indexes/period/methods/test_asfreq.py index ed078a3e8fb8b..865bae69d91c7 100644 --- a/pandas/tests/indexes/period/methods/test_asfreq.py +++ b/pandas/tests/indexes/period/methods/test_asfreq.py @@ -1,3 +1,5 @@ +import re + import pytest from pandas import ( @@ -7,6 +9,8 @@ ) import pandas._testing as tm +from pandas.tseries import offsets + class TestPeriodIndex: def test_asfreq(self): @@ -136,3 +140,50 @@ def test_asfreq_with_different_n(self): excepted = Series([1, 2], index=PeriodIndex(["2020-02", "2020-04"], freq="M")) tm.assert_series_equal(result, excepted) + + @pytest.mark.parametrize( + "freq", + [ + "2BMS", + "2YS-MAR", + "2bh", + ], + ) + def test_pi_asfreq_not_supported_frequency(self, freq): + # GH#55785 + msg = f"{freq[1:]} is not supported as period frequency" + + pi = PeriodIndex(["2020-01-01", "2021-01-01"], freq="M") + with pytest.raises(ValueError, match=msg): + pi.asfreq(freq=freq) + + @pytest.mark.parametrize( + "freq", + [ + "2BME", + "2YE-MAR", + "2QE", + ], + ) + def test_pi_asfreq_invalid_frequency(self, freq): + # GH#55785 + msg = f"Invalid frequency: {freq}" + + pi = PeriodIndex(["2020-01-01", "2021-01-01"], freq="M") + with pytest.raises(ValueError, match=msg): + pi.asfreq(freq=freq) + + @pytest.mark.parametrize( + "freq", + [ + offsets.MonthBegin(2), + offsets.BusinessMonthEnd(2), + ], + ) + def test_pi_asfreq_invalid_baseoffset(self, freq): + # GH#56945 + msg = re.escape(f"{freq} is not supported as period frequency") + + pi = PeriodIndex(["2020-01-01", "2021-01-01"], freq="M") + with pytest.raises(ValueError, match=msg): + pi.asfreq(freq=freq) diff --git a/pandas/tests/resample/test_period_index.py b/pandas/tests/resample/test_period_index.py index 7b3edbad6666b..610eeb6dcf32f 100644 --- a/pandas/tests/resample/test_period_index.py +++ b/pandas/tests/resample/test_period_index.py @@ -1032,8 +1032,8 @@ def test_resample_lowercase_frequency_deprecated( offsets.BusinessHour(2), ], ) - def test_asfreq_invalid_period_freq(self, offset, frame_or_series): - # GH#9586 + def test_asfreq_invalid_period_offset(self, offset, frame_or_series): + # GH#55785 msg = f"Invalid offset: '{offset.base}' for converting time series " obj = frame_or_series(range(5), index=period_range("2020-01-01", periods=5)) diff --git a/pandas/tests/scalar/period/test_asfreq.py b/pandas/tests/scalar/period/test_asfreq.py index 4489c307172d7..73c4d8061c257 100644 --- a/pandas/tests/scalar/period/test_asfreq.py +++ b/pandas/tests/scalar/period/test_asfreq.py @@ -820,10 +820,9 @@ def test_asfreq_MS(self): assert initial.asfreq(freq="M", how="S") == Period("2013-01", "M") - msg = INVALID_FREQ_ERR_MSG + msg = "MS is not supported as period frequency" with pytest.raises(ValueError, match=msg): initial.asfreq(freq="MS", how="S") - msg = "MonthBegin is not supported as period frequency" - with pytest.raises(TypeError, match=msg): + with pytest.raises(ValueError, match=msg): Period("2013-01", "MS") diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index d819e903a0bae..2c3a0816737fc 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -3,6 +3,7 @@ datetime, timedelta, ) +import re import numpy as np import pytest @@ -40,21 +41,22 @@ class TestPeriodDisallowedFreqs: ) def test_offsets_not_supported(self, freq, freq_msg): # GH#55785 - msg = f"{freq_msg} is not supported as period frequency" - with pytest.raises(TypeError, match=msg): + msg = re.escape(f"{freq} is not supported as period frequency") + with pytest.raises(ValueError, match=msg): Period(year=2014, freq=freq) def test_custom_business_day_freq_raises(self): # GH#52534 - msg = "CustomBusinessDay is not supported as period frequency" - with pytest.raises(TypeError, match=msg): + msg = "C is not supported as period frequency" + with pytest.raises(ValueError, match=msg): Period("2023-04-10", freq="C") - with pytest.raises(TypeError, match=msg): + msg = f"{offsets.CustomBusinessDay().base} is not supported as period frequency" + with pytest.raises(ValueError, match=msg): Period("2023-04-10", freq=offsets.CustomBusinessDay()) def test_invalid_frequency_error_message(self): - msg = "WeekOfMonth is not supported as period frequency" - with pytest.raises(TypeError, match=msg): + msg = "WOM-1MON is not supported as period frequency" + with pytest.raises(ValueError, match=msg): Period("2012-01-02", freq="WOM-1MON") def test_invalid_frequency_period_error_message(self):