diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e74bd2f745b94..964d0e98ac446 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -629,6 +629,7 @@ Datetimelike - Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`) - Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`) - Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`) +- Fixed timezone and resolution preservation in :meth:`DatetimeIndex.union`. Previously, :meth:`DatetimeIndex.union` would sometimes convert timezone-aware indices with the same timezone but different units to UTC. (:issue:`60080`) Timedelta ^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 165fe109c4c94..cf9f420b9741b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2958,6 +2958,29 @@ def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index and self.tz is not None and other.tz is not None ): + if self.tz == other.tz: + # GH #60080: Handle union of DatetimeIndex with + # the same timezone but different resolutions + resolution_order = { + "Y": 1, # Year + "M": 2, # Month + "W": 3, # Week + "D": 4, # Day + "h": 5, # Hour + "m": 6, # Minute + "s": 7, # Second + "ms": 8, # Millisecond + "us": 9, # Microsecond + "ns": 10, # Nanosecond + } + # Default to the lowest resolution if unit is unknown + self_res = resolution_order.get(self.dtype.unit, 0) + other_res = resolution_order.get(other.dtype.unit, 0) + # Choose the dtype with higher resolution + dtype = self.dtype if self_res >= other_res else other.dtype + left = self.astype(dtype, copy=False) + right = other.astype(dtype, copy=False) + return left, right # GH#39328, GH#45357 left = self.tz_convert("UTC") right = other.tz_convert("UTC") diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index 7ef6efad0ff6f..d9160c614d881 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -694,3 +694,80 @@ def test_intersection_non_nano_rangelike(): freq="D", ) tm.assert_index_equal(result, expected) + + +def test_union_preserves_timezone_and_resolution(): + """ + GH 60080: Ensure union of DatetimeIndex with the same timezone + and differing resolutions results in the higher resolution unit + and preserves the timezone. + """ + idx1 = DatetimeIndex(["2020-01-01 10:00:00+05:00"]).astype( + "datetime64[us, UTC+05:00]" + ) + idx2 = DatetimeIndex(["2020-01-01 10:00:00+05:00"]).astype( + "datetime64[ns, UTC+05:00]" + ) + result = idx1.union(idx2) + expected = DatetimeIndex(["2020-01-01 10:00:00+05:00"]).astype( + "datetime64[ns, UTC+05:00]" + ) + tm.assert_index_equal(result, expected) + + +def test_union_multiple_entries_same_timezone(): + """ + GH 60080: Test union with multiple DatetimeIndex entries having the same timezone + and different units, ensuring correct alignment and resolution preservation. + """ + idx1 = DatetimeIndex( + ["2023-01-01 10:00:00+05:00", "2023-01-02 10:00:00+05:00"] + ).astype("datetime64[us, UTC+05:00]") + idx2 = DatetimeIndex( + ["2023-01-01 10:00:00+05:00", "2023-01-03 10:00:00+05:00"] + ).astype("datetime64[ns, UTC+05:00]") + result = idx1.union(idx2) + expected = DatetimeIndex( + [ + "2023-01-01 10:00:00+05:00", + "2023-01-02 10:00:00+05:00", + "2023-01-03 10:00:00+05:00", + ] + ).astype("datetime64[ns, UTC+05:00]") + tm.assert_index_equal(result, expected) + + +def test_union_same_timezone_same_resolution(): + """ + GH 60080: Ensure union of DatetimeIndex with the same timezone and + resolution is straightforward and retains the resolution. + """ + idx1 = DatetimeIndex(["2022-01-01 15:00:00+05:00"]).astype( + "datetime64[ms, UTC+05:00]" + ) + idx2 = DatetimeIndex(["2022-01-01 16:00:00+05:00"]).astype( + "datetime64[ms, UTC+05:00]" + ) + result = idx1.union(idx2) + expected = DatetimeIndex( + ["2022-01-01 15:00:00+05:00", "2022-01-01 16:00:00+05:00"] + ).astype("datetime64[ms, UTC+05:00]") + tm.assert_index_equal(result, expected) + + +def test_union_single_entry(): + """ + GH 60080: Ensure union of single-entry DatetimeIndex works as expected + with different units and same timezone. + """ + idx1 = DatetimeIndex(["2023-01-01 10:00:00+05:00"]).astype( + "datetime64[ms, UTC+05:00]" + ) + idx2 = DatetimeIndex(["2023-01-01 10:00:00+05:00"]).astype( + "datetime64[us, UTC+05:00]" + ) + result = idx1.union(idx2) + expected = DatetimeIndex(["2023-01-01 10:00:00+05:00"]).astype( + "datetime64[us, UTC+05:00]" + ) + tm.assert_index_equal(result, expected)