Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 41 additions & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,14 @@ def _as_range_index(self) -> RangeIndex:
return RangeIndex(rng)

def _can_range_setop(self, other) -> bool:
return isinstance(self.freq, Tick) and isinstance(other.freq, Tick)
# Only allow range-based setops when both objects are tick-based AND
# not timezone-aware. For tz-aware DatetimeIndex, constant i8 stepping
# does not hold across DST transitions in local time, so avoid range path.
if not (isinstance(self.freq, Tick) and isinstance(other.freq, Tick)):
return False
self_tz = getattr(self.dtype, "tz", None)
other_tz = getattr(other.dtype, "tz", None)
return self_tz is None and other_tz is None

def _wrap_range_setop(self, other, res_i8) -> Self:
new_freq = None
Expand Down Expand Up @@ -726,6 +733,39 @@ def _union(self, other, sort):
# that result.freq == self.freq
return result
else:
# For tz-aware DatetimeIndex, perform union in UTC to avoid
# local-time irregularities across DST transitions, then convert back.
tz = getattr(self.dtype, "tz", None)
other_tz = getattr(other.dtype, "tz", None)
if tz is not None and tz == other_tz:
# Narrow to DatetimeArray to access tz_convert without mypy errors
if isinstance(self._data, DatetimeArray) and isinstance(
other._data, DatetimeArray
):
# Convert both to UTC, then drop tz to avoid re-entering
# tz-aware path
left_utc_naive = self._data.tz_convert("UTC").tz_localize(None)
right_utc_naive = other._data.tz_convert("UTC").tz_localize(None)
left_naive = type(self)._simple_new(left_utc_naive, name=self.name)
right_naive = type(other)._simple_new(
right_utc_naive, name=other.name
)
# Perform base union on tz-naive indices to avoid DST complications
res_naive = super(type(left_naive), left_naive)._union(
right_naive, sort
)
# Localize back to UTC and then convert to original tz
if isinstance(res_naive, DatetimeArray):
base_arr = res_naive
name = self.name
else:
base_arr = cast(DatetimeArray, res_naive._data)
name = res_naive.name
res_arr = base_arr.tz_localize("UTC").tz_convert(tz)
res = type(self)._simple_new(res_arr, name=name)
return res._with_freq("infer")
# Defensive fallback if types are unexpected
return super()._union(other, sort)
return super()._union(other, sort)._with_freq("infer")

# --------------------------------------------------------------------
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,30 @@ def test_union3(self, sort, box):
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)


def test_union_across_dst_boundary():
# US/Eastern DST spring-forward on 2021-03-14 at 02:00
# (02:00-02:59 local time does not exist)
tz = "US/Eastern"
# Left side spans up to the missing hour window
left = date_range("2021-03-14 00:00", periods=3, freq="h", tz=tz)
# right side continues from the first valid post-DST hour
right = date_range("2021-03-14 03:00", periods=3, freq="h", tz=tz)

# Expect a union that preserves tz and includes valid hours without duplicates
expected = DatetimeIndex(
[
Timestamp("2021-03-14 00:00", tz=tz),
Timestamp("2021-03-14 01:00", tz=tz),
Timestamp("2021-03-14 03:00", tz=tz),
Timestamp("2021-03-14 04:00", tz=tz),
Timestamp("2021-03-14 05:00", tz=tz),
]
).as_unit(left.unit)

result = left.union(right)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("tz", tz)
def test_union(self, tz, sort):
rng1 = date_range("1/1/2000", freq="D", periods=5, tz=tz)
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/series/test_ufunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import numpy as np
import pytest

from pandas.compat.numpy import is_numpy_dev

import pandas as pd
import pandas._testing as tm
from pandas.arrays import SparseArray
Expand Down Expand Up @@ -457,7 +459,11 @@ def add3(x, y, z):
ufunc(ser, ser, df)


@pytest.mark.xfail(reason="see https://github.com/pandas-dev/pandas/pull/51082")
@pytest.mark.xfail(
condition=not is_numpy_dev,
reason="see https://github.com/pandas-dev/pandas/pull/51082",
strict=True,
)
def test_np_fix():
# np.fix is not a ufunc but is composed of several ufunc calls under the hood
# with `out` and `where` keywords
Expand Down
Loading