Skip to content

Commit 8763535

Browse files
authored
fix: fixed-offset datetime weren't being parsed (#1303)
* fix: fix parsing of fixed-offset timezones * vresions
1 parent 0ec5e00 commit 8763535

File tree

2 files changed

+70
-12
lines changed

2 files changed

+70
-12
lines changed

narwhals/_pandas_like/utils.py

+49-12
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,51 @@
3030
Implementation.CUDF,
3131
Implementation.MODIN,
3232
}
33+
PD_DATETIME_RGX = r"""^
34+
datetime64\[
35+
(?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
36+
(?:, # Begin non-capturing group for optional timezone
37+
\s* # Optional whitespace after comma
38+
(?P<time_zone> # Start named group for timezone
39+
[a-zA-Z\/]+ # Match timezone name, e.g., UTC, America/New_York
40+
(?:[+-]\d{2}:\d{2})? # Optional offset in format +HH:MM or -HH:MM
41+
| # OR
42+
pytz\.FixedOffset\(\d+\) # Match pytz.FixedOffset with integer offset in parentheses
43+
) # End time_zone group
44+
)? # End optional timezone group
45+
\] # Closing bracket for datetime64
46+
$"""
47+
PATTERN_PD_DATETIME = re.compile(PD_DATETIME_RGX, re.VERBOSE)
48+
PA_DATETIME_RGX = r"""^
49+
timestamp\[
50+
(?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
51+
(?:, # Begin non-capturing group for optional timezone
52+
\s?tz= # Match "tz=" prefix
53+
(?P<time_zone> # Start named group for timezone
54+
[a-zA-Z\/]* # Match timezone name (e.g., UTC, America/New_York)
55+
(?: # Begin optional non-capturing group for offset
56+
[+-]\d{2}:\d{2} # Match offset in format +HH:MM or -HH:MM
57+
)? # End optional offset group
58+
) # End time_zone group
59+
)? # End optional timezone group
60+
\] # Closing bracket for timestamp
61+
\[pyarrow\] # Literal string "[pyarrow]"
62+
$"""
63+
PATTERN_PA_DATETIME = re.compile(PA_DATETIME_RGX, re.VERBOSE)
64+
PD_DURATION_RGX = r"""^
65+
timedelta64\[
66+
(?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
67+
\] # Closing bracket for timedelta64
68+
$"""
69+
70+
PATTERN_PD_DURATION = re.compile(PD_DURATION_RGX, re.VERBOSE)
71+
PA_DURATION_RGX = r"""^
72+
duration\[
73+
(?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns
74+
\] # Closing bracket for duration
75+
\[pyarrow\] # Literal string "[pyarrow]"
76+
$"""
77+
PATTERN_PA_DURATION = re.compile(PA_DURATION_RGX, re.VERBOSE)
3378

3479

3580
def validate_column_comparand(index: Any, other: Any) -> Any:
@@ -223,14 +268,6 @@ def native_to_narwhals_dtype(
223268
) -> DType:
224269
dtype = str(native_column.dtype)
225270

226-
pd_datetime_rgx = (
227-
r"^datetime64\[(?P<time_unit>s|ms|us|ns)(?:, (?P<time_zone>[a-zA-Z\/]+))?\]$"
228-
)
229-
pa_datetime_rgx = r"^timestamp\[(?P<time_unit>s|ms|us|ns)(?:, tz=(?P<time_zone>[a-zA-Z\/]+))?\]\[pyarrow\]$"
230-
231-
pd_duration_rgx = r"^timedelta64\[(?P<time_unit>s|ms|us|ns)\]$"
232-
pa_duration_rgx = r"^duration\[(?P<time_unit>s|ms|us|ns)\]\[pyarrow\]$"
233-
234271
if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
235272
return dtypes.Int64()
236273
if dtype in {"int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"}:
@@ -269,14 +306,14 @@ def native_to_narwhals_dtype(
269306
return dtypes.Boolean()
270307
if dtype == "category" or dtype.startswith("dictionary<"):
271308
return dtypes.Categorical()
272-
if (match_ := re.match(pd_datetime_rgx, dtype)) or (
273-
match_ := re.match(pa_datetime_rgx, dtype)
309+
if (match_ := PATTERN_PD_DATETIME.match(dtype)) or (
310+
match_ := PATTERN_PA_DATETIME.match(dtype)
274311
):
275312
dt_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
276313
dt_time_zone: str | None = match_.group("time_zone")
277314
return dtypes.Datetime(dt_time_unit, dt_time_zone)
278-
if (match_ := re.match(pd_duration_rgx, dtype)) or (
279-
match_ := re.match(pa_duration_rgx, dtype)
315+
if (match_ := PATTERN_PD_DURATION.match(dtype)) or (
316+
match_ := PATTERN_PA_DURATION.match(dtype)
280317
):
281318
du_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
282319
return dtypes.Duration(du_time_unit)

tests/dtypes_test.py

+21
Original file line numberDiff line numberDiff line change
@@ -176,3 +176,24 @@ def test_pandas_inplace_modification_1267(request: pytest.FixtureRequest) -> Non
176176
assert snw.dtype == nw.Int64
177177
s[0] = 999.5
178178
assert snw.dtype == nw.Float64
179+
180+
181+
def test_pandas_fixed_offset_1302() -> None:
182+
result = nw.from_native(
183+
pd.Series(pd.to_datetime(["2020-01-01T00:00:00.000000000+01:00"])),
184+
series_only=True,
185+
).dtype
186+
if PANDAS_VERSION >= (2,):
187+
assert result == nw.Datetime("ns", "UTC+01:00")
188+
else: # pragma: no cover
189+
assert result == nw.Datetime("ns", "pytz.FixedOffset(60)")
190+
if PANDAS_VERSION >= (2,):
191+
result = nw.from_native(
192+
pd.Series(
193+
pd.to_datetime(["2020-01-01T00:00:00.000000000+01:00"])
194+
).convert_dtypes(dtype_backend="pyarrow"),
195+
series_only=True,
196+
).dtype
197+
assert result == nw.Datetime("ns", "+01:00")
198+
else: # pragma: no cover
199+
pass

0 commit comments

Comments
 (0)