|
30 | 30 | Implementation.CUDF,
|
31 | 31 | Implementation.MODIN,
|
32 | 32 | }
|
| 33 | +PD_DATETIME_RGX = r"""^ |
| 34 | + datetime64\[ |
| 35 | + (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns |
| 36 | + (?:, # Begin non-capturing group for optional timezone |
| 37 | + \s* # Optional whitespace after comma |
| 38 | + (?P<time_zone> # Start named group for timezone |
| 39 | + [a-zA-Z\/]+ # Match timezone name, e.g., UTC, America/New_York |
| 40 | + (?:[+-]\d{2}:\d{2})? # Optional offset in format +HH:MM or -HH:MM |
| 41 | + | # OR |
| 42 | + pytz\.FixedOffset\(\d+\) # Match pytz.FixedOffset with integer offset in parentheses |
| 43 | + ) # End time_zone group |
| 44 | + )? # End optional timezone group |
| 45 | + \] # Closing bracket for datetime64 |
| 46 | +$""" |
| 47 | +PATTERN_PD_DATETIME = re.compile(PD_DATETIME_RGX, re.VERBOSE) |
| 48 | +PA_DATETIME_RGX = r"""^ |
| 49 | + timestamp\[ |
| 50 | + (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns |
| 51 | + (?:, # Begin non-capturing group for optional timezone |
| 52 | + \s?tz= # Match "tz=" prefix |
| 53 | + (?P<time_zone> # Start named group for timezone |
| 54 | + [a-zA-Z\/]* # Match timezone name (e.g., UTC, America/New_York) |
| 55 | + (?: # Begin optional non-capturing group for offset |
| 56 | + [+-]\d{2}:\d{2} # Match offset in format +HH:MM or -HH:MM |
| 57 | + )? # End optional offset group |
| 58 | + ) # End time_zone group |
| 59 | + )? # End optional timezone group |
| 60 | + \] # Closing bracket for timestamp |
| 61 | + \[pyarrow\] # Literal string "[pyarrow]" |
| 62 | +$""" |
| 63 | +PATTERN_PA_DATETIME = re.compile(PA_DATETIME_RGX, re.VERBOSE) |
| 64 | +PD_DURATION_RGX = r"""^ |
| 65 | + timedelta64\[ |
| 66 | + (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns |
| 67 | + \] # Closing bracket for timedelta64 |
| 68 | +$""" |
| 69 | + |
| 70 | +PATTERN_PD_DURATION = re.compile(PD_DURATION_RGX, re.VERBOSE) |
| 71 | +PA_DURATION_RGX = r"""^ |
| 72 | + duration\[ |
| 73 | + (?P<time_unit>s|ms|us|ns) # Match time unit: s, ms, us, or ns |
| 74 | + \] # Closing bracket for duration |
| 75 | + \[pyarrow\] # Literal string "[pyarrow]" |
| 76 | +$""" |
| 77 | +PATTERN_PA_DURATION = re.compile(PA_DURATION_RGX, re.VERBOSE) |
33 | 78 |
|
34 | 79 |
|
35 | 80 | def validate_column_comparand(index: Any, other: Any) -> Any:
|
@@ -223,14 +268,6 @@ def native_to_narwhals_dtype(
|
223 | 268 | ) -> DType:
|
224 | 269 | dtype = str(native_column.dtype)
|
225 | 270 |
|
226 |
| - pd_datetime_rgx = ( |
227 |
| - r"^datetime64\[(?P<time_unit>s|ms|us|ns)(?:, (?P<time_zone>[a-zA-Z\/]+))?\]$" |
228 |
| - ) |
229 |
| - pa_datetime_rgx = r"^timestamp\[(?P<time_unit>s|ms|us|ns)(?:, tz=(?P<time_zone>[a-zA-Z\/]+))?\]\[pyarrow\]$" |
230 |
| - |
231 |
| - pd_duration_rgx = r"^timedelta64\[(?P<time_unit>s|ms|us|ns)\]$" |
232 |
| - pa_duration_rgx = r"^duration\[(?P<time_unit>s|ms|us|ns)\]\[pyarrow\]$" |
233 |
| - |
234 | 271 | if dtype in {"int64", "Int64", "Int64[pyarrow]", "int64[pyarrow]"}:
|
235 | 272 | return dtypes.Int64()
|
236 | 273 | if dtype in {"int32", "Int32", "Int32[pyarrow]", "int32[pyarrow]"}:
|
@@ -269,14 +306,14 @@ def native_to_narwhals_dtype(
|
269 | 306 | return dtypes.Boolean()
|
270 | 307 | if dtype == "category" or dtype.startswith("dictionary<"):
|
271 | 308 | return dtypes.Categorical()
|
272 |
| - if (match_ := re.match(pd_datetime_rgx, dtype)) or ( |
273 |
| - match_ := re.match(pa_datetime_rgx, dtype) |
| 309 | + if (match_ := PATTERN_PD_DATETIME.match(dtype)) or ( |
| 310 | + match_ := PATTERN_PA_DATETIME.match(dtype) |
274 | 311 | ):
|
275 | 312 | dt_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
|
276 | 313 | dt_time_zone: str | None = match_.group("time_zone")
|
277 | 314 | return dtypes.Datetime(dt_time_unit, dt_time_zone)
|
278 |
| - if (match_ := re.match(pd_duration_rgx, dtype)) or ( |
279 |
| - match_ := re.match(pa_duration_rgx, dtype) |
| 315 | + if (match_ := PATTERN_PD_DURATION.match(dtype)) or ( |
| 316 | + match_ := PATTERN_PA_DURATION.match(dtype) |
280 | 317 | ):
|
281 | 318 | du_time_unit: Literal["us", "ns", "ms", "s"] = match_.group("time_unit") # type: ignore[assignment]
|
282 | 319 | return dtypes.Duration(du_time_unit)
|
|
0 commit comments