Skip to content

Commit 7c5363e

Browse files
committed
merge main
2 parents 6f220fb + 5c3db5b commit 7c5363e

File tree

5 files changed

+133
-11
lines changed

5 files changed

+133
-11
lines changed

docs/api-reference/dependencies.md

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
- is_cudf_series
1616
- is_dask_dataframe
1717
- is_ibis_table
18+
- is_into_dataframe
1819
- is_into_series
1920
- is_modin_dataframe
2021
- is_modin_index

narwhals/_arrow/utils.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,9 @@ def convert_str_slice_to_int_slice(
343343
# Regex for date, time, separator and timezone components
344344
DATE_RE = r"(?P<date>\d{1,4}[-/.]\d{1,2}[-/.]\d{1,4})"
345345
SEP_RE = r"(?P<sep>\s|T)"
346-
TIME_RE = r"(?P<time>\d{2}:\d{2}:\d{2})" # \s*(?P<period>[AP]M)?)?
346+
TIME_RE = r"(?P<time>\d{2}:\d{2}(?::\d{2})?)" # \s*(?P<period>[AP]M)?)?
347+
HMS_RE = r"^(?P<hms>\d{2}:\d{2}:\d{2})$"
348+
HM_RE = r"^(?P<hm>\d{2}:\d{2})$"
347349
TZ_RE = r"(?P<tz>Z|[+-]\d{2}:?\d{2})" # Matches 'Z', '+02:00', '+0200', '+02', etc.
348350
FULL_RE = rf"{DATE_RE}{SEP_RE}?{TIME_RE}?{TZ_RE}?$"
349351

@@ -357,6 +359,10 @@ def convert_str_slice_to_int_slice(
357359
(DMY_RE, "%d-%m-%Y"),
358360
(MDY_RE, "%m-%d-%Y"),
359361
)
362+
TIME_FORMATS = (
363+
(HMS_RE, "%H:%M:%S"),
364+
(HM_RE, "%H:%M"),
365+
)
360366

361367

362368
def parse_datetime_format(arr: pa.StringArray) -> str:
@@ -421,8 +427,11 @@ def _parse_date_format(arr: pa.Array) -> str:
421427
def _parse_time_format(arr: pa.Array) -> str:
422428
import pyarrow.compute as pc # ignore-banned-import
423429

424-
matches = pc.extract_regex(arr, pattern=TIME_RE)
425-
return "%H:%M:%S" if pc.all(matches.is_valid()).as_py() else ""
430+
for time_rgx, time_fmt in TIME_FORMATS:
431+
matches = pc.extract_regex(arr, pattern=time_rgx)
432+
if pc.all(matches.is_valid()).as_py():
433+
return time_fmt
434+
return ""
426435

427436

428437
def _rolling(

narwhals/dependencies.py

+39
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,44 @@ def is_into_series(native_series: IntoSeries) -> bool:
251251
)
252252

253253

254+
def is_into_dataframe(native_dataframe: Any) -> bool:
255+
"""
256+
Check whether `native_dataframe` can be converted to a Narwhals DataFrame.
257+
258+
Arguments:
259+
native_dataframe: The object to check.
260+
261+
Returns:
262+
`True` if `native_dataframe` can be converted to a Narwhals DataFrame, `False` otherwise.
263+
264+
Examples:
265+
>>> import pandas as pd
266+
>>> import polars as pl
267+
>>> import numpy as np
268+
>>> from narwhals.dependencies import is_into_dataframe
269+
270+
>>> df_pd = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
271+
>>> df_pl = pl.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]})
272+
>>> np_arr = np.array([[1, 4], [2, 5], [3, 6]])
273+
274+
>>> is_into_dataframe(df_pd)
275+
True
276+
>>> is_into_dataframe(df_pl)
277+
True
278+
>>> is_into_dataframe(np_arr)
279+
False
280+
"""
281+
from narwhals.dataframe import DataFrame
282+
283+
return (
284+
isinstance(native_dataframe, DataFrame)
285+
or hasattr(native_dataframe, "__narwhals_dataframe__")
286+
or is_polars_dataframe(native_dataframe)
287+
or is_pyarrow_table(native_dataframe)
288+
or is_pandas_like_dataframe(native_dataframe)
289+
)
290+
291+
254292
__all__ = [
255293
"get_polars",
256294
"get_pandas",
@@ -275,5 +313,6 @@ def is_into_series(native_series: IntoSeries) -> bool:
275313
"is_dask_dataframe",
276314
"is_pandas_like_dataframe",
277315
"is_pandas_like_series",
316+
"is_into_dataframe",
278317
"is_into_series",
279318
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from __future__ import annotations
2+
3+
from typing import TYPE_CHECKING
4+
from typing import Any
5+
6+
import numpy as np
7+
import pandas as pd
8+
import polars as pl
9+
import pyarrow as pa
10+
11+
import narwhals as nw
12+
from narwhals.dependencies import is_into_dataframe
13+
14+
if TYPE_CHECKING:
15+
from typing_extensions import Self
16+
17+
18+
class DictDataFrame:
19+
def __init__(self, data: dict[str, list[Any]]) -> None:
20+
self._data = data
21+
22+
def __len__(self) -> int: # pragma: no cover
23+
return len(next(iter(self._data.values())))
24+
25+
def __narwhals_dataframe__(self) -> Self: # pragma: no cover
26+
return self
27+
28+
29+
def test_is_into_dataframe() -> None:
30+
data = {"a": [1, 2, 3], "b": [4, 5, 6]}
31+
assert is_into_dataframe(pa.table(data))
32+
assert is_into_dataframe(pl.DataFrame(data))
33+
assert is_into_dataframe(pd.DataFrame(data))
34+
assert is_into_dataframe(nw.from_native(pd.DataFrame(data)))
35+
assert is_into_dataframe(DictDataFrame(data))
36+
assert not is_into_dataframe(np.array([[1, 4], [2, 5], [3, 6]]))
37+
assert not is_into_dataframe(data)

tests/expr_and_series/str/to_datetime_test.py

+44-8
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,29 @@ def test_to_datetime_series(constructor_eager: ConstructorEager) -> None:
4747
assert str(result) == expected
4848

4949

50-
def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
50+
@pytest.mark.parametrize(
51+
("data", "expected", "expected_cudf"),
52+
[
53+
(
54+
{"a": ["2020-01-01T12:34:56"]},
55+
"2020-01-01 12:34:56",
56+
"2020-01-01T12:34:56.000000000",
57+
),
58+
(
59+
{"a": ["2020-01-01T12:34"]},
60+
"2020-01-01 12:34:00",
61+
"2020-01-01T12:34:00.000000000",
62+
),
63+
],
64+
)
65+
def test_to_datetime_infer_fmt(
66+
constructor: Constructor,
67+
data: dict[str, list[str]],
68+
expected: str,
69+
expected_cudf: str,
70+
) -> None:
5171
if "cudf" in str(constructor): # pragma: no cover
52-
expected = "2020-01-01T12:34:56.000000000"
53-
else:
54-
expected = "2020-01-01 12:34:56"
72+
expected = expected_cudf
5573

5674
result = (
5775
nw.from_native(constructor(data))
@@ -63,11 +81,29 @@ def test_to_datetime_infer_fmt(constructor: Constructor) -> None:
6381
assert str(result) == expected
6482

6583

66-
def test_to_datetime_series_infer_fmt(constructor_eager: ConstructorEager) -> None:
84+
@pytest.mark.parametrize(
85+
("data", "expected", "expected_cudf"),
86+
[
87+
(
88+
{"a": ["2020-01-01T12:34:56"]},
89+
"2020-01-01 12:34:56",
90+
"2020-01-01T12:34:56.000000000",
91+
),
92+
(
93+
{"a": ["2020-01-01T12:34"]},
94+
"2020-01-01 12:34:00",
95+
"2020-01-01T12:34:00.000000000",
96+
),
97+
],
98+
)
99+
def test_to_datetime_series_infer_fmt(
100+
constructor_eager: ConstructorEager,
101+
data: dict[str, list[str]],
102+
expected: str,
103+
expected_cudf: str,
104+
) -> None:
67105
if "cudf" in str(constructor_eager): # pragma: no cover
68-
expected = "2020-01-01T12:34:56.000000000"
69-
else:
70-
expected = "2020-01-01 12:34:56"
106+
expected = expected_cudf
71107

72108
result = (
73109
nw.from_native(constructor_eager(data), eager_only=True)["a"].str.to_datetime()

0 commit comments

Comments
 (0)