1
1
from __future__ import annotations
2
2
3
- from narwhals .pandas_like .utils import evaluate_into_exprs
3
+ from narwhals .pandas_like .dataframe import PandasDataFrame
4
+ from narwhals .polars import PolarsDataFrame
4
5
from narwhals .translate import get_pandas
5
6
from narwhals .translate import get_polars
6
7
7
8
8
9
def extract_native (obj : Any , implementation ) -> Any :
9
10
from narwhals .expression import NarwhalsExpr
11
+ from narwhals .series import Series
10
12
11
13
# if isinstance(obj, NarwhalsExpr):
12
14
# return obj._call(pl.col)
@@ -17,14 +19,16 @@ def extract_native(obj: Any, implementation) -> Any:
17
19
return obj ._call (pl .col )
18
20
# if isinstance(obj, DType):
19
21
# return obj._dtype
20
- if isinstance (obj , NarwhalsFrame ):
22
+ if isinstance (obj , DataFrame ):
21
23
return obj ._dataframe
24
+ if isinstance (obj , Series ):
25
+ return obj ._series
22
26
# if isinstance(obj, PolarsSeries):
23
27
# return obj._series
24
28
return obj
25
29
26
30
27
- class NarwhalsFrame :
31
+ class DataFrame :
28
32
def __init__ (
29
33
self , df , * , is_eager = False , is_lazy = False , implementation : str | None = None
30
34
):
@@ -35,24 +39,14 @@ def __init__(
35
39
self ._implementation = implementation
36
40
return
37
41
if (pl := get_polars ()) is not None :
38
- if isinstance (df , pl .DataFrame ):
39
- if is_lazy :
40
- raise ValueError (
41
- "can't instantiate with `is_lazy` if you pass a polars DataFrame"
42
- )
43
- self ._dataframe = df
44
- self ._implementation = "polars"
45
- return
46
- elif isinstance (df , pl .LazyFrame ):
47
- if is_eager :
48
- raise ValueError (
49
- "can't instantiate with `is_eager` if you pass a polars LazyFrame"
50
- )
51
- self ._dataframe = df
42
+ if isinstance (df , (pl .DataFrame , pl .LazyFrame )):
43
+ self ._dataframe = PolarsDataFrame (df , is_eager = is_eager , is_lazy = is_lazy )
52
44
self ._implementation = "polars"
53
45
return
54
46
if (pd := get_pandas ()) is not None and isinstance (df , pd .DataFrame ):
55
- self ._dataframe = df
47
+ self ._dataframe = PandasDataFrame (
48
+ df , is_eager = is_eager , is_lazy = is_lazy , implementation = "pandas"
49
+ )
56
50
self ._implementation = "pandas"
57
51
return
58
52
raise TypeError (
@@ -68,73 +62,57 @@ def _from_dataframe(self, df: Any) -> Self:
68
62
implementation = self ._implementation ,
69
63
)
70
64
71
- def _extract_native (self , obj ):
72
- return extract_native (obj , implementation = self ._implementation )
73
-
74
65
def with_columns (
75
66
self , * exprs : IntoExpr | Iterable [IntoExpr ], ** named_exprs : IntoExpr
76
67
) -> Self :
77
- if self ._implementation == "polars" :
78
- return self ._from_dataframe (
79
- self ._dataframe .with_columns (
80
- * [self ._extract_native (v ) for v in exprs ],
81
- ** {
82
- key : self ._extract_native (value )
83
- for key , value in named_exprs .items ()
84
- },
85
- )
86
- )
87
- elif self ._implementation == "pandas" :
88
- new_series = evaluate_into_exprs (self , * exprs , ** named_exprs )
89
- df = self ._dataframe .assign (
90
- ** {series .name : series ._series for series in new_series }
91
- )
92
- return self ._from_dataframe (df )
68
+ return self ._from_dataframe (
69
+ self ._dataframe .with_columns (* exprs , ** named_exprs ),
70
+ )
93
71
94
72
def filter (self , * predicates : IntoExpr | Iterable [IntoExpr ]) -> Self :
95
73
return self ._from_dataframe (
96
- self ._dataframe .filter (* [ self . _extract_native ( v ) for v in predicates ])
74
+ self ._dataframe .filter (* predicates ),
97
75
)
98
76
99
77
def group_by (self , * keys : str | Iterable [str ]) -> GroupBy :
100
78
from narwhals .group_by import NarwhalsGroupBy
101
79
102
- return NarwhalsGroupBy (
103
- self ,
104
- * keys ,
105
- is_eager = self ._is_eager ,
106
- is_lazy = self ._is_lazy ,
107
- )
80
+ return NarwhalsGroupBy (self , * keys )
108
81
109
82
def sort (
110
83
self ,
111
84
by : str | Iterable [str ],
112
85
* more_by : str ,
113
86
descending : bool | Sequence [bool ] = False ,
114
87
) -> Self :
115
- if self ._implementation == "polars" :
116
- return self ._from_dataframe (
117
- self ._dataframe .sort (by , * more_by , descending = descending )
118
- )
88
+ return self ._from_dataframe (
89
+ self ._dataframe .sort (by , * more_by , descending = descending )
90
+ )
119
91
120
92
def collect (self ) -> Self :
121
- if not self ._is_lazy :
122
- raise RuntimeError (
123
- "DataFrame.collect can only be called if frame was instantiated with `is_lazy=True`"
124
- )
125
- if self ._implementation == "polars" :
126
- import polars as pl
127
-
128
- assert isinstance (self ._dataframe , pl .LazyFrame )
129
- return self .__class__ (self ._dataframe .collect (), is_eager = True , is_lazy = False )
93
+ return self .__class__ (
94
+ self ._dataframe .collect (),
95
+ is_eager = True ,
96
+ is_lazy = False ,
97
+ implementation = self ._implementation ,
98
+ )
130
99
131
100
def to_dict (self , * , as_series : bool = True ) -> dict [str , Any ]:
132
- if not self ._is_eager :
133
- raise RuntimeError (
134
- "DataFrame.to_dict can only be called if frame was instantiated with `is_eager=True`"
135
- )
136
- if self ._implementation == "polars" :
137
- import polars as pl
101
+ return self ._dataframe .to_dict (as_series = as_series )
138
102
139
- assert isinstance (self ._dataframe , pl .DataFrame )
140
- return self ._dataframe .to_dict (as_series = as_series )
103
+ def join (
104
+ self ,
105
+ other : Self ,
106
+ * ,
107
+ how : Literal [inner ] = "inner" ,
108
+ left_on : str | list [str ],
109
+ right_on : str | list [str ],
110
+ ) -> Self :
111
+ return self ._from_dataframe (
112
+ self ._dataframe .join (
113
+ other ._dataframe ,
114
+ how = how ,
115
+ left_on = left_on ,
116
+ right_on = right_on ,
117
+ )
118
+ )
0 commit comments