Skip to content

Commit 356bd52

Browse files
committed
docs: update user-defined-functions for 0.19.x
1 parent dfde6fe commit 356bd52

File tree

4 files changed

+57
-59
lines changed

4 files changed

+57
-59
lines changed

docs/_build/API_REFERENCE_LINKS.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ python:
5151
interpolate: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.interpolate.html
5252
fill_nan: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.fill_nan.html
5353
operators: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/operators.html
54-
map: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.map.html
55-
apply: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.apply.html
54+
map_batches: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_batches.html
55+
map_elements: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_elements.html
5656
over: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.over.html
5757
implode: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.implode.html
5858
DataFrame.explode: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.explode.html

docs/src/python/user-guide/expressions/user-defined-functions.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,25 @@
1111
"values": [10, 7, 1],
1212
}
1313
)
14+
print(df)
15+
# --8<-- [end:dataframe]
1416

17+
# --8<-- [start:shift_map_batches]
1518
out = df.group_by("keys", maintain_order=True).agg(
16-
pl.col("values").map_batches(lambda s: s.shift()).alias("shift_map"),
19+
pl.col("values").map_batches(lambda s: s.shift()).alias("shift_map_batches"),
1720
pl.col("values").shift().alias("shift_expression"),
1821
)
19-
print(df)
20-
# --8<-- [end:dataframe]
22+
print(out)
23+
# --8<-- [end:shift_map_batches]
2124

2225

23-
# --8<-- [start:apply]
26+
# --8<-- [start:map_elements]
2427
out = df.group_by("keys", maintain_order=True).agg(
25-
pl.col("values").map_elements(lambda s: s.shift()).alias("shift_map"),
28+
pl.col("values").map_elements(lambda s: s.shift()).alias("shift_map_elements"),
2629
pl.col("values").shift().alias("shift_expression"),
2730
)
2831
print(out)
29-
# --8<-- [end:apply]
32+
# --8<-- [end:map_elements]
3033

3134
# --8<-- [start:counter]
3235
counter = 0
@@ -39,7 +42,7 @@ def add_counter(val: int) -> int:
3942

4043

4144
out = df.select(
42-
pl.col("values").map_elements(add_counter).alias("solution_apply"),
45+
pl.col("values").map_elements(add_counter).alias("solution_map_elements"),
4346
(pl.col("values") + pl.int_range(1, pl.count() + 1)).alias("solution_expr"),
4447
)
4548
print(out)
@@ -49,7 +52,7 @@ def add_counter(val: int) -> int:
4952
out = df.select(
5053
pl.struct(["keys", "values"])
5154
.map_elements(lambda x: len(x["keys"]) + x["values"])
52-
.alias("solution_apply"),
55+
.alias("solution_map_elements"),
5356
(pl.col("keys").str.len_bytes() + pl.col("values")).alias("solution_expr"),
5457
)
5558
print(out)

docs/src/rust/user-guide/expressions/user-defined-functions.rs

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,36 +6,43 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
66
"keys" => &["a", "a", "b"],
77
"values" => &[10, 7, 1],
88
)?;
9+
println!("{}", df);
10+
// --8<-- [end:dataframe]
911

12+
// --8<-- [start:shift_map_batches]
1013
let out = df
1114
.clone()
1215
.lazy()
1316
.group_by(["keys"])
1417
.agg([
1518
col("values")
1619
.map(|s| Ok(Some(s.shift(1))), GetOutput::default())
17-
.alias("shift_map"),
20+
// note: the `'shift_map_batches'` alias is just there to show how you
21+
// get the same output as in the Python API example.
22+
.alias("shift_map_batches"),
1823
col("values").shift(lit(1)).alias("shift_expression"),
1924
])
2025
.collect()?;
2126

2227
println!("{}", out);
23-
// --8<-- [end:dataframe]
28+
// --8<-- [end:shift_map_batches]
2429

25-
// --8<-- [start:apply]
30+
// --8<-- [start:map_elements]
2631
let out = df
2732
.clone()
2833
.lazy()
2934
.group_by([col("keys")])
3035
.agg([
3136
col("values")
3237
.apply(|s| Ok(Some(s.shift(1))), GetOutput::default())
33-
.alias("shift_map"),
38+
// note: the `'shift_map_elements'` alias is just there to show how you
39+
// get the same output as in the Python API example.
40+
.alias("shift_map_elements"),
3441
col("values").shift(lit(1)).alias("shift_expression"),
3542
])
3643
.collect()?;
3744
println!("{}", out);
38-
// --8<-- [end:apply]
45+
// --8<-- [end:map_elements]
3946

4047
// --8<-- [start:counter]
4148

@@ -75,7 +82,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
7582
},
7683
GetOutput::from_type(DataType::Int32),
7784
)
78-
.alias("solution_apply"),
85+
// note: the `'solution_map_elements'` alias is just there to show how you
86+
// get the same output as in the Python API example.
87+
.alias("solution_map_elements"),
7988
(col("keys").str().count_matches(lit("."), true) + col("values"))
8089
.alias("solution_expr"),
8190
])

docs/user-guide/expressions/user-defined-functions.md

Lines changed: 29 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,5 @@
11
# User-defined functions (Python)
22

3-
!!! warning "Not updated for Python Polars `0.19.0`"
4-
5-
This section of the user guide still needs to be updated for the latest Polars release.
6-
73
You should be convinced by now that Polars expressions are so powerful and flexible that there is much less need for custom Python functions
84
than in other libraries.
95

@@ -12,28 +8,28 @@ over data in Polars.
128

139
For this we provide the following expressions:
1410

15-
- `map`
16-
- `apply`
11+
- `map_batches`
12+
- `map_elements`
1713

18-
## To `map` or to `apply`.
14+
## To `map_batches` or to `map_elements`.
1915

2016
These functions have an important distinction in how they operate and consequently what data they will pass to the user.
2117

22-
A `map` passes the `Series` backed by the `expression` as is.
18+
A `map_batches` passes the `Series` backed by the `expression` as is.
2319

24-
`map` follows the same rules in both the `select` and the `group_by` context, this will
20+
`map_batches` follows the same rules in both the `select` and the `group_by` context, this will
2521
mean that the `Series` represents a column in a `DataFrame`. Note that in the `group_by` context, that column is not yet
2622
aggregated!
2723

28-
Use cases for `map` are for instance passing the `Series` in an expression to a third party library. Below we show how
29-
we could use `map` to pass an expression column to a neural network model.
24+
Use cases for `map_batches` are for instance passing the `Series` in an expression to a third party library. Below we show how
25+
we could use `map_batches` to pass an expression column to a neural network model.
3026

3127
=== ":fontawesome-brands-python: Python"
32-
[:material-api: `map`](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.map.html)
28+
[:material-api: `map_batches`](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_batches.html)
3329

3430
```python
3531
df.with_columns([
36-
pl.col("features").map(lambda s: MyNeuralNetwork.forward(s.to_numpy())).alias("activations")
32+
pl.col("features").map_batches(lambda s: MyNeuralNetwork.forward(s.to_numpy())).alias("activations")
3733
])
3834
```
3935

@@ -45,9 +41,9 @@ df.with_columns([
4541
])
4642
```
4743

48-
Use cases for `map` in the `group_by` context are slim. They are only used for performance reasons, but can quite easily lead to incorrect results. Let me explain why.
44+
Use cases for `map_batches` in the `group_by` context are slim. They are only used for performance reasons, but can quite easily lead to incorrect results. Let me explain why.
4945

50-
{{code_block('user-guide/expressions/user-defined-functions','dataframe',['map'])}}
46+
{{code_block('user-guide/expressions/user-defined-functions','dataframe',[])}}
5147

5248
```python exec="on" result="text" session="user-guide/udf"
5349
--8<-- "python/user-guide/expressions/user-defined-functions.py:setup"
@@ -68,75 +64,65 @@ If we would then apply a `shift` operation to the right, we'd expect:
6864
"b" -> [null]
6965
```
7066

71-
Now, let's print and see what we've got.
67+
Let's try that out and see what we get:
7268

73-
```python
74-
print(out)
75-
```
69+
{{code_block('user-guide/expressions/user-defined-functions','shift_map_batches',['map_batches'])}}
7670

77-
```
78-
shape: (2, 3)
79-
┌──────┬────────────┬──────────────────┐
80-
│ keys ┆ shift_map ┆ shift_expression │
81-
│ --- ┆ --- ┆ --- │
82-
│ str ┆ list[i64] ┆ list[i64] │
83-
╞══════╪════════════╪══════════════════╡
84-
│ a ┆ [null, 10] ┆ [null, 10] │
85-
│ b ┆ [7] ┆ [null] │
86-
└──────┴────────────┴──────────────────┘
71+
```python exec="on" result="text" session="user-guide/udf"
72+
--8<-- "python/user-guide/expressions/user-defined-functions.py:shift_map_batches"
8773
```
8874

8975
Ouch.. we clearly get the wrong results here. Group `"b"` even got a value from group `"a"` 😵.
9076

91-
This went horribly wrong, because the `map` applies the function before we aggregate! So that means the whole column `[10, 7, 1`\] got shifted to `[null, 10, 7]` and was then aggregated.
77+
This went horribly wrong, because the `map_batches` applies the function before we aggregate! So that means the whole column `[10, 7, 1`\] got shifted to `[null, 10, 7]` and was then aggregated.
9278

93-
So my advice is to never use `map` in the `group_by` context unless you know you need it and know what you are doing.
79+
So my advice is to never use `map_batches` in the `group_by` context unless you know you need it and know what you are doing.
9480

95-
## To `apply`
81+
## To `map_elements`
9682

97-
Luckily we can fix previous example with `apply`. `apply` works on the smallest logical elements for that operation.
83+
Luckily we can fix previous example with `map_elements`. `map_elements` works on the smallest logical elements for that operation.
9884

9985
That is:
10086

10187
- `select context` -> single elements
10288
- `group by context` -> single groups
10389

104-
So with `apply` we should be able to fix our example:
90+
So with `map_elements` we should be able to fix our example:
10591

106-
{{code_block('user-guide/expressions/user-defined-functions','apply',['apply'])}}
92+
{{code_block('user-guide/expressions/user-defined-functions','map_elements',['map_elements'])}}
10793

10894
```python exec="on" result="text" session="user-guide/udf"
109-
--8<-- "python/user-guide/expressions/user-defined-functions.py:apply"
95+
--8<-- "python/user-guide/expressions/user-defined-functions.py:map_elements"
11096
```
11197

11298
And observe, a valid result! 🎉
11399

114-
## `apply` in the `select` context
100+
## `map_elements` in the `select` context
115101

116-
In the `select` context, the `apply` expression passes elements of the column to the Python function.
102+
In the `select` context, the `map_elements` expression passes elements of the column to the Python function.
117103

118104
_Note that you are now running Python, this will be slow._
119105

120106
Let's go through some examples to see what to expect. We will continue with the `DataFrame` we defined at the start of
121-
this section and show an example with the `apply` function and a counter example where we use the expression API to
107+
this section and show an example with the `map_elements` function and a counter example where we use the expression API to
122108
achieve the same goals.
123109

124110
### Adding a counter
125111

126112
In this example we create a global `counter` and then add the integer `1` to the global state at every element processed.
127113
Every iteration the result of the increment will be added to the element value.
128114

129-
> Note, this example isn't provided in Rust. The reason is that the global `counter` value would lead to data races when this apply is evaluated in parallel. It would be possible to wrap it in a `Mutex` to protect the variable, but that would be obscuring the point of the example. This is a case where the Python Global Interpreter Lock's performance tradeoff provides some safety guarantees.
115+
> Note, this example isn't provided in Rust. The reason is that the global `counter` value would lead to data races when this `apply` is evaluated in parallel. It would be possible to wrap it in a `Mutex` to protect the variable, but that would be obscuring the point of the example. This is a case where the Python Global Interpreter Lock's performance tradeoff provides some safety guarantees.
130116
131-
{{code_block('user-guide/expressions/user-defined-functions','counter',['apply'])}}
117+
{{code_block('user-guide/expressions/user-defined-functions','counter',['map_elements'])}}
132118

133119
```python exec="on" result="text" session="user-guide/udf"
134120
--8<-- "python/user-guide/expressions/user-defined-functions.py:counter"
135121
```
136122

137123
### Combining multiple column values
138124

139-
If we want to have access to values of different columns in a single `apply` function call, we can create `struct` data
125+
If we want to have access to values of different columns in a single `map_elements` function call, we can create `struct` data
140126
type. This data type collects those columns as fields in the `struct`. So if we'd create a struct from the columns
141127
`"keys"` and `"values"`, we would get the following struct elements:
142128

@@ -150,7 +136,7 @@ type. This data type collects those columns as fields in the `struct`. So if we'
150136

151137
In Python, those would be passed as `dict` to the calling Python function and can thus be indexed by `field: str`. In Rust, you'll get a `Series` with the `Struct` type. The fields of the struct can then be indexed and downcast.
152138

153-
{{code_block('user-guide/expressions/user-defined-functions','combine',['apply','struct'])}}
139+
{{code_block('user-guide/expressions/user-defined-functions','combine',['map_elements','struct'])}}
154140

155141
```python exec="on" result="text" session="user-guide/udf"
156142
--8<-- "python/user-guide/expressions/user-defined-functions.py:combine"

0 commit comments

Comments
 (0)