From 356bd52e206ceb29f5cde585037d1ee3211d31a6 Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sat, 16 Dec 2023 11:28:40 +0000
Subject: [PATCH 1/2] docs: update user-defined-functions for 0.19.x

---
 docs/_build/API_REFERENCE_LINKS.yml           |  4 +-
 .../expressions/user-defined-functions.py     | 19 ++---
 .../expressions/user-defined-functions.rs     | 21 ++++--
 .../expressions/user-defined-functions.md     | 72 ++++++++-----------
 4 files changed, 57 insertions(+), 59 deletions(-)

diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml
index 51959eae6199..d2cb2b70e2b3 100644
--- a/docs/_build/API_REFERENCE_LINKS.yml
+++ b/docs/_build/API_REFERENCE_LINKS.yml
@@ -51,8 +51,8 @@ python:
   interpolate: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.interpolate.html
   fill_nan: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.fill_nan.html
   operators: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/operators.html
-  map: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.map.html
-  apply: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.apply.html
+  map_batches: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_batches.html
+  map_elements: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_elements.html
   over: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.over.html
   implode: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.implode.html
   DataFrame.explode: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.explode.html
diff --git a/docs/src/python/user-guide/expressions/user-defined-functions.py b/docs/src/python/user-guide/expressions/user-defined-functions.py
index 920812babd93..16f0da8dca76 100644
--- a/docs/src/python/user-guide/expressions/user-defined-functions.py
+++ b/docs/src/python/user-guide/expressions/user-defined-functions.py
@@ -11,22 +11,25 @@
         "values": [10, 7, 1],
     }
 )
+print(df)
+# --8<-- [end:dataframe]
 
+# --8<-- [start:shift_map_batches]
 out = df.group_by("keys", maintain_order=True).agg(
-    pl.col("values").map_batches(lambda s: s.shift()).alias("shift_map"),
+    pl.col("values").map_batches(lambda s: s.shift()).alias("shift_map_batches"),
     pl.col("values").shift().alias("shift_expression"),
 )
-print(df)
-# --8<-- [end:dataframe]
+print(out)
+# --8<-- [end:shift_map_batches]
 
 
-# --8<-- [start:apply]
+# --8<-- [start:map_elements]
 out = df.group_by("keys", maintain_order=True).agg(
-    pl.col("values").map_elements(lambda s: s.shift()).alias("shift_map"),
+    pl.col("values").map_elements(lambda s: s.shift()).alias("shift_map_elements"),
     pl.col("values").shift().alias("shift_expression"),
 )
 print(out)
-# --8<-- [end:apply]
+# --8<-- [end:map_elements]
 
 # --8<-- [start:counter]
 counter = 0
@@ -39,7 +42,7 @@ def add_counter(val: int) -> int:
 
 
 out = df.select(
-    pl.col("values").map_elements(add_counter).alias("solution_apply"),
+    pl.col("values").map_elements(add_counter).alias("solution_map_elements"),
     (pl.col("values") + pl.int_range(1, pl.count() + 1)).alias("solution_expr"),
 )
 print(out)
@@ -49,7 +52,7 @@ def add_counter(val: int) -> int:
 out = df.select(
     pl.struct(["keys", "values"])
     .map_elements(lambda x: len(x["keys"]) + x["values"])
-    .alias("solution_apply"),
+    .alias("solution_map_elements"),
     (pl.col("keys").str.len_bytes() + pl.col("values")).alias("solution_expr"),
 )
 print(out)
diff --git a/docs/src/rust/user-guide/expressions/user-defined-functions.rs b/docs/src/rust/user-guide/expressions/user-defined-functions.rs
index 44e8e69fc5f9..82663fe0b3ff 100644
--- a/docs/src/rust/user-guide/expressions/user-defined-functions.rs
+++ b/docs/src/rust/user-guide/expressions/user-defined-functions.rs
@@ -6,7 +6,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         "keys" => &["a", "a", "b"],
         "values" => &[10, 7, 1],
     )?;
+    println!("{}", df);
+    // --8<-- [end:dataframe]
 
+    // --8<-- [start:shift_map_batches]
     let out = df
         .clone()
         .lazy()
@@ -14,15 +17,17 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .agg([
             col("values")
                 .map(|s| Ok(Some(s.shift(1))), GetOutput::default())
-                .alias("shift_map"),
+                // note: the `'shift_map_batches'` alias is just there to show how you
+                // get the same output as in the Python API example.
+                .alias("shift_map_batches"),
             col("values").shift(lit(1)).alias("shift_expression"),
         ])
         .collect()?;
 
     println!("{}", out);
-    // --8<-- [end:dataframe]
+    // --8<-- [end:shift_map_batches]
 
-    // --8<-- [start:apply]
+    // --8<-- [start:map_elements]
     let out = df
         .clone()
         .lazy()
@@ -30,12 +35,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
         .agg([
             col("values")
                 .apply(|s| Ok(Some(s.shift(1))), GetOutput::default())
-                .alias("shift_map"),
+                // note: the `'shift_map_elements'` alias is just there to show how you
+                // get the same output as in the Python API example.
+                .alias("shift_map_elements"),
             col("values").shift(lit(1)).alias("shift_expression"),
         ])
         .collect()?;
     println!("{}", out);
-    // --8<-- [end:apply]
+    // --8<-- [end:map_elements]
 
     // --8<-- [start:counter]
 
@@ -75,7 +82,9 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
                     },
                     GetOutput::from_type(DataType::Int32),
                 )
-                .alias("solution_apply"),
+                // note: the `'solution_map_elements'` alias is just there to show how you
+                // get the same output as in the Python API example.
+                .alias("solution_map_elements"),
             (col("keys").str().count_matches(lit("."), true) + col("values"))
                 .alias("solution_expr"),
         ])
diff --git a/docs/user-guide/expressions/user-defined-functions.md b/docs/user-guide/expressions/user-defined-functions.md
index 25764a414ef2..785cf080fb95 100644
--- a/docs/user-guide/expressions/user-defined-functions.md
+++ b/docs/user-guide/expressions/user-defined-functions.md
@@ -1,9 +1,5 @@
 # User-defined functions (Python)
 
-!!! warning "Not updated for Python Polars `0.19.0`"
-
-    This section of the user guide still needs to be updated for the latest Polars release.
-
 You should be convinced by now that Polars expressions are so powerful and flexible that there is much less need for custom Python functions
 than in other libraries.
 
@@ -12,28 +8,28 @@ over data in Polars.
 
 For this we provide the following expressions:
 
-- `map`
-- `apply`
+- `map_batches`
+- `map_elements`
 
-## To `map` or to `apply`.
+## To `map_batches` or to `map_elements`.
 
 These functions have an important distinction in how they operate and consequently what data they will pass to the user.
 
-A `map` passes the `Series` backed by the `expression` as is.
+A `map_batches` passes the `Series` backed by the `expression` as is.
 
-`map` follows the same rules in both the `select` and the `group_by` context, this will
+`map_batches` follows the same rules in both the `select` and the `group_by` context, this will
 mean that the `Series` represents a column in a `DataFrame`. Note that in the `group_by` context, that column is not yet
 aggregated!
 
-Use cases for `map` are for instance passing the `Series` in an expression to a third party library. Below we show how
-we could use `map` to pass an expression column to a neural network model.
+Use cases for `map_batches` are for instance passing the `Series` in an expression to a third party library. Below we show how
+we could use `map_batches` to pass an expression column to a neural network model.
 
 === ":fontawesome-brands-python: Python"
-[:material-api: `map`](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.map.html)
+[:material-api: `map_batches`](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_batches.html)
 
 ```python
 df.with_columns([
-    pl.col("features").map(lambda s: MyNeuralNetwork.forward(s.to_numpy())).alias("activations")
+    pl.col("features").map_batches(lambda s: MyNeuralNetwork.forward(s.to_numpy())).alias("activations")
 ])
 ```
 
@@ -45,9 +41,9 @@ df.with_columns([
 ])
 ```
 
-Use cases for `map` in the `group_by` context are slim. They are only used for performance reasons, but can quite easily lead to incorrect results. Let me explain why.
+Use cases for `map_batches` in the `group_by` context are slim. They are only used for performance reasons, but can quite easily lead to incorrect results. Let me explain why.
 
-{{code_block('user-guide/expressions/user-defined-functions','dataframe',['map'])}}
+{{code_block('user-guide/expressions/user-defined-functions','dataframe',[])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:setup"
@@ -68,57 +64,47 @@ If we would then apply a `shift` operation to the right, we'd expect:
 "b" -> [null]
 ```
 
-Now, let's print and see what we've got.
+Let's try that out and see what we get:
 
-```python
-print(out)
-```
+{{code_block('user-guide/expressions/user-defined-functions','shift_map_batches',['map_batches'])}}
 
-```
-shape: (2, 3)
-┌──────┬────────────┬──────────────────┐
-│ keys ┆ shift_map  ┆ shift_expression │
-│ ---  ┆ ---        ┆ ---              │
-│ str  ┆ list[i64]  ┆ list[i64]        │
-╞══════╪════════════╪══════════════════╡
-│ a    ┆ [null, 10] ┆ [null, 10]       │
-│ b    ┆ [7]        ┆ [null]           │
-└──────┴────────────┴──────────────────┘
+```python exec="on" result="text" session="user-guide/udf"
+--8<-- "python/user-guide/expressions/user-defined-functions.py:shift_map_batches"
 ```
 
 Ouch.. we clearly get the wrong results here. Group `"b"` even got a value from group `"a"` 😵.
 
-This went horribly wrong, because the `map` applies the function before we aggregate! So that means the whole column `[10, 7, 1`\] got shifted to `[null, 10, 7]` and was then aggregated.
+This went horribly wrong, because the `map_batches` applies the function before we aggregate! So that means the whole column `[10, 7, 1`\] got shifted to `[null, 10, 7]` and was then aggregated.
 
-So my advice is to never use `map` in the `group_by` context unless you know you need it and know what you are doing.
+So my advice is to never use `map_batches` in the `group_by` context unless you know you need it and know what you are doing.
 
-## To `apply`
+## To `map_elements`
 
-Luckily we can fix previous example with `apply`. `apply` works on the smallest logical elements for that operation.
+Luckily we can fix previous example with `map_elements`. `map_elements` works on the smallest logical elements for that operation.
 
 That is:
 
 - `select context` -> single elements
 - `group by context` -> single groups
 
-So with `apply` we should be able to fix our example:
+So with `map_elements` we should be able to fix our example:
 
-{{code_block('user-guide/expressions/user-defined-functions','apply',['apply'])}}
+{{code_block('user-guide/expressions/user-defined-functions','map_elements',['map_elements'])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
---8<-- "python/user-guide/expressions/user-defined-functions.py:apply"
+--8<-- "python/user-guide/expressions/user-defined-functions.py:map_elements"
 ```
 
 And observe, a valid result! 🎉
 
-## `apply` in the `select` context
+## `map_elements` in the `select` context
 
-In the `select` context, the `apply` expression passes elements of the column to the Python function.
+In the `select` context, the `map_elements` expression passes elements of the column to the Python function.
 
 _Note that you are now running Python, this will be slow._
 
 Let's go through some examples to see what to expect. We will continue with the `DataFrame` we defined at the start of
-this section and show an example with the `apply` function and a counter example where we use the expression API to
+this section and show an example with the `map_elements` function and a counter example where we use the expression API to
 achieve the same goals.
 
 ### Adding a counter
@@ -126,9 +112,9 @@ achieve the same goals.
 In this example we create a global `counter` and then add the integer `1` to the global state at every element processed.
 Every iteration the result of the increment will be added to the element value.
 
-> Note, this example isn't provided in Rust. The reason is that the global `counter` value would lead to data races when this apply is evaluated in parallel. It would be possible to wrap it in a `Mutex` to protect the variable, but that would be obscuring the point of the example. This is a case where the Python Global Interpreter Lock's performance tradeoff provides some safety guarantees.
+> Note, this example isn't provided in Rust. The reason is that the global `counter` value would lead to data races when this `apply` is evaluated in parallel. It would be possible to wrap it in a `Mutex` to protect the variable, but that would be obscuring the point of the example. This is a case where the Python Global Interpreter Lock's performance tradeoff provides some safety guarantees.
 
-{{code_block('user-guide/expressions/user-defined-functions','counter',['apply'])}}
+{{code_block('user-guide/expressions/user-defined-functions','counter',['map_elements'])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:counter"
@@ -136,7 +122,7 @@ Every iteration the result of the increment will be added to the element value.
 
 ### Combining multiple column values
 
-If we want to have access to values of different columns in a single `apply` function call, we can create `struct` data
+If we want to have access to values of different columns in a single `map_elements` function call, we can create `struct` data
 type. This data type collects those columns as fields in the `struct`. So if we'd create a struct from the columns
 `"keys"` and `"values"`, we would get the following struct elements:
 
@@ -150,7 +136,7 @@ type. This data type collects those columns as fields in the `struct`. So if we'
 
 In Python, those would be passed as `dict` to the calling Python function and can thus be indexed by `field: str`. In Rust, you'll get a `Series` with the `Struct` type. The fields of the struct can then be indexed and downcast.
 
-{{code_block('user-guide/expressions/user-defined-functions','combine',['apply','struct'])}}
+{{code_block('user-guide/expressions/user-defined-functions','combine',['map_elements','struct'])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:combine"

From baef62d6cc8f06fa1ee2d4f7cb882dcd58e7f94f Mon Sep 17 00:00:00 2001
From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com>
Date: Sun, 17 Dec 2023 09:06:22 +0000
Subject: [PATCH 2/2] avoid invalid references in Rust docs

---
 docs/_build/API_REFERENCE_LINKS.yml                   |  4 ----
 docs/user-guide/expressions/user-defined-functions.md | 11 +++++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml
index d2cb2b70e2b3..35565e96d492 100644
--- a/docs/_build/API_REFERENCE_LINKS.yml
+++ b/docs/_build/API_REFERENCE_LINKS.yml
@@ -51,8 +51,6 @@ python:
   interpolate: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.interpolate.html
   fill_nan: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.fill_nan.html
   operators: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/operators.html
-  map_batches: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_batches.html
-  map_elements: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_elements.html
   over: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.over.html
   implode: https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.implode.html
   DataFrame.explode: https://pola-rs.github.io/polars/py-polars/html/reference/dataframe/api/polars.DataFrame.explode.html
@@ -273,8 +271,6 @@ rust:
   concat_list:
     name: concat_lst
     link: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/fn.concat_lst.html
-  map: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.map
-  apply: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.apply
   over: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.over
 
   alias: https://pola-rs.github.io/polars/docs/rust/dev/polars_lazy/dsl/enum.Expr.html#method.alias
diff --git a/docs/user-guide/expressions/user-defined-functions.md b/docs/user-guide/expressions/user-defined-functions.md
index 785cf080fb95..3d508a4225da 100644
--- a/docs/user-guide/expressions/user-defined-functions.md
+++ b/docs/user-guide/expressions/user-defined-functions.md
@@ -66,7 +66,7 @@ If we would then apply a `shift` operation to the right, we'd expect:
 
 Let's try that out and see what we get:
 
-{{code_block('user-guide/expressions/user-defined-functions','shift_map_batches',['map_batches'])}}
+{{code_block('user-guide/expressions/user-defined-functions','shift_map_batches',[])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:shift_map_batches"
@@ -89,7 +89,10 @@ That is:
 
 So with `map_elements` we should be able to fix our example:
 
-{{code_block('user-guide/expressions/user-defined-functions','map_elements',['map_elements'])}}
+=== ":fontawesome-brands-python: Python"
+[:material-api: `map_elements`](https://pola-rs.github.io/polars/py-polars/html/reference/expressions/api/polars.Expr.map_elements.html)
+
+{{code_block('user-guide/expressions/user-defined-functions','map_elements',[])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:map_elements"
@@ -114,7 +117,7 @@ Every iteration the result of the increment will be added to the element value.
 
 > Note, this example isn't provided in Rust. The reason is that the global `counter` value would lead to data races when this `apply` is evaluated in parallel. It would be possible to wrap it in a `Mutex` to protect the variable, but that would be obscuring the point of the example. This is a case where the Python Global Interpreter Lock's performance tradeoff provides some safety guarantees.
 
-{{code_block('user-guide/expressions/user-defined-functions','counter',['map_elements'])}}
+{{code_block('user-guide/expressions/user-defined-functions','counter',[])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:counter"
@@ -136,7 +139,7 @@ type. This data type collects those columns as fields in the `struct`. So if we'
 
 In Python, those would be passed as `dict` to the calling Python function and can thus be indexed by `field: str`. In Rust, you'll get a `Series` with the `Struct` type. The fields of the struct can then be indexed and downcast.
 
-{{code_block('user-guide/expressions/user-defined-functions','combine',['map_elements','struct'])}}
+{{code_block('user-guide/expressions/user-defined-functions','combine',[])}}
 
 ```python exec="on" result="text" session="user-guide/udf"
 --8<-- "python/user-guide/expressions/user-defined-functions.py:combine"