MAINT Improve build of exercises to avoid duplicated 'Write your code here' cells (#788)

ArturoAmorQ · web-flow · commit eec516523aae · 2025-04-03T10:20:22.000+02:00
diff --git a/build_tools/generate-exercise-from-solution.py b/build_tools/generate-exercise-from-solution.py
@@ -6,6 +6,9 @@
 import jupytext
 
 
+WRITE_YOUR_CODE_COMMENT = "# Write your code here."
+
+
 def replace_simple_text(input_py_str):
     result = input_py_str.replace("📃 Solution for", "📝")
     return result
@@ -44,7 +47,24 @@ def remove_solution(input_py_str):
     ]
 
     for c in cells_to_modify:
-        c["source"] = pattern.sub("# Write your code here.", c["source"])
+        c["source"] = pattern.sub(WRITE_YOUR_CODE_COMMENT, c["source"])
+
+    previous_cell_is_write_your_code = False
+    all_cells_before_deduplication = nb.cells
+    nb.cells = []
+    for c in all_cells_before_deduplication:
+        if c["cell_type"] == "code" and c["source"] == WRITE_YOUR_CODE_COMMENT:
+            current_cell_is_write_your_code = True
+        else:
+            current_cell_is_write_your_code = False
+        if (
+            current_cell_is_write_your_code
+            and previous_cell_is_write_your_code
+        ):
+            # Drop duplicated "write your code here" cells.
+            continue
+        nb.cells.append(c)
+        previous_cell_is_write_your_code = current_cell_is_write_your_code
 
     # TODO: we could potentially try to avoid changing the input file jupytext
     # header since this info is rarely useful. Let's keep it simple for now.
@@ -53,6 +73,7 @@ def remove_solution(input_py_str):
 
 
 def write_exercise(solution_path, exercise_path):
+    print(f"Writing exercise to {exercise_path} from solution {solution_path}")
     input_str = solution_path.read_text()
 
     output_str = input_str
@@ -67,7 +88,9 @@ def write_all_exercises(python_scripts_folder):
     for solution_path in solution_paths:
         exercise_path = Path(str(solution_path).replace("_sol_", "_ex_"))
         if not exercise_path.exists():
-            print(f"{exercise_path} does not exist")
+            print(
+                f"{exercise_path} does not exist, generating it from solution."
+            )
 
         write_exercise(solution_path, exercise_path)
 
diff --git a/notebooks/cross_validation_ex_01.ipynb b/notebooks/cross_validation_ex_01.ipynb
@@ -52,7 +52,7 @@
     "exercise.\n",
     "\n",
     "Also, this classifier can become more flexible/expressive by using a so-called\n",
-    "kernel that makes the model become non-linear. Again, no undestanding regarding\n",
+    "kernel that makes the model become non-linear. Again, no understanding regarding\n",
     "the mathematics is required to accomplish this exercise.\n",
     "\n",
     "We will use an RBF kernel where a parameter `gamma` allows to tune the\n",
diff --git a/notebooks/linear_models_ex_03.ipynb b/notebooks/linear_models_ex_03.ipynb
@@ -211,15 +211,6 @@
     "without interactions."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Write your code here."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/metrics_ex_02.ipynb b/notebooks/metrics_ex_02.ipynb
@@ -80,8 +80,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Then, instead of using the $R^2$ score, use the mean absolute error. You need\n",
-    "to refer to the documentation for the `scoring` parameter."
+    "Then, instead of using the $R^2$ score, use the mean absolute error (MAE). You\n",
+    "may need to refer to the documentation for the `scoring` parameter."
    ]
   },
   {
@@ -102,6 +102,15 @@
     "compute the $R^2$ score and the mean absolute error for instance."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Write your code here."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/notebooks/parameter_tuning_ex_02.ipynb b/notebooks/parameter_tuning_ex_02.ipynb
@@ -76,7 +76,7 @@
    "source": [
     "Use the previously defined model (called `model`) and using two nested `for`\n",
     "loops, make a search of the best combinations of the `learning_rate` and\n",
-    "`max_leaf_nodes` parameters. In this regard, you have to train and test the\n",
+    "`max_leaf_nodes` parameters. In this regard, you need to train and test the\n",
     "model by setting the parameters. The evaluation of the model should be\n",
     "performed using `cross_val_score` on the training set. Use the following\n",
     "parameters search:\n",
diff --git a/notebooks/parameter_tuning_ex_03.ipynb b/notebooks/parameter_tuning_ex_03.ipynb
@@ -31,8 +31,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this exercise, we progressively define the regression pipeline and\n",
-    "later tune its hyperparameters.\n",
+    "In this exercise, we progressively define the regression pipeline and later\n",
+    "tune its hyperparameters.\n",
     "\n",
     "Start by defining a pipeline that:\n",
     "* uses a `StandardScaler` to normalize the numerical data;\n",
diff --git a/python_scripts/linear_models_ex_03.py b/python_scripts/linear_models_ex_03.py
@@ -126,6 +126,3 @@
 
 # %%
 # Write your code here.
-
-# %%
-# Write your code here.