INRIA
diff --git a/‎_images/8093d6c62d5e837118e0dc5cc52e7adbe982e875adb370ab83d7232e06d26691.png
62 KB b/‎_images/8093d6c62d5e837118e0dc5cc52e7adbe982e875adb370ab83d7232e06d26691.png
62 KB
diff --git a/‎_images/a94f76db6c61d6fec689c0c47babccf1982ef9c2bb0795c1961923dedbf9721e.png
-52.8 KB b/‎_images/a94f76db6c61d6fec689c0c47babccf1982ef9c2bb0795c1961923dedbf9721e.png
-52.8 KB
diff --git a/‎_sources/python_scripts/01_tabular_data_exploration_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/01_tabular_data_exploration_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/02_numerical_pipeline_ex_00.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/02_numerical_pipeline_ex_00.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/02_numerical_pipeline_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/02_numerical_pipeline_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/03_categorical_pipeline_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/03_categorical_pipeline_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/03_categorical_pipeline_ex_02.py
Lines changed: 2 additions & 21 deletions b/‎_sources/python_scripts/03_categorical_pipeline_ex_02.py
Lines changed: 2 additions & 21 deletions
diff --git a/‎_sources/python_scripts/cross_validation_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/cross_validation_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/cross_validation_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/cross_validation_ex_02.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/datasets_bike_rides.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/datasets_bike_rides.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/ensemble_adaboost.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/ensemble_adaboost.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/ensemble_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/ensemble_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/ensemble_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/ensemble_ex_02.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/ensemble_ex_03.py
Lines changed: 11 additions & 4 deletions b/‎_sources/python_scripts/ensemble_ex_03.py
Lines changed: 11 additions & 4 deletions
diff --git a/‎_sources/python_scripts/ensemble_ex_04.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/ensemble_ex_04.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/feature_selection_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/feature_selection_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/linear_models_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/linear_models_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/linear_models_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/linear_models_ex_02.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/linear_models_ex_03.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/linear_models_ex_03.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/linear_models_ex_04.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/linear_models_ex_04.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/linear_models_regularization.py
Lines changed: 3 additions & 3 deletions b/‎_sources/python_scripts/linear_models_regularization.py
Lines changed: 3 additions & 3 deletions
diff --git a/‎_sources/python_scripts/metrics_ex_01.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/metrics_ex_01.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/metrics_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/metrics_ex_02.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/parameter_tuning_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/parameter_tuning_ex_02.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/parameter_tuning_ex_03.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/parameter_tuning_ex_03.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎_sources/python_scripts/parameter_tuning_grid_search.py
Lines changed: 3 additions & 0 deletions b/‎_sources/python_scripts/parameter_tuning_grid_search.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎_sources/python_scripts/parameter_tuning_nested.py
Lines changed: 1 addition & 0 deletions b/‎_sources/python_scripts/parameter_tuning_nested.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎_sources/python_scripts/parameter_tuning_randomized_search.py
Lines changed: 1 addition & 0 deletions b/‎_sources/python_scripts/parameter_tuning_randomized_search.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎_sources/python_scripts/trees_ex_01.py
Lines changed: 4 additions & 4 deletions b/‎_sources/python_scripts/trees_ex_01.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎_sources/python_scripts/trees_ex_02.py
Lines changed: 1 addition & 1 deletion b/‎_sources/python_scripts/trees_ex_02.py
Lines changed: 1 addition & 1 deletion
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
@@ -112,26 +112,7 @@
 # Write your code here.
 
 # %% [markdown]
-# ### Analysis
-#
-# From an accuracy point of view, the result is almost exactly the same. The
-# reason is that `HistGradientBoostingClassifier` is expressive and robust
-# enough to deal with misleading ordering of integer coded categories (which was
-# not the case for linear models).
-#
-# However from a computation point of view, the training time is much longer:
-# this is caused by the fact that `OneHotEncoder` generates more features than
-# `OrdinalEncoder`; for each unique categorical value a column is created.
-#
-# Note that the current implementation `HistGradientBoostingClassifier` is still
-# incomplete, and once sparse representation are handled correctly, training
-# time might improve with such kinds of encodings.
-#
-# The main take away message is that arbitrary integer coding of categories is
-# perfectly fine for `HistGradientBoostingClassifier` and yields fast training
-# times.
-
-# Which encoder should I use?
+# ## Which encoder should I use?
 #
 # |                  | Meaningful order              | Non-meaningful order |
 # | ---------------- | ----------------------------- | -------------------- |
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -155,7 +155,7 @@
 # smoother visualization.
 
 # %%
-data_ride.resample("60S").mean().plot()
+data_ride.resample("60s").mean().plot()
 plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
 _ = plt.title("Sensor values for different cyclist measurements")
 
 
@@ -190,7 +190,7 @@
 
 estimator = DecisionTreeClassifier(max_depth=3, random_state=0)
 adaboost = AdaBoostClassifier(
-    estimator=estimator, n_estimators=3, algorithm="SAMME", random_state=0
+    estimator=estimator, n_estimators=3, random_state=0
 )
 adaboost.fit(data, target)
 
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
@@ -70,6 +70,13 @@
 # ensemble. However, the scores reach a plateau where adding new trees just
 # makes fitting and scoring slower.
 #
+# Now repeat the analysis for the gradient boosting model.
+
+# %%
+# Write your code here.
+
+
+# %% [markdown]
 # Gradient boosting models overfit when the number of trees is too large. To
 # avoid adding a new unnecessary tree, unlike random-forest gradient-boosting
 # offers an early-stopping option. Internally, the algorithm uses an
@@ -78,9 +85,9 @@
 # improving for several iterations, it stops adding trees.
 #
 # Now, create a gradient-boosting model with `n_estimators=1_000`. This number
-# of trees is certainly too large. Change the parameter `n_iter_no_change`
-# such that the gradient boosting fitting stops after adding 5 trees to avoid
-# deterioration of the overall generalization performance.
+# of trees is certainly too large as we have seen above. Change the parameter
+# `n_iter_no_change` such that the gradient boosting fitting stops after adding
+# 5 trees to avoid deterioration of the overall generalization performance.
 
 # %%
 # Write your code here.
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -421,7 +421,7 @@
 ridge = make_pipeline(
     MinMaxScaler(),
     PolynomialFeatures(degree=2, include_bias=False),
-    RidgeCV(alphas=alphas, store_cv_values=True),
+    RidgeCV(alphas=alphas, store_cv_results=True),
 )
 
 # %%
@@ -458,14 +458,14 @@
 # It indicates that our model is not overfitting.
 #
 # When fitting the ridge regressor, we also requested to store the error found
-# during cross-validation (by setting the parameter `store_cv_values=True`). We
+# during cross-validation (by setting the parameter `store_cv_results=True`). We
 # can plot the mean squared error for the different `alphas` regularization
 # strengths that we tried. The error bars represent one standard deviation of the
 # average mean square error across folds for a given value of `alpha`.
 
 # %%
 mse_alphas = [
-    est[-1].cv_values_.mean(axis=0) for est in cv_results["estimator"]
+    est[-1].cv_results_.mean(axis=0) for est in cv_results["estimator"]
 ]
 cv_alphas = pd.DataFrame(mse_alphas, columns=alphas)
 cv_alphas = cv_alphas.aggregate(["mean", "std"]).T
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
 
@@ -89,6 +89,9 @@
 preprocessor = ColumnTransformer(
     [("cat_preprocessor", categorical_preprocessor, categorical_columns)],
     remainder="passthrough",
+    # Silence a deprecation warning in scikit-learn v1.6 related to how the
+    # ColumnTransformer stores an attribute that we do not use in this notebook
+    force_int_remainder_cols=False,
 )
 
 # %% [markdown]
 
@@ -56,6 +56,7 @@
         ("cat_preprocessor", categorical_preprocessor, categorical_columns),
     ],
     remainder="passthrough",
+    force_int_remainder_cols=False,  # Silence a warning in scikit-learn v1.6.
 )
 
 # %%
 
@@ -73,6 +73,7 @@
 preprocessor = ColumnTransformer(
     [("cat_preprocessor", categorical_preprocessor, categorical_columns)],
     remainder="passthrough",
+    force_int_remainder_cols=False,  # Silence a warning in scikit-learn v1.6.
 )
 
 # %%
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
@@ -59,9 +59,9 @@
 #
 # ```{warning}
 # At this time, it is not possible to use `response_method="predict_proba"` for
-# multiclass problems. This is a planned feature for a future version of
-# scikit-learn. In the mean time, you can use `response_method="predict"`
-# instead.
+# multiclass problems on a single plot. This is a planned feature for a future
+# version of scikit-learn. In the mean time, you can use
+# `response_method="predict"` instead.
 # ```
 
 # %%
 
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.7
+#       jupytext_version: 1.17.1
 #   kernelspec:
 #     display_name: Python 3
 #     name: python3
Original file line number	Diff line number	Diff line change
`@@ -190,7 +190,7 @@`
`190`	`190`
`191`	`191`	`estimator = DecisionTreeClassifier(max_depth=3, random_state=0)`
`192`	`192`	`adaboost = AdaBoostClassifier(`
`193`		`- estimator=estimator, n_estimators=3, algorithm="SAMME", random_state=0`
	`193`	`+ estimator=estimator, n_estimators=3, random_state=0`
`194`	`194`	`)`
`195`	`195`	`adaboost.fit(data, target)`
`196`	`196`
Original file line number	Diff line number	Diff line change
`@@ -89,6 +89,9 @@`
`89`	`89`	`preprocessor = ColumnTransformer(`
`90`	`90`	`[("cat_preprocessor", categorical_preprocessor, categorical_columns)],`
`91`	`91`	`remainder="passthrough",`
	`92`	`+ # Silence a deprecation warning in scikit-learn v1.6 related to how the`
	`93`	`+ # ColumnTransformer stores an attribute that we do not use in this notebook`
	`94`	`+ force_int_remainder_cols=False,`
`92`	`95`	`)`
`93`	`96`
`94`	`97`	`# %% [markdown]`
Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@`
`56`	`56`	`("cat_preprocessor", categorical_preprocessor, categorical_columns),`
`57`	`57`	`],`
`58`	`58`	`remainder="passthrough",`
	`59`	`+ force_int_remainder_cols=False, # Silence a warning in scikit-learn v1.6.`
`59`	`60`	`)`
`60`	`61`
`61`	`62`	`# %%`
Original file line number	Diff line number	Diff line change
`@@ -73,6 +73,7 @@`
`73`	`73`	`preprocessor = ColumnTransformer(`
`74`	`74`	`[("cat_preprocessor", categorical_preprocessor, categorical_columns)],`
`75`	`75`	`remainder="passthrough",`
	`76`	`+ force_int_remainder_cols=False, # Silence a warning in scikit-learn v1.6.`
`76`	`77`	`)`
`77`	`78`
`78`	`79`	`# %%`