ENH: improve validation for SGD models to accept l1_ratio=None when penalty is not elasticnet (scikit-learn#30730)

MarcBresson · jeremiedbb · OmarManzoor · web-flow · commit 9a6e90a945f3 · 2025-04-19T01:39:55.000+02:00
Co-authored-by: Jérémie du Boisberranger &lt;jeremie@probabl.ai&gt;
Co-authored-by: Omar Salman &lt;omar.salman2007@gmail.com&gt;
diff --git a/doc/whats_new/upcoming_changes/sklearn.linear_model/30730.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.linear_model/30730.enhancement.rst
@@ -0,0 +1,3 @@
+- :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` now accept
+  `l1_ratio=None` when `penalty` is not `"elasticnet"`.
+  By :user:`Marc Bresson <MarcBresson>`.
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
@@ -154,11 +154,20 @@ def _more_validate_params(self, for_partial_fit=False):
                 "learning_rate is 'optimal'. alpha is used "
                 "to compute the optimal learning rate."
             )
+        if self.penalty == "elasticnet" and self.l1_ratio is None:
+            raise ValueError("l1_ratio must be set when penalty is 'elasticnet'")
 
         # raises ValueError if not registered
         self._get_penalty_type(self.penalty)
         self._get_learning_rate_type(self.learning_rate)
 
+    def _get_l1_ratio(self):
+        if self.l1_ratio is None:
+            # plain_sgd expects a float. Any value is fine since at this point
+            # penalty can't be "elsaticnet" so l1_ratio is not used.
+            return 0.0
+        return self.l1_ratio
+
     def _get_loss_function(self, loss):
         """Get concrete ``LossFunction`` object for str ``loss``."""
         loss_ = self.loss_functions[loss]
@@ -462,7 +471,7 @@ def fit_binary(
         penalty_type,
         alpha,
         C,
-        est.l1_ratio,
+        est._get_l1_ratio(),
         dataset,
         validation_mask,
         est.early_stopping,
@@ -993,7 +1002,11 @@ class SGDClassifier(BaseSGDClassifier):
         The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
         l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
         Only used if `penalty` is 'elasticnet'.
-        Values must be in the range `[0.0, 1.0]`.
+        Values must be in the range `[0.0, 1.0]` or can be `None` if
+        `penalty` is not `elasticnet`.
+
+        .. versionchanged:: 1.7
+            `l1_ratio` can be `None` when `penalty` is not "elasticnet".
 
     fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
@@ -1194,7 +1207,7 @@ class SGDClassifier(BaseSGDClassifier):
         **BaseSGDClassifier._parameter_constraints,
         "penalty": [StrOptions({"l2", "l1", "elasticnet"}), None],
         "alpha": [Interval(Real, 0, None, closed="left")],
-        "l1_ratio": [Interval(Real, 0, 1, closed="both")],
+        "l1_ratio": [Interval(Real, 0, 1, closed="both"), None],
         "power_t": [Interval(Real, None, None, closed="neither")],
         "epsilon": [Interval(Real, 0, None, closed="left")],
         "learning_rate": [
@@ -1695,7 +1708,7 @@ def _fit_regressor(
             penalty_type,
             alpha,
             C,
-            self.l1_ratio,
+            self._get_l1_ratio(),
             dataset,
             validation_mask,
             self.early_stopping,
@@ -1796,7 +1809,11 @@ class SGDRegressor(BaseSGDRegressor):
         The Elastic Net mixing parameter, with 0 <= l1_ratio <= 1.
         l1_ratio=0 corresponds to L2 penalty, l1_ratio=1 to L1.
         Only used if `penalty` is 'elasticnet'.
-        Values must be in the range `[0.0, 1.0]`.
+        Values must be in the range `[0.0, 1.0]` or can be `None` if
+        `penalty` is not `elasticnet`.
+
+        .. versionchanged:: 1.7
+            `l1_ratio` can be `None` when `penalty` is not "elasticnet".
 
     fit_intercept : bool, default=True
         Whether the intercept should be estimated or not. If False, the
@@ -1976,7 +1993,7 @@ class SGDRegressor(BaseSGDRegressor):
         **BaseSGDRegressor._parameter_constraints,
         "penalty": [StrOptions({"l2", "l1", "elasticnet"}), None],
         "alpha": [Interval(Real, 0, None, closed="left")],
-        "l1_ratio": [Interval(Real, 0, 1, closed="both")],
+        "l1_ratio": [Interval(Real, 0, 1, closed="both"), None],
         "power_t": [Interval(Real, None, None, closed="neither")],
         "learning_rate": [
             StrOptions({"constant", "optimal", "invscaling", "adaptive"}),
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
@@ -486,6 +486,27 @@ def test_not_enough_sample_for_early_stopping(klass):
         clf.fit(X3, Y3)
 
 
+@pytest.mark.parametrize("Estimator", [SGDClassifier, SGDRegressor])
+@pytest.mark.parametrize("l1_ratio", [0, 0.7, 1])
+def test_sgd_l1_ratio_not_used(Estimator, l1_ratio):
+    """Check that l1_ratio is not used when penalty is not 'elasticnet'"""
+    clf1 = Estimator(penalty="l1", l1_ratio=None, random_state=0).fit(X, Y)
+    clf2 = Estimator(penalty="l1", l1_ratio=l1_ratio, random_state=0).fit(X, Y)
+
+    assert_allclose(clf1.coef_, clf2.coef_)
+
+
+@pytest.mark.parametrize(
+    "Estimator", [SGDClassifier, SparseSGDClassifier, SGDRegressor, SparseSGDRegressor]
+)
+def test_sgd_failing_penalty_validation(Estimator):
+    clf = Estimator(penalty="elasticnet", l1_ratio=None)
+    with pytest.raises(
+        ValueError, match="l1_ratio must be set when penalty is 'elasticnet'"
+    ):
+        clf.fit(X, Y)
+
+
 ###############################################################################
 # Classification Test Case
 

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	+- :class:`linear_model.SGDClassifier` and :class:`linear_model.SGDRegressor` now accept
	`2`	+ `l1_ratio=None` when `penalty` is not `"elasticnet"`.
	`3`	+ By :user:`Marc Bresson <MarcBresson>`.