MNT Improve exception handling for invalid labels in cohen_kappa_score (scikit-learn#31175)

StefanieSenger · ogrisel · lorentzenchr · web-flow · commit 2f078bf4f8b2 · 2025-04-18T13:28:00.000Z
Co-authored-by: Olivier Grisel &lt;olivier.grisel@ensta.org&gt;
Co-authored-by: Christian Lorentzen &lt;lorentzen.ch@gmail.com&gt;
Co-authored-by: Lucy Liu &lt;jliu176@gmail.com&gt;
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
@@ -832,7 +832,9 @@ class labels [2]_.
     labels : array-like of shape (n_classes,), default=None
         List of labels to index the matrix. This may be used to select a
         subset of labels. If `None`, all labels that appear at least once in
-        ``y1`` or ``y2`` are used.
+        ``y1`` or ``y2`` are used. Note that at least one label in `labels` must be
+         present in `y1`, even though this function is otherwise agnostic to the order
+         of `y1` and `y2`.
 
     weights : {'linear', 'quadratic'}, default=None
         Weighting type to calculate the score. `None` means not weighted;
@@ -866,7 +868,18 @@ class labels [2]_.
     >>> cohen_kappa_score(y1, y2)
     0.6875
     """
-    confusion = confusion_matrix(y1, y2, labels=labels, sample_weight=sample_weight)
+    try:
+        confusion = confusion_matrix(y1, y2, labels=labels, sample_weight=sample_weight)
+    except ValueError as e:
+        if "At least one label specified must be in y_true" in str(e):
+            msg = (
+                "At least one label in `labels` must be present in `y1` (even though "
+                "`cohen_kappa_score` is otherwise agnostic to the order of `y1` and "
+                "`y2`)."
+            )
+            raise ValueError(msg) from e
+        raise
+
     n_classes = confusion.shape[0]
     sum0 = np.sum(confusion, axis=0)
     sum1 = np.sum(confusion, axis=1)
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
@@ -926,6 +926,17 @@ def test_cohen_kappa():
     )
 
 
+def test_cohen_kappa_score_error_wrong_label():
+    """Test that correct error is raised when users pass labels that are not in y1."""
+    labels = [1, 2]
+    y1 = np.array(["a"] * 5 + ["b"] * 5)
+    y2 = np.array(["b"] * 10)
+    with pytest.raises(
+        ValueError, match="At least one label in `labels` must be present in `y1`"
+    ):
+        cohen_kappa_score(y1, y2, labels=labels)
+
+
 @pytest.mark.parametrize("zero_division", [0, 1, np.nan])
 @pytest.mark.parametrize("y_true, y_pred", [([0], [0])])
 @pytest.mark.parametrize(