From 8209ffad6f49a6133c07e6c1435ca67027d69ef6 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Sun, 11 Feb 2024 22:03:15 -0800
Subject: [PATCH 01/14] expose feature_name_ via sklearn consistent attribute
 feature_names_in_

---
 python-package/lightgbm/sklearn.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 9f1a62f542ca..1a569fc62afe 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1119,6 +1119,11 @@ def feature_name_(self) -> List[str]:
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
         return self._Booster.feature_name()  # type: ignore[union-attr]
+    
+    @property
+    def feature_names_in_(self) -> List[str]:
+        """:obj:`list` of shape = [n_features]: Sklearn-style property for feature names."""
+        return self.feature_name_
 
 
 class LGBMRegressor(_LGBMRegressorBase, LGBMModel):

From 52835d802d523c30b7914f927079d4c482d58cd1 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Mon, 12 Feb 2024 20:52:24 -0800
Subject: [PATCH 02/14] fix docstring

---
 python-package/lightgbm/sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 1a569fc62afe..d3ac917522c0 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1122,7 +1122,7 @@ def feature_name_(self) -> List[str]:
     
     @property
     def feature_names_in_(self) -> List[str]:
-        """:obj:`list` of shape = [n_features]: Sklearn-style property for feature names."""
+        """:obj:`list` of shape = [n_features]: The names of features."""
         return self.feature_name_
 
 

From adc76834ee0b55b03f1ab6fa60259e7062f2b6f8 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Mon, 12 Feb 2024 20:53:21 -0800
Subject: [PATCH 03/14] raise error if estimator not fitted

---
 python-package/lightgbm/sklearn.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d3ac917522c0..d0200eec5714 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1123,6 +1123,8 @@ def feature_name_(self) -> List[str]:
     @property
     def feature_names_in_(self) -> List[str]:
         """:obj:`list` of shape = [n_features]: The names of features."""
+        if not self.__sklearn_is_fitted__(): 
+            raise LGBMNotFittedError('No feature_names_in_ found. Need to call fit beforehand.') 
         return self.feature_name_
 
 

From 08e67aaa656c84a03344dd99ee41e8a257bd49c6 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Sun, 17 Mar 2024 14:58:24 -0700
Subject: [PATCH 04/14] ensure exact feature match for feature_names_in_
 attribute

---
 tests/python_package_test/test_sklearn.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 2fc127b5232d..40c648446cfe 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1397,6 +1397,7 @@ def test_validate_features(task):
     else:
         model.fit(df, y)
     assert model.feature_name_ == features
+    assert model.feature_names_in_ == features
 
     # try to predict with a different feature
     df2 = df.rename(columns={"x2": "z"})

From 0ecc3371795764e066c202760a5a1f56e5bb80ed Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 28 Mar 2024 11:38:42 -0700
Subject: [PATCH 05/14] add test for numpy input

---
 tests/python_package_test/test_sklearn.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 40c648446cfe..86975ee42e71 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1276,6 +1276,17 @@ def test_check_is_fitted():
         check_is_fitted(model)
 
 
+def test_getting_feature_names_in_np_input():
+    X, y = load_digits(n_class=2, return_X_y=True)
+    est = lgb.LGBMModel(n_estimators=5, objective="binary")
+    with pytest.raises(lgb.compat.LGBMNotFittedError):
+        est.feature_names_in_
+    est.fit(X, y)
+    assert est.feature_names_in_ == [
+        f"Column_{i}" for i in range(X.shape[1])
+    ]
+
+
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
 def test_sklearn_integration(estimator, check):
     estimator.set_params(min_child_samples=1, min_data_in_bin=1)

From c110c9d7fcb3a5fc935671625c633c311a976708 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 28 Mar 2024 11:50:27 -0700
Subject: [PATCH 06/14] add test for pandas input with feature names

---
 tests/python_package_test/test_sklearn.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 86975ee42e71..f009acf74b1f 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1277,6 +1277,8 @@ def test_check_is_fitted():
 
 
 def test_getting_feature_names_in_np_input():
+    # input is a numpy array, which doesn't have feature names. LightGBM adds
+    # feature names to the fitted model, which is inconsistent with sklearn's behavior
     X, y = load_digits(n_class=2, return_X_y=True)
     est = lgb.LGBMModel(n_estimators=5, objective="binary")
     with pytest.raises(lgb.compat.LGBMNotFittedError):
@@ -1287,6 +1289,16 @@ def test_getting_feature_names_in_np_input():
     ]
 
 
+def test_getting_feature_names_in_pd_input():
+    # as_frame=True means input has column names and these should propagate to fitted model
+    X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
+    est = lgb.LGBMModel(n_estimators=5, objective="binary")
+    with pytest.raises(lgb.compat.LGBMNotFittedError):
+        est.feature_names_in_
+    est.fit(X, y)
+    assert est.feature_names_in_ == list(X.columns)
+
+
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
 def test_sklearn_integration(estimator, check):
     estimator.set_params(min_child_samples=1, min_data_in_bin=1)

From a8a56314a0a0fef13c04c76b31da9231feac2c5a Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 28 Mar 2024 11:54:33 -0700
Subject: [PATCH 07/14] add documentation for when input data has no feature
 names

---
 python-package/lightgbm/sklearn.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d0200eec5714..72da3d2a4fb6 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1122,7 +1122,12 @@ def feature_name_(self) -> List[str]:
     
     @property
     def feature_names_in_(self) -> List[str]:
-        """:obj:`list` of shape = [n_features]: The names of features."""
+        """:obj:`list` of shape = [n_features]: The names of features.
+        
+        .. note::
+
+            If input does not contain feature names, they will be added during fitting in the format ``Column_0``, ``Column_1``, ..., ``Column_N``.
+        """
         if not self.__sklearn_is_fitted__(): 
             raise LGBMNotFittedError('No feature_names_in_ found. Need to call fit beforehand.') 
         return self.feature_name_

From 4e1f1dc5bd7afae274e3ace3130b7e61c317bc0d Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 28 Mar 2024 11:56:43 -0700
Subject: [PATCH 08/14] pre-commit fixes

---
 python-package/lightgbm/sklearn.py        | 8 ++++----
 tests/python_package_test/test_sklearn.py | 4 +---
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 72da3d2a4fb6..7212029444a4 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1119,17 +1119,17 @@ def feature_name_(self) -> List[str]:
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
         return self._Booster.feature_name()  # type: ignore[union-attr]
-    
+
     @property
     def feature_names_in_(self) -> List[str]:
         """:obj:`list` of shape = [n_features]: The names of features.
-        
+
         .. note::
 
             If input does not contain feature names, they will be added during fitting in the format ``Column_0``, ``Column_1``, ..., ``Column_N``.
         """
-        if not self.__sklearn_is_fitted__(): 
-            raise LGBMNotFittedError('No feature_names_in_ found. Need to call fit beforehand.') 
+        if not self.__sklearn_is_fitted__():
+            raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
         return self.feature_name_
 
 
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index f009acf74b1f..c25bfb2dd239 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1284,9 +1284,7 @@ def test_getting_feature_names_in_np_input():
     with pytest.raises(lgb.compat.LGBMNotFittedError):
         est.feature_names_in_
     est.fit(X, y)
-    assert est.feature_names_in_ == [
-        f"Column_{i}" for i in range(X.shape[1])
-    ]
+    assert est.feature_names_in_ == [f"Column_{i}" for i in range(X.shape[1])]
 
 
 def test_getting_feature_names_in_pd_input():

From b826426a4bdfeeebaad9779005365a8c554842c9 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 30 May 2024 19:57:40 -0700
Subject: [PATCH 09/14] feature_names_in_ returns a 1D numpy array

---
 python-package/lightgbm/sklearn.py        | 4 ++--
 tests/python_package_test/test_sklearn.py | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index 7212029444a4..d7c00c517c7d 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1121,7 +1121,7 @@ def feature_name_(self) -> List[str]:
         return self._Booster.feature_name()  # type: ignore[union-attr]
 
     @property
-    def feature_names_in_(self) -> List[str]:
+    def feature_names_in_(self) -> np.ndarray:
         """:obj:`list` of shape = [n_features]: The names of features.
 
         .. note::
@@ -1130,7 +1130,7 @@ def feature_names_in_(self) -> List[str]:
         """
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
-        return self.feature_name_
+        return np.array(self.feature_name_)
 
 
 class LGBMRegressor(_LGBMRegressorBase, LGBMModel):
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index c25bfb2dd239..d7b18b25611a 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1282,9 +1282,9 @@ def test_getting_feature_names_in_np_input():
     X, y = load_digits(n_class=2, return_X_y=True)
     est = lgb.LGBMModel(n_estimators=5, objective="binary")
     with pytest.raises(lgb.compat.LGBMNotFittedError):
-        est.feature_names_in_
+        check_is_fitted(est)
     est.fit(X, y)
-    assert est.feature_names_in_ == [f"Column_{i}" for i in range(X.shape[1])]
+    np.testing.assert_array_equal(est.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
 
 
 def test_getting_feature_names_in_pd_input():
@@ -1294,7 +1294,7 @@ def test_getting_feature_names_in_pd_input():
     with pytest.raises(lgb.compat.LGBMNotFittedError):
         est.feature_names_in_
     est.fit(X, y)
-    assert est.feature_names_in_ == list(X.columns)
+    np.testing.assert_array_equal(est.feature_names_in_, X.columns)
 
 
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
@@ -1418,7 +1418,6 @@ def test_validate_features(task):
     else:
         model.fit(df, y)
     assert model.feature_name_ == features
-    assert model.feature_names_in_ == features
 
     # try to predict with a different feature
     df2 = df.rename(columns={"x2": "z"})

From fd1ce7c25051edffb8ed385cec5fa2831ed448ea Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 30 May 2024 20:24:03 -0700
Subject: [PATCH 10/14] test LGBMModel, LGBMClassifier, LGBMRegressor,
 LGBMRanker

---
 tests/python_package_test/test_sklearn.py | 36 ++++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index d7b18b25611a..2c807b8e1dcf 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1281,20 +1281,40 @@ def test_getting_feature_names_in_np_input():
     # feature names to the fitted model, which is inconsistent with sklearn's behavior
     X, y = load_digits(n_class=2, return_X_y=True)
     est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    with pytest.raises(lgb.compat.LGBMNotFittedError):
-        check_is_fitted(est)
-    est.fit(X, y)
-    np.testing.assert_array_equal(est.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
+    clf = lgb.LGBMClassifier(n_estimators=5)
+    reg = lgb.LGBMRegressor(n_estimators=5)
+    rnk = lgb.LGBMRanker(n_estimators=5)
+    models = (est, clf, reg, rnk)
+    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
+
+    for model in models:
+        with pytest.raises(lgb.compat.LGBMNotFittedError):
+            check_is_fitted(model)
+        if isinstance(model, lgb.LGBMRanker):
+            model.fit(X, y, group=group)
+        else:
+            model.fit(X, y)
+        np.testing.assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
 
 
 def test_getting_feature_names_in_pd_input():
     # as_frame=True means input has column names and these should propagate to fitted model
     X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
     est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    with pytest.raises(lgb.compat.LGBMNotFittedError):
-        est.feature_names_in_
-    est.fit(X, y)
-    np.testing.assert_array_equal(est.feature_names_in_, X.columns)
+    clf = lgb.LGBMClassifier(n_estimators=5)
+    reg = lgb.LGBMRegressor(n_estimators=5)
+    rnk = lgb.LGBMRanker(n_estimators=5)
+    models = (est, clf, reg, rnk)
+    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
+
+    for model in models:
+        with pytest.raises(lgb.compat.LGBMNotFittedError):
+            check_is_fitted(model)
+        if isinstance(model, lgb.LGBMRanker):
+            model.fit(X, y, group=group)
+        else:
+            model.fit(X, y)
+        np.testing.assert_array_equal(est.feature_names_in_, X.columns)
 
 
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])

From edd951ac3f770a31e4064eb785e45b49f35d8d74 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Thu, 30 May 2024 20:59:14 -0700
Subject: [PATCH 11/14] rearrange feature name property docstrings

---
 python-package/lightgbm/sklearn.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index d7c00c517c7d..c0375018fe57 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1115,19 +1115,19 @@ def feature_importances_(self) -> np.ndarray:
 
     @property
     def feature_name_(self) -> List[str]:
-        """:obj:`list` of shape = [n_features]: The names of features."""
-        if not self.__sklearn_is_fitted__():
-            raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
-        return self._Booster.feature_name()  # type: ignore[union-attr]
-
-    @property
-    def feature_names_in_(self) -> np.ndarray:
         """:obj:`list` of shape = [n_features]: The names of features.
 
         .. note::
 
             If input does not contain feature names, they will be added during fitting in the format ``Column_0``, ``Column_1``, ..., ``Column_N``.
         """
+        if not self.__sklearn_is_fitted__():
+            raise LGBMNotFittedError("No feature_name found. Need to call fit beforehand.")
+        return self._Booster.feature_name()  # type: ignore[union-attr]
+
+    @property
+    def feature_names_in_(self) -> np.ndarray:
+        """:obj:`array` of shape = [n_features]: scikit-learn compatible version of .feature_name_."""
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
         return np.array(self.feature_name_)

From 25888c6a79569e91af5e3874b4df36c6458d7d6b Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Fri, 31 May 2024 19:26:36 -0700
Subject: [PATCH 12/14] add get_feature_names_out method

---
 python-package/lightgbm/sklearn.py        |  6 ++++
 tests/python_package_test/test_sklearn.py | 43 +++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index c0375018fe57..eecf80cbb4a4 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1014,6 +1014,12 @@ def predict(
             **predict_params,
         )
 
+    def get_feature_names_out(self) -> np.ndarray:
+        """:obj:`array` of shape = [n_features]: Get output features of fitted model."""
+        if not self.__sklearn_is_fitted__():
+            raise LGBMNotFittedError("Output features cannot be determined. Need to call fit beforehand.")
+        return self.feature_names_in_
+
     predict.__doc__ = _lgbmmodel_doc_predict.format(
         description="Return the predicted value for each sample.",
         X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 2c807b8e1dcf..c05d2e7248a0 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1317,6 +1317,49 @@ def test_getting_feature_names_in_pd_input():
         np.testing.assert_array_equal(est.feature_names_in_, X.columns)
 
 
+def test_get_feature_names_out_np_input():
+    # input is a numpy array, which doesn't have feature names. LightGBM adds
+    # feature names to the fitted model, which is inconsistent with sklearn's behavior
+    X, y = load_digits(n_class=2, return_X_y=True)
+    est = lgb.LGBMModel(n_estimators=5, objective="binary")
+    clf = lgb.LGBMClassifier(n_estimators=5)
+    reg = lgb.LGBMRegressor(n_estimators=5)
+    rnk = lgb.LGBMRanker(n_estimators=5)
+    models = (est, clf, reg, rnk)
+    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
+
+    for model in models:
+        with pytest.raises(lgb.compat.LGBMNotFittedError):
+            check_is_fitted(model)
+        if isinstance(model, lgb.LGBMRanker):
+            model.fit(X, y, group=group)
+        else:
+            model.fit(X, y)
+        np.testing.assert_array_equal(
+            model.get_feature_names_out(), np.array([f"Column_{i}" for i in range(X.shape[1])])
+        )
+
+
+def test_get_feature_names_out_pd_input():
+    # as_frame=True means input has column names and these should propagate to fitted model
+    X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
+    est = lgb.LGBMModel(n_estimators=5, objective="binary")
+    clf = lgb.LGBMClassifier(n_estimators=5)
+    reg = lgb.LGBMRegressor(n_estimators=5)
+    rnk = lgb.LGBMRanker(n_estimators=5)
+    models = (est, clf, reg, rnk)
+    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
+
+    for model in models:
+        with pytest.raises(lgb.compat.LGBMNotFittedError):
+            check_is_fitted(model)
+        if isinstance(model, lgb.LGBMRanker):
+            model.fit(X, y, group=group)
+        else:
+            model.fit(X, y)
+        np.testing.assert_array_equal(model.get_feature_names_out(), X.columns)
+
+
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])
 def test_sklearn_integration(estimator, check):
     estimator.set_params(min_child_samples=1, min_data_in_bin=1)

From 574d9ce58afdf0ceaa75c1a33594ad7575fcfd56 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Fri, 31 May 2024 20:20:38 -0700
Subject: [PATCH 13/14] format reference to .feature_name_ with ticks

---
 python-package/lightgbm/sklearn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index eecf80cbb4a4..4eb606cea392 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1133,7 +1133,7 @@ def feature_name_(self) -> List[str]:
 
     @property
     def feature_names_in_(self) -> np.ndarray:
-        """:obj:`array` of shape = [n_features]: scikit-learn compatible version of .feature_name_."""
+        """:obj:`array` of shape = [n_features]: scikit-learn compatible version of ``.feature_name_``."""
         if not self.__sklearn_is_fitted__():
             raise LGBMNotFittedError("No feature_names_in_ found. Need to call fit beforehand.")
         return np.array(self.feature_name_)

From 8ac21d32589299fc1fe8089a969dfd5f983aee19 Mon Sep 17 00:00:00 2001
From: nicklamiller <nicklamiller@gmail.com>
Date: Mon, 10 Jun 2024 17:04:22 -0700
Subject: [PATCH 14/14] remove get_feature_names_out method, tidy up tests

---
 python-package/lightgbm/sklearn.py        |   6 --
 tests/python_package_test/test_sklearn.py | 108 +++++++---------------
 2 files changed, 32 insertions(+), 82 deletions(-)

diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py
index a335fc32a0a7..7f3e91a064c4 100644
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -1043,12 +1043,6 @@ def predict(
             **predict_params,
         )
 
-    def get_feature_names_out(self) -> np.ndarray:
-        """:obj:`array` of shape = [n_features]: Get output features of fitted model."""
-        if not self.__sklearn_is_fitted__():
-            raise LGBMNotFittedError("Output features cannot be determined. Need to call fit beforehand.")
-        return self.feature_names_in_
-
     predict.__doc__ = _lgbmmodel_doc_predict.format(
         description="Return the predicted value for each sample.",
         X_shape="numpy array, pandas DataFrame, H2O DataTable's Frame , scipy.sparse, list of lists of int or float of shape = [n_samples, n_features]",
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index b2e23a847715..10af8ba960f3 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -1290,88 +1290,44 @@ def test_max_depth_warning_is_never_raised(capsys, estimator_class, max_depth):
     assert "Provided parameters constrain tree depth" not in capsys.readouterr().out
 
 
-def test_getting_feature_names_in_np_input():
-    # input is a numpy array, which doesn't have feature names. LightGBM adds
-    # feature names to the fitted model, which is inconsistent with sklearn's behavior
-    X, y = load_digits(n_class=2, return_X_y=True)
-    est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    clf = lgb.LGBMClassifier(n_estimators=5)
-    reg = lgb.LGBMRegressor(n_estimators=5)
-    rnk = lgb.LGBMRanker(n_estimators=5)
-    models = (est, clf, reg, rnk)
-    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
-
-    for model in models:
-        with pytest.raises(lgb.compat.LGBMNotFittedError):
-            check_is_fitted(model)
-        if isinstance(model, lgb.LGBMRanker):
-            model.fit(X, y, group=group)
-        else:
-            model.fit(X, y)
-        np.testing.assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
-
-
-def test_getting_feature_names_in_pd_input():
-    # as_frame=True means input has column names and these should propagate to fitted model
-    X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
-    est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    clf = lgb.LGBMClassifier(n_estimators=5)
-    reg = lgb.LGBMRegressor(n_estimators=5)
-    rnk = lgb.LGBMRanker(n_estimators=5)
-    models = (est, clf, reg, rnk)
-    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
-
-    for model in models:
-        with pytest.raises(lgb.compat.LGBMNotFittedError):
-            check_is_fitted(model)
-        if isinstance(model, lgb.LGBMRanker):
-            model.fit(X, y, group=group)
-        else:
-            model.fit(X, y)
-        np.testing.assert_array_equal(est.feature_names_in_, X.columns)
-
-
-def test_get_feature_names_out_np_input():
+@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
+def test_getting_feature_names_in_np_input(estimator_class):
     # input is a numpy array, which doesn't have feature names. LightGBM adds
     # feature names to the fitted model, which is inconsistent with sklearn's behavior
     X, y = load_digits(n_class=2, return_X_y=True)
-    est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    clf = lgb.LGBMClassifier(n_estimators=5)
-    reg = lgb.LGBMRegressor(n_estimators=5)
-    rnk = lgb.LGBMRanker(n_estimators=5)
-    models = (est, clf, reg, rnk)
-    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
-
-    for model in models:
-        with pytest.raises(lgb.compat.LGBMNotFittedError):
-            check_is_fitted(model)
-        if isinstance(model, lgb.LGBMRanker):
-            model.fit(X, y, group=group)
-        else:
-            model.fit(X, y)
-        np.testing.assert_array_equal(
-            model.get_feature_names_out(), np.array([f"Column_{i}" for i in range(X.shape[1])])
-        )
+    params = {"n_estimators": 2, "num_leaves": 7}
+    if estimator_class is lgb.LGBMModel:
+        model = estimator_class(**{**params, "objective": "binary"})
+    else:
+        model = estimator_class(**params)
+    with pytest.raises(lgb.compat.LGBMNotFittedError):
+        check_is_fitted(model)
+    if isinstance(model, lgb.LGBMRanker):
+        model.fit(X, y, group=[X.shape[0]])
+    else:
+        model.fit(X, y)
+    np.testing.assert_array_equal(model.feature_names_in_, np.array([f"Column_{i}" for i in range(X.shape[1])]))
 
 
-def test_get_feature_names_out_pd_input():
-    # as_frame=True means input has column names and these should propagate to fitted model
+@pytest.mark.parametrize("estimator_class", [lgb.LGBMModel, lgb.LGBMClassifier, lgb.LGBMRegressor, lgb.LGBMRanker])
+def test_getting_feature_names_in_pd_input(estimator_class):
     X, y = load_digits(n_class=2, return_X_y=True, as_frame=True)
-    est = lgb.LGBMModel(n_estimators=5, objective="binary")
-    clf = lgb.LGBMClassifier(n_estimators=5)
-    reg = lgb.LGBMRegressor(n_estimators=5)
-    rnk = lgb.LGBMRanker(n_estimators=5)
-    models = (est, clf, reg, rnk)
-    group = np.full(shape=(X.shape[0] // 2,), fill_value=2)  # Just an example group
-
-    for model in models:
-        with pytest.raises(lgb.compat.LGBMNotFittedError):
-            check_is_fitted(model)
-        if isinstance(model, lgb.LGBMRanker):
-            model.fit(X, y, group=group)
-        else:
-            model.fit(X, y)
-        np.testing.assert_array_equal(model.get_feature_names_out(), X.columns)
+    col_names = X.columns.to_list()
+    assert isinstance(col_names, list) and all(
+        isinstance(c, str) for c in col_names
+    ), "input data must have feature names for this test to cover the expected functionality"
+    params = {"n_estimators": 2, "num_leaves": 7}
+    if estimator_class is lgb.LGBMModel:
+        model = estimator_class(**{**params, "objective": "binary"})
+    else:
+        model = estimator_class(**params)
+    with pytest.raises(lgb.compat.LGBMNotFittedError):
+        check_is_fitted(model)
+    if isinstance(model, lgb.LGBMRanker):
+        model.fit(X, y, group=[X.shape[0]])
+    else:
+        model.fit(X, y)
+    np.testing.assert_array_equal(model.feature_names_in_, X.columns)
 
 
 @parametrize_with_checks([lgb.LGBMClassifier(), lgb.LGBMRegressor()])