microsoft · jameslamb · Sep 15, 2023 · Sep 15, 2023 · Sep 16, 2023 · Aug 18, 2024
@@ -3696,6 +3696,14 @@ def __init__(
             params = self._get_loaded_param()
         elif model_str is not None:
             self.model_from_string(model_str)
+            # ensure params are updated on the C++ side
+            # NOTE: models loaded from file are initially set to "boosting: GBDT", so "boosting"
+            #       shouldn't be passed through here
+            self.params = params
+            boosting_type = params.pop("boosting", None)
+            self.reset_parameter(params)
+            if boosting_type is not None:
+                params["boosting"] = boosting_type
         else:
             raise TypeError(
                 "Need at least one training dataset or model file or model string " "to create Booster instance"
@@ -3719,7 +3727,7 @@ def __copy__(self) -> "Booster":
 
     def __deepcopy__(self, *args: Any, **kwargs: Any) -> "Booster":
         model_str = self.model_to_string(num_iteration=-1)
-        return Booster(model_str=model_str)
+        return Booster(model_str=model_str, params=self.params)
 
     def __getstate__(self) -> Dict[str, Any]:
         this = self.__dict__.copy()

@@ -800,7 +800,10 @@ void GBDT::ResetConfig(const Config* config) {
 
   boosting_on_gpu_ = objective_function_ != nullptr && objective_function_->IsCUDAObjective() &&
                     !data_sample_strategy_->IsHessianChange();  // for sample strategy with Hessian change, fall back to boosting on CPU
-  tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
+
+  if (tree_learner_ != nullptr) {
+    tree_learner_->ResetBoostingOnGPU(boosting_on_gpu_);
+  }
 
   if (train_data_ != nullptr) {
     data_sample_strategy_->ResetSampleConfig(new_config.get(), false);

@@ -201,6 +201,7 @@ def test_booster(tmp_path):
     booster2 = ctypes.c_void_p()
     num_total_model = ctypes.c_int(0)
     LIB.LGBM_BoosterCreateFromModelfile(c_str(str(model_path)), ctypes.byref(num_total_model), ctypes.byref(booster2))
+    LIB.LGBM_BoosterResetParameter(booster2, c_str("app=binary metric=auc num_leaves=29 verbose=0"))
     data = np.loadtxt(str(binary_example_dir / "binary.test"), dtype=np.float64)
     mat = data[:, 1:]
     preb = np.empty(mat.shape[0], dtype=np.float64)

@@ -15,7 +15,7 @@
 import lightgbm as lgb
 from lightgbm.compat import PANDAS_INSTALLED, pd_DataFrame, pd_Series
 
-from .utils import dummy_obj, load_breast_cancer, mse_obj, np_assert_array_equal
+from .utils import BOOSTING_TYPES, dummy_obj, load_breast_cancer, mse_obj, np_assert_array_equal
 
 
 def test_basic(tmp_path):
@@ -947,3 +947,34 @@ def test_max_depth_warning_is_raised_if_max_depth_gte_5_and_num_leaves_omitted(c
         "in params. Alternatively, pass (max_depth=-1) and just use 'num_leaves' to constrain model complexity."
     )
     assert expected_warning in capsys.readouterr().out
+
+
+@pytest.mark.parametrize("boosting_type", BOOSTING_TYPES)
+def test_booster_deepcopy_preserves_parameters(boosting_type, default_rng):
+    orig_params = {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
+    bst = lgb.train(params=orig_params, num_boost_round=2, train_set=lgb.Dataset(default_rng.random(100, 2)))
+    bst2 = deepcopy(bst)
+    assert bst2.params == bst.params
+    assert bst.params["num_leaves"] == 5
+    assert bst.params["verbosity"] == -1
+
+    # passed-in params shouldn't have been modified outside of lightgbm
+    assert orig_params == {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
+
+
+@pytest.mark.parametrize("boosting_type", BOOSTING_TYPES)
+def test_booster_params_kwarg_overrides_params_from_model_string(boosting_type, default_rng):
+    orig_params = {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
+    bst = lgb.train(params=orig_params, num_boost_round=2, train_set=lgb.Dataset(default_rng.random(100, 2)))
+    bst2 = lgb.Booster(params={"num_leaves": 7}, model_str=bst.model_to_string())
+
+    # params should have been updated on the Python object and the C++ side
+    assert bst2.params["num_leaves"] == 7
+    assert "[num_leaves: 7]" in bst2.model_to_string()
+
+    # boosting type should have been preserved in the new model
+    if boosting_type != "gbdt":
+        raise RuntimeError
+
+    # passed-in params shouldn't have been modified outside of lightgbm
+    assert orig_params == {"boosting": boosting_type, "feature_fraction": 0.708, "num_leaves": 5, "verbosity": -1}
@@ -11,6 +11,7 @@
 
 import lightgbm as lgb
 
+BOOSTING_TYPES = ["gbdt", "dart", "goss", "rf"]
 SERIALIZERS = ["pickle", "joblib", "cloudpickle"]
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,6 +11,7 @@

		import lightgbm as lgb

		BOOSTING_TYPES = ["gbdt", "dart", "goss", "rf"]
		SERIALIZERS = ["pickle", "joblib", "cloudpickle"]


Expand Down