merge with upstream.

Kevin · Kevin · commit 48c728e1fb0b · 2020-04-09T08:14:21.000+02:00
diff --git a/hpsklearn/components.py b/hpsklearn/components.py
@@ -23,6 +23,10 @@
     import xgboost
 except ImportError:
     xgboost = None
+try:
+    import lightgbm
+except ImportError:
+    lightgbm = None
 
 ##########################################
 ##==== Wrappers for sklearn modules ====##
@@ -123,6 +127,19 @@ def sklearn_XGBRegressor(*args, **kwargs):
         raise ImportError('No module named xgboost')
     return xgboost.XGBRegressor(*args, **kwargs)
 
+@scope.define
+def sklearn_LGBMClassifier(*args, **kwargs):
+    if lightgbm is None:
+        raise ImportError('No module named lightgbm')
+    return lightgbm.LGBMClassifier(*args, **kwargs)
+
+@scope.define
+def sklearn_LGBMRegressor(*args, **kwargs):
+    if lightgbm is None:
+        raise ImportError('No module named lightgbm')
+    return lightgbm.LGBMRegressor(*args, **kwargs)
+
+
 @scope.define
 def sklearn_GPRegressor(*args, **kwargs):
     return sklearn.gaussian_process.GaussianProcessRegressor(*args, **kwargs)
@@ -1157,7 +1174,7 @@ def _name(msg):
         max_depth=max_depth,
         min_samples_split=scope.int(hp.quniform(
             _name('min_samples_split'),
-            1, 10, 1)) if min_samples_split is None else min_samples_split,
+            2, 10, 1)) if min_samples_split is None else min_samples_split,
         min_samples_leaf=scope.int(hp.quniform(
             _name('min_samples_leaf'),
             1, 5, 1)) if min_samples_leaf is None else min_samples_leaf,
@@ -1448,7 +1465,6 @@ def _xgboost_hp_space(
     )
     return hp_space
 
-
 ########################################################
 ##==== XGBoost classifier/regressor constructors ====##
 ########################################################
@@ -1495,6 +1511,128 @@ def _name(msg):
     return scope.sklearn_XGBRegressor(**hp_space)
 
 
+###################################################
+##==== LightGBM hyperparameters search space ====##
+###################################################
+
+def _lightgbm_max_depth(name):
+    return scope.int(hp.uniform(name, 1, 11))
+
+def _lightgbm_num_leaves(name):
+    return scope.int(hp.uniform(name, 2, 121))
+
+def _lightgbm_learning_rate(name):
+    return hp.loguniform(name, np.log(0.0001), np.log(0.5)) - 0.0001
+
+def _lightgbm_n_estimators(name):
+    return scope.int(hp.quniform(name, 100, 6000, 200))
+
+def _lightgbm_gamma(name):
+    return hp.loguniform(name, np.log(0.0001), np.log(5)) - 0.0001
+
+def _lightgbm_min_child_weight(name):
+    return scope.int(hp.loguniform(name, np.log(1), np.log(100)))
+
+def _lightgbm_subsample(name):
+    return hp.uniform(name, 0.5, 1)
+
+def _lightgbm_colsample_bytree(name):
+    return hp.uniform(name, 0.5, 1)
+
+def _lightgbm_colsample_bylevel(name):
+    return hp.uniform(name, 0.5, 1)
+
+def _lightgbm_reg_alpha(name):
+    return hp.loguniform(name, np.log(0.0001), np.log(1)) - 0.0001
+
+def _lightgbm_reg_lambda(name):
+    return hp.loguniform(name, np.log(1), np.log(4))
+
+def _lightgbm_boosting_type(name):
+    return hp.choice(name, ['gbdt', 'dart', 'goss'])
+
+def _lightgbm_hp_space(
+    name_func,
+    max_depth=None,
+    num_leaves=None,
+    learning_rate=None,
+    n_estimators=None,
+    min_child_weight=None,
+    max_delta_step=0,
+    subsample=None,
+    colsample_bytree=None,
+    reg_alpha=None,
+    reg_lambda=None,
+    boosting_type=None,
+    scale_pos_weight=1,
+    random_state=None):
+    '''Generate LightGBM hyperparameters search space
+    '''
+    hp_space = dict(
+        max_depth=(_lightgbm_max_depth(name_func('max_depth'))
+                   if max_depth is None else max_depth),
+        num_leaves=(_lightgbm_num_leaves(name_func('num_leaves'))
+                    if num_leaves is None else num_leaves),
+        learning_rate=(_lightgbm_learning_rate(name_func('learning_rate'))
+                       if learning_rate is None else learning_rate),
+        n_estimators=(_lightgbm_n_estimators(name_func('n_estimators'))
+                      if n_estimators is None else n_estimators),
+        min_child_weight=(_lightgbm_min_child_weight(name_func('min_child_weight'))
+                          if min_child_weight is None else min_child_weight),
+        max_delta_step=max_delta_step,
+        subsample=(_lightgbm_subsample(name_func('subsample'))
+                   if subsample is None else subsample),
+        colsample_bytree=(_lightgbm_colsample_bytree(name_func('colsample_bytree'))
+                          if colsample_bytree is None else colsample_bytree),
+        reg_alpha=(_lightgbm_reg_alpha(name_func('reg_alpha'))
+                   if reg_alpha is None else reg_alpha),
+        reg_lambda=(_lightgbm_reg_lambda(name_func('reg_lambda'))
+                    if reg_lambda is None else reg_lambda),
+        boosting_type=(_lightgbm_boosting_type(name_func('boosting_type'))
+                    if boosting_type is None else boosting_type),
+        scale_pos_weight=scale_pos_weight,
+        seed=_random_state(name_func('rstate'), random_state)
+    )
+    return hp_space
+
+########################################################
+##==== LightGBM classifier/regressor constructors ====##
+########################################################
+def lightgbm_classification(name, objective='binary', **kwargs):
+    '''
+    Return a pyll graph with hyperparameters that will construct
+    a lightgbm.LGBMClassifier model.
+
+    Args:
+        objective([str]): choose from ['binary', 'multiclass']
+            or provide an hp.choice pyll node
+
+    See help(hpsklearn.components._lightgbm_hp_space) for info on
+    additional available LightGBM arguments.
+    '''
+    def _name(msg):
+        return '%s.%s_%s' % (name, 'lightgbm', msg)
+
+    hp_space = _lightgbm_hp_space(_name, **kwargs)
+    hp_space['objective'] = objective
+    return scope.sklearn_LGBMClassifier(**hp_space)
+
+
+def lightgbm_regression(name, **kwargs):
+    '''
+    Return a pyll graph with hyperparameters that will construct
+    a lightgbm.LightGBMRegressor model.
+    
+    See help(hpsklearn.components._lightgbm_hp_space) for info on
+    additional available LightGBM arguments.
+    '''
+    def _name(msg):
+        return '%s.%s_%s' % (name, 'lightgbm_reg', msg)
+
+    hp_space = _lightgbm_hp_space(_name, **kwargs)
+    return scope.sklearn_LGBMRegressor(objective='regression', **hp_space)
+
+
 #################################################
 ##==== Naive Bayes classifiers constructor ====##
 #################################################
diff --git a/hpsklearn/estimator.py b/hpsklearn/estimator.py
@@ -614,15 +614,12 @@ def fit_iter(self, X, y, EX_list=None, valid_size=.2, n_folds=None,
         assert weights is None
         increment = self.fit_increment if increment is None else increment
 
-        # len does not work on sparse matrices, so using shape[0] instead
-        # shape[0] does not work on lists, so using len() for those
-        if scipy.sparse.issparse(X):
-            data_length = X.shape[0]
-        else:
-            data_length = len(X)
-        if type(X) is list:
+        # Convert list, pandas series, or other array-like to ndarray
+        # do not convert sparse matrices
+        if not scipy.sparse.issparse(X) and not isinstance(X, np.ndarray):
             X = np.array(X)
-        if type(y) is list:
+
+        if not scipy.sparse.issparse(y) and not isinstance(y, np.ndarray):
             y = np.array(y)
 
         if not warm_start:
diff --git a/hpsklearn/tests/test_classification.py b/hpsklearn/tests/test_classification.py
@@ -115,10 +115,23 @@ def test_classifier(self):
 if xgboost is not None:
     setattr(
         TestClassification,
-        'test_{0}'.format(clf.__name__),
+        'test_xgboost_classification',
         create_function(components.xgboost_classification)
     )
 
+# Only test the lightgbm classifier if the optional dependency is installed
+try:
+    import lightgbm
+except ImportError:
+    lightgbm = None
+
+if lightgbm is not None:
+    setattr(
+        TestClassification,
+        'test_lightgbm_classification',
+        create_function(components.lightgbm_classification)
+    )
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/hpsklearn/tests/test_regression.py b/hpsklearn/tests/test_regression.py
@@ -71,10 +71,23 @@ def test_regressor(self):
 if xgboost is not None:
     setattr(
         TestRegression,
-        'test_{0}'.format(clf.__name__),
+        'test_xgboost_regression',
         create_function(components.xgboost_regression)
     )
 
+# Only test the lightgbm regressor if the optional dependency is installed
+try:
+    import lightgbm
+except ImportError:
+    lightgbm = None
+
+if lightgbm is not None:
+    setattr(
+        TestRegression,
+        'test_lightgmb_regression',
+        create_function(components.lightgbm_regression)
+    )
+
 if __name__ == '__main__':
     unittest.main()
 
diff --git a/setup.py b/setup.py
@@ -48,6 +48,7 @@
         'scipy',
     ],
     extras_require = {
-        'xgboost':  ['xgboost==0.6a2']
+        'xgboost':  ['xgboost==0.6a2'],
+        'lightgbm': ['lightgbm==2.3.1']
     }
 )

Original file line number	Diff line number	Diff line change
`@@ -48,6 +48,7 @@`
`48`	`48`	`'scipy',`
`49`	`49`	`],`
`50`	`50`	`extras_require = {`
`51`		`- 'xgboost': ['xgboost==0.6a2']`
	`51`	`+ 'xgboost': ['xgboost==0.6a2'],`
	`52`	`+ 'lightgbm': ['lightgbm==2.3.1']`
`52`	`53`	`}`
`53`	`54`	`)`