You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
iffirst_metric_only: # the only first metric is used for early stopping
break
env.evaluation_result_list contains the evaluations for all the metrics and all validation data(including the training data if added). So the behavior of early stopping is not the way the document says:
But the training data is ignored anyway. To check only the first metric, set the first_metric_only parameter to True in params.
test code only has one validation data, so the bug did not get caught:
Here is the code to demonstrate the issue. Only one line was added to early_stopping callback to print env.evaluation_result_list for each iteration.
import warnings
from operator import gt, lt
import numpy as np
import lightgbm as lgb
from lightgbm.callback import EarlyStopException
from lightgbm.callback import _format_eval_result
from lightgbm.compat import range_
def early_stopping(stopping_rounds, first_metric_only=False, verbose=True):
best_score = []
best_score_avg = []
best_iter = []
best_iter_avg = []
best_score_list = []
best_score_avg_list = []
cmp_op = []
enabled = [True]
def _init(env):
enabled[0] = not any((boost_alias in env.params
and env.params[boost_alias] == 'dart') for boost_alias in ('boosting',
'boosting_type',
'boost'))
if not enabled[0]:
warnings.warn('Early stopping is not available in dart mode')
return
if not env.evaluation_result_list:
raise ValueError('For early stopping, '
'at least one dataset and eval metric is required for evaluation')
if verbose:
msg = "Training until validation scores don't improve for {} rounds."
print(msg.format(stopping_rounds))
for eval_ret in env.evaluation_result_list:
best_iter.append(0)
best_score_list.append(None)
if eval_ret[3]:
best_score.append(float('-inf'))
# best_score_avg = float('-inf')
cmp_op.append(gt)
else:
best_score.append(float('inf'))
# best_score_avg = float('inf')
cmp_op.append(lt)
best_score_avg.append(None)
best_iter_avg.append(None)
best_score_avg_list.append(None)
def _callback(env):
if not cmp_op:
_init(env)
if not enabled[0]:
return
# added line to print env.evaluation_result_list
print(env.evaluation_result_list)
for i in range_(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2]
if best_score_list[i] is None or cmp_op[i](score, best_score[i]):
best_score[i] = score
best_iter[i] = env.iteration
best_score_list[i] = env.evaluation_result_list
elif env.iteration - best_iter[i] >= stopping_rounds:
if verbose:
print('Early stopping, best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
raise EarlyStopException(best_iter[i], best_score_list[i])
if env.iteration == env.end_iteration - 1:
if verbose:
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
raise EarlyStopException(best_iter[i], best_score_list[i])
if first_metric_only: # the only first metric is used for early stopping
break
_callback.order = 30
return _callback
data = np.random.random((500, 10))
y = [1] * 250 + [0] * 250
lgb_train = lgb.Dataset(data, y, free_raw_data=True)
data = np.random.random((500, 10))
y = [1] * 250 + [0] * 250
lgb_test = lgb.Dataset(data, y, free_raw_data=True)
params = {
'objective': 'binary',
'verbose': 1,
'metric': ['binary_logloss', 'auc']
}
gbm = lgb.train(params=params,
train_set=lgb_train,
valid_sets=[lgb_train, lgb_test],
num_boost_round=6,
callbacks=[early_stopping(1, first_metric_only=False, verbose=True)]
)
And here is the printed output:
[1] training's auc: 0.741264 training's binary_logloss: 0.675381 valid_1's auc: 0.475968 valid_1's binary_logloss: 0.695726
Training until validation scores don't improve for 1 rounds.
[('training', u'auc', '0.741264', True), ('training', u'binary_logloss', '0.6753807446588581', False), ('valid_1', u'auc', '0.475968', True), ('valid_1', u'binary_logloss', '0.6957255240695474', False)]
[LightGBM] [Warning] No further splits with positive gain, best gain: -inf
[2] training's auc: 0.804496 training's binary_logloss: 0.659374 valid_1's auc: 0.471824 valid_1's binary_logloss: 0.698373
[('training', u'auc', '0.804496', True), ('training', u'binary_logloss', '0.6593740400597452', False), ('valid_1', u'auc', '0.471824', True), ('valid_1', u'binary_logloss', '0.6983732401328614', False)]
Early stopping, best iteration is:
[1] training's auc: 0.741264 training's binary_logloss: 0.675381 valid_1's auc: 0.475968 valid_1's binary_logloss: 0.695726
The text was updated successfully, but these errors were encountered:
Environment info
Operating System: Linux
CPU/GPU model: CPU
C++/Python/R version: Python
LightGBM version or commit hash: 2.2.4
There might be bugs here:
LightGBM/python-package/lightgbm/callback.py
Lines 212 to 234 in 1556642
env.evaluation_result_list contains the evaluations for all the metrics and all validation data(including the training data if added). So the behavior of early stopping is not the way the document says:
test code only has one validation data, so the bug did not get caught:
LightGBM/tests/python_package_test/test_engine.py
Lines 1547 to 1586 in 1556642
Here is the code to demonstrate the issue. Only one line was added to early_stopping callback to print env.evaluation_result_list for each iteration.
And here is the printed output:
The text was updated successfully, but these errors were encountered: