From e498551f833f3a97ce4a0327bab0299edc9aa209 Mon Sep 17 00:00:00 2001 From: "Jonathan C. McKinney" Date: Mon, 1 Mar 2021 16:24:16 -0800 Subject: [PATCH] Potential fix for https://github.com/microsoft/LightGBM/issues/4037 --- src/io/dataset.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/io/dataset.cpp b/src/io/dataset.cpp index af3a41d87ff0..efc8035ab051 100644 --- a/src/io/dataset.cpp +++ b/src/io/dataset.cpp @@ -122,9 +122,10 @@ std::vector> FindGroups( const data_size_t cur_non_zero_cnt = is_filtered_feature ? 0 : num_per_col[fidx]; std::vector available_groups; - for (int gid = 0; gid < static_cast(features_in_group.size()); ++gid) { - auto cur_num_bin = group_num_bin[gid] + bin_mappers[fidx]->num_bin() + + auto bin_part = bin_mappers[fidx]->num_bin() + (bin_mappers[fidx]->GetDefaultBin() == 0 ? -1 : 0); + for (int gid = 0; gid < static_cast(features_in_group.size()); ++gid) { + auto cur_num_bin = group_num_bin[gid] + bin_part; if (group_total_data_cnt[gid] + cur_non_zero_cnt <= total_sample_cnt + single_val_max_conflict_cnt) { if (!is_use_gpu || cur_num_bin <= max_bin_per_group) {