Skip to content

Commit b3ed14d

Browse files
authored
[mt] Multi target memory reallocation optimization (#11744)
1 parent c2d06bd commit b3ed14d

File tree

2 files changed

+108
-4
lines changed

2 files changed

+108
-4
lines changed

src/tree/updater_quantile_hist.cc

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@
2626
#include "hist/evaluate_splits.h" // for HistEvaluator, HistMultiEvaluator, UpdatePre...
2727
#include "hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
2828
#include "hist/hist_cache.h" // for BoundedHistCollection
29-
#include "hist/histogram.h" // for MultiHistogramBuilder
3029
#include "hist/hist_param.h" // for HistMakerTrainParam
30+
#include "hist/histogram.h" // for MultiHistogramBuilder
3131
#include "hist/sampler.h" // for SampleGradient
3232
#include "param.h" // for TrainParam, GradStats
3333
#include "xgboost/base.h" // for Args, GradientPairPrecise, GradientPair, Gra...
@@ -152,15 +152,23 @@ class MultiTargetHistBuilder {
152152

153153
p_last_fmat_ = p_fmat;
154154
bst_bin_t n_total_bins = 0;
155-
partitioner_.clear();
155+
size_t page_idx = 0;
156156
for (auto const &page : p_fmat->GetBatches<GHistIndexMatrix>(ctx_, HistBatch(param_))) {
157157
if (n_total_bins == 0) {
158158
n_total_bins = page.cut.TotalBins();
159159
} else {
160160
CHECK_EQ(n_total_bins, page.cut.TotalBins());
161161
}
162-
partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid, p_fmat->Info().IsColumnSplit());
162+
if (page_idx < partitioner_.size()) {
163+
partitioner_[page_idx].Reset(ctx_, page.Size(), page.base_rowid,
164+
p_fmat->Info().IsColumnSplit());
165+
} else {
166+
partitioner_.emplace_back(ctx_, page.Size(), page.base_rowid,
167+
p_fmat->Info().IsColumnSplit());
168+
}
169+
page_idx++;
163170
}
171+
partitioner_.resize(page_idx);
164172

165173
bst_target_t n_targets = p_tree->NumTargets();
166174
histogram_builder_ = std::make_unique<MultiHistogramBuilder>();
@@ -364,6 +372,7 @@ class HistUpdater {
364372
}
365373
page_idx++;
366374
}
375+
partitioner_.resize(page_idx);
367376
histogram_builder_->Reset(ctx_, n_total_bins, 1, HistBatch(param_), collective::IsDistributed(),
368377
fmat->Info().IsColumnSplit(), hist_param_);
369378
evaluator_ = std::make_unique<HistEvaluator>(ctx_, this->param_, fmat->Info(), col_sampler_);

tests/cpp/tree/test_quantile_hist.cc

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,24 @@
33
*/
44
#include <gtest/gtest.h>
55
#include <xgboost/host_device_vector.h>
6+
#include <xgboost/linalg.h>
67
#include <xgboost/tree_updater.h>
78

9+
#include <cmath>
810
#include <cstddef> // for size_t
11+
#include <cstring>
12+
#include <memory>
913
#include <string>
1014
#include <vector>
1115

1216
#include "../../../src/tree/common_row_partitioner.h"
1317
#include "../../../src/tree/hist/expand_entry.h" // for MultiExpandEntry, CPUExpandEntry
14-
#include "../collective/test_worker.h" // for TestDistributedGlobal
18+
#include "../collective/test_worker.h" // for TestDistributedGlobal
1519
#include "../helpers.h"
1620
#include "test_column_split.h" // for TestColumnSplit
1721
#include "test_partitioner.h"
1822
#include "xgboost/data.h"
23+
#include "xgboost/task.h"
1924

2025
namespace xgboost::tree {
2126
namespace {
@@ -246,4 +251,94 @@ INSTANTIATE_TEST_SUITE_P(ColumnSplit, TestHistColumnSplit, ::testing::ValuesIn([
246251
}
247252
return params;
248253
}()));
254+
255+
namespace {
256+
void FillGradients(linalg::Matrix<GradientPair>* gpair) {
257+
auto h = gpair->HostView();
258+
for (std::size_t row = 0; row < h.Shape(0); ++row) {
259+
for (std::size_t target = 0; target < h.Shape(1); ++target) {
260+
h(row, target) = GradientPair{1.0f, 0.0f};
261+
}
262+
}
263+
}
264+
265+
// Verify partitioner doesn't write past buffer end when doing
266+
// update on small dataset after large one.
267+
void TestPartitionerOverrun(bst_target_t n_targets) {
268+
constexpr bst_idx_t kNBig = 1 << 16, kNSmall = 1024;
269+
constexpr int kCols = 3;
270+
271+
Context ctx;
272+
ctx.InitAllowUnknown(Args{{"nthread", "1"}});
273+
274+
ObjInfo task{ObjInfo::kRegression, true, true};
275+
auto updater =
276+
std::unique_ptr<TreeUpdater>{TreeUpdater::Create("grow_quantile_histmaker", &ctx, &task)};
277+
278+
TrainParam param;
279+
param.InitAllowUnknown(Args{{"max_depth", "1"},
280+
{"max_bin", "32"},
281+
{"lambda", "0"},
282+
{"gamma", "0"},
283+
{"min_child_weight", "0"}});
284+
updater->Configure(Args{});
285+
286+
auto const n_targets_size = static_cast<std::size_t>(n_targets);
287+
288+
auto dmat_large =
289+
RandomDataGenerator{kNBig, kCols, 0.0f}.Seed(0).Batches(8).GenerateSparsePageDMatrix(
290+
"part_resize_big_first", true);
291+
292+
std::size_t shape_large[2]{dmat_large->Info().num_row_, n_targets_size};
293+
linalg::Matrix<GradientPair> gpair_large(shape_large, ctx.Device());
294+
FillGradients(&gpair_large);
295+
296+
RegTree tree_large{n_targets, static_cast<bst_feature_t>(kCols)};
297+
std::vector<RegTree*> trees_large{&tree_large};
298+
std::vector<HostDeviceVector<bst_node_t>> position_large(1);
299+
common::Span<HostDeviceVector<bst_node_t>> pos_large{position_large.data(), 1};
300+
updater->Update(&param, &gpair_large, dmat_large.get(), pos_large, trees_large);
301+
302+
auto dmat_small =
303+
RandomDataGenerator{kNSmall, kCols, 0.0f}.Seed(1).Batches(1).GenerateSparsePageDMatrix(
304+
"part_resize_small_second", false);
305+
306+
std::vector<HostDeviceVector<bst_node_t>> position_small(1);
307+
auto& pos = position_small.front();
308+
pos.Resize(kNBig); // Allocate large
309+
pos.Resize(kNSmall); // Shrink logical size, capacity remains large
310+
311+
auto& hv = pos.HostVector();
312+
std::size_t cap = hv.capacity();
313+
ASSERT_GE(cap, static_cast<std::size_t>(kNBig));
314+
315+
std::size_t tail_elems = cap - hv.size();
316+
ASSERT_GT(tail_elems, 0u) << "Expected reserved tail storage";
317+
std::vector<bst_node_t> tail_before(tail_elems);
318+
std::memcpy(tail_before.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));
319+
320+
std::size_t shape_small[2]{dmat_small->Info().num_row_, n_targets_size};
321+
linalg::Matrix<GradientPair> gpair_small(shape_small, ctx.Device());
322+
FillGradients(&gpair_small);
323+
324+
RegTree tree_small{n_targets, static_cast<bst_feature_t>(kCols)};
325+
std::vector<RegTree*> trees_small{&tree_small};
326+
common::Span<HostDeviceVector<bst_node_t>> pos_small{position_small.data(), 1};
327+
updater->Update(&param, &gpair_small, dmat_small.get(), pos_small, trees_small);
328+
329+
// Verify no buffer overrun: tail bytes should be unchanged
330+
ASSERT_EQ(hv.capacity(), cap) << "Test precondition violated: capacity changed";
331+
std::vector<bst_node_t> tail_after(tail_elems);
332+
std::memcpy(tail_after.data(), hv.data() + hv.size(), tail_elems * sizeof(bst_node_t));
333+
334+
EXPECT_EQ(tail_before, tail_after)
335+
<< "Buffer overrun detected: writes past kNSmall when updating small "
336+
"single-batch DMatrix after large multi-batch one. "
337+
"Likely stale partitioner writing to buffer.";
338+
}
339+
} // anonymous namespace
340+
341+
TEST(QuantileHist, HistUpdaterPartitionerOverrun) { TestPartitionerOverrun(1); }
342+
343+
TEST(QuantileHist, MultiTargetHistBuilderPartitionerOverrun) { TestPartitionerOverrun(3); }
249344
} // namespace xgboost::tree

0 commit comments

Comments
 (0)