Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
11d4b63
optimise data initialisation
Apr 4, 2025
4589533
Merge branch 'dmlc:master' into dev/cpu/init_data_optimisation
razdoburdin Apr 4, 2025
3464f8c
linting
Apr 7, 2025
e211ab9
changing the capture for inner lambdas
Apr 8, 2025
9221573
fix
Apr 8, 2025
0a793e3
set default
Apr 8, 2025
e249a3b
linting
Apr 8, 2025
1be6f5d
fix test
Apr 8, 2025
396f4b3
Merge branch 'dmlc:master' into dev/cpu/init_data_optimisation
razdoburdin Apr 8, 2025
55a89d7
submodule fix
Apr 8, 2025
8a15c70
fix for i386
Apr 8, 2025
085627f
proteckt thread-unsafe code
Apr 9, 2025
b1e714f
fix for multi-batch
Apr 10, 2025
70fd6bc
fix compilation error
Apr 10, 2025
edef9e7
remove critical section; avoid using of bit filds
Apr 10, 2025
606c537
tildy
Apr 10, 2025
6f885b0
return deleted code
Apr 11, 2025
c0dbd7e
remove unactual code
Apr 11, 2025
1cb3693
address comments
May 5, 2025
560a67a
fix calling ColumnMatrix constructor
May 5, 2025
0ac338e
switch back to bitfield
May 19, 2025
61b3878
linting
May 19, 2025
98ef541
Update src/common/column_matrix.h
razdoburdin Jun 2, 2025
2b090e6
Update src/common/column_matrix.h
razdoburdin Jun 2, 2025
9f5ba75
Merge branch 'master' into dev/cpu/init_data_optimisation
trivialfis Jun 15, 2025
8cdd7db
Cleanup, typos.
trivialfis Jul 1, 2025
15aa65b
rename.
trivialfis Jul 1, 2025
58920a0
typos.
trivialfis Jul 1, 2025
0b7037a
update comment
Jul 21, 2025
820b79a
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
6d553fb
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
ea5c4fe
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
7dc15f2
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
390efd1
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
3250cc0
Update src/common/column_matrix.h
razdoburdin Nov 4, 2025
a8df33b
linting
razdoburdin Nov 5, 2025
0fcd8a7
remove whitespace
razdoburdin Nov 5, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions src/common/column_matrix.cc
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2025, XGBoost Contributors
* \brief Utility for fast column-wise access
*/
#include "column_matrix.h"

#include <algorithm> // for transform
#include <cstddef> // for size_t
#include <cstdint> // for uint64_t, uint8_t
#include <limits> // for numeric_limits
#include <type_traits> // for remove_reference_t
#include <vector> // for vector

#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "xgboost/base.h" // for bst_feaature_t
#include "xgboost/span.h" // for Span
#include "../common/ref_resource_view.h" // for MakeFixedVecWithMalloc
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "xgboost/base.h" // for bst_feaature_t

namespace xgboost::common {
void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold) {
void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold,
int n_threads) {
auto const nfeature = gmat.Features();
const size_t nrow = gmat.Size();
// identify type of each column
Expand Down Expand Up @@ -61,18 +61,19 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres
auto storage_size =
feature_offsets_.back() * static_cast<std::underlying_type_t<BinTypeSize>>(bins_type_size_);

index_ = common::MakeFixedVecWithMalloc(storage_size, std::uint8_t{0});
index_ = common::MakeFixedVecWithMalloc(storage_size, std::uint8_t{0}, n_threads);

if (!all_dense_column) {
row_ind_ = common::MakeFixedVecWithMalloc(feature_offsets_[nfeature], std::size_t{0});
row_ind_ = common::MakeFixedVecWithMalloc(feature_offsets_[nfeature],
std::size_t{0}, n_threads);
}

// store least bin id for each feature
index_base_ = const_cast<uint32_t*>(gmat.cut.Ptrs().data());

any_missing_ = !gmat.IsDense();

missing_ = MissingIndicator{0, false};
missing_ = MissingIndicator{feature_offsets_, type_, any_missing_};
}

// IO procedures for external memory.
Expand All @@ -93,6 +94,9 @@ bool ColumnMatrix::Read(AlignedResourceReadStream* fi, uint32_t const* index_bas
if (!common::ReadVec(fi, &missing_.storage)) {
return false;
}
if (!common::ReadVec(fi, &missing_.feature_offsets_padded)) {
return false;
}
missing_.InitView();

index_base_ = index_base;
Expand All @@ -113,6 +117,7 @@ std::size_t ColumnMatrix::Write(AlignedFileWriteStream* fo) const {
bytes += common::WriteVec(fo, row_ind_);
bytes += common::WriteVec(fo, feature_offsets_);
bytes += common::WriteVec(fo, missing_.storage);
bytes += common::WriteVec(fo, missing_.feature_offsets_padded);

bytes += fo->Write(bins_type_size_);
bytes += fo->Write(any_missing_);
Expand Down
Loading
Loading