Skip to content

Commit 8d4ffb0

Browse files
authored
Avoid deprecated functions in thrust. (#11785)
1 parent 948ddc4 commit 8d4ffb0

File tree

9 files changed

+99
-102
lines changed

9 files changed

+99
-102
lines changed

src/collective/coll.cu

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,17 @@
22
* Copyright 2023-2025, XGBoost Contributors
33
*/
44
#if defined(XGBOOST_USE_NCCL)
5-
#include <chrono> // for chrono, chrono_literals
6-
#include <cstddef> // for size_t
7-
#include <cstdint> // for int8_t, int64_t
8-
#include <future> // for future, future_status
9-
#include <memory> // for shared_ptr
10-
#include <mutex> // for mutex, unique_lock
11-
#include <string> // for string
12-
#include <thread> // for this_thread
13-
#include <type_traits> // for invoke_result_t, is_same_v, enable_if_t
14-
#include <utility> // for move
5+
#include <chrono> // for chrono, chrono_literals
6+
#include <cstddef> // for size_t
7+
#include <cstdint> // for int8_t, int64_t
8+
#include <functional> // for bit_and, bit_or, bit_xor
9+
#include <future> // for future, future_status
10+
#include <memory> // for shared_ptr
11+
#include <mutex> // for mutex, unique_lock
12+
#include <string> // for string
13+
#include <thread> // for this_thread
14+
#include <type_traits> // for invoke_result_t, is_same_v, enable_if_t
15+
#include <utility> // for move
1516

1617
#include "../common/cuda_stream.h" // for StreamRef, Event
1718
#include "../common/device_helpers.cuh" // for device_vector
@@ -211,16 +212,16 @@ void RunBitwiseAllreduce(curt::StreamRef stream, common::Span<std::int8_t> out_b
211212
// Then reduce locally.
212213
switch (op) {
213214
case Op::kBitwiseAND:
214-
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_and<std::int8_t>(),
215-
pcomm->World(), data.size());
215+
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_and{}, pcomm->World(),
216+
data.size());
216217
break;
217218
case Op::kBitwiseOR:
218-
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_or<std::int8_t>(),
219-
pcomm->World(), data.size());
219+
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_or{}, pcomm->World(),
220+
data.size());
220221
break;
221222
case Op::kBitwiseXOR:
222-
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, thrust::bit_xor<std::int8_t>(),
223-
pcomm->World(), data.size());
223+
RunBitwiseAllreduce(pcomm->Stream(), data, device_buffer, std::bit_xor{}, pcomm->World(),
224+
data.size());
224225
break;
225226
default:
226227
LOG(FATAL) << "Not a bitwise reduce operation.";

src/common/algorithm.cuh

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@
77
#include <thrust/copy.h> // for copy
88
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
99
#include <thrust/sort.h> // for stable_sort_by_key
10-
#include <thrust/tuple.h> // for tuple, get
1110

12-
#include <cstddef> // size_t
13-
#include <cstdint> // int32_t
14-
#include <cub/cub.cuh> // DispatchSegmentedRadixSort,NullType,DoubleBuffer
15-
#include <iterator> // distance
16-
#include <limits> // numeric_limits
17-
#include <type_traits> // conditional_t,remove_const_t
11+
#include <cstddef> // size_t
12+
#include <cstdint> // int32_t
13+
#include <cub/device/device_run_length_encode.cuh> // for DeviceRunLengthEncode
14+
#include <cub/device/dispatch/dispatch_radix_sort.cuh> // for DispatchSegmentedRadixSort
15+
#include <cub/util_type.cuh> // for NullType, DoubleBuffer
16+
#include <cuda/std/tuple> // for tuple
17+
#include <functional> // for plus, logical_and
18+
#include <iterator> // for distance
19+
#include <limits> // for numeric_limits
20+
#include <type_traits> // for conditional_t,remove_const_t
1821

1922
#include "common.h" // safe_cuda
2023
#include "cuda_context.cuh" // CUDAContext
@@ -175,30 +178,30 @@ template <typename SegIt, typename ValIt>
175178
void SegmentedArgMergeSort(Context const *ctx, SegIt seg_begin, SegIt seg_end, ValIt val_begin,
176179
ValIt val_end, dh::device_vector<std::size_t> *p_sorted_idx) {
177180
auto cuctx = ctx->CUDACtx();
178-
using Tup = thrust::tuple<std::int32_t, float>;
181+
using Tup = cuda::std::tuple<std::int32_t, float>;
179182
auto &sorted_idx = *p_sorted_idx;
180183
std::size_t n = std::distance(val_begin, val_end);
181184
sorted_idx.resize(n);
182185
dh::Iota(dh::ToSpan(sorted_idx), cuctx->Stream());
183186
dh::device_vector<Tup> keys(sorted_idx.size());
184-
auto key_it = dh::MakeTransformIterator<Tup>(thrust::make_counting_iterator(0ul),
185-
[=] XGBOOST_DEVICE(std::size_t i) -> Tup {
186-
std::int32_t seg_idx;
187-
if (i < *seg_begin) {
188-
seg_idx = -1;
189-
} else {
190-
seg_idx = dh::SegmentId(seg_begin, seg_end, i);
191-
}
192-
auto residue = val_begin[i];
193-
return thrust::make_tuple(seg_idx, residue);
194-
});
187+
auto key_it = dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) -> Tup {
188+
std::int32_t seg_idx;
189+
if (i < *seg_begin) {
190+
seg_idx = -1;
191+
} else {
192+
seg_idx = dh::SegmentId(seg_begin, seg_end, i);
193+
}
194+
auto residue = val_begin[i];
195+
return cuda::std::make_tuple(seg_idx, residue);
196+
});
195197
thrust::copy(ctx->CUDACtx()->CTP(), key_it, key_it + keys.size(), keys.begin());
196198
thrust::stable_sort_by_key(cuctx->TP(), keys.begin(), keys.end(), sorted_idx.begin(),
197199
[=] XGBOOST_DEVICE(Tup const &l, Tup const &r) {
198-
if (thrust::get<0>(l) != thrust::get<0>(r)) {
199-
return thrust::get<0>(l) < thrust::get<0>(r); // segment index
200+
if (cuda::std::get<0>(l) != cuda::std::get<0>(r)) {
201+
// segment index
202+
return cuda::std::get<0>(l) < cuda::std::get<0>(r);
200203
}
201-
return thrust::get<1>(l) < thrust::get<1>(r); // residue
204+
return cuda::std::get<1>(l) < cuda::std::get<1>(r); // residue
202205
});
203206
}
204207

@@ -331,7 +334,7 @@ template <typename InputIteratorT, typename OutputIteratorT, typename OffsetT>
331334
void InclusiveSum(Context const *ctx, InputIteratorT d_in, OutputIteratorT d_out,
332335
OffsetT num_items) {
333336
#if CUB_VERSION >= 300000
334-
InclusiveScan(ctx, d_in, d_out, cuda::std::plus{}, num_items);
337+
InclusiveScan(ctx, d_in, d_out, std::plus{}, num_items);
335338
#else
336339
InclusiveScan(ctx, d_in, d_out, cub::Sum{}, num_items);
337340
#endif
@@ -370,7 +373,7 @@ AllOf(Policy policy, InputIt first, InputIt second, Chk &&check) {
370373
auto n = std::distance(first, second);
371374
auto it =
372375
dh::MakeIndexTransformIter([=] XGBOOST_DEVICE(std::size_t i) { return check(first[i]); });
373-
return dh::Reduce(policy, it, it + n, true, thrust::logical_and<>{});
376+
return dh::Reduce(policy, it, it + n, true, std::logical_and<>{});
374377
}
375378
} // namespace xgboost::common
376379
#endif // XGBOOST_COMMON_ALGORITHM_CUH_

src/common/device_helpers.cuh

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <thrust/device_vector.h> // for device_vector
88
#include <thrust/execution_policy.h> // thrust::seq
99
#include <thrust/iterator/discard_iterator.h> // for discard_iterator
10+
#include <thrust/iterator/reverse_iterator.h> // for make_reverse_iterator
1011
#include <thrust/iterator/transform_output_iterator.h> // make_transform_output_iterator
1112
#include <thrust/system/cuda/error.h>
1213
#include <thrust/system_error.h>
@@ -17,6 +18,7 @@
1718
#include <cub/cub.cuh>
1819
#include <cub/util_type.cuh> // for UnitWord, DoubleBuffer
1920
#include <cuda/std/iterator> // for iterator_traits
21+
#include <functional> // for equal_to
2022
#include <variant> // for variant, visit
2123
#include <vector> // for vector
2224

@@ -612,11 +614,11 @@ struct SegmentedUniqueReduceOp {
612614
* \return Number of unique values in total.
613615
*/
614616
template <typename DerivedPolicy, typename KeyInIt, typename KeyOutIt, typename ValInIt,
615-
typename ValOutIt, typename CompValue, typename CompKey = thrust::equal_to<size_t>>
617+
typename ValOutIt, typename CompValue, typename CompKey = std::equal_to<size_t>>
616618
size_t SegmentedUnique(const thrust::detail::execution_policy_base<DerivedPolicy> &exec,
617619
KeyInIt key_segments_first, KeyInIt key_segments_last, ValInIt val_first,
618620
ValInIt val_last, KeyOutIt key_segments_out, ValOutIt val_out,
619-
CompValue comp, CompKey comp_key = thrust::equal_to<size_t>{}) {
621+
CompValue comp, CompKey comp_key = std::equal_to<size_t>{}) {
620622
using Key = thrust::pair<size_t, typename cuda::std::iterator_traits<ValInIt>::value_type>;
621623
auto unique_key_it = dh::MakeTransformIterator<Key>(
622624
thrust::make_counting_iterator(static_cast<size_t>(0)),
@@ -672,10 +674,9 @@ size_t SegmentedUniqueByKey(const thrust::detail::execution_policy_base<DerivedP
672674
using Key = thrust::pair<size_t, typename cuda::std::iterator_traits<KeyInIt>::value_type>;
673675

674676
auto unique_key_it = dh::MakeTransformIterator<Key>(
675-
thrust::make_counting_iterator(static_cast<size_t>(0)),
676-
[=] __device__(size_t i) {
677+
thrust::make_counting_iterator(static_cast<size_t>(0)), [=] __device__(size_t i) {
677678
size_t seg = dh::SegmentId(key_segments_first, key_segments_last, i);
678-
return thrust::make_pair(seg, *(key_first + i));
679+
return cuda::std::make_pair(seg, *(key_first + i));
679680
});
680681
size_t segments_len = key_segments_last - key_segments_first;
681682
thrust::fill(exec, key_segments_out, key_segments_out + segments_len, 0);
@@ -686,19 +687,19 @@ size_t SegmentedUniqueByKey(const thrust::detail::execution_policy_base<DerivedP
686687
auto reduce_it = thrust::make_transform_output_iterator(
687688
thrust::make_discard_iterator(),
688689
detail::SegmentedUniqueReduceOp<Key, SegOutIt>{key_segments_out});
689-
auto uniques_ret = thrust::unique_by_key_copy(
690-
exec, unique_key_it, unique_key_it + n_inputs, val_first, reduce_it,
691-
val_out, [=] __device__(Key const &l, Key const &r) {
692-
if (l.first == r.first) {
693-
// In the same segment.
694-
return comp(thrust::get<1>(l), thrust::get<1>(r));
695-
}
696-
return false;
697-
});
690+
auto uniques_ret =
691+
thrust::unique_by_key_copy(exec, unique_key_it, unique_key_it + n_inputs, val_first,
692+
reduce_it, val_out, [=] __device__(Key const &l, Key const &r) {
693+
if (l.first == r.first) {
694+
// In the same segment.
695+
return comp(l.second, r.second);
696+
}
697+
return false;
698+
});
698699
auto n_uniques = uniques_ret.second - val_out;
699700
CHECK_LE(n_uniques, n_inputs);
700-
thrust::exclusive_scan(exec, key_segments_out,
701-
key_segments_out + segments_len, key_segments_out, 0);
701+
thrust::exclusive_scan(exec, key_segments_out, key_segments_out + segments_len, key_segments_out,
702+
0);
702703
return n_uniques;
703704
}
704705

src/common/quantile.cuh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
#include <thrust/logical.h> // for any_of
88

9+
#include <cstddef> // for size_t
10+
#include <functional> // for equal_to
11+
912
#include "categorical.h"
1013
#include "common.h" // for HumanMemUnit
1114
#include "cuda_context.cuh" // for CUDAContext
@@ -204,8 +207,8 @@ class SketchContainer {
204207
SketchContainer& operator=(const SketchContainer&) = delete;
205208

206209
/* \brief Removes all the duplicated elements in quantile structure. */
207-
template <typename KeyComp = thrust::equal_to<size_t>>
208-
size_t Unique(Context const* ctx, KeyComp key_comp = thrust::equal_to<size_t>{}) {
210+
template <typename KeyComp = std::equal_to<size_t>>
211+
std::size_t Unique(Context const* ctx, KeyComp key_comp = std::equal_to<size_t>{}) {
209212
timer_.Start(__func__);
210213
curt::SetDevice(ctx->Ordinal());
211214
this->columns_ptr_.SetDevice(ctx->Device());

src/data/ellpack_page.cu

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@
66
#include <thrust/iterator/counting_iterator.h> // for make_counting_iterator
77
#include <thrust/iterator/transform_output_iterator.h> // for transform_output_iterator
88

9-
#include <algorithm> // for copy
10-
#include <limits> // for numeric_limits
11-
#include <utility> // for move
12-
#include <vector> // for vector
9+
#include <algorithm> // for copy
10+
#include <cuda/std/iterator> // for distance
11+
#include <limits> // for numeric_limits
12+
#include <utility> // for move
13+
#include <vector> // for vector
1314

1415
#include "../common/algorithm.cuh" // for InclusiveScan
1516
#include "../common/categorical.h" // for IsCat
@@ -75,13 +76,13 @@ __global__ void CompressBinEllpackKernel(
7576
auto row_end = entries + row_ptrs[irow + 1] - row_ptrs[0];
7677
auto it = thrust::make_transform_iterator(thrust::make_counting_iterator(0ul),
7778
[=](std::size_t i) { return row_beg[i].index; });
78-
auto it_end = it + thrust::distance(row_beg, row_end);
79+
auto it_end = it + cuda::std::distance(row_beg, row_end);
7980
auto res_it = thrust::lower_bound(thrust::seq, it, it_end, cpr_fidx);
8081
if (res_it == it_end || cpr_fidx != *res_it) {
8182
wr.AtomicWriteSymbol(buffer, bin, (irow + base_row) * row_stride + cpr_fidx);
8283
return;
8384
}
84-
cpr_fidx = thrust::distance(it, res_it);
85+
cpr_fidx = cuda::std::distance(it, res_it);
8586
SPAN_CHECK(cpr_fidx < row_length);
8687
}
8788

src/encoder/ordinal.cuh

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,7 @@ void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
223223
* Check consistency.
224224
*/
225225
auto check_it = thrust::make_transform_iterator(
226-
thrust::make_counting_iterator(0ul),
227-
cuda::proclaim_return_type<bool>([=] __device__(std::size_t i) {
226+
thrust::make_counting_iterator(0ul), [=] XGBOOST_DEVICE(std::size_t i) -> bool {
228227
auto const& l_f = orig_enc.columns[i];
229228
auto const& r_f = new_enc.columns[i];
230229
if (l_f.index() != r_f.index()) {
@@ -233,10 +232,9 @@ void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
233232
auto l_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, l_f);
234233
auto r_is_empty = cuda::std::visit([](auto&& arg) { return arg.empty(); }, r_f);
235234
return l_is_empty == r_is_empty;
236-
}));
237-
bool valid = thrust::reduce(
238-
exec, check_it, check_it + new_enc.Size(), true,
239-
cuda::proclaim_return_type<bool>([=] __device__(bool l, bool r) { return l && r; }));
235+
});
236+
bool valid = thrust::reduce(exec, check_it, check_it + new_enc.Size(), true,
237+
[=] XGBOOST_DEVICE(bool l, bool r) -> bool { return l && r; });
240238
if (!valid) {
241239
policy.Error(
242240
"Invalid new DataFrame. "
@@ -282,10 +280,9 @@ void Recode(ExecPolicy const& policy, DeviceColumnsView orig_enc,
282280
f_mapping[i - f_beg] = idx;
283281
});
284282

285-
auto err_it = thrust::find_if(exec, dh::tcbegin(mapping), dh::tcend(mapping),
286-
cuda::proclaim_return_type<bool>([=] __device__(std::int32_t v) {
287-
return v == detail::NotFound();
288-
}));
283+
auto err_it = thrust::find_if(
284+
exec, dh::tcbegin(mapping), dh::tcend(mapping),
285+
[=] XGBOOST_DEVICE(std::int32_t v) -> bool { return v == detail::NotFound(); });
289286

290287
if (err_it != dh::tcend(mapping)) {
291288
// Report missing cat.

src/metric/auc.cu

Lines changed: 10 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@
66
#include <thrust/scan.h>
77

88
#include <cassert>
9-
#include <cub/cub.cuh> // NOLINT
9+
#include <cuda/std/utility> // for pair
10+
#include <functional> // for equal_to
1011
#include <limits>
1112
#include <memory>
1213
#include <tuple>
@@ -372,15 +373,9 @@ double GPUMultiClassAUCOVR(Context const *ctx, MetaInfo const &info,
372373
dh::TemporaryArray<uint32_t> unique_class_ptr(d_class_ptr.size());
373374
auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
374375
auto n_uniques = dh::SegmentedUniqueByKey(
375-
ctx->CUDACtx()->TP(),
376-
dh::tbegin(d_class_ptr),
377-
dh::tend(d_class_ptr),
378-
uni_key,
379-
uni_key + d_sorted_idx.size(),
380-
dh::tbegin(d_unique_idx),
381-
d_unique_class_ptr.data(),
382-
dh::tbegin(d_unique_idx),
383-
thrust::equal_to<thrust::pair<uint32_t, float>>{});
376+
ctx->CUDACtx()->TP(), dh::tbegin(d_class_ptr), dh::tend(d_class_ptr), uni_key,
377+
uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),
378+
dh::tbegin(d_unique_idx), std::equal_to<thrust::pair<uint32_t, float>>{});
384379
d_unique_idx = d_unique_idx.subspan(0, n_uniques);
385380

386381
auto get_class_id = [=] XGBOOST_DEVICE(size_t idx) { return idx / n_samples; };
@@ -746,15 +741,9 @@ std::pair<double, uint32_t> GPURankingPRAUCImpl(Context const *ctx,
746741
dh::TemporaryArray<uint32_t> unique_class_ptr(d_group_ptr.size());
747742
auto d_unique_class_ptr = dh::ToSpan(unique_class_ptr);
748743
auto n_uniques = dh::SegmentedUniqueByKey(
749-
ctx->CUDACtx()->TP(),
750-
dh::tbegin(d_group_ptr),
751-
dh::tend(d_group_ptr),
752-
uni_key,
753-
uni_key + d_sorted_idx.size(),
754-
dh::tbegin(d_unique_idx),
755-
d_unique_class_ptr.data(),
756-
dh::tbegin(d_unique_idx),
757-
thrust::equal_to<thrust::pair<uint32_t, float>>{});
744+
ctx->CUDACtx()->TP(), dh::tbegin(d_group_ptr), dh::tend(d_group_ptr), uni_key,
745+
uni_key + d_sorted_idx.size(), dh::tbegin(d_unique_idx), d_unique_class_ptr.data(),
746+
dh::tbegin(d_unique_idx), std::equal_to<cuda::std::pair<uint32_t, float>>{});
758747
d_unique_idx = d_unique_idx.subspan(0, n_uniques);
759748

760749
auto get_group_id = [=] XGBOOST_DEVICE(size_t idx) {
@@ -861,8 +850,8 @@ std::pair<double, std::uint32_t> GPURankingPRAUC(Context const *ctx,
861850
return thrust::make_pair(y * w, (1.0 - y) * w);
862851
});
863852
thrust::reduce_by_key(ctx->CUDACtx()->CTP(), key_it, key_it + predts.size(), val_it,
864-
thrust::make_discard_iterator(), totals.begin(), thrust::equal_to<size_t>{},
865-
PairPlus<double, double>{});
853+
thrust::make_discard_iterator(), totals.begin(), std::equal_to<size_t>{},
854+
PairPlus<double, double>{}); // NOLINT
866855

867856
/**
868857
* Calculate AUC

src/tree/gpu_hist/gradient_based_sampler.cu

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,9 @@
55
#include <thrust/random.h>
66
#include <thrust/sort.h> // for sort
77
#include <thrust/transform.h>
8-
#include <xgboost/host_device_vector.h>
9-
#include <xgboost/logging.h>
108

11-
#include <cstddef> // for size_t
9+
#include <cstddef> // for size_t
10+
#include <cuda/std/iterator> // for distance
1211
#include <limits>
1312
#include <utility>
1413

@@ -18,6 +17,8 @@
1817
#include "../../data/iterative_dmatrix.h" // for IterativeDMatrix
1918
#include "../param.h"
2019
#include "gradient_based_sampler.cuh"
20+
#include "xgboost/host_device_vector.h"
21+
#include "xgboost/logging.h"
2122

2223
namespace xgboost::tree {
2324
/*! \brief A functor that returns random weights. */
@@ -374,6 +375,6 @@ size_t GradientBasedSampler::CalculateThresholdIndex(Context const* ctx,
374375
SampleRateDelta(threshold, gpair.size(), sample_rows));
375376
thrust::device_ptr<float> min =
376377
thrust::min_element(cuctx->CTP(), dh::tbegin(grad_sum), dh::tend(grad_sum));
377-
return thrust::distance(dh::tbegin(grad_sum), min) + 1;
378+
return cuda::std::distance(dh::tbegin(grad_sum), min) + 1;
378379
}
379380
}; // namespace xgboost::tree

0 commit comments

Comments
 (0)