Skip to content

Commit 7248ba6

Browse files
authored
Revert "fp32 fix for objectives calculations (#70)"
This reverts commit 74efac6.
1 parent 3d067f4 commit 7248ba6

File tree

9 files changed

+30
-55
lines changed

9 files changed

+30
-55
lines changed

plugin/sycl/common/transform.h

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,13 @@ void LaunchSyclKernel(DeviceOrd device, Functor&& _func, xgboost::common::Range
2020
auto* qu = device_manager.GetQueue(device);
2121

2222
size_t size = *(_range.end());
23-
const bool has_fp64_support = qu->get_device().has(::sycl::aspect::fp64);
24-
if (has_fp64_support) {
25-
qu->submit([&](::sycl::handler& cgh) {
26-
cgh.parallel_for<>(::sycl::range<1>(size),
27-
[=](::sycl::id<1> pid) {
28-
const size_t idx = pid[0];
29-
const_cast<Functor&&>(_func)(idx, std::true_type(), _spans...);
30-
});
31-
}).wait();
32-
} else {
33-
qu->submit([&](::sycl::handler& cgh) {
34-
cgh.parallel_for<>(::sycl::range<1>(size),
35-
[=](::sycl::id<1> pid) {
36-
const size_t idx = pid[0];
37-
const_cast<Functor&&>(_func)(idx, std::false_type(), _spans...);
38-
});
39-
}).wait();
40-
}
23+
qu->submit([&](::sycl::handler& cgh) {
24+
cgh.parallel_for<>(::sycl::range<1>(size),
25+
[=](::sycl::id<1> pid) {
26+
const size_t idx = pid[0];
27+
const_cast<Functor&&>(_func)(idx, _spans...);
28+
});
29+
}).wait();
4130
}
4231

4332
} // namespace common

src/common/transform.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ template <typename Functor, typename... SpanType>
3737
__global__ void LaunchCUDAKernel(Functor _func, Range _range,
3838
SpanType... _spans) {
3939
for (auto i : dh::GridStrideRange(*_range.begin(), *_range.end())) {
40-
_func(i, std::true_type(), _spans...);
40+
_func(i, _spans...);
4141
}
4242
}
4343
#endif // defined(__CUDACC__)
@@ -184,8 +184,7 @@ class Transform {
184184
void LaunchCPU(Functor func, HDV *...vectors) const {
185185
omp_ulong end = static_cast<omp_ulong>(*(range_.end()));
186186
SyncHost(vectors...);
187-
ParallelFor(end, n_threads_, [&](omp_ulong idx) { func(idx, std::true_type(),
188-
UnpackHDV(vectors)...); });
187+
ParallelFor(end, n_threads_, [&](omp_ulong idx) { func(idx, UnpackHDV(vectors)...); });
189188
}
190189

191190
private:

src/objective/aft_obj.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class AFTObj : public ObjFunction {
4545
linalg::Matrix<GradientPair>* out_gpair, size_t ndata, DeviceOrd device,
4646
bool is_null_weight, float aft_loss_distribution_scale) {
4747
common::Transform<>::Init(
48-
[=] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support,
48+
[=] XGBOOST_DEVICE(size_t _idx,
4949
common::Span<GradientPair> _out_gpair,
5050
common::Span<const bst_float> _preds,
5151
common::Span<const bst_float> _labels_lower_bound,
@@ -104,7 +104,7 @@ class AFTObj : public ObjFunction {
104104
void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {
105105
// Trees give us a prediction in log scale, so exponentiate
106106
common::Transform<>::Init(
107-
[] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support, common::Span<bst_float> _preds) {
107+
[] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
108108
_preds[_idx] = exp(_preds[_idx]);
109109
},
110110
common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),

src/objective/hinge.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ class HingeObj : public FitIntercept {
8585

8686
void PredTransform(HostDeviceVector<float> *io_preds) const override {
8787
common::Transform<>::Init(
88-
[] XGBOOST_DEVICE(std::size_t _idx, auto has_fp64_support, common::Span<float> _preds) {
88+
[] XGBOOST_DEVICE(std::size_t _idx, common::Span<float> _preds) {
8989
_preds[_idx] = _preds[_idx] > 0.0 ? 1.0 : 0.0;
9090
},
9191
common::Range{0, static_cast<int64_t>(io_preds->Size()), 1}, this->ctx_->Threads(),

src/objective/multiclass_obj.cu

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
7575
}
7676

7777
common::Transform<>::Init(
78-
[=] XGBOOST_DEVICE(size_t idx, auto has_fp64_support,
78+
[=] XGBOOST_DEVICE(size_t idx,
7979
common::Span<GradientPair> gpair,
8080
common::Span<bst_float const> labels,
8181
common::Span<bst_float const> preds,
@@ -86,16 +86,8 @@ class SoftmaxMultiClassObj : public ObjFunction {
8686
// Part of Softmax function
8787
bst_float wmax = std::numeric_limits<bst_float>::min();
8888
for (auto const i : point) { wmax = fmaxf(i, wmax); }
89-
90-
float wsum = 0.0f;
91-
if constexpr (has_fp64_support) {
92-
double wsum_fp64 = 0;
93-
for (auto const i : point) { wsum_fp64 += expf(i - wmax); }
94-
wsum = static_cast<float>(wsum_fp64);
95-
} else {
96-
for (auto const i : point) { wsum += expf(i - wmax); }
97-
}
98-
89+
double wsum = 0.0f;
90+
for (auto const i : point) { wsum += expf(i - wmax); }
9991
auto label = labels[idx];
10092
if (label < 0 || label >= nclass) {
10193
_label_correct[0] = 0;
@@ -104,11 +96,11 @@ class SoftmaxMultiClassObj : public ObjFunction {
10496
bst_float wt = is_null_weight ? 1.0f : weights[idx];
10597
for (int k = 0; k < nclass; ++k) {
10698
// Computation duplicated to avoid creating a cache.
107-
bst_float p = expf(point[k] - wmax) / wsum;
99+
bst_float p = expf(point[k] - wmax) / static_cast<float>(wsum);
108100
const float eps = 1e-16f;
109-
const bst_float h = 2.0f * p * (1.0f - p) * wt;
101+
const bst_float h = fmax(2.0f * p * (1.0f - p) * wt, eps);
110102
p = label == k ? p - 1.0f : p;
111-
gpair[idx * nclass + k] = GradientPair(p * wt, h < eps ? eps : h);
103+
gpair[idx * nclass + k] = GradientPair(p * wt, h);
112104
}
113105
}, common::Range{0, ndata}, ctx_->Threads(), device)
114106
.Eval(out_gpair->Data(), info.labels.Data(), &preds, &info.weights_, &label_correct_);
@@ -137,7 +129,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
137129
auto device = io_preds->Device();
138130
if (prob) {
139131
common::Transform<>::Init(
140-
[=] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support, common::Span<bst_float> _preds) {
132+
[=] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
141133
common::Span<bst_float> point =
142134
_preds.subspan(_idx * nclass, nclass);
143135
common::Softmax(point.begin(), point.end());
@@ -150,8 +142,7 @@ class SoftmaxMultiClassObj : public ObjFunction {
150142
max_preds.SetDevice(device);
151143
max_preds.Resize(ndata);
152144
common::Transform<>::Init(
153-
[=] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support,
154-
common::Span<const bst_float> _preds,
145+
[=] XGBOOST_DEVICE(size_t _idx, common::Span<const bst_float> _preds,
155146
common::Span<bst_float> _max_preds) {
156147
common::Span<const bst_float> point =
157148
_preds.subspan(_idx * nclass, nclass);

src/objective/regression_obj.cu

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -142,8 +142,7 @@ class RegLossObj : public FitInterceptGlmLike {
142142

143143
common::Transform<>::Init(
144144
[block_size, ndata, n_targets] XGBOOST_DEVICE(
145-
size_t data_block_idx, auto has_fp64_support,
146-
common::Span<float> _additional_input,
145+
size_t data_block_idx, common::Span<float> _additional_input,
147146
common::Span<GradientPair> _out_gpair,
148147
common::Span<const bst_float> _preds,
149148
common::Span<const bst_float> _labels,
@@ -180,7 +179,7 @@ class RegLossObj : public FitInterceptGlmLike {
180179

181180
void PredTransform(HostDeviceVector<float> *io_preds) const override {
182181
common::Transform<>::Init(
183-
[] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support, common::Span<float> _preds) {
182+
[] XGBOOST_DEVICE(size_t _idx, common::Span<float> _preds) {
184183
_preds[_idx] = Loss::PredTransform(_preds[_idx]);
185184
},
186185
common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),
@@ -361,7 +360,7 @@ class PoissonRegression : public FitInterceptGlmLike {
361360
}
362361
bst_float max_delta_step = param_.max_delta_step;
363362
common::Transform<>::Init(
364-
[=] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support,
363+
[=] XGBOOST_DEVICE(size_t _idx,
365364
common::Span<int> _label_correct,
366365
common::Span<GradientPair> _out_gpair,
367366
common::Span<const bst_float> _preds,
@@ -388,7 +387,7 @@ class PoissonRegression : public FitInterceptGlmLike {
388387
}
389388
void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {
390389
common::Transform<>::Init(
391-
[] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support, common::Span<bst_float> _preds) {
390+
[] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
392391
_preds[_idx] = expf(_preds[_idx]);
393392
},
394393
common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),
@@ -567,7 +566,7 @@ class TweedieRegression : public FitInterceptGlmLike {
567566

568567
const float rho = param_.tweedie_variance_power;
569568
common::Transform<>::Init(
570-
[=] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support,
569+
[=] XGBOOST_DEVICE(size_t _idx,
571570
common::Span<int> _label_correct,
572571
common::Span<GradientPair> _out_gpair,
573572
common::Span<const bst_float> _preds,
@@ -598,7 +597,7 @@ class TweedieRegression : public FitInterceptGlmLike {
598597
}
599598
void PredTransform(HostDeviceVector<bst_float> *io_preds) const override {
600599
common::Transform<>::Init(
601-
[] XGBOOST_DEVICE(size_t _idx, auto has_fp64_support, common::Span<bst_float> _preds) {
600+
[] XGBOOST_DEVICE(size_t _idx, common::Span<bst_float> _preds) {
602601
_preds[_idx] = expf(_preds[_idx]);
603602
},
604603
common::Range{0, static_cast<int64_t>(io_preds->Size())}, this->ctx_->Threads(),

src/tree/split_evaluator.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,7 @@ class TreeEvaluator {
180180
}
181181

182182
common::Transform<>::Init(
183-
[=] XGBOOST_DEVICE(size_t, auto has_fp64_support,
184-
common::Span<float> lower,
183+
[=] XGBOOST_DEVICE(size_t, common::Span<float> lower,
185184
common::Span<float> upper,
186185
common::Span<int> monotone) {
187186
lower[leftid] = lower[nodeid];

tests/cpp/common/test_transform_range.cc

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ constexpr DeviceOrd TransformDevice() {
2525

2626
template <typename T>
2727
struct TestTransformRange {
28-
template <class kBoolConst>
29-
void XGBOOST_DEVICE operator()(std::size_t _idx, kBoolConst has_fp64_support, Span<float> _out, Span<const float> _in) {
28+
void XGBOOST_DEVICE operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
3029
_out[_idx] = _in[_idx];
3130
}
3231
};
@@ -60,7 +59,7 @@ TEST(TransformDeathTest, Exception) {
6059
const HostDeviceVector<float> in_vec{h_in, DeviceOrd::CPU()};
6160
EXPECT_DEATH(
6261
{
63-
Transform<>::Init([](size_t idx, auto has_fp64_support, common::Span<float const> _in) { _in[idx + 1]; },
62+
Transform<>::Init([](size_t idx, common::Span<float const> _in) { _in[idx + 1]; },
6463
Range(0, static_cast<Range::DifferenceType>(kSize)), AllThreadsForTest(),
6564
DeviceOrd::CPU())
6665
.Eval(&in_vec);

tests/cpp/plugin/test_sycl_transform_range.cc

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,7 @@ namespace xgboost::common {
1919

2020
template <typename T>
2121
struct TestTransformRange {
22-
template <class kBoolConst>
23-
void operator()(std::size_t _idx, kBoolConst has_fp64_support, Span<float> _out, Span<const float> _in) {
22+
void operator()(std::size_t _idx, Span<float> _out, Span<const float> _in) {
2423
_out[_idx] = _in[_idx];
2524
}
2625
};

0 commit comments

Comments
 (0)