Skip to content

Commit ecd5567

Browse files
malfetpytorchmergebot
authored andcommitted
Tentative fix for CUDA-10.2 windows build failures (pytorch#76204)
Summary: `C10_UNUSED` somehow triggers segfault in some versions of NVCC that looks like something as follows: ``` caffe2\caffe2\operators\piecewise_linear_transform_op.h(65): internal error: assertion failed: gen_variable_decl: declared_type is NULL (cp_gen_be.c, line 22209 in gen_variable_decl) 1 catastrophic error detected in the compilation of "caffe2/caffe2/operators/piecewise_linear_transform_op.cu". Compilation aborted. nvcc error : 'cudafe++' died with status 0xC0000409 ``` Fixes regression introduced by pytorch#75538 / D35747333 (pytorch@f6c275f) Pull Request resolved: pytorch#76204 Test Plan: CI Reviewed By: EscapeZero, atalman Differential Revision: D35831451 fbshipit-source-id: f744d4688c9fd324f8f54b27781a3def97778d1e (cherry picked from commit fd64655)
1 parent 4b311a9 commit ecd5567

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

caffe2/operators/deform_conv_op_impl.h

+4-2
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
155155
col_buffer->Resize(buffer_shape);
156156
T* col_buffer_data = col_buffer->template mutable_data<T>();
157157
// Im2col, followed by gemm.
158-
for (C10_UNUSED const auto image_id : c10::irange(N)) {
158+
for (const auto image_id : c10::irange(N)) {
159+
(void)image_id; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
159160
for (const auto group_id : c10::irange(group_)) {
160161
DeformableIm2col(
161162
Xdata + group_id * input_offset,
@@ -342,7 +343,8 @@ bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
342343
math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
343344
}
344345

345-
for (C10_UNUSED const auto image_id : c10::irange(N)) {
346+
for (const auto image_id : c10::irange(N)) {
347+
(void)image_id; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
346348
for (const auto group_id : c10::irange(group_)) {
347349
math::Gemm<T, Context>(
348350
CblasTrans,

caffe2/operators/piecewise_linear_transform_op.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ class PiecewiseLinearTransformOp final : public Operator<Context> {
6262
const int64_t num_bounds_per_group,
6363
const int64_t num_group) {
6464
const T* start = bounds;
65-
for (C10_UNUSED const auto i : c10::irange(num_group)) {
65+
for (const auto i : c10::irange(num_group)) {
66+
(void)i; // CUDA-10.2 on Windows crashes when C10_UNUSED macro is used
6667
if (!std::is_sorted(start, start + num_bounds_per_group)) {
6768
return false;
6869
}

0 commit comments

Comments
 (0)