Skip to content

Commit e4cdc31

Browse files
cyyeverpytorchmergebot
authored andcommitted
[14/N] Fix clang-tidy warnings in aten/src/ATen (pytorch#133988)
Follows pytorch#133807 Pull Request resolved: pytorch#133988 Approved by: https://github.com/ezyang
1 parent 9731ccb commit e4cdc31

18 files changed

+50
-45
lines changed

aten/src/ATen/cuda/tunable/TunableOp.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -278,7 +278,7 @@ class TunableOp {
278278
};
279279

280280
struct OpParams {
281-
OpParams() {}
281+
OpParams() = default;
282282
virtual ~OpParams() = default;
283283
virtual std::string Signature() const = 0;
284284
};

aten/src/ATen/native/cuda/BinaryInternal.h

+2-6
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515

1616
#include <type_traits>
1717

18-
namespace at {
19-
namespace native {
20-
namespace binary_internal {
18+
namespace at::native::binary_internal {
2119

2220
template <typename scalar_t>
2321
struct DivFunctor {
@@ -43,6 +41,4 @@ struct MulFunctor<bool> {
4341
};
4442
void div_true_kernel_cuda(TensorIteratorBase& iter);
4543
void div_trunc_kernel_cuda(TensorIteratorBase& iter);
46-
} // namespace binary_internal
47-
} // namespace native
48-
} // namespace at
44+
} // namespace at::native::binary_internal

aten/src/ATen/native/cuda/Blas.cpp

+12-3
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ c10::MaybeOwned<Tensor> inline prepare_matrix_for_cublas(const Tensor& tensor, b
9595

9696
struct cublasCommonArgs {
9797
cublasCommonArgs(const Tensor& mat1, const Tensor& mat2, Tensor& c) {
98-
bool transpose_result, transpose_mat1, transpose_mat2;
98+
bool transpose_result = false, transpose_mat1 = false, transpose_mat2 = false;
9999
result = prepare_matrix_for_cublas(c, transpose_result);
100100
mata = prepare_matrix_for_cublas(transpose_result ? mat2 : mat1, transpose_mat1, transpose_result);
101101
matb = prepare_matrix_for_cublas(transpose_result ? mat1 : mat2, transpose_mat2, transpose_result);
@@ -263,6 +263,7 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
263263
"expected mat1 and mat2 to have the same dtype, but got: ", mat1.dtype(), " != ", mat2.dtype()
264264
)
265265

266+
// NOLINTNEXTLINE(*c-array*)
266267
TensorArg targs[]{{result, "out", 0}, {self, "self", 1}, {mat1, "mat1", 2}, {mat2, "mat2", 3}};
267268
checkAllSameGPU(__func__, targs);
268269

@@ -483,9 +484,11 @@ Tensor& addmm_out_cuda_impl(Tensor& result, const Tensor& self, const Tensor& ma
483484
});
484485
switch (activation) {
485486
case Activation::RELU:
487+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
486488
at::relu_(const_cast<Tensor&>(*args.result));
487489
break;
488490
case Activation::GELU:
491+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
489492
at::gelu_(const_cast<Tensor&>(*args.result), "tanh");
490493
break;
491494
default: break;
@@ -542,8 +545,8 @@ const Tensor& baddbmm_out_cuda_impl(const Tensor& result, const Tensor& self, co
542545
int64_t n = result_sizes[leading_dim];
543546
int64_t k = (transpose_result ? batch2 : batch1).sizes()[leading_dim];
544547

545-
int64_t lda, ldb, ldc;
546-
bool transpose_batch1, transpose_batch2;
548+
int64_t lda = 0, ldb = 0, ldc = 0;
549+
bool transpose_batch1 = false, transpose_batch2 = false;
547550
auto batch1_ = prepare_batch_matrix_for_cublas(transpose_result ? batch2 : batch1, transpose_batch1, lda, transpose_result, m, k);
548551
auto batch2_ = prepare_batch_matrix_for_cublas(transpose_result ? batch1 : batch2, transpose_batch2, ldb, transpose_result, k, n);
549552

@@ -593,14 +596,17 @@ const Tensor& baddbmm_out_cuda_impl(const Tensor& result, const Tensor& self, co
593596
} // anonymous namespace
594597

595598
TORCH_IMPL_FUNC(addmm_out_cuda)(const Tensor& self, const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, const Tensor& result) {
599+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
596600
addmm_out_cuda_impl(const_cast<Tensor&>(result), self, mat1, mat2, beta, alpha);
597601
}
598602

599603
TORCH_IMPL_FUNC(addmm_activation_out_cuda)(const Tensor& self, const Tensor& mat1, const Tensor& mat2, const Scalar& beta, const Scalar& alpha, bool use_gelu, const Tensor& result) {
604+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
600605
addmm_out_cuda_impl(const_cast<Tensor&>(result), self, mat1, mat2, beta, alpha, use_gelu ? Activation::GELU : Activation::RELU);
601606
}
602607

603608
TORCH_IMPL_FUNC(mm_out_cuda)(const Tensor& self, const Tensor& mat2, const Tensor& result) {
609+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
604610
addmm_out_cuda_impl(const_cast<Tensor&>(result), result, self, mat2, 0, 1);
605611
}
606612

@@ -765,13 +771,15 @@ TORCH_IMPL_FUNC(addmv_out_cuda)(const Tensor &self, const Tensor &mat, const Ten
765771
result.zero_();
766772
} else {
767773
at::mul_out(
774+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
768775
const_cast<Tensor&>(result),
769776
self,
770777
at::native::scalar_tensor(
771778
beta_, self.scalar_type(), std::nullopt /* layout */, at::kCPU, std::nullopt /* pin_memory */));
772779
}
773780
} else {
774781
if (!result.is_same(*self_) && betaval != 0.0) { //if beta is 0, result contents will be zeroed later
782+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
775783
at::native::copy_(const_cast<Tensor&>(result), *self_);
776784
}
777785
if (result.numel() != 0) {
@@ -1040,6 +1048,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
10401048
auto bias_ = bias.value_or(Tensor());
10411049
auto scale_result_ = scale_result.value_or(Tensor());
10421050

1051+
// NOLINTNEXTLINE(*c-array*)
10431052
TensorArg targs[]{{out, "out", 0}, {mat1, "mat1", 1}, {mat2, "mat2", 2},
10441053
{bias_, "bias", 3}, {scale_a, "scale_a", 4}, {scale_b, "scale_b", 5},
10451054
{scale_result_, "scale_result", 6}};

aten/src/ATen/native/cuda/Copy.h

+3-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ struct TensorIteratorBase;
55

66
namespace native {
77

8-
void direct_copy_kernel_cuda(TensorIteratorBase &iter);
8+
void direct_copy_kernel_cuda(TensorIteratorBase& iter);
99

10-
}} // namespace at::native
10+
}
11+
} // namespace at

aten/src/ATen/native/cuda/Distributions.cpp

+4
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@
1818

1919
namespace at::native {
2020

21+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
2122
Tensor _s_poisson_cuda(const Tensor& lambda, std::optional<Generator> gen_) {
2223
auto gen = get_generator_or_default<CUDAGeneratorImpl>(gen_, cuda::detail::getDefaultCUDAGenerator());
2324
Tensor ret = at::empty(lambda.sizes(), lambda.options());
2425
launch_poisson_cuda_kernel(ret, lambda, gen);
2526
return ret;
2627
}
2728

29+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
2830
Tensor _s_binomial_cuda(const Tensor& count, const Tensor& prob, std::optional<Generator> gen_) {
2931
auto gen = get_generator_or_default<CUDAGeneratorImpl>(gen_, cuda::detail::getDefaultCUDAGenerator());
3032
Tensor ret = at::empty(count.sizes(), count.options());
@@ -37,13 +39,15 @@ Tensor _s_binomial_cuda(const Tensor& count, const Tensor& prob, std::optional<G
3739
return ret;
3840
}
3941

42+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
4043
Tensor _s_gamma_cuda(const Tensor& alpha, std::optional<Generator> gen_) {
4144
auto gen = get_generator_or_default<CUDAGeneratorImpl>(gen_, cuda::detail::getDefaultCUDAGenerator());
4245
Tensor ret = at::empty(alpha.sizes(), alpha.options());
4346
launch_gamma_kernel(ret, alpha, gen);
4447
return ret;
4548
}
4649

50+
// NOLINTNEXTLINE(performance-unnecessary-value-param)
4751
Tensor _s_dirichlet_cuda(const Tensor& alpha, std::optional<Generator> gen_) {
4852
auto gen = get_generator_or_default<CUDAGeneratorImpl>(gen_, cuda::detail::getDefaultCUDAGenerator());
4953
Tensor ret = at::empty(alpha.sizes(), alpha.options());

aten/src/ATen/native/cuda/Indexing.cu

+1-1
Original file line numberDiff line numberDiff line change
@@ -1353,7 +1353,7 @@ void index_select_out_cuda_impl(
13531353
uint64_t dim,
13541354
const Tensor& index) {
13551355
uint64_t numIndices = index.numel();
1356-
uint64_t selfDims = self.dim() == 0 ? 1 : self.dim();
1356+
auto selfDims = self.dim() == 0 ? 1 : self.dim();
13571357

13581358
const cudaStream_t stream = at::cuda::getCurrentCUDAStream();
13591359

+3-5
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,7 @@
11
#pragma once
2-
#include<algorithm>
2+
#include <algorithm>
33

4-
namespace at {
5-
namespace native {
4+
namespace at::native {
65

76
// returns 2**floor(log2(n))
87
static int lastPow2(unsigned int n) {
@@ -14,5 +13,4 @@ static int lastPow2(unsigned int n) {
1413
return std::max<int>(1, n - (n >> 1));
1514
}
1615

17-
} // namespace native
18-
} // namespace at
16+
} // namespace at::native

aten/src/ATen/native/cuda/LinearAlgebraStubs.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ REGISTER_CUDA_DISPATCH(lstsq_stub, &lazy_lstsq_kernel);
160160
// Protect from infinite recursion by initializing dispatch to self and checking
161161
// that values are different after linalg library were loaded
162162

163-
namespace cuda {
164-
namespace detail {
163+
164+
namespace cuda::detail {
165165
void registerLinalgDispatch(const LinalgDispatch& disp_) {
166166
disp = disp_;
167167
}
168-
}} //namespace cuda::detail
168+
} //namespace cuda::detail
169169

170170
Tensor _cholesky_solve_helper_cuda(const Tensor& self, const Tensor& A, bool upper) {
171171
getTorchLinalgLibrary();

aten/src/ATen/native/cuda/ReduceOps.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ namespace at::native {
2828
namespace {
2929

3030
void norm_kernel_cuda(TensorIterator& iter, const Scalar& val) {
31-
double p;
31+
double p = 0;
3232
if (val.isIntegral(false)) {
33-
p = val.to<int64_t>();
33+
p = static_cast<double>(val.to<int64_t>());
3434
} else if (val.isFloatingPoint()) {
3535
p = val.to<double>();
3636
} else {

aten/src/ATen/native/cuda/Resize.cpp

+3-3
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ const Tensor& resize_cuda_(
5454
return resize_named_tensor_(self, size, optional_memory_format);
5555
}
5656
auto* self_ = self.unsafeGetTensorImpl();
57-
int64_t old_storage_nbytes = self_->unsafe_storage() ? self_->unsafe_storage().nbytes() : 0;
58-
resize_impl_cuda_(self_, size, /*strides=*/std::nullopt);
57+
auto old_storage_nbytes = self_->unsafe_storage() ? self_->unsafe_storage().nbytes() : 0;
58+
resize_impl_cuda_(self_, size, /*stride=*/std::nullopt);
5959
if (optional_memory_format.has_value()) {
6060
auto memory_format =
6161
optional_memory_format.value();
@@ -67,7 +67,7 @@ const Tensor& resize_cuda_(
6767
}
6868
// See Note [Enabling Deterministic Operations]
6969
if (C10_UNLIKELY(at::globalContext().deterministicAlgorithms() && at::globalContext().deterministicFillUninitializedMemory())) {
70-
at::native::fill_resize_deterministic_(self, old_storage_nbytes);
70+
at::native::fill_resize_deterministic_(self, static_cast<int64_t>(old_storage_nbytes));
7171
}
7272
return self;
7373
}

aten/src/ATen/native/cuda/RowwiseScaledMM.h

+1-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#include <ATen/core/TensorBase.h>
33
#include <optional>
44

5-
65
namespace at::cuda::detail {
76
TORCH_API void f8f8bf16_rowwise(
87
at::Tensor XQ, // FP8
@@ -12,4 +11,4 @@ TORCH_API void f8f8bf16_rowwise(
1211
std::optional<at::Tensor> bias, // BF16
1312
bool use_fast_accum,
1413
at::Tensor& out);
15-
} // at::cuda::detail
14+
} // namespace at::cuda::detail

aten/src/ATen/native/cuda/Sort.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ namespace at::native {
2525

2626
std::vector<int64_t> infer_dense_strides_dim_last(const Tensor & self, int64_t dim);
2727

28-
void fillSliceWithIndex(const Tensor& t, int dim) {
28+
void fillSliceWithIndex(const Tensor& t, int64_t dim) {
2929
if (t.numel()) {
3030
auto sizes = DimVector(t.dim(), 1);
3131
sizes[dim] = t.sizes()[dim];

aten/src/ATen/native/cuda/Sort.cu

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ namespace at::native {
1919
template <typename T>
2020
static int minimum_grid_for_occupancy(T kernel, int max_block_size) {
2121
int minGridSize = 0;
22-
int blockSize;
22+
int blockSize = 0;
2323
C10_CUDA_CHECK(cudaOccupancyMaxPotentialBlockSize(
2424
&minGridSize,
2525
&blockSize,
@@ -361,7 +361,7 @@ void sortCommon(Sorter sorter, const TensorBase &key, const TensorBase &value,
361361
void sortKeyValueInplace(
362362
const TensorBase& key,
363363
const TensorBase& value,
364-
int dim,
364+
int64_t dim,
365365
bool descending,
366366
bool stable) {
367367
const auto sort_size = key.size(dim);

aten/src/ATen/native/cuda/Sort.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,15 @@
33
#include <ATen/core/TensorBase.h>
44
#include <ATen/native/cuda/SortStable.h>
55

6-
namespace at {
7-
namespace native {
6+
7+
namespace at::native {
88

99
inline bool should_use_small_sort(const TensorBase &self, int64_t dim) {
1010
return self.size(dim) <= 4096;
1111
}
1212

1313
void sortKeyValueInplace(
14-
const TensorBase &key, const TensorBase &value, int dim,
14+
const TensorBase &key, const TensorBase &value, int64_t dim,
1515
bool descending, bool stable=false);
1616

17-
}} // namespace at::native
17+
} // namespace at::native

aten/src/ATen/native/cuda/SortStable.h

+2-4
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
#include <ATen/core/TensorBase.h>
33
#include <cstdint>
44

5-
namespace at {
6-
namespace native {
5+
namespace at::native {
76

87
// Stable-sort self into values, and set indices to the
98
// inverse-permutation from values back to self.
@@ -15,5 +14,4 @@ void launch_stable_sort_kernel(
1514
const TensorBase& values,
1615
const TensorBase& indices);
1716

18-
} // namespace native
19-
} // namespace at
17+
} // namespace at::native

aten/src/ATen/native/cuda/TensorModeKernel.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <ATen/native/Resize.h>
77
#include <ATen/native/TensorCompare.h>
88

9-
constexpr int MAX_BLOCK_SIZE = AT_ROCM_ENABLED() ? 256 : 1024;
9+
constexpr int64_t MAX_BLOCK_SIZE = AT_ROCM_ENABLED() ? 256 : 1024;
1010

1111
// Maximum size per grid dimension that we assume (compute capability >= 2.0)
1212
constexpr int64_t MAX_GRID_SIZE = 65535LL;

aten/src/ATen/native/cuda/TensorShapeCUDA.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ Tensor& set_cuda_(Tensor& result) {
2929

3030
// unify with cuda implementation? This is not done to avoid a dispatch in resize_impl_cpu_
3131
Tensor& set_storage_cuda_(Tensor& result, Storage storage, int64_t storage_offset, IntArrayRef size, IntArrayRef stride) {
32-
checkSetStorage(result, storage, storage_offset, size, stride);
32+
checkSetStorage(result, std::move(storage), storage_offset, size, stride);
3333

3434
result.unsafeGetTensorImpl()->set_storage_offset(storage_offset);
3535
at::OptionalIntArrayRef stride_opt = stride.data() != nullptr ?

aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu

+4-4
Original file line numberDiff line numberDiff line change
@@ -579,7 +579,7 @@ inline std::tuple<dim3, dim3, StackArray<int64_t>> check_shape_and_partition_(
579579
const dim3 blocks(
580580
div_round_up(outer_dense_size * jagged_folded_size, threads_y));
581581

582-
StackArray<int64_t> jagged_dims_tensor;
582+
StackArray<int64_t> jagged_dims_tensor{};
583583
const int num_jagged_dim = dense_tensor.dim() - 2;
584584
TORCH_CHECK(num_jagged_dim <= static_cast<int>(kStackArrayMaxDims));
585585
jagged_dims_tensor.ndim = num_jagged_dim;
@@ -845,7 +845,7 @@ __launch_bounds__(kMaxThreads) void jagged_dense_dense_elementwise_jagged_output
845845
}
846846
if (!truncated) {
847847
const int oidx = offset_temp;
848-
int iidx;
848+
int iidx = 0;
849849
for (iidx = threadIdx.x; iidx * 2 + 1 < inner_dense_size;
850850
iidx += blockDim.x) {
851851
output_values[offset][2 * iidx] =
@@ -1201,7 +1201,7 @@ inline bool jagged_dense_dense_elementwise_jagged_output_matches_opt(
12011201
matches &= (y_0_reshaped.size(0) < INT_MAX);
12021202
matches &= (y_0_reshaped.size(1) < INT_MAX);
12031203

1204-
int max_shared_bytes;
1204+
int max_shared_bytes = 0;
12051205
#ifndef USE_ROCM
12061206
C10_CUDA_CHECK(cudaDeviceGetAttribute(
12071207
&max_shared_bytes,
@@ -1226,7 +1226,7 @@ inline bool jagged_dense_dense_elementwise_jagged_output_matches_opt(
12261226
auto B = y_0_reshaped.size(0);
12271227
// the default shared memory on V100/A100/H100 is 48 KB from
12281228
// https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#shared-memory-8-x
1229-
if ((B + 1) * sizeof(index_t) >= used_shared_bytes) {
1229+
if ((B + 1) * sizeof(index_t) >= static_cast<size_t>(used_shared_bytes)) {
12301230
matches = false;
12311231
}
12321232
});

0 commit comments

Comments
 (0)