Skip to content

Commit 33f6efc

Browse files
ethansfngfacebook-github-bot
authored andcommitted
Use Memcpy in copy_utils (pytorch#11430)
Summary: standard elementwise copy in copy_utils is inefficient, use memcpy instead Rollback Plan: Differential Revision: D76061894
1 parent 00ca8ff commit 33f6efc

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

kernels/portable/cpu/util/copy_ops_util.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,26 @@ void _as_strided_copy(
2828
int64_t dim) {
2929
// the last dimension, copy data
3030
if (dim == static_cast<int64_t>(size.size()) - 1) {
31-
for (const auto i : c10::irange(size.at(dim))) {
32-
output_data[i] = *input_data;
33-
input_data += stride.at(dim);
31+
const size_t num_elements = size.at(dim);
32+
const int64_t stride_dim = stride.at(dim);
33+
// use memcpy for contiguous memory
34+
if (stride_dim == 1) {
35+
memcpy(output_data, input_data, num_elements * sizeof(CTYPE));
36+
} else {
37+
for (const auto i : c10::irange(num_elements)) {
38+
output_data[i] = *input_data;
39+
input_data += stride_dim;
40+
}
3441
}
3542
return;
3643
}
3744
size_t trailing_dims = getTrailingDims(out, dim);
3845
// recursively set data for the next dimension
46+
const int64_t stride_dim = stride.at(dim);
3947
for ([[maybe_unused]] const auto i : c10::irange(size.at(dim))) {
4048
_as_strided_copy<CTYPE>(
4149
input_data, output_data, out, size, stride, dim + 1);
42-
input_data += stride.at(dim);
50+
input_data += stride_dim;
4351
output_data += trailing_dims;
4452
}
4553
}

0 commit comments

Comments
 (0)