We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent aed9c7e commit 2d907bcCopy full SHA for 2d907bc
kernels/portable/cpu/util/copy_ops_util.h
@@ -28,9 +28,15 @@ void _as_strided_copy(
28
int64_t dim) {
29
// the last dimension, copy data
30
if (dim == static_cast<int64_t>(size.size()) - 1) {
31
- for (const auto i : c10::irange(size.at(dim))) {
32
- output_data[i] = *input_data;
33
- input_data += stride.at(dim);
+ const size_t num_elements = size.at(dim);
+ // use memcpy for contiguous memory
+ if (stride.at(dim) == 1) {
34
+ memcpy(output_data, input_data, num_elements * sizeof(CTYPE));
35
+ } else {
36
+ for (const auto i : c10::irange(num_elements)) {
37
+ output_data[i] = *input_data;
38
+ input_data += stride.at(dim);
39
+ }
40
}
41
return;
42
0 commit comments