Skip to content

Commit 0019279

Browse files
authored
CANN: Opt ROPE optimization (#12865)
* [CANN]Opt ROPE optimization * [CANN]Codestyle adjustment * [CANN]Fix the ROPE precision issue * [CANN]codestyle fix * [CANN]add rope unsupport case Signed-off-by: noemotiovon <[email protected]>
1 parent b0c75ac commit 0019279

File tree

2 files changed

+85
-122
lines changed

2 files changed

+85
-122
lines changed

Diff for: ggml/src/ggml-cann/aclnn_ops.cpp

+82-122
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include <aclnnop/aclnn_reflection_pad1d.h>
6565
#include <aclnnop/aclnn_eq_tensor.h>
6666
#include <aclnnop/aclnn_gt_scalar.h>
67+
#include <aclnnop/aclnn_pow.h>
6768
#include <float.h>
6869

6970
#include <cmath>
@@ -144,23 +145,6 @@ static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
144145
GGML_CANN_CALL_ACLNN_OP(Cast, acl_src, cast_data_type, acl_dst);
145146
}
146147

147-
/**
148-
* @brief Casts the elements of a tensor to a specified data type using the CANN backend.
149-
*
150-
* @details This function performs a type conversion on the elements of the input tensor `acl_src`
151-
* and stores the results in the destination tensor `acl_dst`. The conversion type is
152-
* determined based on the `dst` tensor's data type.
153-
*
154-
* @param ctx The context for the CANN backend operations.
155-
* @param acl_src The source tensor whose elements will be cast.
156-
* @param acl_dst The destination tensor that will store the casted elements.
157-
* @param dst The ggml tensor specifying the target data type.
158-
*/
159-
static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
160-
aclTensor* acl_dst, ggml_tensor* dst) {
161-
aclnn_cast(ctx, acl_src, acl_dst, ggml_cann_type_mapping(dst->type));
162-
}
163-
164148
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
165149
ggml_tensor* src = dst->src[0];
166150
GGML_ASSERT(ggml_can_repeat(src, dst));
@@ -767,7 +751,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
767751
if (dst->type == src0->type) {
768752
cann_copy(ctx, acl_src, acl_dst);
769753
} else {
770-
aclnn_cast(ctx, acl_src, acl_dst, dst);
754+
aclnn_cast(ctx, acl_src, acl_dst, ggml_cann_type_mapping(dst->type));
771755
}
772756
} else {
773757
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) {
@@ -792,7 +776,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
792776
ggml_type_size(dst->type), src0->ne, src_trans_nb,
793777
GGML_MAX_DIMS);
794778

795-
aclnn_cast(ctx, acl_src, src_trans_tensor, dst);
779+
aclnn_cast(ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping(dst->type));
796780
size_t cpy_size = ggml_nbytes(dst);
797781
ACL_CHECK(aclrtMemcpyAsync(
798782
dst->data, cpy_size, src_trans_buffer, cpy_size,
@@ -814,7 +798,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
814798
ggml_type_size(dst->type), src0->ne, src_trans_nb,
815799
GGML_MAX_DIMS);
816800

817-
aclnn_cast(ctx, acl_src, src_trans_tensor, dst);
801+
aclnn_cast(ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping(dst->type));
818802

819803
size_t cpy_size = ggml_nbytes(dst);
820804
ACL_CHECK(aclrtMemcpyAsync(dst->data, cpy_size, src_trans_buffer,
@@ -1158,7 +1142,7 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
11581142
tmp_cast_buffer, ggml_cann_type_mapping(dst->type),
11591143
ggml_type_size(dst->type), tmp_im2col_ne, temp_cast_nb,
11601144
GGML_MAX_DIMS - 1, ACL_FORMAT_ND);
1161-
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor, dst);
1145+
aclnn_cast(ctx, tmp_im2col_tensor, tmp_cast_tensor, ggml_cann_type_mapping(dst->type));
11621146
}
11631147

11641148
// post-processing
@@ -1733,7 +1717,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17331717
aclTensor* src_trans_tensor = ggml_cann_create_tensor(
17341718
src_trans_buffer, ACL_FLOAT, ggml_type_size(dst->type),
17351719
src0->ne, src_trans_nb, GGML_MAX_DIMS);
1736-
aclnn_cast(ctx, acl_src0, src_trans_tensor, dst);
1720+
aclnn_cast(ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping(dst->type));
17371721
aclnn_embedding_4d(ctx, src_trans_buffer, src0->ne,
17381722
src_trans_nb, src1, dst);
17391723
ACL_CHECK(aclDestroyTensor(acl_src0));
@@ -2074,7 +2058,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
20742058
output_buffer, ACL_FLOAT16, output_elem_size, output_cast_ne,
20752059
output_cast_nb, GGML_MAX_DIMS);
20762060
aclTensor* acl_dst_tensor = ggml_cann_create_tensor(dst);
2077-
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, dst);
2061+
aclnn_cast(ctx, acl_output_tensor, acl_dst_tensor, ggml_cann_type_mapping(dst->type));
20782062

20792063
ACL_CHECK(aclDestroyTensor(acl_output_tensor));
20802064
ACL_CHECK(aclDestroyTensor(acl_dst_tensor));
@@ -2159,37 +2143,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
21592143
ggml_tensor* src1 = dst->src[1]; // position
21602144
ggml_tensor* src2 = dst->src[2]; // freq_factors
21612145

2162-
// arange, [0,1,...,ne0/2]
2163-
int64_t arange_length = src0->ne[0] / 2;
2164-
ggml_cann_pool_alloc arange_allocator(ctx.pool(),
2165-
arange_length * sizeof(float_t));
2166-
void* arange_buffer = arange_allocator.get();
2167-
int64_t arange_ne[] = {arange_length, 1, 1, 1};
2168-
size_t arange_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
2169-
arange_length * sizeof(float_t)};
2170-
2171-
aclTensor* acl_arange_tensor =
2172-
ggml_cann_create_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t),
2173-
arange_ne, arange_nb, GGML_MAX_DIMS);
2146+
GGML_TENSOR_BINARY_OP_LOCALS
2147+
2148+
// theta_scale arange, [0,1,...,ne00/2 - 1]
2149+
int64_t theta_scale_length = ne00 / 2;
2150+
ggml_cann_pool_alloc theta_scale_allocator(ctx.pool(),
2151+
theta_scale_length * sizeof(float_t));
2152+
void* theta_scale_buffer = theta_scale_allocator.get();
2153+
int64_t theta_scale_ne[] = {theta_scale_length, 1, 1, 1};
2154+
size_t theta_scale_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
2155+
theta_scale_length * sizeof(float_t)};
2156+
2157+
aclTensor* acl_theta_scale_tensor =
2158+
ggml_cann_create_tensor(theta_scale_buffer, ACL_FLOAT, sizeof(float_t),
2159+
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
21742160
float start = 0;
21752161
float step = 1;
2176-
float stop = src0->ne[0] / 2;
2177-
float n_elements = src0->ne[0] / 2;
2178-
aclnn_arange(ctx, acl_arange_tensor, start, stop, step, n_elements);
2162+
float stop = ne00 / 2;
2163+
float n_elements = ne00 / 2;
2164+
aclnn_arange(ctx, acl_theta_scale_tensor, start, stop, step, n_elements);
21792165

21802166
// power
2181-
// aclnnPowScalarTensor(): @param self is tensor which should be scalar, so
2182-
// use aclnn_pow_tensor_tensor() until fixed. aclScalar* acl_theta_scale =
2183-
// aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2184-
// aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
2185-
// acl_power_tensor);
2186-
ggml_cann_pool_alloc theta_scale_allocator(ctx.pool(),
2187-
arange_length * sizeof(float_t));
2188-
void* theta_scale_buffer = theta_scale_allocator.get();
2189-
aclTensor* acl_theta_scale_tensor = aclnn_values(
2190-
ctx, theta_scale_buffer, arange_length * sizeof(float_t), arange_ne,
2191-
GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), theta_scale);
2192-
aclnn_pow_tensor_tensor(ctx, acl_theta_scale_tensor, acl_arange_tensor);
2167+
aclScalar* acl_theta_scale = aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2168+
GGML_CANN_CALL_ACLNN_OP(PowScalarTensor, acl_theta_scale, acl_theta_scale_tensor, acl_theta_scale_tensor);
21932169

21942170
// freq_scale
21952171
if (freq_scale != 1) {
@@ -2200,28 +2176,27 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22002176
if (src2) {
22012177
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor(
22022178
src2->data, ggml_cann_type_mapping(src2->type),
2203-
ggml_type_size(src2->type), arange_ne, arange_nb, GGML_MAX_DIMS);
2179+
ggml_type_size(src2->type), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
22042180
aclnn_div(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor);
22052181
ACL_CHECK(aclDestroyTensor(acl_freq_factors_tensor));
22062182
}
22072183

22082184
// position
22092185
GGML_ASSERT(src1->type == GGML_TYPE_I32);
22102186
int64_t position_length = src1->ne[0];
2211-
int64_t position_ne[] = {1, position_length, 1, 1};
2212-
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
2213-
sizeof(int32_t) * position_length,
2187+
int64_t position_ne[] = {1, 1, position_length, 1};
2188+
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t), sizeof(int32_t),
22142189
sizeof(int32_t) * position_length};
22152190
aclTensor* acl_position_tensor = ggml_cann_create_tensor(
22162191
src1->data, ggml_cann_type_mapping(src1->type),
22172192
ggml_type_size(src1->type), position_ne, position_nb, GGML_MAX_DIMS);
22182193

22192194
// power * position
2220-
int64_t theta_length = arange_length * position_length;
2195+
int64_t theta_length = theta_scale_length * position_length;
22212196
ggml_cann_pool_alloc theta_allocator(ctx.pool(),
22222197
theta_length * sizeof(float_t));
22232198
void* theta_buffer = theta_allocator.get();
2224-
int64_t theta_ne[] = {arange_length, position_length, 1, 1};
2199+
int64_t theta_ne[] = {theta_scale_length, 1, position_length, 1};
22252200
size_t theta_nb[GGML_MAX_DIMS];
22262201
theta_nb[0] = sizeof(float_t);
22272202
for (int i = 1; i < GGML_MAX_DIMS; i++) {
@@ -2233,40 +2208,22 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22332208
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
22342209
acl_theta_tensor);
22352210

2236-
// permute: [0,1,2,3]->[0,2,1,3]
2237-
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
2238-
size_t permute_nb[GGML_MAX_DIMS];
2239-
permute_nb[0] = sizeof(float_t);
2240-
for (int i = 1; i < GGML_MAX_DIMS; i++) {
2241-
permute_nb[i] = permute_nb[i - 1] * permute_ne[i - 1];
2242-
}
2243-
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
2244-
theta_length * sizeof(float_t));
2245-
void* permute_buffer = permute_allocator.get();
2246-
aclTensor* acl_permute_tensor = ggml_cann_create_tensor(
2247-
permute_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2248-
GGML_MAX_DIMS, ACL_FORMAT_ND);
2249-
int64_t permute_dim[] = {0, 2, 1, 3};
2250-
int64_t num_dims = 4;
2251-
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
2252-
num_dims);
2253-
22542211
// sin/cos
22552212
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
22562213
theta_length * sizeof(float_t));
22572214
void* sin_buffer = sin_allocator.get();
22582215
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
2259-
sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2216+
sin_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
22602217
GGML_MAX_DIMS, ACL_FORMAT_ND);
2261-
aclnn_sin(ctx, acl_permute_tensor, acl_sin_tensor);
2218+
aclnn_sin(ctx, acl_theta_tensor, acl_sin_tensor);
22622219

22632220
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
22642221
theta_length * sizeof(float_t));
22652222
void* cos_buffer = cos_allocator.get();
22662223
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
2267-
cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2224+
cos_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
22682225
GGML_MAX_DIMS, ACL_FORMAT_ND);
2269-
aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
2226+
aclnn_cos(ctx, acl_theta_tensor, acl_cos_tensor);
22702227

22712228
// attn_factor
22722229
if (attn_factor != 1) {
@@ -2282,21 +2239,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822239
} else {
22832240
int64_t num_repeats = 2;
22842241
int64_t dim = 3;
2285-
int64_t output_size = arange_length * num_repeats;
2242+
int64_t output_size = theta_scale_length * num_repeats;
22862243
aclnn_repeat_interleave(ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
22872244
num_repeats, output_size);
22882245
aclnn_repeat_interleave(ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
22892246
num_repeats, output_size);
22902247
}
22912248

22922249
// release
2293-
ACL_CHECK(aclDestroyTensor(acl_arange_tensor));
22942250
ACL_CHECK(aclDestroyTensor(acl_theta_scale_tensor));
22952251
ACL_CHECK(aclDestroyTensor(acl_position_tensor));
22962252
ACL_CHECK(aclDestroyTensor(acl_theta_tensor));
2297-
ACL_CHECK(aclDestroyTensor(acl_permute_tensor));
22982253
ACL_CHECK(aclDestroyTensor(acl_sin_tensor));
22992254
ACL_CHECK(aclDestroyTensor(acl_cos_tensor));
2255+
ACL_CHECK(aclDestroyScalar(acl_theta_scale));
23002256
}
23012257

23022258
#ifdef __cplusplus
@@ -2318,7 +2274,6 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23182274
// TODO: use ascendc
23192275
// Only test with LLAMA model.
23202276
ggml_tensor* src0 = dst->src[0]; // input
2321-
// ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now.
23222277

23232278
// param
23242279
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
@@ -2353,13 +2308,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23532308

23542309
// init cos/sin cache
23552310
ggml_cann_pool_alloc sin_allocator(
2356-
ctx.pool(), src0->ne[0] * src0->ne[2] * sizeof(float_t));
2311+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
23572312
ggml_cann_pool_alloc cos_allocator(
2358-
ctx.pool(), src0->ne[0] * src0->ne[2] * sizeof(float_t));
2313+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
23592314
void* sin_buffer = sin_allocator.get();
23602315
void* cos_buffer = cos_allocator.get();
23612316

2362-
int64_t sin_reshape_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
2317+
int64_t sin_reshape_ne[4] = {ne00, 1, ne02, 1};
23632318
size_t sin_reshape_nb[GGML_MAX_DIMS];
23642319
sin_reshape_nb[0] = sizeof(float_t);
23652320
for (int i = 1; i < GGML_MAX_DIMS; i++) {
@@ -2372,7 +2327,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23722327
ggml_cann_create_tensor(cos_buffer, ACL_FLOAT, sizeof(float_t),
23732328
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
23742329
aclnn_cache_init(ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
2375-
theta_scale, freq_scale, attn_factor, is_neox);
2330+
theta_scale, freq_scale, attn_factor, is_neox);
23762331

23772332
aclTensor* acl_src = ggml_cann_create_tensor(src0);
23782333
aclTensor* acl_dst = ggml_cann_create_tensor(dst);
@@ -2549,46 +2504,51 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
25492504
return;
25502505
#endif
25512506

2552-
// src0 == GGML_TYPE_F16
2553-
// TODO: optimization this `if` code
2554-
if (src0->type == GGML_TYPE_F16) {
2555-
ggml_cann_pool_alloc sin_final_allocator(
2556-
ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type));
2557-
ggml_cann_pool_alloc cos_final_allocator(
2558-
ctx.pool(), src0->ne[0] * src0->ne[2] * ggml_type_size(src0->type));
2559-
void* sin_final_buffer = sin_final_allocator.get();
2560-
void* cos_final_buffer = cos_final_allocator.get();
2561-
2562-
int64_t sin_final_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
2563-
size_t sin_final_nb[GGML_MAX_DIMS];
2564-
sin_final_nb[0] = ggml_type_size(src0->type);
2565-
for (int i = 1; i < GGML_MAX_DIMS; i++) {
2566-
sin_final_nb[i] = sin_final_nb[i - 1] * sin_final_ne[i - 1];
2507+
// ggml_mode = 0 --> aclnn_model = 1
2508+
int64_t acl_mode = mode == 0 ? 1 : mode;
2509+
2510+
switch (src0->type) {
2511+
case GGML_TYPE_F32: {
2512+
GGML_CANN_CALL_ACLNN_OP(RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2513+
acl_sin_reshape_tensor, acl_mode, acl_dst);
2514+
break;
25672515
}
2568-
aclTensor* acl_sin_final_tensor = ggml_cann_create_tensor(
2569-
sin_final_buffer, ggml_cann_type_mapping(src0->type),
2570-
ggml_type_size(src0->type), sin_final_ne, sin_final_nb,
2571-
GGML_MAX_DIMS);
2572-
aclTensor* acl_cos_final_tensor = ggml_cann_create_tensor(
2573-
cos_final_buffer, ggml_cann_type_mapping(src0->type),
2574-
ggml_type_size(src0->type), sin_final_ne, sin_final_nb,
2575-
GGML_MAX_DIMS);
2516+
case GGML_TYPE_F16: {
2517+
ggml_cann_pool_alloc src_trans_allocator(
2518+
ctx.pool(), ggml_nelements(src0) * sizeof(float));
2519+
void* src_trans_buffer = src_trans_allocator.get();
2520+
ggml_cann_pool_alloc dst_trans_allocator(
2521+
ctx.pool(), ggml_nelements(dst) * sizeof(float));
2522+
void* dst_trans_buffer = dst_trans_allocator.get();
25762523

2577-
aclnn_cast(ctx, acl_sin_reshape_tensor, acl_sin_final_tensor, dst);
2578-
aclnn_cast(ctx, acl_cos_reshape_tensor, acl_cos_final_tensor, dst);
2579-
ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
2580-
ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));
2581-
acl_sin_reshape_tensor = acl_sin_final_tensor;
2582-
acl_cos_reshape_tensor = acl_cos_final_tensor;
2583-
}
2524+
size_t src_trans_nb[GGML_MAX_DIMS];
2525+
src_trans_nb[0] = sizeof(float);
2526+
for (int i = 1; i < GGML_MAX_DIMS; i++) {
2527+
src_trans_nb[i] = src_trans_nb[i - 1] * src0->ne[i - 1];
2528+
}
25842529

2585-
int acl_mode = mode;
2586-
if (mode == 0) {
2587-
acl_mode = 1;
2588-
}
2530+
aclTensor* acl_src_trans_tensor = ggml_cann_create_tensor(
2531+
src_trans_buffer, ACL_FLOAT, sizeof(float), src0->ne, src_trans_nb,
2532+
GGML_MAX_DIMS);
2533+
aclTensor* acl_dst_trans_tensor = ggml_cann_create_tensor(
2534+
dst_trans_buffer, ACL_FLOAT, sizeof(float), dst->ne, src_trans_nb,
2535+
GGML_MAX_DIMS);
2536+
2537+
aclnn_cast(ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
2538+
2539+
GGML_CANN_CALL_ACLNN_OP(RotaryPositionEmbedding, acl_src_trans_tensor, acl_cos_reshape_tensor,
2540+
acl_sin_reshape_tensor, acl_mode, acl_dst_trans_tensor);
2541+
2542+
aclnn_cast(ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
25892543

2590-
GGML_CANN_CALL_ACLNN_OP(RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2591-
acl_sin_reshape_tensor, acl_mode, acl_dst);
2544+
ACL_CHECK(aclDestroyTensor(acl_src_trans_tensor));
2545+
ACL_CHECK(aclDestroyTensor(acl_dst_trans_tensor));
2546+
break;
2547+
}
2548+
default:
2549+
GGML_ABORT("Unsupported tensor type for GGML_OP_ROPE");
2550+
break;
2551+
}
25922552
ACL_CHECK(aclDestroyTensor(acl_src));
25932553
ACL_CHECK(aclDestroyTensor(acl_cos_reshape_tensor));
25942554
ACL_CHECK(aclDestroyTensor(acl_sin_reshape_tensor));

Diff for: ggml/src/ggml-cann/ggml-cann.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -2087,6 +2087,9 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
20872087
return false;
20882088
}
20892089

2090+
if(!ggml_is_contiguous(op->src[0])){
2091+
return false;
2092+
}
20902093
return true;
20912094
}
20922095
case GGML_OP_UPSCALE: {

0 commit comments

Comments
 (0)