64
64
#include < aclnnop/aclnn_reflection_pad1d.h>
65
65
#include < aclnnop/aclnn_eq_tensor.h>
66
66
#include < aclnnop/aclnn_gt_scalar.h>
67
+ #include < aclnnop/aclnn_pow.h>
67
68
#include < float.h>
68
69
69
70
#include < cmath>
@@ -144,23 +145,6 @@ static void aclnn_cast(ggml_backend_cann_context& ctx, aclTensor* acl_src,
144
145
GGML_CANN_CALL_ACLNN_OP (Cast, acl_src, cast_data_type, acl_dst);
145
146
}
146
147
147
- /* *
148
- * @brief Casts the elements of a tensor to a specified data type using the CANN backend.
149
- *
150
- * @details This function performs a type conversion on the elements of the input tensor `acl_src`
151
- * and stores the results in the destination tensor `acl_dst`. The conversion type is
152
- * determined based on the `dst` tensor's data type.
153
- *
154
- * @param ctx The context for the CANN backend operations.
155
- * @param acl_src The source tensor whose elements will be cast.
156
- * @param acl_dst The destination tensor that will store the casted elements.
157
- * @param dst The ggml tensor specifying the target data type.
158
- */
159
- static void aclnn_cast (ggml_backend_cann_context& ctx, aclTensor* acl_src,
160
- aclTensor* acl_dst, ggml_tensor* dst) {
161
- aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
162
- }
163
-
164
148
void ggml_cann_repeat (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
165
149
ggml_tensor* src = dst->src [0 ];
166
150
GGML_ASSERT (ggml_can_repeat (src, dst));
@@ -767,7 +751,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
767
751
if (dst->type == src0->type ) {
768
752
cann_copy (ctx, acl_src, acl_dst);
769
753
} else {
770
- aclnn_cast (ctx, acl_src, acl_dst, dst);
754
+ aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping ( dst-> type ) );
771
755
}
772
756
} else {
773
757
if (ggml_is_contiguous (src0) && ggml_is_contiguous (dst)) {
@@ -792,7 +776,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
792
776
ggml_type_size (dst->type ), src0->ne , src_trans_nb,
793
777
GGML_MAX_DIMS);
794
778
795
- aclnn_cast (ctx, acl_src, src_trans_tensor, dst);
779
+ aclnn_cast (ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
796
780
size_t cpy_size = ggml_nbytes (dst);
797
781
ACL_CHECK (aclrtMemcpyAsync (
798
782
dst->data , cpy_size, src_trans_buffer, cpy_size,
@@ -814,7 +798,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
814
798
ggml_type_size (dst->type ), src0->ne , src_trans_nb,
815
799
GGML_MAX_DIMS);
816
800
817
- aclnn_cast (ctx, acl_src, src_trans_tensor, dst);
801
+ aclnn_cast (ctx, acl_src, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
818
802
819
803
size_t cpy_size = ggml_nbytes (dst);
820
804
ACL_CHECK (aclrtMemcpyAsync (dst->data , cpy_size, src_trans_buffer,
@@ -1158,7 +1142,7 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1158
1142
tmp_cast_buffer, ggml_cann_type_mapping (dst->type ),
1159
1143
ggml_type_size (dst->type ), tmp_im2col_ne, temp_cast_nb,
1160
1144
GGML_MAX_DIMS - 1 , ACL_FORMAT_ND);
1161
- aclnn_cast (ctx, tmp_im2col_tensor, tmp_cast_tensor, dst);
1145
+ aclnn_cast (ctx, tmp_im2col_tensor, tmp_cast_tensor, ggml_cann_type_mapping ( dst-> type ) );
1162
1146
}
1163
1147
1164
1148
// post-processing
@@ -1733,7 +1717,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
1733
1717
aclTensor* src_trans_tensor = ggml_cann_create_tensor (
1734
1718
src_trans_buffer, ACL_FLOAT, ggml_type_size (dst->type ),
1735
1719
src0->ne , src_trans_nb, GGML_MAX_DIMS);
1736
- aclnn_cast (ctx, acl_src0, src_trans_tensor, dst);
1720
+ aclnn_cast (ctx, acl_src0, src_trans_tensor, ggml_cann_type_mapping ( dst-> type ) );
1737
1721
aclnn_embedding_4d (ctx, src_trans_buffer, src0->ne ,
1738
1722
src_trans_nb, src1, dst);
1739
1723
ACL_CHECK (aclDestroyTensor (acl_src0));
@@ -2074,7 +2058,7 @@ static void ggml_cann_mul_mat_quant(ggml_backend_cann_context& ctx,
2074
2058
output_buffer, ACL_FLOAT16, output_elem_size, output_cast_ne,
2075
2059
output_cast_nb, GGML_MAX_DIMS);
2076
2060
aclTensor* acl_dst_tensor = ggml_cann_create_tensor (dst);
2077
- aclnn_cast (ctx, acl_output_tensor, acl_dst_tensor, dst);
2061
+ aclnn_cast (ctx, acl_output_tensor, acl_dst_tensor, ggml_cann_type_mapping ( dst-> type ) );
2078
2062
2079
2063
ACL_CHECK (aclDestroyTensor (acl_output_tensor));
2080
2064
ACL_CHECK (aclDestroyTensor (acl_dst_tensor));
@@ -2159,37 +2143,29 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2159
2143
ggml_tensor* src1 = dst->src [1 ]; // position
2160
2144
ggml_tensor* src2 = dst->src [2 ]; // freq_factors
2161
2145
2162
- // arange, [0,1,...,ne0/2]
2163
- int64_t arange_length = src0->ne [0 ] / 2 ;
2164
- ggml_cann_pool_alloc arange_allocator (ctx.pool (),
2165
- arange_length * sizeof (float_t ));
2166
- void * arange_buffer = arange_allocator.get ();
2167
- int64_t arange_ne[] = {arange_length, 1 , 1 , 1 };
2168
- size_t arange_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2169
- arange_length * sizeof (float_t )};
2170
-
2171
- aclTensor* acl_arange_tensor =
2172
- ggml_cann_create_tensor (arange_buffer, ACL_FLOAT, sizeof (float_t ),
2173
- arange_ne, arange_nb, GGML_MAX_DIMS);
2146
+ GGML_TENSOR_BINARY_OP_LOCALS
2147
+
2148
+ // theta_scale arange, [0,1,...,ne00/2 - 1]
2149
+ int64_t theta_scale_length = ne00 / 2 ;
2150
+ ggml_cann_pool_alloc theta_scale_allocator (ctx.pool (),
2151
+ theta_scale_length * sizeof (float_t ));
2152
+ void * theta_scale_buffer = theta_scale_allocator.get ();
2153
+ int64_t theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2154
+ size_t theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2155
+ theta_scale_length * sizeof (float_t )};
2156
+
2157
+ aclTensor* acl_theta_scale_tensor =
2158
+ ggml_cann_create_tensor (theta_scale_buffer, ACL_FLOAT, sizeof (float_t ),
2159
+ theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2174
2160
float start = 0 ;
2175
2161
float step = 1 ;
2176
- float stop = src0-> ne [ 0 ] / 2 ;
2177
- float n_elements = src0-> ne [ 0 ] / 2 ;
2178
- aclnn_arange (ctx, acl_arange_tensor , start, stop, step, n_elements);
2162
+ float stop = ne00 / 2 ;
2163
+ float n_elements = ne00 / 2 ;
2164
+ aclnn_arange (ctx, acl_theta_scale_tensor , start, stop, step, n_elements);
2179
2165
2180
2166
// power
2181
- // aclnnPowScalarTensor(): @param self is tensor which should be scalar, so
2182
- // use aclnn_pow_tensor_tensor() until fixed. aclScalar* acl_theta_scale =
2183
- // aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2184
- // aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
2185
- // acl_power_tensor);
2186
- ggml_cann_pool_alloc theta_scale_allocator (ctx.pool (),
2187
- arange_length * sizeof (float_t ));
2188
- void * theta_scale_buffer = theta_scale_allocator.get ();
2189
- aclTensor* acl_theta_scale_tensor = aclnn_values (
2190
- ctx, theta_scale_buffer, arange_length * sizeof (float_t ), arange_ne,
2191
- GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), theta_scale);
2192
- aclnn_pow_tensor_tensor (ctx, acl_theta_scale_tensor, acl_arange_tensor);
2167
+ aclScalar* acl_theta_scale = aclCreateScalar (&theta_scale, aclDataType::ACL_FLOAT);
2168
+ GGML_CANN_CALL_ACLNN_OP (PowScalarTensor, acl_theta_scale, acl_theta_scale_tensor, acl_theta_scale_tensor);
2193
2169
2194
2170
// freq_scale
2195
2171
if (freq_scale != 1 ) {
@@ -2200,28 +2176,27 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2200
2176
if (src2) {
2201
2177
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
2202
2178
src2->data , ggml_cann_type_mapping (src2->type ),
2203
- ggml_type_size (src2->type ), arange_ne, arange_nb , GGML_MAX_DIMS);
2179
+ ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb , GGML_MAX_DIMS);
2204
2180
aclnn_div (ctx, acl_theta_scale_tensor, acl_freq_factors_tensor);
2205
2181
ACL_CHECK (aclDestroyTensor (acl_freq_factors_tensor));
2206
2182
}
2207
2183
2208
2184
// position
2209
2185
GGML_ASSERT (src1->type == GGML_TYPE_I32);
2210
2186
int64_t position_length = src1->ne [0 ];
2211
- int64_t position_ne[] = {1 , position_length, 1 , 1 };
2212
- size_t position_nb[] = {sizeof (int32_t ), sizeof (int32_t ),
2213
- sizeof (int32_t ) * position_length,
2187
+ int64_t position_ne[] = {1 , 1 , position_length, 1 };
2188
+ size_t position_nb[] = {sizeof (int32_t ), sizeof (int32_t ), sizeof (int32_t ),
2214
2189
sizeof (int32_t ) * position_length};
2215
2190
aclTensor* acl_position_tensor = ggml_cann_create_tensor (
2216
2191
src1->data , ggml_cann_type_mapping (src1->type ),
2217
2192
ggml_type_size (src1->type ), position_ne, position_nb, GGML_MAX_DIMS);
2218
2193
2219
2194
// power * position
2220
- int64_t theta_length = arange_length * position_length;
2195
+ int64_t theta_length = theta_scale_length * position_length;
2221
2196
ggml_cann_pool_alloc theta_allocator (ctx.pool (),
2222
2197
theta_length * sizeof (float_t ));
2223
2198
void * theta_buffer = theta_allocator.get ();
2224
- int64_t theta_ne[] = {arange_length, position_length, 1 , 1 };
2199
+ int64_t theta_ne[] = {theta_scale_length, 1 , position_length , 1 };
2225
2200
size_t theta_nb[GGML_MAX_DIMS];
2226
2201
theta_nb[0 ] = sizeof (float_t );
2227
2202
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2233,40 +2208,22 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2233
2208
aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor,
2234
2209
acl_theta_tensor);
2235
2210
2236
- // permute: [0,1,2,3]->[0,2,1,3]
2237
- int64_t permute_ne[] = {arange_length, 1 , position_length, 1 };
2238
- size_t permute_nb[GGML_MAX_DIMS];
2239
- permute_nb[0 ] = sizeof (float_t );
2240
- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2241
- permute_nb[i] = permute_nb[i - 1 ] * permute_ne[i - 1 ];
2242
- }
2243
- ggml_cann_pool_alloc permute_allocator (ctx.pool (),
2244
- theta_length * sizeof (float_t ));
2245
- void * permute_buffer = permute_allocator.get ();
2246
- aclTensor* acl_permute_tensor = ggml_cann_create_tensor (
2247
- permute_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb,
2248
- GGML_MAX_DIMS, ACL_FORMAT_ND);
2249
- int64_t permute_dim[] = {0 , 2 , 1 , 3 };
2250
- int64_t num_dims = 4 ;
2251
- aclnn_permute (ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
2252
- num_dims);
2253
-
2254
2211
// sin/cos
2255
2212
ggml_cann_pool_alloc sin_allocator (ctx.pool (),
2256
2213
theta_length * sizeof (float_t ));
2257
2214
void * sin_buffer = sin_allocator.get ();
2258
2215
aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2259
- sin_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2216
+ sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
2260
2217
GGML_MAX_DIMS, ACL_FORMAT_ND);
2261
- aclnn_sin (ctx, acl_permute_tensor , acl_sin_tensor);
2218
+ aclnn_sin (ctx, acl_theta_tensor , acl_sin_tensor);
2262
2219
2263
2220
ggml_cann_pool_alloc cos_allocator (ctx.pool (),
2264
2221
theta_length * sizeof (float_t ));
2265
2222
void * cos_buffer = cos_allocator.get ();
2266
2223
aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2267
- cos_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2224
+ cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
2268
2225
GGML_MAX_DIMS, ACL_FORMAT_ND);
2269
- aclnn_cos (ctx, acl_permute_tensor , acl_cos_tensor);
2226
+ aclnn_cos (ctx, acl_theta_tensor , acl_cos_tensor);
2270
2227
2271
2228
// attn_factor
2272
2229
if (attn_factor != 1 ) {
@@ -2282,21 +2239,20 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2282
2239
} else {
2283
2240
int64_t num_repeats = 2 ;
2284
2241
int64_t dim = 3 ;
2285
- int64_t output_size = arange_length * num_repeats;
2242
+ int64_t output_size = theta_scale_length * num_repeats;
2286
2243
aclnn_repeat_interleave (ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
2287
2244
num_repeats, output_size);
2288
2245
aclnn_repeat_interleave (ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
2289
2246
num_repeats, output_size);
2290
2247
}
2291
2248
2292
2249
// release
2293
- ACL_CHECK (aclDestroyTensor (acl_arange_tensor));
2294
2250
ACL_CHECK (aclDestroyTensor (acl_theta_scale_tensor));
2295
2251
ACL_CHECK (aclDestroyTensor (acl_position_tensor));
2296
2252
ACL_CHECK (aclDestroyTensor (acl_theta_tensor));
2297
- ACL_CHECK (aclDestroyTensor (acl_permute_tensor));
2298
2253
ACL_CHECK (aclDestroyTensor (acl_sin_tensor));
2299
2254
ACL_CHECK (aclDestroyTensor (acl_cos_tensor));
2255
+ ACL_CHECK (aclDestroyScalar (acl_theta_scale));
2300
2256
}
2301
2257
2302
2258
#ifdef __cplusplus
@@ -2318,7 +2274,6 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2318
2274
// TODO: use ascendc
2319
2275
// Only test with LLAMA model.
2320
2276
ggml_tensor* src0 = dst->src [0 ]; // input
2321
- // ggml_tensor* src2 = dst->src[2]; // freq_factors, not used now.
2322
2277
2323
2278
// param
2324
2279
float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow;
@@ -2353,13 +2308,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2353
2308
2354
2309
// init cos/sin cache
2355
2310
ggml_cann_pool_alloc sin_allocator (
2356
- ctx.pool (), src0-> ne [ 0 ] * src0-> ne [ 2 ] * sizeof (float_t ));
2311
+ ctx.pool (), ne00 * ne02 * sizeof (float_t ));
2357
2312
ggml_cann_pool_alloc cos_allocator (
2358
- ctx.pool (), src0-> ne [ 0 ] * src0-> ne [ 2 ] * sizeof (float_t ));
2313
+ ctx.pool (), ne00 * ne02 * sizeof (float_t ));
2359
2314
void * sin_buffer = sin_allocator.get ();
2360
2315
void * cos_buffer = cos_allocator.get ();
2361
2316
2362
- int64_t sin_reshape_ne[4 ] = {src0-> ne [ 0 ] , 1 , src0-> ne [ 2 ] , 1 };
2317
+ int64_t sin_reshape_ne[4 ] = {ne00 , 1 , ne02 , 1 };
2363
2318
size_t sin_reshape_nb[GGML_MAX_DIMS];
2364
2319
sin_reshape_nb[0 ] = sizeof (float_t );
2365
2320
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2372,7 +2327,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2372
2327
ggml_cann_create_tensor (cos_buffer, ACL_FLOAT, sizeof (float_t ),
2373
2328
sin_reshape_ne, sin_reshape_nb, GGML_MAX_DIMS);
2374
2329
aclnn_cache_init (ctx, dst, acl_cos_reshape_tensor, acl_sin_reshape_tensor,
2375
- theta_scale, freq_scale, attn_factor, is_neox);
2330
+ theta_scale, freq_scale, attn_factor, is_neox);
2376
2331
2377
2332
aclTensor* acl_src = ggml_cann_create_tensor (src0);
2378
2333
aclTensor* acl_dst = ggml_cann_create_tensor (dst);
@@ -2549,46 +2504,51 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2549
2504
return ;
2550
2505
#endif
2551
2506
2552
- // src0 == GGML_TYPE_F16
2553
- // TODO: optimization this `if` code
2554
- if (src0->type == GGML_TYPE_F16) {
2555
- ggml_cann_pool_alloc sin_final_allocator (
2556
- ctx.pool (), src0->ne [0 ] * src0->ne [2 ] * ggml_type_size (src0->type ));
2557
- ggml_cann_pool_alloc cos_final_allocator (
2558
- ctx.pool (), src0->ne [0 ] * src0->ne [2 ] * ggml_type_size (src0->type ));
2559
- void * sin_final_buffer = sin_final_allocator.get ();
2560
- void * cos_final_buffer = cos_final_allocator.get ();
2561
-
2562
- int64_t sin_final_ne[4 ] = {src0->ne [0 ], 1 , src0->ne [2 ], 1 };
2563
- size_t sin_final_nb[GGML_MAX_DIMS];
2564
- sin_final_nb[0 ] = ggml_type_size (src0->type );
2565
- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2566
- sin_final_nb[i] = sin_final_nb[i - 1 ] * sin_final_ne[i - 1 ];
2507
+ // ggml_mode = 0 --> aclnn_model = 1
2508
+ int64_t acl_mode = mode == 0 ? 1 : mode;
2509
+
2510
+ switch (src0->type ) {
2511
+ case GGML_TYPE_F32: {
2512
+ GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2513
+ acl_sin_reshape_tensor, acl_mode, acl_dst);
2514
+ break ;
2567
2515
}
2568
- aclTensor* acl_sin_final_tensor = ggml_cann_create_tensor (
2569
- sin_final_buffer, ggml_cann_type_mapping (src0->type ),
2570
- ggml_type_size (src0->type ), sin_final_ne, sin_final_nb,
2571
- GGML_MAX_DIMS);
2572
- aclTensor* acl_cos_final_tensor = ggml_cann_create_tensor (
2573
- cos_final_buffer, ggml_cann_type_mapping (src0->type ),
2574
- ggml_type_size (src0->type ), sin_final_ne, sin_final_nb,
2575
- GGML_MAX_DIMS);
2516
+ case GGML_TYPE_F16: {
2517
+ ggml_cann_pool_alloc src_trans_allocator (
2518
+ ctx.pool (), ggml_nelements (src0) * sizeof (float ));
2519
+ void * src_trans_buffer = src_trans_allocator.get ();
2520
+ ggml_cann_pool_alloc dst_trans_allocator (
2521
+ ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2522
+ void * dst_trans_buffer = dst_trans_allocator.get ();
2576
2523
2577
- aclnn_cast (ctx, acl_sin_reshape_tensor, acl_sin_final_tensor, dst);
2578
- aclnn_cast (ctx, acl_cos_reshape_tensor, acl_cos_final_tensor, dst);
2579
- ACL_CHECK (aclDestroyTensor (acl_cos_reshape_tensor));
2580
- ACL_CHECK (aclDestroyTensor (acl_sin_reshape_tensor));
2581
- acl_sin_reshape_tensor = acl_sin_final_tensor;
2582
- acl_cos_reshape_tensor = acl_cos_final_tensor;
2583
- }
2524
+ size_t src_trans_nb[GGML_MAX_DIMS];
2525
+ src_trans_nb[0 ] = sizeof (float );
2526
+ for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2527
+ src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
2528
+ }
2584
2529
2585
- int acl_mode = mode;
2586
- if (mode == 0 ) {
2587
- acl_mode = 1 ;
2588
- }
2530
+ aclTensor* acl_src_trans_tensor = ggml_cann_create_tensor (
2531
+ src_trans_buffer, ACL_FLOAT, sizeof (float ), src0->ne , src_trans_nb,
2532
+ GGML_MAX_DIMS);
2533
+ aclTensor* acl_dst_trans_tensor = ggml_cann_create_tensor (
2534
+ dst_trans_buffer, ACL_FLOAT, sizeof (float ), dst->ne , src_trans_nb,
2535
+ GGML_MAX_DIMS);
2536
+
2537
+ aclnn_cast (ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
2538
+
2539
+ GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src_trans_tensor, acl_cos_reshape_tensor,
2540
+ acl_sin_reshape_tensor, acl_mode, acl_dst_trans_tensor);
2541
+
2542
+ aclnn_cast (ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
2589
2543
2590
- GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
2591
- acl_sin_reshape_tensor, acl_mode, acl_dst);
2544
+ ACL_CHECK (aclDestroyTensor (acl_src_trans_tensor));
2545
+ ACL_CHECK (aclDestroyTensor (acl_dst_trans_tensor));
2546
+ break ;
2547
+ }
2548
+ default :
2549
+ GGML_ABORT (" Unsupported tensor type for GGML_OP_ROPE" );
2550
+ break ;
2551
+ }
2592
2552
ACL_CHECK (aclDestroyTensor (acl_src));
2593
2553
ACL_CHECK (aclDestroyTensor (acl_cos_reshape_tensor));
2594
2554
ACL_CHECK (aclDestroyTensor (acl_sin_reshape_tensor));
0 commit comments