@@ -2506,7 +2506,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2506
2506
2507
2507
// ggml_mode = 0 --> aclnn_model = 1
2508
2508
int64_t acl_mode = mode == 0 ? 1 : mode;
2509
-
2509
+
2510
2510
switch (src0->type ) {
2511
2511
case GGML_TYPE_F32: {
2512
2512
GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src, acl_cos_reshape_tensor,
@@ -2520,28 +2520,27 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2520
2520
ggml_cann_pool_alloc dst_trans_allocator (
2521
2521
ctx.pool (), ggml_nelements (dst) * sizeof (float ));
2522
2522
void * dst_trans_buffer = dst_trans_allocator.get ();
2523
-
2523
+
2524
2524
size_t src_trans_nb[GGML_MAX_DIMS];
2525
2525
src_trans_nb[0 ] = sizeof (float );
2526
2526
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2527
2527
src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
2528
2528
}
2529
-
2529
+
2530
2530
aclTensor* acl_src_trans_tensor = ggml_cann_create_tensor (
2531
2531
src_trans_buffer, ACL_FLOAT, sizeof (float ), src0->ne , src_trans_nb,
2532
2532
GGML_MAX_DIMS);
2533
-
2534
2533
aclTensor* acl_dst_trans_tensor = ggml_cann_create_tensor (
2535
2534
dst_trans_buffer, ACL_FLOAT, sizeof (float ), dst->ne , src_trans_nb,
2536
2535
GGML_MAX_DIMS);
2537
-
2536
+
2538
2537
aclnn_cast (ctx, acl_src, acl_src_trans_tensor, ACL_FLOAT);
2539
-
2538
+
2540
2539
GGML_CANN_CALL_ACLNN_OP (RotaryPositionEmbedding, acl_src_trans_tensor, acl_cos_reshape_tensor,
2541
2540
acl_sin_reshape_tensor, acl_mode, acl_dst_trans_tensor);
2542
-
2541
+
2543
2542
aclnn_cast (ctx, acl_dst_trans_tensor, acl_dst, ACL_FLOAT16);
2544
-
2543
+
2545
2544
ACL_CHECK (aclDestroyTensor (acl_src_trans_tensor));
2546
2545
ACL_CHECK (aclDestroyTensor (acl_dst_trans_tensor));
2547
2546
break ;
0 commit comments