Skip to content

Commit 3f33cb4

Browse files
author
noemotiovon
committed
[CANN]Opt ROPE optimization
Signed-off-by: noemotiovon <[email protected]>
1 parent 526739b commit 3f33cb4

File tree

1 file changed

+38
-66
lines changed

1 file changed

+38
-66
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

+38-66
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
#include <aclnnop/aclnn_reflection_pad1d.h>
6565
#include <aclnnop/aclnn_eq_tensor.h>
6666
#include <aclnnop/aclnn_gt_scalar.h>
67+
#include <aclnnop/aclnn_pow.h>
6768
#include <float.h>
6869

6970
#include <cmath>
@@ -2159,69 +2160,60 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
21592160
ggml_tensor* src1 = dst->src[1]; // position
21602161
ggml_tensor* src2 = dst->src[2]; // freq_factors
21612162

2162-
// arange, [0,1,...,ne0/2]
2163-
int64_t arange_length = src0->ne[0] / 2;
2164-
ggml_cann_pool_alloc arange_allocator(ctx.pool(),
2165-
arange_length * sizeof(float_t));
2166-
void* arange_buffer = arange_allocator.get();
2167-
int64_t arange_ne[] = {arange_length, 1, 1, 1};
2168-
size_t arange_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
2169-
arange_length * sizeof(float_t)};
2170-
2171-
aclTensor* acl_arange_tensor =
2172-
ggml_cann_create_tensor(arange_buffer, ACL_FLOAT, sizeof(float_t),
2173-
arange_ne, arange_nb, GGML_MAX_DIMS);
2174-
float start = 0;
2175-
float step = 1;
2176-
float stop = src0->ne[0] / 2;
2177-
float n_elements = src0->ne[0] / 2;
2178-
aclnn_arange(ctx, acl_arange_tensor, start, stop, step, n_elements);
2163+
GGML_TENSOR_BINARY_OP_LOCALS
21792164

2180-
// power
2181-
// aclnnPowScalarTensor(): @param self is tensor which should be scalar, so
2182-
// use aclnn_pow_tensor_tensor() until fixed. aclScalar* acl_theta_scale =
2183-
// aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2184-
// aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
2185-
// acl_power_tensor);
2165+
// theta_scale arange, [0,1,...,ne0/2]
2166+
int64_t theta_scale_length = ne00 / 2;
21862167
ggml_cann_pool_alloc theta_scale_allocator(ctx.pool(),
2187-
arange_length * sizeof(float_t));
2168+
theta_scale_length * sizeof(float_t));
21882169
void* theta_scale_buffer = theta_scale_allocator.get();
2189-
aclTensor* acl_theta_scale_tensor = aclnn_values(
2190-
ctx, theta_scale_buffer, arange_length * sizeof(float_t), arange_ne,
2191-
GGML_MAX_DIMS, ACL_FLOAT, sizeof(float_t), theta_scale);
2192-
aclnn_pow_tensor_tensor(ctx, acl_theta_scale_tensor, acl_arange_tensor);
2170+
int64_t theta_scale_ne[] = {theta_scale_length, 1, 1, 1};
2171+
size_t theta_scale_nb[] = {sizeof(float_t), sizeof(float_t), sizeof(float_t),
2172+
theta_scale_length * sizeof(float_t)};
2173+
2174+
aclTensor* acl_theat_scale_tensor =
2175+
ggml_cann_create_tensor(theta_scale_buffer, ACL_FLOAT, sizeof(float_t),
2176+
theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2177+
float start = 0;
2178+
float step = 1;
2179+
float stop = ne00 / 2;
2180+
float n_elements = ne00 / 2;
2181+
aclnn_arange(ctx, acl_theat_scale_tensor, start, stop, step, n_elements);
21932182

2183+
// power
2184+
aclScalar* acl_theta_scale = aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2185+
GGML_CANN_CALL_ACLNN_OP(PowScalarTensor, acl_theta_scale, acl_theat_scale_tensor, acl_theat_scale_tensor);
2186+
21942187
// freq_scale
21952188
if (freq_scale != 1) {
2196-
aclnn_muls(ctx, acl_theta_scale_tensor, freq_scale, nullptr, true);
2189+
aclnn_muls(ctx, acl_theat_scale_tensor, freq_scale, nullptr, true);
21972190
}
21982191

21992192
// freq_factors
22002193
if (src2) {
22012194
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor(
22022195
src2->data, ggml_cann_type_mapping(src2->type),
2203-
ggml_type_size(src2->type), arange_ne, arange_nb, GGML_MAX_DIMS);
2204-
aclnn_div(ctx, acl_theta_scale_tensor, acl_freq_factors_tensor);
2196+
ggml_type_size(src2->type), theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2197+
aclnn_div(ctx, acl_theat_scale_tensor, acl_freq_factors_tensor);
22052198
ACL_CHECK(aclDestroyTensor(acl_freq_factors_tensor));
22062199
}
22072200

22082201
// position
22092202
GGML_ASSERT(src1->type == GGML_TYPE_I32);
22102203
int64_t position_length = src1->ne[0];
2211-
int64_t position_ne[] = {1, position_length, 1, 1};
2212-
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t),
2213-
sizeof(int32_t) * position_length,
2204+
int64_t position_ne[] = {1, 1, position_length, 1};
2205+
size_t position_nb[] = {sizeof(int32_t), sizeof(int32_t), sizeof(int32_t),
22142206
sizeof(int32_t) * position_length};
22152207
aclTensor* acl_position_tensor = ggml_cann_create_tensor(
22162208
src1->data, ggml_cann_type_mapping(src1->type),
22172209
ggml_type_size(src1->type), position_ne, position_nb, GGML_MAX_DIMS);
22182210

22192211
// power * position
2220-
int64_t theta_length = arange_length * position_length;
2212+
int64_t theta_length = theta_scale_length * position_length;
22212213
ggml_cann_pool_alloc theta_allocator(ctx.pool(),
22222214
theta_length * sizeof(float_t));
22232215
void* theta_buffer = theta_allocator.get();
2224-
int64_t theta_ne[] = {arange_length, position_length, 1, 1};
2216+
int64_t theta_ne[] = {theta_scale_length, 1, position_length, 1};
22252217
size_t theta_nb[GGML_MAX_DIMS];
22262218
theta_nb[0] = sizeof(float_t);
22272219
for (int i = 1; i < GGML_MAX_DIMS; i++) {
@@ -2230,43 +2222,25 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22302222
aclTensor* acl_theta_tensor =
22312223
ggml_cann_create_tensor(theta_buffer, ACL_FLOAT, sizeof(float_t),
22322224
theta_ne, theta_nb, GGML_MAX_DIMS);
2233-
aclnn_mul(ctx, acl_position_tensor, acl_theta_scale_tensor,
2225+
aclnn_mul(ctx, acl_position_tensor, acl_theat_scale_tensor,
22342226
acl_theta_tensor);
22352227

2236-
// permute: [0,1,2,3]->[0,2,1,3]
2237-
int64_t permute_ne[] = {arange_length, 1, position_length, 1};
2238-
size_t permute_nb[GGML_MAX_DIMS];
2239-
permute_nb[0] = sizeof(float_t);
2240-
for (int i = 1; i < GGML_MAX_DIMS; i++) {
2241-
permute_nb[i] = permute_nb[i - 1] * permute_ne[i - 1];
2242-
}
2243-
ggml_cann_pool_alloc permute_allocator(ctx.pool(),
2244-
theta_length * sizeof(float_t));
2245-
void* permute_buffer = permute_allocator.get();
2246-
aclTensor* acl_permute_tensor = ggml_cann_create_tensor(
2247-
permute_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2248-
GGML_MAX_DIMS, ACL_FORMAT_ND);
2249-
int64_t permute_dim[] = {0, 2, 1, 3};
2250-
int64_t num_dims = 4;
2251-
aclnn_permute(ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
2252-
num_dims);
2253-
22542228
// sin/cos
22552229
ggml_cann_pool_alloc sin_allocator(ctx.pool(),
22562230
theta_length * sizeof(float_t));
22572231
void* sin_buffer = sin_allocator.get();
22582232
aclTensor* acl_sin_tensor = ggml_cann_create_tensor(
2259-
sin_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2233+
sin_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
22602234
GGML_MAX_DIMS, ACL_FORMAT_ND);
2261-
aclnn_sin(ctx, acl_permute_tensor, acl_sin_tensor);
2235+
aclnn_sin(ctx, acl_theta_tensor, acl_sin_tensor);
22622236

22632237
ggml_cann_pool_alloc cos_allocator(ctx.pool(),
22642238
theta_length * sizeof(float_t));
22652239
void* cos_buffer = cos_allocator.get();
22662240
aclTensor* acl_cos_tensor = ggml_cann_create_tensor(
2267-
cos_buffer, ACL_FLOAT, sizeof(float_t), permute_ne, permute_nb,
2241+
cos_buffer, ACL_FLOAT, sizeof(float_t), theta_ne, theta_nb,
22682242
GGML_MAX_DIMS, ACL_FORMAT_ND);
2269-
aclnn_cos(ctx, acl_permute_tensor, acl_cos_tensor);
2243+
aclnn_cos(ctx, acl_theta_tensor, acl_cos_tensor);
22702244

22712245
// attn_factor
22722246
if (attn_factor != 1) {
@@ -2282,19 +2256,17 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
22822256
} else {
22832257
int64_t num_repeats = 2;
22842258
int64_t dim = 3;
2285-
int64_t output_size = arange_length * num_repeats;
2259+
int64_t output_size = theta_scale_length * num_repeats;
22862260
aclnn_repeat_interleave(ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
22872261
num_repeats, output_size);
22882262
aclnn_repeat_interleave(ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
22892263
num_repeats, output_size);
22902264
}
22912265

22922266
// release
2293-
ACL_CHECK(aclDestroyTensor(acl_arange_tensor));
2294-
ACL_CHECK(aclDestroyTensor(acl_theta_scale_tensor));
2267+
ACL_CHECK(aclDestroyTensor(acl_theat_scale_tensor));
22952268
ACL_CHECK(aclDestroyTensor(acl_position_tensor));
22962269
ACL_CHECK(aclDestroyTensor(acl_theta_tensor));
2297-
ACL_CHECK(aclDestroyTensor(acl_permute_tensor));
22982270
ACL_CHECK(aclDestroyTensor(acl_sin_tensor));
22992271
ACL_CHECK(aclDestroyTensor(acl_cos_tensor));
23002272
}
@@ -2353,13 +2325,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
23532325

23542326
// init cos/sin cache
23552327
ggml_cann_pool_alloc sin_allocator(
2356-
ctx.pool(), src0->ne[0] * src0->ne[2] * sizeof(float_t));
2328+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
23572329
ggml_cann_pool_alloc cos_allocator(
2358-
ctx.pool(), src0->ne[0] * src0->ne[2] * sizeof(float_t));
2330+
ctx.pool(), ne00 * ne02 * sizeof(float_t));
23592331
void* sin_buffer = sin_allocator.get();
23602332
void* cos_buffer = cos_allocator.get();
23612333

2362-
int64_t sin_reshape_ne[4] = {src0->ne[0], 1, src0->ne[2], 1};
2334+
int64_t sin_reshape_ne[4] = {ne00, 1, ne02, 1};
23632335
size_t sin_reshape_nb[GGML_MAX_DIMS];
23642336
sin_reshape_nb[0] = sizeof(float_t);
23652337
for (int i = 1; i < GGML_MAX_DIMS; i++) {

0 commit comments

Comments
 (0)