64
64
#include < aclnnop/aclnn_reflection_pad1d.h>
65
65
#include < aclnnop/aclnn_eq_tensor.h>
66
66
#include < aclnnop/aclnn_gt_scalar.h>
67
+ #include < aclnnop/aclnn_pow.h>
67
68
#include < float.h>
68
69
69
70
#include < cmath>
@@ -2159,69 +2160,60 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2159
2160
ggml_tensor* src1 = dst->src [1 ]; // position
2160
2161
ggml_tensor* src2 = dst->src [2 ]; // freq_factors
2161
2162
2162
- // arange, [0,1,...,ne0/2]
2163
- int64_t arange_length = src0->ne [0 ] / 2 ;
2164
- ggml_cann_pool_alloc arange_allocator (ctx.pool (),
2165
- arange_length * sizeof (float_t ));
2166
- void * arange_buffer = arange_allocator.get ();
2167
- int64_t arange_ne[] = {arange_length, 1 , 1 , 1 };
2168
- size_t arange_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2169
- arange_length * sizeof (float_t )};
2170
-
2171
- aclTensor* acl_arange_tensor =
2172
- ggml_cann_create_tensor (arange_buffer, ACL_FLOAT, sizeof (float_t ),
2173
- arange_ne, arange_nb, GGML_MAX_DIMS);
2174
- float start = 0 ;
2175
- float step = 1 ;
2176
- float stop = src0->ne [0 ] / 2 ;
2177
- float n_elements = src0->ne [0 ] / 2 ;
2178
- aclnn_arange (ctx, acl_arange_tensor, start, stop, step, n_elements);
2163
+ GGML_TENSOR_BINARY_OP_LOCALS
2179
2164
2180
- // power
2181
- // aclnnPowScalarTensor(): @param self is tensor which should be scalar, so
2182
- // use aclnn_pow_tensor_tensor() until fixed. aclScalar* acl_theta_scale =
2183
- // aclCreateScalar(&theta_scale, aclDataType::ACL_FLOAT);
2184
- // aclnn_power_scalar_tensor(ctx, acl_theta_scale, acl_arange_tensor,
2185
- // acl_power_tensor);
2165
+ // theta_scale arange, [0,1,...,ne0/2]
2166
+ int64_t theta_scale_length = ne00 / 2 ;
2186
2167
ggml_cann_pool_alloc theta_scale_allocator (ctx.pool (),
2187
- arange_length * sizeof (float_t ));
2168
+ theta_scale_length * sizeof (float_t ));
2188
2169
void * theta_scale_buffer = theta_scale_allocator.get ();
2189
- aclTensor* acl_theta_scale_tensor = aclnn_values (
2190
- ctx, theta_scale_buffer, arange_length * sizeof (float_t ), arange_ne,
2191
- GGML_MAX_DIMS, ACL_FLOAT, sizeof (float_t ), theta_scale);
2192
- aclnn_pow_tensor_tensor (ctx, acl_theta_scale_tensor, acl_arange_tensor);
2170
+ int64_t theta_scale_ne[] = {theta_scale_length, 1 , 1 , 1 };
2171
+ size_t theta_scale_nb[] = {sizeof (float_t ), sizeof (float_t ), sizeof (float_t ),
2172
+ theta_scale_length * sizeof (float_t )};
2173
+
2174
+ aclTensor* acl_theat_scale_tensor =
2175
+ ggml_cann_create_tensor (theta_scale_buffer, ACL_FLOAT, sizeof (float_t ),
2176
+ theta_scale_ne, theta_scale_nb, GGML_MAX_DIMS);
2177
+ float start = 0 ;
2178
+ float step = 1 ;
2179
+ float stop = ne00 / 2 ;
2180
+ float n_elements = ne00 / 2 ;
2181
+ aclnn_arange (ctx, acl_theat_scale_tensor, start, stop, step, n_elements);
2193
2182
2183
+ // power
2184
+ aclScalar* acl_theta_scale = aclCreateScalar (&theta_scale, aclDataType::ACL_FLOAT);
2185
+ GGML_CANN_CALL_ACLNN_OP (PowScalarTensor, acl_theta_scale, acl_theat_scale_tensor, acl_theat_scale_tensor);
2186
+
2194
2187
// freq_scale
2195
2188
if (freq_scale != 1 ) {
2196
- aclnn_muls (ctx, acl_theta_scale_tensor , freq_scale, nullptr , true );
2189
+ aclnn_muls (ctx, acl_theat_scale_tensor , freq_scale, nullptr , true );
2197
2190
}
2198
2191
2199
2192
// freq_factors
2200
2193
if (src2) {
2201
2194
aclTensor* acl_freq_factors_tensor = ggml_cann_create_tensor (
2202
2195
src2->data , ggml_cann_type_mapping (src2->type ),
2203
- ggml_type_size (src2->type ), arange_ne, arange_nb , GGML_MAX_DIMS);
2204
- aclnn_div (ctx, acl_theta_scale_tensor , acl_freq_factors_tensor);
2196
+ ggml_type_size (src2->type ), theta_scale_ne, theta_scale_nb , GGML_MAX_DIMS);
2197
+ aclnn_div (ctx, acl_theat_scale_tensor , acl_freq_factors_tensor);
2205
2198
ACL_CHECK (aclDestroyTensor (acl_freq_factors_tensor));
2206
2199
}
2207
2200
2208
2201
// position
2209
2202
GGML_ASSERT (src1->type == GGML_TYPE_I32);
2210
2203
int64_t position_length = src1->ne [0 ];
2211
- int64_t position_ne[] = {1 , position_length, 1 , 1 };
2212
- size_t position_nb[] = {sizeof (int32_t ), sizeof (int32_t ),
2213
- sizeof (int32_t ) * position_length,
2204
+ int64_t position_ne[] = {1 , 1 , position_length, 1 };
2205
+ size_t position_nb[] = {sizeof (int32_t ), sizeof (int32_t ), sizeof (int32_t ),
2214
2206
sizeof (int32_t ) * position_length};
2215
2207
aclTensor* acl_position_tensor = ggml_cann_create_tensor (
2216
2208
src1->data , ggml_cann_type_mapping (src1->type ),
2217
2209
ggml_type_size (src1->type ), position_ne, position_nb, GGML_MAX_DIMS);
2218
2210
2219
2211
// power * position
2220
- int64_t theta_length = arange_length * position_length;
2212
+ int64_t theta_length = theta_scale_length * position_length;
2221
2213
ggml_cann_pool_alloc theta_allocator (ctx.pool (),
2222
2214
theta_length * sizeof (float_t ));
2223
2215
void * theta_buffer = theta_allocator.get ();
2224
- int64_t theta_ne[] = {arange_length, position_length, 1 , 1 };
2216
+ int64_t theta_ne[] = {theta_scale_length, 1 , position_length , 1 };
2225
2217
size_t theta_nb[GGML_MAX_DIMS];
2226
2218
theta_nb[0 ] = sizeof (float_t );
2227
2219
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
@@ -2230,43 +2222,25 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2230
2222
aclTensor* acl_theta_tensor =
2231
2223
ggml_cann_create_tensor (theta_buffer, ACL_FLOAT, sizeof (float_t ),
2232
2224
theta_ne, theta_nb, GGML_MAX_DIMS);
2233
- aclnn_mul (ctx, acl_position_tensor, acl_theta_scale_tensor ,
2225
+ aclnn_mul (ctx, acl_position_tensor, acl_theat_scale_tensor ,
2234
2226
acl_theta_tensor);
2235
2227
2236
- // permute: [0,1,2,3]->[0,2,1,3]
2237
- int64_t permute_ne[] = {arange_length, 1 , position_length, 1 };
2238
- size_t permute_nb[GGML_MAX_DIMS];
2239
- permute_nb[0 ] = sizeof (float_t );
2240
- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
2241
- permute_nb[i] = permute_nb[i - 1 ] * permute_ne[i - 1 ];
2242
- }
2243
- ggml_cann_pool_alloc permute_allocator (ctx.pool (),
2244
- theta_length * sizeof (float_t ));
2245
- void * permute_buffer = permute_allocator.get ();
2246
- aclTensor* acl_permute_tensor = ggml_cann_create_tensor (
2247
- permute_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb,
2248
- GGML_MAX_DIMS, ACL_FORMAT_ND);
2249
- int64_t permute_dim[] = {0 , 2 , 1 , 3 };
2250
- int64_t num_dims = 4 ;
2251
- aclnn_permute (ctx, acl_theta_tensor, acl_permute_tensor, permute_dim,
2252
- num_dims);
2253
-
2254
2228
// sin/cos
2255
2229
ggml_cann_pool_alloc sin_allocator (ctx.pool (),
2256
2230
theta_length * sizeof (float_t ));
2257
2231
void * sin_buffer = sin_allocator.get ();
2258
2232
aclTensor* acl_sin_tensor = ggml_cann_create_tensor (
2259
- sin_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2233
+ sin_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
2260
2234
GGML_MAX_DIMS, ACL_FORMAT_ND);
2261
- aclnn_sin (ctx, acl_permute_tensor , acl_sin_tensor);
2235
+ aclnn_sin (ctx, acl_theta_tensor , acl_sin_tensor);
2262
2236
2263
2237
ggml_cann_pool_alloc cos_allocator (ctx.pool (),
2264
2238
theta_length * sizeof (float_t ));
2265
2239
void * cos_buffer = cos_allocator.get ();
2266
2240
aclTensor* acl_cos_tensor = ggml_cann_create_tensor (
2267
- cos_buffer, ACL_FLOAT, sizeof (float_t ), permute_ne, permute_nb ,
2241
+ cos_buffer, ACL_FLOAT, sizeof (float_t ), theta_ne, theta_nb ,
2268
2242
GGML_MAX_DIMS, ACL_FORMAT_ND);
2269
- aclnn_cos (ctx, acl_permute_tensor , acl_cos_tensor);
2243
+ aclnn_cos (ctx, acl_theta_tensor , acl_cos_tensor);
2270
2244
2271
2245
// attn_factor
2272
2246
if (attn_factor != 1 ) {
@@ -2282,19 +2256,17 @@ static void aclnn_cache_init(ggml_backend_cann_context& ctx, ggml_tensor* dst,
2282
2256
} else {
2283
2257
int64_t num_repeats = 2 ;
2284
2258
int64_t dim = 3 ;
2285
- int64_t output_size = arange_length * num_repeats;
2259
+ int64_t output_size = theta_scale_length * num_repeats;
2286
2260
aclnn_repeat_interleave (ctx, acl_sin_tensor, acl_sin_repeat_tensor, dim,
2287
2261
num_repeats, output_size);
2288
2262
aclnn_repeat_interleave (ctx, acl_cos_tensor, acl_cos_repeat_tensor, dim,
2289
2263
num_repeats, output_size);
2290
2264
}
2291
2265
2292
2266
// release
2293
- ACL_CHECK (aclDestroyTensor (acl_arange_tensor));
2294
- ACL_CHECK (aclDestroyTensor (acl_theta_scale_tensor));
2267
+ ACL_CHECK (aclDestroyTensor (acl_theat_scale_tensor));
2295
2268
ACL_CHECK (aclDestroyTensor (acl_position_tensor));
2296
2269
ACL_CHECK (aclDestroyTensor (acl_theta_tensor));
2297
- ACL_CHECK (aclDestroyTensor (acl_permute_tensor));
2298
2270
ACL_CHECK (aclDestroyTensor (acl_sin_tensor));
2299
2271
ACL_CHECK (aclDestroyTensor (acl_cos_tensor));
2300
2272
}
@@ -2353,13 +2325,13 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
2353
2325
2354
2326
// init cos/sin cache
2355
2327
ggml_cann_pool_alloc sin_allocator (
2356
- ctx.pool (), src0-> ne [ 0 ] * src0-> ne [ 2 ] * sizeof (float_t ));
2328
+ ctx.pool (), ne00 * ne02 * sizeof (float_t ));
2357
2329
ggml_cann_pool_alloc cos_allocator (
2358
- ctx.pool (), src0-> ne [ 0 ] * src0-> ne [ 2 ] * sizeof (float_t ));
2330
+ ctx.pool (), ne00 * ne02 * sizeof (float_t ));
2359
2331
void * sin_buffer = sin_allocator.get ();
2360
2332
void * cos_buffer = cos_allocator.get ();
2361
2333
2362
- int64_t sin_reshape_ne[4 ] = {src0-> ne [ 0 ] , 1 , src0-> ne [ 2 ] , 1 };
2334
+ int64_t sin_reshape_ne[4 ] = {ne00 , 1 , ne02 , 1 };
2363
2335
size_t sin_reshape_nb[GGML_MAX_DIMS];
2364
2336
sin_reshape_nb[0 ] = sizeof (float_t );
2365
2337
for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
0 commit comments