Skip to content

Commit bbfeb0e

Browse files
committed
feat: Increase the way memory allocation is managed
1 parent fe5b78c commit bbfeb0e

File tree

3 files changed

+354
-80
lines changed

3 files changed

+354
-80
lines changed

ggml/src/ggml-cann/aclnn_ops.cpp

+6-1
Original file line numberDiff line numberDiff line change
@@ -1209,6 +1209,11 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
12091209
GGML_CANN_CALL_ACLNN_OP(Sin, acl_src, acl_dst);
12101210
}
12111211

1212+
void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src,
1213+
aclTensor* acl_dst) {
1214+
GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst);
1215+
}
1216+
12121217
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx,
12131218
ggml_tensor* dst) {
12141219
const ggml_tensor* src = dst->src[0];
@@ -1783,7 +1788,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
17831788
src0->data, ACL_INT8, sizeof(int8_t), weight_ne, weight_nb,
17841789
GGML_MAX_DIMS + 1);
17851790
aclTensor* acl_scale_tensor = ggml_cann_create_tensor(
1786-
src0->data, ACL_FLOAT16, sizeof(float16_t), scale_ne, scale_nb,
1791+
src0->data, ACL_FLOAT16, sizeof(uint16_t), scale_ne, scale_nb,
17871792
GGML_MAX_DIMS + 1, ACL_FORMAT_ND, scale_offset);
17881793
aclTensor* dequant_tensor = ggml_cann_create_tensor(
17891794
dequant_buffer_allocator.get(), ACL_FLOAT, sizeof(float_t),

ggml/src/ggml-cann/aclnn_ops.h

+12-10
Original file line numberDiff line numberDiff line change
@@ -593,6 +593,9 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
593593
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
594594
aclTensor* acl_dst);
595595

596+
void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src,
597+
aclTensor* acl_dst);
598+
596599
/**
597600
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
598601
* output tensor.
@@ -840,14 +843,13 @@ void ggml_cann_unary_op(
840843
* @see ggml_cann_unary_op
841844
* @see GGML_CANN_CALL_ACLNN_OP
842845
*/
843-
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
844-
do { \
845-
auto lambda = [](ggml_backend_cann_context& ctx, \
846-
aclTensor* acl_src, \
847-
aclTensor* acl_dst) { \
848-
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
849-
}; \
850-
ggml_cann_unary_op(lambda, ctx, dst); \
851-
} \
852-
while (0)
846+
#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \
847+
do { \
848+
ggml_tensor * src = dst->src[0]; \
849+
aclTensor * acl_src = ggml_cann_create_tensor(src); \
850+
aclTensor * acl_dst = ggml_cann_create_tensor(dst); \
851+
GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \
852+
ACL_CHECK(aclDestroyTensor(acl_src)); \
853+
ACL_CHECK(aclDestroyTensor(acl_dst)); \
854+
} while (0)
853855
#endif // CANN_ACLNN_OPS

0 commit comments

Comments
 (0)