From 14ac344f5e005085a397d9739f5508e4f631edf3 Mon Sep 17 00:00:00 2001 From: dou <15529241576@163.com> Date: Thu, 10 Apr 2025 07:24:11 +0000 Subject: [PATCH 1/3] feat: Increase the way memory allocation is managed --- ggml/src/ggml-cann/aclnn_ops.cpp | 7 +- ggml/src/ggml-cann/aclnn_ops.h | 22 +- ggml/src/ggml-cann/ggml-cann.cpp | 405 +++++++++++++++++++++++++------ 3 files changed, 354 insertions(+), 80 deletions(-) diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 37d4117972358..4357c9a2948ec 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -1209,6 +1209,11 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, GGML_CANN_CALL_ACLNN_OP(Sin, acl_src, acl_dst); } +void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src, + aclTensor* acl_dst) { + GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst); +} + void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst) { const ggml_tensor* src = dst->src[0]; @@ -1783,7 +1788,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst) { src0->data, ACL_INT8, sizeof(int8_t), weight_ne, weight_nb, GGML_MAX_DIMS + 1); aclTensor* acl_scale_tensor = ggml_cann_create_tensor( - src0->data, ACL_FLOAT16, sizeof(float16_t), scale_ne, scale_nb, + src0->data, ACL_FLOAT16, sizeof(uint16_t), scale_ne, scale_nb, GGML_MAX_DIMS + 1, ACL_FORMAT_ND, scale_offset); aclTensor* dequant_tensor = ggml_cann_create_tensor( dequant_buffer_allocator.get(), ACL_FLOAT, sizeof(float_t), diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h index b2d1b3c36d238..2669debe43dc1 100644 --- a/ggml/src/ggml-cann/aclnn_ops.h +++ b/ggml/src/ggml-cann/aclnn_ops.h @@ -593,6 +593,9 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, aclTensor* acl_dst); +void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src, + aclTensor* acl_dst); + /** * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one * output tensor. @@ -840,14 +843,13 @@ void ggml_cann_unary_op( * @see ggml_cann_unary_op * @see GGML_CANN_CALL_ACLNN_OP */ -#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \ - do { \ - auto lambda = [](ggml_backend_cann_context& ctx, \ - aclTensor* acl_src, \ - aclTensor* acl_dst) { \ - GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \ - }; \ - ggml_cann_unary_op(lambda, ctx, dst); \ - } \ - while (0) +#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \ + do { \ + ggml_tensor * src = dst->src[0]; \ + aclTensor * acl_src = ggml_cann_create_tensor(src); \ + aclTensor * acl_dst = ggml_cann_create_tensor(dst); \ + GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \ + ACL_CHECK(aclDestroyTensor(acl_src)); \ + ACL_CHECK(aclDestroyTensor(acl_dst)); \ + } while (0) #endif // CANN_ACLNN_OPS diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index cec36b36e7e92..5a1263dc79134 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include "ggml-impl.h" #include "ggml-backend-impl.h" @@ -119,9 +121,10 @@ static ggml_cann_device_info ggml_cann_init() { prop.location.type = ACL_MEM_LOCATION_TYPE_DEVICE; prop.location.id = id; prop.reserve = 0; - ACL_CHECK(aclrtMemGetAllocationGranularity( + err = aclrtMemGetAllocationGranularity( &prop, ACL_RT_MEM_ALLOC_GRANULARITY_RECOMMENDED, - &info.devices[id].vmm_granularity)); + &info.devices[id].vmm_granularity); + info.devices[id].vmm = err == ACL_SUCCESS; size_t free, total; ggml_backend_cann_get_device_memory(id, &free, &total); @@ -148,11 +151,223 @@ const ggml_cann_device_info& ggml_cann_info() { //#define DEBUG_CANN_MALLOC /** - * @brief A pool of CANN buffers(legacy). + * @brief A pool of CANN buffers(priority segment buffer). * * This class manages a pool of CANN buffers for a specific device. */ -struct ggml_cann_pool_leg : public ggml_cann_pool { +struct ggml_cann_pool_buf_prio : public ggml_cann_pool { + /** + * @brief The maximum reuse margin for a buffer. + */ + static const size_t max_reuse_margin = 1ull << 22; // 4MB + + /** + * @brief The minimum free margin for a buffer. + */ + static const size_t min_free_margin = 1ull << 20; // 1MB + + + /** + * @brief The alignment for buffer allocation. + */ + static const size_t alignment = 128; + + /** + * @brief The device ID associated with this buffer pool. + */ + int device; + + /** + * @brief Whether to disable clean during buffer allocation. + */ + bool disable_clean = false; + + /** + * @brief Structure representing a CANN buffer. + */ + struct ggml_cann_buffer { + void* ptr = nullptr; ///< Pointer to the buffer. + size_t size = 0; ///< Size of the buffer. + std::chrono::steady_clock::time_point last_used; ///< Last used time. + + bool operator>(const ggml_cann_buffer& other) const { + return size > other.size; + } + }; + + /** + * @brief Array of CANN buffers in the pool. + */ + std::unordered_map buffer_pool; + std::priority_queue, + std::greater<>> free_buffers ; + + /** + * @brief Total size of all buffers in the pool. + */ + size_t pool_size = 0; + + /** + * @brief Constructor to initialize the buffer pool for a specific device. + * + * @param device The device ID to associate with this buffer pool. + */ + explicit ggml_cann_pool_buf_prio(int device) : device(device) { + disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + } + + /** + * @brief Destructor to free all buffers in the pool. + */ + ~ggml_cann_pool_buf_prio() { + ggml_cann_set_device(device); + for (auto& [b_ptr, b_size] : buffer_pool) { + aclrtFree(b_ptr); + pool_size -= b_size; + } + buffer_pool.clear(); + GGML_ASSERT(pool_size == 0); + } + + /** + * @brief Allocate a buffer of the given size. + * + * @param size The size of the buffer to allocate. + * @param actual_size A pointer to a variable to receive the actual size of + * the allocated buffer. + * @return A pointer to the allocated buffer. + */ + void* alloc(size_t size, size_t* actual_size) override { + size = GGML_PAD(size, alignment); + if (size == 0) { + size = alignment; + } + + void* ptr = nullptr; + auto now = std::chrono::steady_clock::now(); + + std::vector free_buffers_rest; + free_buffers_rest.reserve(free_buffers.size()); + while (!free_buffers.empty()) { + auto b = free_buffers.top(); + free_buffers.pop(); + + if (b.size >= size) { + // reuse the buffer if the size is enough + const size_t margin = b.size - size; + if (margin <= max_reuse_margin) { + *actual_size = b.size; + ptr = b.ptr; + #ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: reused %p, " + "pool_size = %5u MB, " + "size = %5u MB, " + "margin = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(margin, 1048576) / 1048576)); + #endif + break; + } + } + + bool should_clean = !disable_clean && + b.size > min_free_margin && + std::chrono::duration_cast(now - b.last_used).count() > 100; + if (should_clean) { + // free the buffer if the size is needed to be freed + ACL_CHECK(aclrtFree(b.ptr)); + pool_size -= b.size; + buffer_pool.erase(b.ptr); + #ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: clean %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); + #endif + continue; + } + free_buffers_rest.push_back(b); + } + for (ggml_cann_buffer &b : free_buffers_rest) { + free_buffers.push(std::move(b)); + } + + #ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); + #endif + if (ptr != nullptr) { + return ptr; + } + + // allocate a new buffer if no buffer can be reused + ggml_cann_set_device(device); + ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); + *actual_size = size; + pool_size += size; + #ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: allocate %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, ptr, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(size, 1048576) / 1048576)); + #endif + buffer_pool.emplace(ptr, size); + return ptr; + } + + /** + * @brief Free a buffer and return it to the pool. + * + * @param ptr Pointer to the buffer to free. + * @param size Size of the buffer to free. + */ + void free(void* ptr, size_t size) override { + auto it = buffer_pool.find(ptr); + if (it == buffer_pool.end()) { + GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr); + } + + auto now = std::chrono::steady_clock::now(); + free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now}); + #ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: return %p, " + "pool_size = %5u MB\n", + device, ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); + #endif + } + }; + +/** + * @brief A pool of CANN buffers(segment buffer). + * + * This class manages a pool of CANN buffers for a specific device. + */ +struct ggml_cann_pool_buf : public ggml_cann_pool { + /** + * @brief The maximum reuse margin for a buffer. + */ + static const size_t max_reuse_margin = 1ull << 22; // 4MB + + /** + * @brief The minimum free margin for a buffer. + */ + static const size_t min_free_margin = 1ull << 20; // 1MB + + /** + * @brief The alignment for buffer allocation. + */ + static const size_t alignment = 128; + /** * @brief The maximum number of buffers in the pool. */ @@ -163,12 +378,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { */ int device; + /** + * @brief Whether to disable clean during buffer allocation. + */ + bool disable_clean = false; + /** * @brief Structure representing a CANN buffer. */ struct ggml_cann_buffer { void* ptr = nullptr; ///< Pointer to the buffer memory. size_t size = 0; ///< Size of the buffer. + bool used = false; ///< Whether the buffer is currently in use. + std::chrono::steady_clock::time_point last_used; ///< Last used time. }; /** @@ -186,17 +408,19 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { * * @param device The device ID to associate with this buffer pool. */ - explicit ggml_cann_pool_leg(int device) : device(device) {} + explicit ggml_cann_pool_buf(int device) : device(device) { + disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; + } /** * @brief Destructor to free all buffers in the pool. */ - ~ggml_cann_pool_leg() { + ~ggml_cann_pool_buf() { ggml_cann_set_device(device); for (int i = 0; i < MAX_BUFFERS; ++i) { ggml_cann_buffer& b = buffer_pool[i]; if (b.ptr != nullptr) { - ACL_CHECK(aclrtFree(b.ptr)); + aclrtFree(b.ptr); pool_size -= b.size; } } @@ -212,63 +436,93 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { * @return A pointer to the allocated buffer. */ void* alloc(size_t size, size_t* actual_size) override { - const size_t alignment = 128; size = GGML_PAD(size, alignment); if (size == 0) { size = alignment; } -#ifdef DEBUG_CANN_MALLOC - int nnz = 0; - size_t max_size = 0; -#endif - size_t best_diff = 1ull << 36; - int ibest = -1; - for (int i = 0; i < MAX_BUFFERS; ++i) { + + void* ptr = nullptr; + auto now = std::chrono::steady_clock::now(); + + int i = 0; + for (; i < MAX_BUFFERS; ++i) { ggml_cann_buffer& b = buffer_pool[i]; - if (b.ptr != nullptr) { + if (b.ptr == nullptr) { + break; + } + if (b.used) { + continue; + } + if (b.size >= size) { + // reuse the buffer if the size is enough + const size_t margin = b.size - size; + if (margin <= max_reuse_margin) { + *actual_size = b.size; + b.used = true; + ptr = b.ptr; #ifdef DEBUG_CANN_MALLOC - ++nnz; - if (b.size > max_size) max_size = b.size; + GGML_LOG_INFO( + "cann pool[%d]: reused %p, " + "pool_size = %5u MB, " + "size = %5u MB, " + "margin = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(margin, 1048576) / 1048576)); #endif - if (b.size >= size) { - size_t diff = b.size - size; - if (diff < best_diff) { - best_diff = diff; - ibest = i; - if (!best_diff) { - void* ptr = b.ptr; - *actual_size = b.size; - b.ptr = nullptr; - b.size = 0; - return ptr; - } - } + break; } } + + bool should_clean = !disable_clean && + b.size > min_free_margin && + std::chrono::duration_cast(now - b.last_used).count() > 100; + if (should_clean) { + // free the buffer if the size is needed to be freed + ACL_CHECK(aclrtFree(b.ptr)); + pool_size -= b.size; +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: clean %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); +#endif + b.ptr = nullptr; + } } - if (ibest >= 0) { - ggml_cann_buffer& b = buffer_pool[ibest]; - void* ptr = b.ptr; - *actual_size = b.size; - b.ptr = nullptr; - b.size = 0; + if (ptr != nullptr) { return ptr; } - void* ptr; - ggml_cann_set_device(device); - ACL_CHECK( - aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); - *actual_size = size; - pool_size += size; + + if (i < MAX_BUFFERS) { + // allocate a new buffer if no buffer can be reused + ggml_cann_buffer& b = buffer_pool[i]; + ggml_cann_set_device(device); + ACL_CHECK(aclrtMalloc(&b.ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); + pool_size += size; + *actual_size = size; + b.size = size; + b.used = true; + if (i >= MAX_BUFFERS - 8) { + GGML_LOG_WARN("cann pool[%d]: slots almost full\n", device); + } #ifdef DEBUG_CANN_MALLOC - GGML_LOG_INFO( - "%s[%d]: %d buffers, max_size = %u MB, pool_size = %u MB, " - "requested %u MB\n", - __func__, device, nnz, (uint32_t)(max_size / 1024 / 1024), - (uint32_t)(pool_size / 1024 / 1024), - (uint32_t)(size / 1024 / 1024)); + GGML_LOG_INFO( + "cann pool[%d]: allocate %p, " + "pool_size = %5u MB, " + "size = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576), + (uint32_t)(GGML_PAD(b.size, 1048576) / 1048576)); #endif - return ptr; + return b.ptr; + } + + GGML_ABORT("cann pool[%d]: slots full\n", device); } /** @@ -280,16 +534,21 @@ struct ggml_cann_pool_leg : public ggml_cann_pool { void free(void* ptr, size_t size) override { for (int i = 0; i < MAX_BUFFERS; ++i) { ggml_cann_buffer& b = buffer_pool[i]; - if (b.ptr == nullptr) { - b.ptr = ptr; - b.size = size; - return; + if (b.ptr != ptr) { + continue; } + b.used = false; + b.last_used = std::chrono::steady_clock::now(); +#ifdef DEBUG_CANN_MALLOC + GGML_LOG_INFO( + "cann pool[%d]: return %p, " + "pool_size = %5u MB\n", + device, b.ptr, + (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); +#endif + return; } - // memory should always buffered. these memory may still needed by - // tasks in stream. - // TODO, fix me. - GGML_ABORT("Cann buffer pool full, increase MAX_CANN_BUFFERS\n"); + GGML_ABORT("cann pool[%d]: slots full\n", device); } }; @@ -347,8 +606,7 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { * @param device The device ID to associate with this buffer pool. */ explicit ggml_cann_pool_vmm(int device) - : device(device), - granularity(ggml_cann_info().devices[device].vmm_granularity) { + : device(device) { auto dev = ggml_cann_info().devices[device]; granularity = dev.vmm_granularity; max_size = dev.total_vram; @@ -471,7 +729,18 @@ struct ggml_cann_pool_vmm : public ggml_cann_pool { */ std::unique_ptr ggml_backend_cann_context::new_pool_for_device( int device) { - return std::unique_ptr(new ggml_cann_pool_vmm(device)); + bool disable_vmm = (getenv("GGML_CANN_DISABLE_VMM_POOL") != nullptr); + if (!disable_vmm && ggml_cann_info().devices[device].vmm) { + GGML_LOG_INFO("%s: device %d use vmm pool\n", __func__, device); + return std::unique_ptr(new ggml_cann_pool_vmm(device)); + } + bool enable_buf_prio = (getenv("GGML_CANN_ENABLE_BUF_PRIO_POOL") != nullptr); + if (enable_buf_prio) { + GGML_LOG_INFO("%s: device %d use buffer pool with priority queue\n", __func__, device); + return std::unique_ptr(new ggml_cann_pool_buf_prio(device)); + } + GGML_LOG_INFO("%s: device %d use buffer pool\n", __func__, device); + return std::unique_ptr(new ggml_cann_pool_buf(device)); } // cann buffer @@ -1020,8 +1289,11 @@ ggml_backend_cann_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, ggml_cann_set_device(buft_ctx->device); - size = std::max(size, (size_t)1); - + const size_t alignment = 128; + size = GGML_PAD(size, alignment); + if (size == 0) { + size = alignment; + } void* dev_ptr; aclError err = aclrtMalloc(&dev_ptr, size, ACL_MEM_MALLOC_HUGE_FIRST); if (err != ACL_SUCCESS) { @@ -1330,12 +1602,7 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, GGML_CANN_CALL_UNARY_OP(Silu); break; case GGML_UNARY_OP_GELU_QUICK: { - auto lambda = [](ggml_backend_cann_context& ctx, - aclTensor* acl_src, - aclTensor* acl_dst) { - GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst); - }; - ggml_cann_unary_op(lambda, ctx, dst); + ggml_cann_unary_op(ctx, dst); } break; case GGML_UNARY_OP_TANH: GGML_CANN_CALL_UNARY_OP(Tanh); From cc36575fbc8e4b31507d44b3ca402aa94fc4ae9a Mon Sep 17 00:00:00 2001 From: dou <15529241576@163.com> Date: Fri, 11 Apr 2025 09:27:34 +0000 Subject: [PATCH 2/3] update some changes --- ggml/src/ggml-cann/aclnn_ops.cpp | 5 ----- ggml/src/ggml-cann/aclnn_ops.h | 22 ++++++++++------------ ggml/src/ggml-cann/ggml-cann.cpp | 7 ++++++- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 4357c9a2948ec..f312a620cb69c 100644 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -1209,11 +1209,6 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, GGML_CANN_CALL_ACLNN_OP(Sin, acl_src, acl_dst); } -void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src, - aclTensor* acl_dst) { - GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst); -} - void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst) { const ggml_tensor* src = dst->src[0]; diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h index 2669debe43dc1..b2d1b3c36d238 100644 --- a/ggml/src/ggml-cann/aclnn_ops.h +++ b/ggml/src/ggml-cann/aclnn_ops.h @@ -593,9 +593,6 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, aclTensor* acl_dst); -void aclnn_geluv2(ggml_backend_cann_context& ctx, aclTensor* acl_src, - aclTensor* acl_dst); - /** * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one * output tensor. @@ -843,13 +840,14 @@ void ggml_cann_unary_op( * @see ggml_cann_unary_op * @see GGML_CANN_CALL_ACLNN_OP */ -#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \ - do { \ - ggml_tensor * src = dst->src[0]; \ - aclTensor * acl_src = ggml_cann_create_tensor(src); \ - aclTensor * acl_dst = ggml_cann_create_tensor(dst); \ - GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \ - ACL_CHECK(aclDestroyTensor(acl_src)); \ - ACL_CHECK(aclDestroyTensor(acl_dst)); \ - } while (0) +#define GGML_CANN_CALL_UNARY_OP(OP_NAME) \ + do { \ + auto lambda = [](ggml_backend_cann_context& ctx, \ + aclTensor* acl_src, \ + aclTensor* acl_dst) { \ + GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); \ + }; \ + ggml_cann_unary_op(lambda, ctx, dst); \ + } \ + while (0) #endif // CANN_ACLNN_OPS diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 5a1263dc79134..4f80bbd062e1e 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -1602,7 +1602,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, GGML_CANN_CALL_UNARY_OP(Silu); break; case GGML_UNARY_OP_GELU_QUICK: { - ggml_cann_unary_op(ctx, dst); + auto lambda = [](ggml_backend_cann_context& ctx, + aclTensor* acl_src, + aclTensor* acl_dst) { + GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst); + }; + ggml_cann_unary_op(lambda, ctx, dst); } break; case GGML_UNARY_OP_TANH: GGML_CANN_CALL_UNARY_OP(Tanh); From c21bc52f585e73b6f5d636c03fd3323d8d762704 Mon Sep 17 00:00:00 2001 From: dou <15529241576@163.com> Date: Mon, 14 Apr 2025 12:07:18 +0000 Subject: [PATCH 3/3] fix some errors --- ggml/src/ggml-cann/ggml-cann.cpp | 43 ++++++++++++++++---------------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index 4f80bbd062e1e..8f8acaf999cb1 100644 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -160,28 +160,27 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { * @brief The maximum reuse margin for a buffer. */ static const size_t max_reuse_margin = 1ull << 22; // 4MB - + /** * @brief The minimum free margin for a buffer. */ static const size_t min_free_margin = 1ull << 20; // 1MB - - + /** * @brief The alignment for buffer allocation. */ - static const size_t alignment = 128; - + static const size_t alignment = 128; + /** * @brief The device ID associated with this buffer pool. */ int device; - + /** * @brief Whether to disable clean during buffer allocation. */ bool disable_clean = false; - + /** * @brief Structure representing a CANN buffer. */ @@ -189,12 +188,12 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { void* ptr = nullptr; ///< Pointer to the buffer. size_t size = 0; ///< Size of the buffer. std::chrono::steady_clock::time_point last_used; ///< Last used time. - + bool operator>(const ggml_cann_buffer& other) const { return size > other.size; } }; - + /** * @brief Array of CANN buffers in the pool. */ @@ -202,12 +201,12 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { std::priority_queue, std::greater<>> free_buffers ; - + /** * @brief Total size of all buffers in the pool. */ size_t pool_size = 0; - + /** * @brief Constructor to initialize the buffer pool for a specific device. * @@ -216,7 +215,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { explicit ggml_cann_pool_buf_prio(int device) : device(device) { disable_clean = getenv("GGML_CANN_DISABLE_BUF_POOL_CLEAN") != nullptr; } - + /** * @brief Destructor to free all buffers in the pool. */ @@ -229,7 +228,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { buffer_pool.clear(); GGML_ASSERT(pool_size == 0); } - + /** * @brief Allocate a buffer of the given size. * @@ -243,16 +242,16 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { if (size == 0) { size = alignment; } - + void* ptr = nullptr; auto now = std::chrono::steady_clock::now(); - + std::vector free_buffers_rest; free_buffers_rest.reserve(free_buffers.size()); while (!free_buffers.empty()) { auto b = free_buffers.top(); free_buffers.pop(); - + if (b.size >= size) { // reuse the buffer if the size is enough const size_t margin = b.size - size; @@ -273,7 +272,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { break; } } - + bool should_clean = !disable_clean && b.size > min_free_margin && std::chrono::duration_cast(now - b.last_used).count() > 100; @@ -298,14 +297,14 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { for (ggml_cann_buffer &b : free_buffers_rest) { free_buffers.push(std::move(b)); } - + #ifdef DEBUG_CANN_MALLOC GGML_LOG_INFO("cann pool[%d] free pool_size = %5u MB\n\n", device, (uint32_t)(GGML_PAD(pool_size, 1048576) / 1048576)); #endif if (ptr != nullptr) { return ptr; } - + // allocate a new buffer if no buffer can be reused ggml_cann_set_device(device); ACL_CHECK(aclrtMalloc(&ptr, size, ACL_MEM_MALLOC_HUGE_FIRST)); @@ -322,7 +321,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { buffer_pool.emplace(ptr, size); return ptr; } - + /** * @brief Free a buffer and return it to the pool. * @@ -334,7 +333,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { if (it == buffer_pool.end()) { GGML_ABORT("cann pool[%d]: buffer %p not found in pool\n", device, ptr); } - + auto now = std::chrono::steady_clock::now(); free_buffers.emplace(ggml_cann_buffer{ptr, it->second, now}); #ifdef DEBUG_CANN_MALLOC @@ -346,7 +345,7 @@ struct ggml_cann_pool_buf_prio : public ggml_cann_pool { #endif } }; - + /** * @brief A pool of CANN buffers(segment buffer). *