CANN: add comment

hipudding · hipudding · commit 0434f7f4019d · 2025-04-15T02:04:50.000Z
diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml/src/ggml-cann/aclnn_ops.h
@@ -651,53 +651,95 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
  */
 void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
 
-using AnyAclResource = std::unique_ptr<void, std::function<void(void*)>>;
+/**
+ * @brief A generic wrapper for ACL resources with custom deleter support.
+ */
+using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
 
+/**
+ * @brief Trait structure used to define how to destroy a given ACL resource type.
+ * 
+ * @tparam T ACL resource type.
+ */
 template<typename T>
-struct AclResourceTraits;
+struct acl_resource_traits;
+
+/**
+ * @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
+ */
 template<>
-struct AclResourceTraits<aclTensor> {
+struct acl_resource_traits<aclTensor> {
     static void destroy(void* p) {
         ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
     }
 };
+
+/**
+ * @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
+ */
 template<>
-struct AclResourceTraits<aclIntArray> {
+struct acl_resource_traits<aclIntArray> {
     static void destroy(void* p) {
         ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
     }
 };
+
+/**
+ * @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
+ */
 template<>
-struct AclResourceTraits<aclScalar> {
+struct acl_resource_traits<aclScalar> {
     static void destroy(void* p) {
         ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
     }
 };
+
+/**
+ * @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
+ */
 template<>
-struct AclResourceTraits<aclTensorList> {
+struct acl_resource_traits<aclTensorList> {
     static void destroy(void* p) {
         ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
     }
 };
 
+/**
+ * @brief Creates a generic ACL resource wrapper with proper destruction logic.
+ * 
+ * @tparam T ACL resource type.
+ * @param ptr Raw pointer to ACL resource.
+ * @return any_acl_resource Smart pointer that handles destruction.
+ */
 template<typename T>
-AnyAclResource make_acl_resource(T* ptr) {
-    return AnyAclResource(
+any_acl_resource make_acl_resource(T* ptr) {
+    return any_acl_resource(
         static_cast<void*>(ptr),
         [](void* p) {
-            AclResourceTraits<T>::destroy(p);
+            acl_resource_traits<T>::destroy(p);
         }
     );
 }
 
+/**
+ * @brief Registers multiple ACL resources into a vector for lifetime management.
+ * 
+ * @tparam Args Variadic list of ACL resource types.
+ * @param vec Target vector to hold ACL resources.
+ * @param args Raw pointers to ACL resources.
+ */
 template<typename... Args>
-void register_acl_resources(std::vector<AnyAclResource>& vec, Args*... args) {
+void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
     (vec.emplace_back(make_acl_resource(args)), ...);
 }
 
+/**
+ * @brief Task class that wraps the execution of an aclnn function call.
+ */
 class aclnn_task : public cann_task {
     public:
-        aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr, uint64_t workspace_size, aclOpExecutor * executor,
+        aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
+                   uint64_t workspace_size, aclOpExecutor * executor,
                    aclrtStream stream) :
             aclnn_func_(aclnn_func),
             workspace_addr_(workspace_addr),
@@ -714,38 +756,33 @@ class aclnn_task : public cann_task {
         aclOpExecutor * executor_;
         aclrtStream     stream_;
 };
-    
-class resource_task : public cann_task {
+
+/**
+ * @brief Task class that releases ACL resources after usage.
+ */
+class release_resource_task : public cann_task {
 public:
-    resource_task(std::vector<AnyAclResource>&& resources){
+    release_resource_task(std::vector<any_acl_resource>&& resources){
         resource_ = std::move(resources);
     }
 
     virtual void run_task() override {
         resource_.clear();
     }
 private:
-    std::vector<AnyAclResource> resource_;
-};
-
-class free_ptr_task : public cann_task {
-public:
-    free_ptr_task(void* ptr) : ptr_(ptr) {}
-
-    virtual void run_task() override {
-        free(ptr_);
-    }
-private:
-    void* ptr_;
+    std::vector<any_acl_resource> resource_;
 };
 
+/**
+ * @brief Task class for performing asynchronous memory copy operations.
+ */
 class async_memcpy_task : public cann_task {
 public:
-    async_memcpy_task(void* dst, const void* src, size_t size, aclrtMemcpyKind kind, aclrtStream stream)
+    async_memcpy_task(void* dst, const void* src, size_t size,
+                      aclrtMemcpyKind kind, aclrtStream stream)
         : dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
 
     virtual void run_task() override {
-        
         ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
     }
 private:
@@ -756,6 +793,9 @@ class async_memcpy_task : public cann_task {
     aclrtStream stream_;
 };
 
+/**
+ * @brief Task class for performing asynchronous memory set operations.
+ */
 class async_memset_task : public cann_task {
     public:
     async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
@@ -810,16 +850,33 @@ class async_memset_task : public cann_task {
         }                                                                                   \
     } while (0)
 
+/**
+ * @brief Registers and releases multiple ACL resources, optionally deferring the release 
+ *        using a task.
+ * 
+ * @tparam Args Types of the ACL resources.
+ * @param ctx Backend context which manages task submission and async mode.
+ * @param args Pointers to ACL resources to be released.
+ */
 template <typename... Args> 
 void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
-    std::vector<AnyAclResource> resources;
+    std::vector<any_acl_resource> resources;
     register_acl_resources(resources, std::forward<Args>(args)...);
     if(ctx.async_mode) {
-        auto task = std::make_unique<resource_task>(std::move(resources));
+        auto task = std::make_unique<release_resource_task>(std::move(resources));
         ctx.task_queue.submit_task(std::move(task));
     }
 }
 
+/**
+ * @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
+ * 
+ * @param ctx Backend context containing stream and async configuration.
+ * @param dst Destination memory address.
+ * @param src Source memory address.
+ * @param len Size of memory to copy (in bytes).
+ * @param kind Type of memory copy (host-to-device, device-to-host, etc).
+ */
 inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
                                    const void * src, size_t len, aclrtMemcpyKind kind) {
     if (ctx.async_mode) {
@@ -840,6 +897,14 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
     }
 }
 
+/**
+ * @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
+ * 
+ * @param ctx Backend context containing stream and async configuration.
+ * @param buffer Memory buffer to be set.
+ * @param size Size of the memory buffer (in bytes).
+ * @param value Value to set in the buffer.
+ */
 inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
                                    size_t size, int value) {
     if (ctx.async_mode) {
diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h
@@ -212,21 +212,45 @@ struct ggml_cann_pool_alloc {
     ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
 };
 
+/**
+ * @brief Function pointer type for ACLNN operator calls.
+ */
 using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStream);
 
+/**
+ * @brief Base class for all CANN tasks to be submitted to the task queue.
+ * 
+ * Users should override the run_task() method with actual task logic.
+ */
 class cann_task {
 public:
     virtual void run_task() {}
 };
 
+/**
+ * @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances.
+ */
 class cann_task_queue {
 public:
+    /**
+     * @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
+     * 
+     * @param capacity Queue capacity. Must be a power of 2.
+     * @param device Target device ID (used for context setting).
+     */
     explicit cann_task_queue(size_t capacity, int32_t device)
-        : buffer_(capacity), capacity_(capacity), head_(0), tail_(0), running_(false), device_(device), consuming_(false) {
+        : buffer_(capacity), capacity_(capacity), head_(0), tail_(0),
+          running_(false), device_(device), consuming_(false) {
         GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
         mask_ = capacity_ - 1;
     }
 
+    /**
+     * @brief Attempts to enqueue a task into the queue.
+     * 
+     * @param item Unique pointer to the task.
+     * @return true if the task was successfully enqueued, false if the queue was full.
+     */
     bool enqueue(std::unique_ptr<cann_task>&& item) {
         size_t tail = tail_.load(std::memory_order_relaxed);
         size_t next_tail = (tail + 1) & mask_;
@@ -243,6 +267,12 @@ class cann_task_queue {
         return true;
     }
 
+    /**
+     * @brief Dequeues all available tasks in bulk into an output vector.
+     * 
+     * @param output Output vector that will contain the dequeued tasks.
+     * @return Number of tasks dequeued.
+     */
     size_t dequeue_bulk(std::vector<std::unique_ptr<cann_task>>& output) {
         output.clear();
         size_t head = head_.load(std::memory_order_relaxed);
@@ -266,6 +296,11 @@ class cann_task_queue {
         return count;
     }
 
+    /**
+     * @brief Submits a task to the queue, and starts the worker thread if not already running.
+     * 
+     * @param task Task to be submitted.
+     */
     void submit_task(std::unique_ptr<cann_task>&& task) {
         while(!enqueue(std::move(task))) continue;
         
@@ -276,18 +311,29 @@ class cann_task_queue {
         
     }
 
+    /**
+     * @brief Checks whether the queue is empty.
+     * 
+     * @return true if the queue is empty, false otherwise.
+     */
     bool empty() const {
         return head_.load(std::memory_order_acquire) ==
                tail_.load(std::memory_order_acquire);
     }
 
+    /**
+     * @brief Waits until the queue is completely empty and no tasks are being processed.
+     */
     void wait() {
         if (!running_)
             return;
 
         while (!(empty() && consuming_)) {}
     }
 
+    /**
+     * @brief Stops the task queue and joins the worker thread.
+     */
     void stop() {
         running_ = false;
         wait();
@@ -298,6 +344,9 @@ class cann_task_queue {
     }
 
 private:
+    /**
+     * @brief Worker thread function that continuously dequeues and executes tasks.
+     */
     void execute() {
         std::vector<std::unique_ptr<cann_task>> tasks;
         ggml_cann_set_device(device_);
diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp
@@ -1502,9 +1502,7 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
 /**
  * @brief Sets tensor data asynchronously in the CANN backend.
  *
- * This function asynchronously sets tensor data in the CANN backend. Depending
- * on the tensor type, it may perform data transformations before copying data
- * to the device.
+ * This function asynchronously sets tensor data in the CANN backend. 
  *
  * @param backend Pointer to the CANN backend structure.
  * @param tensor Pointer to the tensor structure to set data for.
@@ -1530,6 +1528,17 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
         ACL_MEMCPY_HOST_TO_DEVICE);
 }
 
+/**
+ * @brief Gets tensor data asynchronously in the CANN backend.
+ *
+ * This function asynchronously gets tensor data in the CANN backend. 
+ *
+ * @param backend Pointer to the CANN backend structure.
+ * @param tensor Pointer to the tensor structure to get data from.
+ * @param data Pointer to the host data to copy from the tensor.
+ * @param offset Offset in bytes within the host data.
+ * @param size Size of the data to copy in bytes.
+ */
 static void ggml_backend_cann_get_tensor_async(
     ggml_backend_t backend, const ggml_tensor *tensor, void *data,
     size_t offset, size_t size) {