Skip to content

Commit 0434f7f

Browse files
committed
CANN: add comment
1 parent 92821f6 commit 0434f7f

File tree

3 files changed

+157
-34
lines changed

3 files changed

+157
-34
lines changed

Diff for: ggml/src/ggml-cann/aclnn_ops.h

+95-30
Original file line numberDiff line numberDiff line change
@@ -651,53 +651,95 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
651651
*/
652652
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
653653

654-
using AnyAclResource = std::unique_ptr<void, std::function<void(void*)>>;
654+
/**
655+
* @brief A generic wrapper for ACL resources with custom deleter support.
656+
*/
657+
using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
655658

659+
/**
660+
* @brief Trait structure used to define how to destroy a given ACL resource type.
661+
*
662+
* @tparam T ACL resource type.
663+
*/
656664
template<typename T>
657-
struct AclResourceTraits;
665+
struct acl_resource_traits;
666+
667+
/**
668+
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
669+
*/
658670
template<>
659-
struct AclResourceTraits<aclTensor> {
671+
struct acl_resource_traits<aclTensor> {
660672
static void destroy(void* p) {
661673
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
662674
}
663675
};
676+
677+
/**
678+
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
679+
*/
664680
template<>
665-
struct AclResourceTraits<aclIntArray> {
681+
struct acl_resource_traits<aclIntArray> {
666682
static void destroy(void* p) {
667683
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
668684
}
669685
};
686+
687+
/**
688+
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
689+
*/
670690
template<>
671-
struct AclResourceTraits<aclScalar> {
691+
struct acl_resource_traits<aclScalar> {
672692
static void destroy(void* p) {
673693
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
674694
}
675695
};
696+
697+
/**
698+
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
699+
*/
676700
template<>
677-
struct AclResourceTraits<aclTensorList> {
701+
struct acl_resource_traits<aclTensorList> {
678702
static void destroy(void* p) {
679703
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
680704
}
681705
};
682706

707+
/**
708+
* @brief Creates a generic ACL resource wrapper with proper destruction logic.
709+
*
710+
* @tparam T ACL resource type.
711+
* @param ptr Raw pointer to ACL resource.
712+
* @return any_acl_resource Smart pointer that handles destruction.
713+
*/
683714
template<typename T>
684-
AnyAclResource make_acl_resource(T* ptr) {
685-
return AnyAclResource(
715+
any_acl_resource make_acl_resource(T* ptr) {
716+
return any_acl_resource(
686717
static_cast<void*>(ptr),
687718
[](void* p) {
688-
AclResourceTraits<T>::destroy(p);
719+
acl_resource_traits<T>::destroy(p);
689720
}
690721
);
691722
}
692723

724+
/**
725+
* @brief Registers multiple ACL resources into a vector for lifetime management.
726+
*
727+
* @tparam Args Variadic list of ACL resource types.
728+
* @param vec Target vector to hold ACL resources.
729+
* @param args Raw pointers to ACL resources.
730+
*/
693731
template<typename... Args>
694-
void register_acl_resources(std::vector<AnyAclResource>& vec, Args*... args) {
732+
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
695733
(vec.emplace_back(make_acl_resource(args)), ...);
696734
}
697735

736+
/**
737+
* @brief Task class that wraps the execution of an aclnn function call.
738+
*/
698739
class aclnn_task : public cann_task {
699740
public:
700-
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr, uint64_t workspace_size, aclOpExecutor * executor,
741+
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
742+
uint64_t workspace_size, aclOpExecutor * executor,
701743
aclrtStream stream) :
702744
aclnn_func_(aclnn_func),
703745
workspace_addr_(workspace_addr),
@@ -714,38 +756,33 @@ class aclnn_task : public cann_task {
714756
aclOpExecutor * executor_;
715757
aclrtStream stream_;
716758
};
717-
718-
class resource_task : public cann_task {
759+
760+
/**
761+
* @brief Task class that releases ACL resources after usage.
762+
*/
763+
class release_resource_task : public cann_task {
719764
public:
720-
resource_task(std::vector<AnyAclResource>&& resources){
765+
release_resource_task(std::vector<any_acl_resource>&& resources){
721766
resource_ = std::move(resources);
722767
}
723768

724769
virtual void run_task() override {
725770
resource_.clear();
726771
}
727772
private:
728-
std::vector<AnyAclResource> resource_;
729-
};
730-
731-
class free_ptr_task : public cann_task {
732-
public:
733-
free_ptr_task(void* ptr) : ptr_(ptr) {}
734-
735-
virtual void run_task() override {
736-
free(ptr_);
737-
}
738-
private:
739-
void* ptr_;
773+
std::vector<any_acl_resource> resource_;
740774
};
741775

776+
/**
777+
* @brief Task class for performing asynchronous memory copy operations.
778+
*/
742779
class async_memcpy_task : public cann_task {
743780
public:
744-
async_memcpy_task(void* dst, const void* src, size_t size, aclrtMemcpyKind kind, aclrtStream stream)
781+
async_memcpy_task(void* dst, const void* src, size_t size,
782+
aclrtMemcpyKind kind, aclrtStream stream)
745783
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
746784

747785
virtual void run_task() override {
748-
749786
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
750787
}
751788
private:
@@ -756,6 +793,9 @@ class async_memcpy_task : public cann_task {
756793
aclrtStream stream_;
757794
};
758795

796+
/**
797+
* @brief Task class for performing asynchronous memory set operations.
798+
*/
759799
class async_memset_task : public cann_task {
760800
public:
761801
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
@@ -810,16 +850,33 @@ class async_memset_task : public cann_task {
810850
} \
811851
} while (0)
812852

853+
/**
854+
* @brief Registers and releases multiple ACL resources, optionally deferring the release
855+
* using a task.
856+
*
857+
* @tparam Args Types of the ACL resources.
858+
* @param ctx Backend context which manages task submission and async mode.
859+
* @param args Pointers to ACL resources to be released.
860+
*/
813861
template <typename... Args>
814862
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
815-
std::vector<AnyAclResource> resources;
863+
std::vector<any_acl_resource> resources;
816864
register_acl_resources(resources, std::forward<Args>(args)...);
817865
if(ctx.async_mode) {
818-
auto task = std::make_unique<resource_task>(std::move(resources));
866+
auto task = std::make_unique<release_resource_task>(std::move(resources));
819867
ctx.task_queue.submit_task(std::move(task));
820868
}
821869
}
822870

871+
/**
872+
* @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
873+
*
874+
* @param ctx Backend context containing stream and async configuration.
875+
* @param dst Destination memory address.
876+
* @param src Source memory address.
877+
* @param len Size of memory to copy (in bytes).
878+
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
879+
*/
823880
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
824881
const void * src, size_t len, aclrtMemcpyKind kind) {
825882
if (ctx.async_mode) {
@@ -840,6 +897,14 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
840897
}
841898
}
842899

900+
/**
901+
* @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
902+
*
903+
* @param ctx Backend context containing stream and async configuration.
904+
* @param buffer Memory buffer to be set.
905+
* @param size Size of the memory buffer (in bytes).
906+
* @param value Value to set in the buffer.
907+
*/
843908
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
844909
size_t size, int value) {
845910
if (ctx.async_mode) {

Diff for: ggml/src/ggml-cann/common.h

+50-1
Original file line numberDiff line numberDiff line change
@@ -212,21 +212,45 @@ struct ggml_cann_pool_alloc {
212212
ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete;
213213
};
214214

215+
/**
216+
* @brief Function pointer type for ACLNN operator calls.
217+
*/
215218
using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStream);
216219

220+
/**
221+
* @brief Base class for all CANN tasks to be submitted to the task queue.
222+
*
223+
* Users should override the run_task() method with actual task logic.
224+
*/
217225
class cann_task {
218226
public:
219227
virtual void run_task() {}
220228
};
221229

230+
/**
231+
* @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances.
232+
*/
222233
class cann_task_queue {
223234
public:
235+
/**
236+
* @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
237+
*
238+
* @param capacity Queue capacity. Must be a power of 2.
239+
* @param device Target device ID (used for context setting).
240+
*/
224241
explicit cann_task_queue(size_t capacity, int32_t device)
225-
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0), running_(false), device_(device), consuming_(false) {
242+
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0),
243+
running_(false), device_(device), consuming_(false) {
226244
GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
227245
mask_ = capacity_ - 1;
228246
}
229247

248+
/**
249+
* @brief Attempts to enqueue a task into the queue.
250+
*
251+
* @param item Unique pointer to the task.
252+
* @return true if the task was successfully enqueued, false if the queue was full.
253+
*/
230254
bool enqueue(std::unique_ptr<cann_task>&& item) {
231255
size_t tail = tail_.load(std::memory_order_relaxed);
232256
size_t next_tail = (tail + 1) & mask_;
@@ -243,6 +267,12 @@ class cann_task_queue {
243267
return true;
244268
}
245269

270+
/**
271+
* @brief Dequeues all available tasks in bulk into an output vector.
272+
*
273+
* @param output Output vector that will contain the dequeued tasks.
274+
* @return Number of tasks dequeued.
275+
*/
246276
size_t dequeue_bulk(std::vector<std::unique_ptr<cann_task>>& output) {
247277
output.clear();
248278
size_t head = head_.load(std::memory_order_relaxed);
@@ -266,6 +296,11 @@ class cann_task_queue {
266296
return count;
267297
}
268298

299+
/**
300+
* @brief Submits a task to the queue, and starts the worker thread if not already running.
301+
*
302+
* @param task Task to be submitted.
303+
*/
269304
void submit_task(std::unique_ptr<cann_task>&& task) {
270305
while(!enqueue(std::move(task))) continue;
271306

@@ -276,18 +311,29 @@ class cann_task_queue {
276311

277312
}
278313

314+
/**
315+
* @brief Checks whether the queue is empty.
316+
*
317+
* @return true if the queue is empty, false otherwise.
318+
*/
279319
bool empty() const {
280320
return head_.load(std::memory_order_acquire) ==
281321
tail_.load(std::memory_order_acquire);
282322
}
283323

324+
/**
325+
* @brief Waits until the queue is completely empty and no tasks are being processed.
326+
*/
284327
void wait() {
285328
if (!running_)
286329
return;
287330

288331
while (!(empty() && consuming_)) {}
289332
}
290333

334+
/**
335+
* @brief Stops the task queue and joins the worker thread.
336+
*/
291337
void stop() {
292338
running_ = false;
293339
wait();
@@ -298,6 +344,9 @@ class cann_task_queue {
298344
}
299345

300346
private:
347+
/**
348+
* @brief Worker thread function that continuously dequeues and executes tasks.
349+
*/
301350
void execute() {
302351
std::vector<std::unique_ptr<cann_task>> tasks;
303352
ggml_cann_set_device(device_);

Diff for: ggml/src/ggml-cann/ggml-cann.cpp

+12-3
Original file line numberDiff line numberDiff line change
@@ -1502,9 +1502,7 @@ static void ggml_backend_cann_free(ggml_backend_t backend) {
15021502
/**
15031503
* @brief Sets tensor data asynchronously in the CANN backend.
15041504
*
1505-
* This function asynchronously sets tensor data in the CANN backend. Depending
1506-
* on the tensor type, it may perform data transformations before copying data
1507-
* to the device.
1505+
* This function asynchronously sets tensor data in the CANN backend.
15081506
*
15091507
* @param backend Pointer to the CANN backend structure.
15101508
* @param tensor Pointer to the tensor structure to set data for.
@@ -1530,6 +1528,17 @@ static void ggml_backend_cann_set_tensor_async(ggml_backend_t backend,
15301528
ACL_MEMCPY_HOST_TO_DEVICE);
15311529
}
15321530

1531+
/**
1532+
* @brief Gets tensor data asynchronously in the CANN backend.
1533+
*
1534+
* This function asynchronously gets tensor data in the CANN backend.
1535+
*
1536+
* @param backend Pointer to the CANN backend structure.
1537+
* @param tensor Pointer to the tensor structure to get data from.
1538+
* @param data Pointer to the host data to copy from the tensor.
1539+
* @param offset Offset in bytes within the host data.
1540+
* @param size Size of the data to copy in bytes.
1541+
*/
15331542
static void ggml_backend_cann_get_tensor_async(
15341543
ggml_backend_t backend, const ggml_tensor *tensor, void *data,
15351544
size_t offset, size_t size) {

0 commit comments

Comments
 (0)