@@ -651,53 +651,95 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
651
651
*/
652
652
void ggml_cann_elu (ggml_backend_cann_context& ctx, ggml_tensor* dst);
653
653
654
- using AnyAclResource = std::unique_ptr<void , std::function<void (void *)>>;
654
+ /* *
655
+ * @brief A generic wrapper for ACL resources with custom deleter support.
656
+ */
657
+ using any_acl_resource = std::unique_ptr<void , std::function<void (void *)>>;
655
658
659
+ /* *
660
+ * @brief Trait structure used to define how to destroy a given ACL resource type.
661
+ *
662
+ * @tparam T ACL resource type.
663
+ */
656
664
template <typename T>
657
- struct AclResourceTraits ;
665
+ struct acl_resource_traits ;
666
+
667
+ /* *
668
+ * @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
669
+ */
658
670
template <>
659
- struct AclResourceTraits <aclTensor> {
671
+ struct acl_resource_traits <aclTensor> {
660
672
static void destroy (void * p) {
661
673
ACL_CHECK (aclDestroyTensor (static_cast <aclTensor*>(p)));
662
674
}
663
675
};
676
+
677
+ /* *
678
+ * @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
679
+ */
664
680
template <>
665
- struct AclResourceTraits <aclIntArray> {
681
+ struct acl_resource_traits <aclIntArray> {
666
682
static void destroy (void * p) {
667
683
ACL_CHECK (aclDestroyIntArray (static_cast <aclIntArray*>(p)));
668
684
}
669
685
};
686
+
687
+ /* *
688
+ * @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
689
+ */
670
690
template <>
671
- struct AclResourceTraits <aclScalar> {
691
+ struct acl_resource_traits <aclScalar> {
672
692
static void destroy (void * p) {
673
693
ACL_CHECK (aclDestroyScalar (static_cast <aclScalar*>(p)));
674
694
}
675
695
};
696
+
697
+ /* *
698
+ * @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
699
+ */
676
700
template <>
677
- struct AclResourceTraits <aclTensorList> {
701
+ struct acl_resource_traits <aclTensorList> {
678
702
static void destroy (void * p) {
679
703
ACL_CHECK (aclDestroyTensorList (static_cast <aclTensorList*>(p)));
680
704
}
681
705
};
682
706
707
+ /* *
708
+ * @brief Creates a generic ACL resource wrapper with proper destruction logic.
709
+ *
710
+ * @tparam T ACL resource type.
711
+ * @param ptr Raw pointer to ACL resource.
712
+ * @return any_acl_resource Smart pointer that handles destruction.
713
+ */
683
714
template <typename T>
684
- AnyAclResource make_acl_resource (T* ptr) {
685
- return AnyAclResource (
715
+ any_acl_resource make_acl_resource (T* ptr) {
716
+ return any_acl_resource (
686
717
static_cast <void *>(ptr),
687
718
[](void * p) {
688
- AclResourceTraits <T>::destroy (p);
719
+ acl_resource_traits <T>::destroy (p);
689
720
}
690
721
);
691
722
}
692
723
724
+ /* *
725
+ * @brief Registers multiple ACL resources into a vector for lifetime management.
726
+ *
727
+ * @tparam Args Variadic list of ACL resource types.
728
+ * @param vec Target vector to hold ACL resources.
729
+ * @param args Raw pointers to ACL resources.
730
+ */
693
731
template <typename ... Args>
694
- void register_acl_resources (std::vector<AnyAclResource >& vec, Args*... args) {
732
+ void register_acl_resources (std::vector<any_acl_resource >& vec, Args*... args) {
695
733
(vec.emplace_back (make_acl_resource (args)), ...);
696
734
}
697
735
736
+ /* *
737
+ * @brief Task class that wraps the execution of an aclnn function call.
738
+ */
698
739
class aclnn_task : public cann_task {
699
740
public:
700
- aclnn_task (aclnn_func_t aclnn_func, void * workspace_addr, uint64_t workspace_size, aclOpExecutor * executor,
741
+ aclnn_task (aclnn_func_t aclnn_func, void * workspace_addr,
742
+ uint64_t workspace_size, aclOpExecutor * executor,
701
743
aclrtStream stream) :
702
744
aclnn_func_ (aclnn_func),
703
745
workspace_addr_ (workspace_addr),
@@ -714,38 +756,33 @@ class aclnn_task : public cann_task {
714
756
aclOpExecutor * executor_;
715
757
aclrtStream stream_;
716
758
};
717
-
718
- class resource_task : public cann_task {
759
+
760
+ /* *
761
+ * @brief Task class that releases ACL resources after usage.
762
+ */
763
+ class release_resource_task : public cann_task {
719
764
public:
720
- resource_task (std::vector<AnyAclResource >&& resources){
765
+ release_resource_task (std::vector<any_acl_resource >&& resources){
721
766
resource_ = std::move (resources);
722
767
}
723
768
724
769
virtual void run_task () override {
725
770
resource_.clear ();
726
771
}
727
772
private:
728
- std::vector<AnyAclResource> resource_;
729
- };
730
-
731
- class free_ptr_task : public cann_task {
732
- public:
733
- free_ptr_task (void * ptr) : ptr_(ptr) {}
734
-
735
- virtual void run_task () override {
736
- free (ptr_);
737
- }
738
- private:
739
- void * ptr_;
773
+ std::vector<any_acl_resource> resource_;
740
774
};
741
775
776
+ /* *
777
+ * @brief Task class for performing asynchronous memory copy operations.
778
+ */
742
779
class async_memcpy_task : public cann_task {
743
780
public:
744
- async_memcpy_task (void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream)
781
+ async_memcpy_task (void * dst, const void * src, size_t size,
782
+ aclrtMemcpyKind kind, aclrtStream stream)
745
783
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
746
784
747
785
virtual void run_task () override {
748
-
749
786
ACL_CHECK (aclrtMemcpyAsync (dst_, size_, src_, size_, kind_, stream_));
750
787
}
751
788
private:
@@ -756,6 +793,9 @@ class async_memcpy_task : public cann_task {
756
793
aclrtStream stream_;
757
794
};
758
795
796
+ /* *
797
+ * @brief Task class for performing asynchronous memory set operations.
798
+ */
759
799
class async_memset_task : public cann_task {
760
800
public:
761
801
async_memset_task (void * buffer, size_t size, int32_t value, aclrtStream stream)
@@ -810,16 +850,33 @@ class async_memset_task : public cann_task {
810
850
} \
811
851
} while (0 )
812
852
853
+ /* *
854
+ * @brief Registers and releases multiple ACL resources, optionally deferring the release
855
+ * using a task.
856
+ *
857
+ * @tparam Args Types of the ACL resources.
858
+ * @param ctx Backend context which manages task submission and async mode.
859
+ * @param args Pointers to ACL resources to be released.
860
+ */
813
861
template <typename ... Args>
814
862
void ggml_cann_release_resources (ggml_backend_cann_context & ctx, Args &&... args) {
815
- std::vector<AnyAclResource > resources;
863
+ std::vector<any_acl_resource > resources;
816
864
register_acl_resources (resources, std::forward<Args>(args)...);
817
865
if (ctx.async_mode ) {
818
- auto task = std::make_unique<resource_task >(std::move (resources));
866
+ auto task = std::make_unique<release_resource_task >(std::move (resources));
819
867
ctx.task_queue .submit_task (std::move (task));
820
868
}
821
869
}
822
870
871
+ /* *
872
+ * @brief Performs an asynchronous memory copy operation, optionally deferred via task submission.
873
+ *
874
+ * @param ctx Backend context containing stream and async configuration.
875
+ * @param dst Destination memory address.
876
+ * @param src Source memory address.
877
+ * @param len Size of memory to copy (in bytes).
878
+ * @param kind Type of memory copy (host-to-device, device-to-host, etc).
879
+ */
823
880
inline void ggml_cann_async_memcpy (ggml_backend_cann_context & ctx, void * dst,
824
881
const void * src, size_t len, aclrtMemcpyKind kind) {
825
882
if (ctx.async_mode ) {
@@ -840,6 +897,14 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
840
897
}
841
898
}
842
899
900
+ /* *
901
+ * @brief Performs an asynchronous memory set operation, optionally deferred via task submission.
902
+ *
903
+ * @param ctx Backend context containing stream and async configuration.
904
+ * @param buffer Memory buffer to be set.
905
+ * @param size Size of the memory buffer (in bytes).
906
+ * @param value Value to set in the buffer.
907
+ */
843
908
inline void ggml_cann_async_memset (ggml_backend_cann_context & ctx, void * buffer,
844
909
size_t size, int value) {
845
910
if (ctx.async_mode ) {
0 commit comments