Skip to content

Commit

Permalink
[ GPU/OpenCL ] change transpose_cl to inherit LayerImplCl
Browse files Browse the repository at this point in the history
- This commit updates transpose_cl.cpp/h to inherit LayerImplCl.
- This commit implements registerClKernels() of transpose_cl layer.
- This commit update cl_context.cpp (applying transpose_cl's update)
- This is the last commit to complete #2723.
- This can close #2723.

Self evaluation:

Build test: [X]Passed [ ]Failed [ ]Skipped
Run test: [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Eunju Yang <[email protected]>
  • Loading branch information
EunjuYang authored and jijoongmoon committed Jan 22, 2025
1 parent 78889bc commit 410d6fc
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 18 deletions.
9 changes: 5 additions & 4 deletions nntrainer/cl_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,11 @@ static void add_default_object(ClContext &cc) {
ConcatLayerCl::type, ml::train::LayerType::LAYER_CONCAT);
}

// @todo transposlayercl also needs to be updated.
cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
TransposeLayerCl::type,
ml::train::LayerType::LAYER_TRANSPOSE);
if (TransposeLayerCl::registerClKernels()) {
cc.registerFactory(nntrainer::createLayer<TransposeLayerCl>,
TransposeLayerCl::type,
ml::train::LayerType::LAYER_TRANSPOSE);
}
}

static void registerer(ClContext &cc) noexcept {
Expand Down
9 changes: 4 additions & 5 deletions nntrainer/layers/cl_layers/addition_layer_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,11 +95,10 @@ class AdditionLayerCL : public LayerImplCl {
const std::string getType() const override { return AdditionLayerCL::type; };

/**
* @brief registerClKernels for addition_layer_cl
* @details registerClKernels for addition_layer_cl always returns true
* without any specific action for kernel registeration. It only uses
* cl_blas_kernels and there is no specific kernels for this. If there are
* specific kernels for this, it should be updated to register the kernels .
* @brief Register Cl Kernels for `AdditionLayerCl`, bypassing the
* registration process since it does not require any specific kernels. This
* function simply returns `true` because `AdditionLayerCl` does not rely on
* the specific kernels for the layer.
*/
static bool registerClKernels() { return true; };

Expand Down
21 changes: 12 additions & 9 deletions nntrainer/layers/cl_layers/transpose_cl.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <common_properties.h>
#include <layer_devel.h>
#include <layer_impl_cl.h>
#include <opencl_buffer.h>
#include <opencl_kernel.h>

Expand All @@ -25,13 +26,13 @@ namespace nntrainer {
* @brief A tranpose layer.
*
*/
class TransposeLayerCl final : public Layer {
class TransposeLayerCl final : public LayerImplCl {
public:
/**
* @brief Construct a new transpose layer object
*
*/
TransposeLayerCl() : Layer(), transpose_props(props::Print()) {}
TransposeLayerCl() : LayerImplCl(), transpose_props(props::Print()) {}

/**
* @brief Destroy the transpose layer object
Expand Down Expand Up @@ -82,15 +83,17 @@ class TransposeLayerCl final : public Layer {
*/
void setProperty(const std::vector<std::string> &values) override;

inline static const std::string type = "transpose";
/**
* @brief Register Cl Kernels for `TransposeLayerCl`, bypassing the
* registration process since it does not require any specific kernels. This
* function simply returns `true` because `TransposeLayerCl` does not rely on
* the specific kernels for the layer.
*/
static bool registerClKernels() { return true; };

static opencl::Kernel kernel_transpose_axis0;
static opencl::Kernel kernel_transpose_fp16_axis0;
static opencl::Kernel kernel_transpose_axis1;
static opencl::Kernel kernel_transpose_fp16_axis1;
static opencl::Kernel kernel_transpose_axis2;
static opencl::Kernel kernel_transpose_fp16_axis2;
inline static const std::string type = "transpose";

private:
std::tuple<props::Print> transpose_props; /**< transpose layer properties :
unit - number of output neurons */
};
Expand Down

0 comments on commit 410d6fc

Please sign in to comment.