[Tensor] Scale factors support for unsigned int type

This pull request modifies the Tensor class of unsigned int type to include scale factors in memory. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <[email protected]>
nnstreamer · Feb 11, 2025 · c1acf13 · c1acf13
1 parent bc06bd8
commit c1acf13
Show file tree

Hide file tree

Showing 5 changed files with 165 additions and 43 deletions.
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
@@ -76,23 +76,29 @@ Tensor::Tensor(
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
-  ml::train::TensorDim::TensorType t_type) {
-  itensor = std::shared_ptr<UInt8Tensor>(new UInt8Tensor(d, t_type.format),
-                                         std::default_delete<UInt8Tensor>());
+  std::vector<float> const &scales, unsigned int zero_point,
+  ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
+  itensor = std::shared_ptr<UInt8Tensor>(
+    new UInt8Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    std::default_delete<UInt8Tensor>());
 }
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
-  ml::train::TensorDim::TensorType t_type) {
-  itensor = std::shared_ptr<UInt16Tensor>(new UInt16Tensor(d, t_type.format),
-                                          std::default_delete<UInt16Tensor>());
+  std::vector<float> const &scales, unsigned int zero_point,
+  ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
+  itensor = std::shared_ptr<UInt16Tensor>(
+    new UInt16Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    std::default_delete<UInt16Tensor>());
 }
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
-  ml::train::TensorDim::TensorType t_type) {
-  itensor = std::shared_ptr<UInt32Tensor>(new UInt32Tensor(d, t_type.format),
-                                          std::default_delete<UInt32Tensor>());
+  std::vector<float> const &scales, unsigned int zero_point,
+  ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
+  itensor = std::shared_ptr<UInt32Tensor>(
+    new UInt32Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    std::default_delete<UInt32Tensor>());
 }
 
 Tensor::Tensor(std::string name_, Tformat fm, Tdatatype d_type) {

diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
@@ -253,7 +253,8 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
-         ml::train::TensorDim::TensorType t_type);
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
    * @brief     Constructor of Tensor
@@ -262,8 +263,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint8_t>>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -272,16 +275,19 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint8_t>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
    * @param[in] d data for the Tensor. It needs to set format properly.
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
-         ml::train::TensorDim::TensorType t_type);
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
    * @brief     Constructor of Tensor
@@ -290,8 +296,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint16_t>>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -300,16 +308,19 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint16_t>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
    * @param[in] d data for the Tensor. It needs to set format properly.
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
-         ml::train::TensorDim::TensorType t_type);
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
    * @brief     Constructor of Tensor
@@ -318,8 +329,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint32_t>>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of Tensor
@@ -328,8 +341,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint32_t>> const &d,
-         ml::train::TensorDim::TensorType t_type) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, t_type){};
+         std::vector<float> const &scales, unsigned int zero_point,
+         ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+           t_type, qscheme_){};
 
   /**
    * @brief     Constructor of CharTensor (QINT8)

diff --git a/nntrainer/tensor/uint_tensor.cpp b/nntrainer/tensor/uint_tensor.cpp
@@ -15,20 +15,21 @@
 #ifdef __UINT_TENSOR_H__
 
 template <typename T>
-UIntTensor<T>::UIntTensor(std::string name_, Tformat fm) :
-  TensorBase(name_, fm, checkTensorDataType()) {}
+UIntTensor<T>::UIntTensor(std::string name_, Tformat fm, QScheme qscheme_) :
+  TensorBase(name_, fm, checkTensorDataType()), qscheme(qscheme_) {}
 
 template <typename T>
 UIntTensor<T>::UIntTensor(const TensorDim &d, bool alloc_now, Initializer init,
-                          std::string name) :
-  TensorBase(d, alloc_now, init, name) {
+                          std::string name, QScheme qscheme_) :
+  TensorBase(d, alloc_now, init, name), qscheme(qscheme_) {
   if (alloc_now)
     allocate();
 }
 
 template <typename T>
-UIntTensor<T>::UIntTensor(const TensorDim &d, const void *buf) :
-  UIntTensor(d, true) {
+UIntTensor<T>::UIntTensor(const TensorDim &d, const void *buf,
+                          QScheme qscheme_) :
+  UIntTensor(d, true, Initializer::NONE, "", qscheme_) {
   if (d.getDataLen() != 0) {
     if (buf != nullptr)
       copy(buf);
@@ -37,7 +38,9 @@ UIntTensor<T>::UIntTensor(const TensorDim &d, const void *buf) :
 
 template <typename T>
 UIntTensor<T>::UIntTensor(
-  std::vector<std::vector<std::vector<std::vector<T>>>> const &d, Tformat fm) {
+  std::vector<std::vector<std::vector<std::vector<T>>>> const &d,
+  std::vector<float> const &scales, unsigned int zero_point, Tformat fm,
+  QScheme qscheme_) {
   if (d.empty() || d[0].empty() || d[0][0].empty() || d[0][0][0].empty()) {
     throw std::out_of_range(
       "[Tensor] trying to initialize UIntTensor from empty vector");
@@ -60,7 +63,9 @@ UIntTensor<T>::UIntTensor(
   contiguous = true;
   initializer = Initializer::NONE;
 
-  MemoryData *mem_data = new MemoryData((void *)(new T[dim.getDataLen()]()));
+  MemoryData *mem_data =
+    new MemoryData((void *)(new T[dim.getDataLen() +
+                                  sizeof(float) / sizeof(T) * scale_size()]()));
   data = std::shared_ptr<MemoryData>(mem_data, [](MemoryData *mem_data) {
     delete[] mem_data->getAddr<T>();
     delete mem_data;
@@ -110,7 +115,9 @@ template <typename T> void UIntTensor<T>::allocate() {
     /// allocate new memory for the tensor data
     MemoryData *mem_data;
 
-    mem_data = new MemoryData((void *)(new T[dim.getDataLen()]{}));
+    mem_data = new MemoryData(
+      (void *)(new T[dim.getDataLen() +
+                     sizeof(float) / sizeof(T) * scale_size()]{}));
     data = std::shared_ptr<MemoryData>(mem_data, [](auto *mem_data) {
       delete[] mem_data->template getAddr<T>();
       delete mem_data;
@@ -141,6 +148,25 @@ template <typename T> void *UIntTensor<T>::getData(size_t idx) const {
   return data->getAddr<T>() + offset + idx;
 }
 
+template <typename T> void *UIntTensor<T>::getScale() const {
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return ((T *)getData()) + size();
+}
+
+template <typename T> void *UIntTensor<T>::getScale(size_t idx) const {
+  NNTR_THROW_IF(idx > scale_size(), std::invalid_argument)
+    << "Tensor::getScale() index is not valid";
+
+  if (!data)
+    return nullptr;
+
+  data->validate();
+  return (float *)((T *)getData() + size()) + idx;
+}
+
 template <typename T> void *UIntTensor<T>::getAddress(unsigned int i) {
   size_t index = getIndex(batch(), channel(), height(), width());
   if (i > index) {
@@ -344,6 +370,34 @@ template <typename T> void UIntTensor<T>::print(std::ostream &out) const {
   }
 }
 
+template <typename T>
+void UIntTensor<T>::save_quantization_info(std::ostream &file) {
+  checkedWrite(file, (char *)&qscheme, sizeof(uint8_t),
+               "[CharTensor::save] failed to write quantization information");
+}
+
+template <typename T>
+void UIntTensor<T>::read_quantization_info(std::ifstream &file) {
+  checkedRead(file, (char *)&qscheme, sizeof(uint8_t),
+              "[CharTensor::read] failed to read quantization information");
+}
+
+template <typename T> size_t UIntTensor<T>::scale_size() const {
+  switch (qscheme) {
+  case QScheme::PER_TENSOR_AFFINE:
+    return 1;
+  case QScheme::PER_CHANNEL_AFFINE:
+    return width();
+  default:
+    break;
+  }
+  return 0;
+}
+
+template <typename T> QScheme UIntTensor<T>::q_scheme() const {
+  return qscheme;
+}
+
 template <typename T> void UIntTensor<T>::copy(const void *buf) {
   NNTR_THROW_IF(!contiguous, std::invalid_argument)
     << getName() << " is not contiguous, cannot copy.";

diff --git a/nntrainer/tensor/uint_tensor.h b/nntrainer/tensor/uint_tensor.h
@@ -31,7 +31,8 @@ template <typename T> class UIntTensor : public TensorBase {
   /**
    * @brief     Basic Constructor of Tensor
    */
-  UIntTensor(std::string name_ = "", Tformat fm = Tformat::NCHW);
+  UIntTensor(std::string name_ = "", Tformat fm = Tformat::NCHW,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new UIntTensor object
@@ -42,15 +43,17 @@ template <typename T> class UIntTensor : public TensorBase {
    * @param name Name of the tensor
    */
   UIntTensor(const TensorDim &d, bool alloc_now,
-             Initializer init = Initializer::NONE, std::string name = "");
+             Initializer init = Initializer::NONE, std::string name = "",
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new UIntTensor object
    *
    * @param d Tensor dim for this tensor
    * @param buf buffer
    */
-  UIntTensor(const TensorDim &d, const void *buf = nullptr);
+  UIntTensor(const TensorDim &d, const void *buf = nullptr,
+             QScheme qscheme_ = QScheme::PER_TENSOR_AFFINE);
 
   /**
    * @brief Construct a new UIntTensor object
@@ -59,7 +62,8 @@ template <typename T> class UIntTensor : public TensorBase {
    * @param fm format for the Tensor
    */
   UIntTensor(std::vector<std::vector<std::vector<std::vector<T>>>> const &d,
-             Tformat fm);
+             std::vector<float> const &scales, unsigned int zero_point,
+             Tformat fm, QScheme qscheme_);
 
   /**
    * @brief Construct a new UIntTensor object
@@ -106,6 +110,16 @@ template <typename T> class UIntTensor : public TensorBase {
    */
   void *getData(size_t idx) const override;
 
+  /**
+   * @copydoc Tensor::getScale()
+   */
+  void *getScale() const override;
+
+  /**
+   * @copydoc Tensor::getScale(size_t idx)
+   */
+  void *getScale(size_t idx) const override;
+
   /**
    * @brief     i data index
    * @retval    address of ith data
@@ -221,7 +235,32 @@ template <typename T> class UIntTensor : public TensorBase {
    */
   void print(std::ostream &out) const override;
 
+  /**
+   * @copydoc TensorBase::save_quantization_info()
+   */
+  void save_quantization_info(std::ostream &file) override;
+
+  /**
+   * @copydoc TensorBase::read_quantization_info()
+   */
+  void read_quantization_info(std::ifstream &file) override;
+
+  /**
+   * @copydoc Tensor::scale_size()
+   */
+  size_t scale_size() const override;
+
+  /**
+   * @copydoc Tensor::q_scheme()
+   */
+  QScheme q_scheme() const;
+
 private:
+  /**
+   * @brief quantization scheme
+   */
+  QScheme qscheme;
+
   /**
    * @brief copy a buffer to @a this, the caller has to ensure that @a this is
    * initialized otherwise undefined behavior