[Tensor] Zero point support for unsigned int type

This pull request updates the unsigned int type Tensor class to include zero points in its memory management. It's important to note that the size of the scale factors matches the size of the zero points. At present, the zero point uses a scale factor size, which will be modified soon. **Self-evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghyeon Jeong <[email protected]>
nnstreamer · Feb 20, 2025 · c1e5c60 · c1e5c60
1 parent 4f86abc
commit c1e5c60
Show file tree

Hide file tree

Showing 7 changed files with 482 additions and 162 deletions.
diff --git a/nntrainer/tensor/tensor.cpp b/nntrainer/tensor/tensor.cpp
@@ -35,10 +35,8 @@ Tensor::Tensor(
   QScheme qscheme_) {
   switch (qscheme_) {
   case QScheme::PER_TENSOR_AFFINE:
-    std::cout << "per tensor\n";
     break;
   case QScheme::PER_CHANNEL_AFFINE:
-    std::cout << "per channel\n";
     break;
   default:
     break;
@@ -76,28 +74,31 @@ Tensor::Tensor(
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
-  std::vector<float> const &scales, unsigned int zero_point,
+  std::vector<float> const &scales,
+  std::vector<unsigned int> const &zero_points,
   ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
   itensor = std::shared_ptr<UInt8Tensor>(
-    new UInt8Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    new UInt8Tensor(d, scales, zero_points, t_type.format, qscheme_),
     std::default_delete<UInt8Tensor>());
 }
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
-  std::vector<float> const &scales, unsigned int zero_point,
+  std::vector<float> const &scales,
+  std::vector<unsigned int> const &zero_points,
   ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
   itensor = std::shared_ptr<UInt16Tensor>(
-    new UInt16Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    new UInt16Tensor(d, scales, zero_points, t_type.format, qscheme_),
     std::default_delete<UInt16Tensor>());
 }
 
 Tensor::Tensor(
   std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
-  std::vector<float> const &scales, unsigned int zero_point,
+  std::vector<float> const &scales,
+  std::vector<unsigned int> const &zero_points,
   ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
   itensor = std::shared_ptr<UInt32Tensor>(
-    new UInt32Tensor(d, scales, zero_point, t_type.format, qscheme_),
+    new UInt32Tensor(d, scales, zero_points, t_type.format, qscheme_),
     std::default_delete<UInt32Tensor>());
 }
 
@@ -1182,10 +1183,16 @@ void Tensor::save(std::ostream &file) {
   itensor->save_quantization_info(file);
 
   /// @note Scale factors are temporary fixed to float for now
-  std::streamsize sz =
-    static_cast<std::streamsize>(bytes() + scale_size() * sizeof(float));
+  size_t tensor_bytes = bytes() + scale_size() * sizeof(float);
+
+  if (getDataType() == Tdatatype::UINT8 || getDataType() == Tdatatype::UINT16 ||
+      getDataType() == Tdatatype::UINT32) {
+    tensor_bytes += scale_size() * sizeof(unsigned int);
+  }
+
+  std::streamsize sz = static_cast<std::streamsize>(tensor_bytes);
   NNTR_THROW_IF(sz < 0, std::invalid_argument)
-    << "save size: " << bytes() + scale_size() * sizeof(float)
+    << "save size: " << tensor_bytes
     << " is too big. It cannot be represented by std::streamsize";
 
   checkedWrite(file, getData<char>(), sz, "[Tensor::save] operation failed");
@@ -1200,11 +1207,17 @@ void Tensor::read(std::ifstream &file) {
   itensor->read_quantization_info(file);
 
   /// @note Scale factors are temporary fixed to float for now
-  std::streamsize sz =
-    static_cast<std::streamsize>(bytes() + scale_size() * sizeof(float));
+  size_t tensor_bytes = bytes() + scale_size() * sizeof(float);
+
+  if (getDataType() == Tdatatype::UINT8 || getDataType() == Tdatatype::UINT16 ||
+      getDataType() == Tdatatype::UINT32) {
+    tensor_bytes += scale_size() * sizeof(unsigned int);
+  }
+
+  std::streamsize sz = static_cast<std::streamsize>(tensor_bytes);
 
   NNTR_THROW_IF(sz < 0, std::invalid_argument)
-    << "read size: " << bytes() + scale_size() * sizeof(float)
+    << "read size: " << tensor_bytes
     << " is too big. It cannot be represented by std::streamsize";
 
   checkedRead(file, getData<char>(), sz, "[Tensor::read] operation failed");

diff --git a/nntrainer/tensor/tensor.h b/nntrainer/tensor/tensor.h
@@ -253,7 +253,8 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
@@ -263,9 +264,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint8_t>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -275,9 +277,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint8_t>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -286,7 +289,8 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
@@ -296,9 +300,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint16_t>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -308,9 +313,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint16_t>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -319,7 +325,8 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_);
 
   /**
@@ -329,9 +336,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<std::vector<uint32_t>>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -341,9 +349,10 @@ class Tensor {
    * @param[in] t_type Tensor Type
    */
   Tensor(std::vector<std::vector<uint32_t>> const &d,
-         std::vector<float> const &scales, unsigned int zero_point,
+         std::vector<float> const &scales,
+         std::vector<unsigned int> const &zero_points,
          ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
-    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
+    Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
            t_type, qscheme_){};
 
   /**
@@ -551,6 +560,20 @@ class Tensor {
     return (T *)itensor->getScale(idx);
   }
 
+  /**
+   * @brief     return zero point pointer of Tensor
+   * @retval    unsigned int pointer
+   */
+  unsigned int *getZeroPoint() const { return itensor->getZeroPoint(); }
+
+  /**
+   * @brief     return zero point pointer of Tensor
+   * @retval    unsigned int pointer
+   */
+  unsigned int *getZeroPoint(size_t idx) const {
+    return itensor->getZeroPoint(idx);
+  }
+
   /**
    * @brief     i data index
    * @retval    template T pointer (address of ith data)

diff --git a/nntrainer/tensor/tensor_base.h b/nntrainer/tensor/tensor_base.h
@@ -209,6 +209,24 @@ class TensorBase {
       getStringDataType());
   }
 
+  /**
+   * @copydoc Tensor::getZeroPoint()
+   */
+  virtual unsigned int *getZeroPoint() const {
+    throw std::invalid_argument(
+      "Tensor::getZeroPoint() is not supported in tensor data type " +
+      getStringDataType());
+  }
+
+  /**
+   * @copydoc Tensor::getZeroPoint(size_t idx)
+   */
+  virtual unsigned int *getZeroPoint(size_t idx) const {
+    throw std::invalid_argument(
+      "Tensor::getZeroPoint() is not supported in tensor data type " +
+      getStringDataType());
+  }
+
   /**
    * @brief     i data index
    * @retval    address of ith data

diff --git a/nntrainer/tensor/tensor_pool.cpp b/nntrainer/tensor/tensor_pool.cpp
@@ -186,17 +186,25 @@ void TensorPool::finalize(const MemoryPlanner &planner,
      * 3. requestMemory for all the tensors and set their tokens
      * @note +1 is to make the validity_end exlusive in the interval range
      */
+    size_t tensor_bytes =
+      spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float);
+
+    /// @note this is a temporal way to reserve memory space for zero point
+    if (spec.tensor->getDataType() == Tdatatype::UINT8 ||
+        spec.tensor->getDataType() == Tdatatype::UINT16 ||
+        spec.tensor->getDataType() == Tdatatype::UINT32) {
+      tensor_bytes += spec.tensor->scale_size() * sizeof(unsigned int);
+    }
+
     details->token = mem_pool->requestMemory(
-      spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float),
-      validity_start, validity_end + 1, details->exec_order, details->lifespan,
-      spec.is_weight_grad);
+      tensor_bytes, validity_start, validity_end + 1, details->exec_order,
+      details->lifespan, spec.is_weight_grad);
 #ifdef DEBUG
     if (details->token == 0)
       throw std::runtime_error("Received invalid token from memory pool");
 #endif
 
-    bytes_requested +=
-      spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float);
+    bytes_requested += tensor_bytes;
   }
 
   /** 4. finalizeLayout for the memory pool. */