Skip to content

Commit

Permalink
[Tensor] Zero point support for unsigned int type 
Browse files Browse the repository at this point in the history
This pull request updates the unsigned int type Tensor class to include zero points in its memory management.
It's important to note that the size of the scale factors matches the size of the zero points.
At present, the zero point uses a scale factor size, which will be modified soon.

**Self-evaluation:**
1. Build test: [X]Passed [ ]Failed [ ]Skipped
2. Run test:   [X]Passed [ ]Failed [ ]Skipped

Signed-off-by: Donghyeon Jeong <[email protected]>
  • Loading branch information
djeong20 authored and jijoongmoon committed Feb 20, 2025
1 parent 4f86abc commit c1e5c60
Show file tree
Hide file tree
Showing 7 changed files with 482 additions and 162 deletions.
41 changes: 27 additions & 14 deletions nntrainer/tensor/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,8 @@ Tensor::Tensor(
QScheme qscheme_) {
switch (qscheme_) {
case QScheme::PER_TENSOR_AFFINE:
std::cout << "per tensor\n";
break;
case QScheme::PER_CHANNEL_AFFINE:
std::cout << "per channel\n";
break;
default:
break;
Expand Down Expand Up @@ -76,28 +74,31 @@ Tensor::Tensor(

Tensor::Tensor(
std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
itensor = std::shared_ptr<UInt8Tensor>(
new UInt8Tensor(d, scales, zero_point, t_type.format, qscheme_),
new UInt8Tensor(d, scales, zero_points, t_type.format, qscheme_),
std::default_delete<UInt8Tensor>());
}

Tensor::Tensor(
std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
itensor = std::shared_ptr<UInt16Tensor>(
new UInt16Tensor(d, scales, zero_point, t_type.format, qscheme_),
new UInt16Tensor(d, scales, zero_points, t_type.format, qscheme_),
std::default_delete<UInt16Tensor>());
}

Tensor::Tensor(
std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) {
itensor = std::shared_ptr<UInt32Tensor>(
new UInt32Tensor(d, scales, zero_point, t_type.format, qscheme_),
new UInt32Tensor(d, scales, zero_points, t_type.format, qscheme_),
std::default_delete<UInt32Tensor>());
}

Expand Down Expand Up @@ -1182,10 +1183,16 @@ void Tensor::save(std::ostream &file) {
itensor->save_quantization_info(file);

/// @note Scale factors are temporary fixed to float for now
std::streamsize sz =
static_cast<std::streamsize>(bytes() + scale_size() * sizeof(float));
size_t tensor_bytes = bytes() + scale_size() * sizeof(float);

if (getDataType() == Tdatatype::UINT8 || getDataType() == Tdatatype::UINT16 ||
getDataType() == Tdatatype::UINT32) {
tensor_bytes += scale_size() * sizeof(unsigned int);
}

std::streamsize sz = static_cast<std::streamsize>(tensor_bytes);
NNTR_THROW_IF(sz < 0, std::invalid_argument)
<< "save size: " << bytes() + scale_size() * sizeof(float)
<< "save size: " << tensor_bytes
<< " is too big. It cannot be represented by std::streamsize";

checkedWrite(file, getData<char>(), sz, "[Tensor::save] operation failed");
Expand All @@ -1200,11 +1207,17 @@ void Tensor::read(std::ifstream &file) {
itensor->read_quantization_info(file);

/// @note Scale factors are temporary fixed to float for now
std::streamsize sz =
static_cast<std::streamsize>(bytes() + scale_size() * sizeof(float));
size_t tensor_bytes = bytes() + scale_size() * sizeof(float);

if (getDataType() == Tdatatype::UINT8 || getDataType() == Tdatatype::UINT16 ||
getDataType() == Tdatatype::UINT32) {
tensor_bytes += scale_size() * sizeof(unsigned int);
}

std::streamsize sz = static_cast<std::streamsize>(tensor_bytes);

NNTR_THROW_IF(sz < 0, std::invalid_argument)
<< "read size: " << bytes() + scale_size() * sizeof(float)
<< "read size: " << tensor_bytes
<< " is too big. It cannot be represented by std::streamsize";

checkedRead(file, getData<char>(), sz, "[Tensor::read] operation failed");
Expand Down
53 changes: 38 additions & 15 deletions nntrainer/tensor/tensor.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<std::vector<uint8_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_);

/**
Expand All @@ -263,9 +264,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<uint8_t>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand All @@ -275,9 +277,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<uint8_t>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand All @@ -286,7 +289,8 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<std::vector<uint16_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_);

/**
Expand All @@ -296,9 +300,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<uint16_t>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand All @@ -308,9 +313,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<uint16_t>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand All @@ -319,7 +325,8 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<std::vector<uint32_t>>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_);

/**
Expand All @@ -329,9 +336,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<std::vector<uint32_t>>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand All @@ -341,9 +349,10 @@ class Tensor {
* @param[in] t_type Tensor Type
*/
Tensor(std::vector<std::vector<uint32_t>> const &d,
std::vector<float> const &scales, unsigned int zero_point,
std::vector<float> const &scales,
std::vector<unsigned int> const &zero_points,
ml::train::TensorDim::TensorType t_type, QScheme qscheme_) :
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_point,
Tensor(std::vector<std::decay<decltype(d)>::type>{d}, scales, zero_points,
t_type, qscheme_){};

/**
Expand Down Expand Up @@ -551,6 +560,20 @@ class Tensor {
return (T *)itensor->getScale(idx);
}

/**
* @brief return zero point pointer of Tensor
* @retval unsigned int pointer
*/
unsigned int *getZeroPoint() const { return itensor->getZeroPoint(); }

/**
* @brief return zero point pointer of Tensor
* @retval unsigned int pointer
*/
unsigned int *getZeroPoint(size_t idx) const {
return itensor->getZeroPoint(idx);
}

/**
* @brief i data index
* @retval template T pointer (address of ith data)
Expand Down
18 changes: 18 additions & 0 deletions nntrainer/tensor/tensor_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,24 @@ class TensorBase {
getStringDataType());
}

/**
* @copydoc Tensor::getZeroPoint()
*/
virtual unsigned int *getZeroPoint() const {
throw std::invalid_argument(
"Tensor::getZeroPoint() is not supported in tensor data type " +
getStringDataType());
}

/**
* @copydoc Tensor::getZeroPoint(size_t idx)
*/
virtual unsigned int *getZeroPoint(size_t idx) const {
throw std::invalid_argument(
"Tensor::getZeroPoint() is not supported in tensor data type " +
getStringDataType());
}

/**
* @brief i data index
* @retval address of ith data
Expand Down
18 changes: 13 additions & 5 deletions nntrainer/tensor/tensor_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,17 +186,25 @@ void TensorPool::finalize(const MemoryPlanner &planner,
* 3. requestMemory for all the tensors and set their tokens
* @note +1 is to make the validity_end exlusive in the interval range
*/
size_t tensor_bytes =
spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float);

/// @note this is a temporal way to reserve memory space for zero point
if (spec.tensor->getDataType() == Tdatatype::UINT8 ||
spec.tensor->getDataType() == Tdatatype::UINT16 ||
spec.tensor->getDataType() == Tdatatype::UINT32) {
tensor_bytes += spec.tensor->scale_size() * sizeof(unsigned int);
}

details->token = mem_pool->requestMemory(
spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float),
validity_start, validity_end + 1, details->exec_order, details->lifespan,
spec.is_weight_grad);
tensor_bytes, validity_start, validity_end + 1, details->exec_order,
details->lifespan, spec.is_weight_grad);
#ifdef DEBUG
if (details->token == 0)
throw std::runtime_error("Received invalid token from memory pool");
#endif

bytes_requested +=
spec.tensor->bytes() + spec.tensor->scale_size() * sizeof(float);
bytes_requested += tensor_bytes;
}

/** 4. finalizeLayout for the memory pool. */
Expand Down
Loading

0 comments on commit c1e5c60

Please sign in to comment.