From 311cf7bef08783f3641dd074e493786de456020a Mon Sep 17 00:00:00 2001
From: Mateusz Bencer <m.bencer@partner.samsung.com>
Date: Fri, 29 Nov 2024 20:31:19 +0100
Subject: [PATCH] [onert] Share tensors memory for designated operands

This commit improves the tensors memory management to handle sharing memory buffers.
It means that more that one tensor can indicate the same buffer. It is determined by operands index map calculated in the previous step.
Note that cases like sharing memory from constant tensors requires additional checks.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer m.bencer@partner.samsung.com
---
 .../backend/basic/BackendContextHelpers.h     | 52 +++++++---
 .../backend/basic/StaticTensorManager.h       |  1 +
 .../src/backend/basic/StaticTensorManager.cc  | 99 ++++++++++++++++---
 3 files changed, 125 insertions(+), 27 deletions(-)
diff --git a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
index 46e57e925e6..ce905ce7a46 100644
--- a/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
+++ b/runtime/onert/core/include/backend/basic/BackendContextHelpers.h
@@ -177,16 +177,31 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
 }
 
 template <typename T_TensorBuilder>
-ITensorRegistry *
-genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
-           const util::Set<ir::OperandIndex> &external_operands,
-           const std::shared_ptr<ITensorRegistry> &tensor_registry,
-           const std::vector<onert::ir::OperationIndex> &op_order,
-           const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
+ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
+                            const ir::Graph &graph,
+                            const util::Set<ir::OperandIndex> &external_operands,
+                            const std::shared_ptr<ITensorRegistry> &tensor_registry,
+                            const std::vector<onert::ir::OperationIndex> &op_order,
+                            const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
 {
+  // process source tensors for shared memory at first
+  std::vector<ir::OperandIndex> registered_source_ind;
+  for (const auto &[_, source_ind] : shared_memory_operand_idx)
+  {
+    if (external_operands.contains(source_ind))
+      continue;
+    if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
+      continue;
+    tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
+    registered_source_ind.emplace_back(source_ind);
+  }
+
   graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
     if (external_operands.contains(ind))
       return;
+    if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
+        std::end(registered_source_ind)) // skip tensors already registered
+      return;
     tensor_builder->registerTensorInfo(ind, obj.info());
   });
 
@@ -219,10 +234,14 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
 inline void initConsts(const ir::Operands &operands,
                        const util::Set<ir::OperandIndex> &external_operands,
                        ITensorRegistry *tensor_registry,
-                       const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
+                       const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
 {
   operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
-    if (external_operands.contains(ind) || !operand.isConstant())
+    const bool has_const_shared_memory =
+      shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
+      operands.at(shared_memory_operands_map.at(ind)).isConstant();
+    const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
+    if (external_operands.contains(ind) || !can_be_initialized_as_const)
       return;
 
     auto tensor = tensor_registry->getNativeITensor(ind);
@@ -230,14 +249,23 @@ inline void initConsts(const ir::Operands &operands,
 
     VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;
 
-    auto data = operand.shareData();
-    assert(data && data->base());
     ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);
-
     if (ext_tensor == nullptr)
       throw std::runtime_error{"This tensor is not external tensor"};
 
-    ext_tensor->setData(data);
+    if (has_const_shared_memory)
+    {
+      const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
+      auto memory_source_data = source_operand_ind.shareData();
+      assert(memory_source_data && memory_source_data->base());
+      ext_tensor->setData(memory_source_data);
+    }
+    else
+    {
+      auto data = operand.shareData();
+      assert(data && data->base());
+      ext_tensor->setData(data);
+    }
   });
 }
 
diff --git a/runtime/onert/core/include/backend/basic/StaticTensorManager.h b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
index a92af7bd45d..f9157cb2a42 100644
--- a/runtime/onert/core/include/backend/basic/StaticTensorManager.h
+++ b/runtime/onert/core/include/backend/basic/StaticTensorManager.h
@@ -60,6 +60,7 @@ class StaticTensorManager
   ir::OperandIndexMap<bool> _as_constants;
   DynamicTensorManager *_dynamic_tensor_manager;
   ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
+  ir::OperandIndexMap<uint32_t> _source_operand_inds_ref_counter;
 };
 
 } // namespace basic
diff --git a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
index 2e5fadd8d37..f6f69d6af06 100644
--- a/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
+++ b/runtime/onert/core/src/backend/basic/StaticTensorManager.cc
@@ -14,6 +14,8 @@
  * limitations under the License.
  */
 
+#include <algorithm>
+
 #include "backend/basic/StaticTensorManager.h"
 
 #include "backend/basic/DynamicTensorManager.h"
@@ -54,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)
 
   for (auto &&[ind, tensor] : _tensors->native_tensors())
   {
-    if (!_as_constants[ind] && !tensor->is_dynamic())
+    bool buffer_set = false;
+    if (!tensor->is_dynamic())
     {
-      auto *buffer = _nonconst_mgr->getBuffer(ind);
-      tensor->setBuffer(buffer);
-
-      VERBOSE(CPU_StaticTensorManager)
-        << "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
+      if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes))
+      {
+        const auto &shared_memory_ind = _shared_memory_operand_indexes[ind];
+        if (!_as_constants[shared_memory_ind])
+        {
+          tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
+          buffer_set = true;
+        }
+      }
+      else if (!_as_constants[ind])
+      {
+        tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
+        buffer_set = true;
+      }
+      if (buffer_set)
+      {
+        VERBOSE(CPU_StaticTensorManager)
+          << "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
+      }
     }
   }
 }
@@ -71,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
                                       const ir::OperandInfo &tensor_info, bool as_const)
 {
   assert(!_tensors->getNativeTensor(ind));
+  std::unique_ptr<Tensor> tensor = nullptr;
   if (as_const)
   {
-    auto tensor = std::make_unique<ExternalTensor>(tensor_info);
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    tensor = std::make_unique<ExternalTensor>(tensor_info);
   }
   else
   {
-    auto tensor =
-      std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
-    _tensors->setNativeTensor(ind, std::move(tensor));
+    const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
+    if (source_operand_ind != std::end(_shared_memory_operand_indexes) &&
+        _as_constants[source_operand_ind->second])
+    {
+      as_const = _as_constants[source_operand_ind->second];
+      auto new_tensor_info = tensor_info;
+      new_tensor_info.setAsConstant();
+      tensor = std::make_unique<ExternalTensor>(new_tensor_info);
+    }
+    else
+    {
+      tensor =
+        std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
+    }
   }
+  assert(tensor);
+  _tensors->setNativeTensor(ind, std::move(tensor));
   _as_constants[ind] = as_const;
 }
 
@@ -92,8 +122,26 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->claimPlan(ind, size);
+  ir::OperandIndex claim_ind;
+  const auto source_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_ind == std::end(_shared_memory_operand_indexes))
+  {
+    claim_ind = ind;
+  }
+  else
+  {
+    claim_ind = source_ind->second;
+  }
+  if (_as_constants[claim_ind])
+  {
+    return;
+  }
+  ++_source_operand_inds_ref_counter[claim_ind];
+  // notify only first usage
+  if (1 == _source_operand_inds_ref_counter[claim_ind])
+  {
+    _nonconst_mgr->claimPlan(claim_ind, size);
+  }
 }
 
 void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
@@ -103,8 +151,29 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
   // This method is called only when a tensor has proper shape
   assert(!_tensors->getNativeTensor(ind)->is_dynamic());
 
-  if (!_as_constants[ind])
-    _nonconst_mgr->releasePlan(ind);
+  ir::OperandIndex release_ind;
+  const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind);
+  if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes))
+  {
+    release_ind = ind;
+  }
+  else
+  {
+    release_ind = source_operand_ind_ind->second;
+  }
+  if (_as_constants[release_ind])
+  {
+    return;
+  }
+  if (_source_operand_inds_ref_counter[release_ind] > 0)
+  {
+    --_source_operand_inds_ref_counter[release_ind];
+  }
+  // notify only last usage
+  if (0 == _source_operand_inds_ref_counter[release_ind])
+  {
+    _nonconst_mgr->releasePlan(release_ind);
+  }
 }
 
 void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)