Skip to content

Commit

Permalink
[onert] Share tensors memory for designated operands
Browse files Browse the repository at this point in the history
This commit improves the tensors memory management to handle sharing memory buffers.
It means that more that one tensor can indicate the same buffer. It is determined by operands index map calculated in the previous step.
Note that cases like sharing memory from constant tensors requires additional checks.

ONE-DCO-1.0-Signed-off-by: Mateusz Bencer [email protected]
  • Loading branch information
mbencer committed Nov 29, 2024
1 parent c6c8c0d commit 311cf7b
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 27 deletions.
52 changes: 40 additions & 12 deletions runtime/onert/core/include/backend/basic/BackendContextHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,16 +177,31 @@ void planTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const i
}

template <typename T_TensorBuilder>
ITensorRegistry *
genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder, const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operand_idx*/)
ITensorRegistry *genTensors(const std::shared_ptr<T_TensorBuilder> &tensor_builder,
const ir::Graph &graph,
const util::Set<ir::OperandIndex> &external_operands,
const std::shared_ptr<ITensorRegistry> &tensor_registry,
const std::vector<onert::ir::OperationIndex> &op_order,
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operand_idx)
{
// process source tensors for shared memory at first
std::vector<ir::OperandIndex> registered_source_ind;
for (const auto &[_, source_ind] : shared_memory_operand_idx)
{
if (external_operands.contains(source_ind))
continue;
if (tensor_builder->isRegistered(source_ind)) // some tensors can have the same source
continue;
tensor_builder->registerTensorInfo(source_ind, graph.operands().at(source_ind).info());
registered_source_ind.emplace_back(source_ind);
}

graph.operands().iterate([&](const ir::OperandIndex &ind, const ir::Operand &obj) {
if (external_operands.contains(ind))
return;
if (std::find(std::begin(registered_source_ind), std::end(registered_source_ind), ind) !=
std::end(registered_source_ind)) // skip tensors already registered
return;
tensor_builder->registerTensorInfo(ind, obj.info());
});

Expand Down Expand Up @@ -219,25 +234,38 @@ template <typename T_BackendContext> ITensorRegistry *genTensors(T_BackendContex
inline void initConsts(const ir::Operands &operands,
const util::Set<ir::OperandIndex> &external_operands,
ITensorRegistry *tensor_registry,
const ir::OperandIndexMap<ir::OperandIndex> & /*shared_memory_operands_map*/)
const ir::OperandIndexMap<ir::OperandIndex> &shared_memory_operands_map)
{
operands.iterate([&](const ir::OperandIndex &ind, const ir::Operand &operand) {
if (external_operands.contains(ind) || !operand.isConstant())
const bool has_const_shared_memory =
shared_memory_operands_map.find(ind) != std::end(shared_memory_operands_map) &&
operands.at(shared_memory_operands_map.at(ind)).isConstant();
const bool can_be_initialized_as_const = operand.isConstant() || has_const_shared_memory;
if (external_operands.contains(ind) || !can_be_initialized_as_const)
return;

auto tensor = tensor_registry->getNativeITensor(ind);
assert(tensor != nullptr);

VERBOSE(FillOperandData) << "Fill data for " << ind << std::endl;

auto data = operand.shareData();
assert(data && data->base());
ExternalTensor *ext_tensor = dynamic_cast<ExternalTensor *>(tensor);

if (ext_tensor == nullptr)
throw std::runtime_error{"This tensor is not external tensor"};

ext_tensor->setData(data);
if (has_const_shared_memory)
{
const auto &source_operand_ind = operands.at(shared_memory_operands_map.at(ind));
auto memory_source_data = source_operand_ind.shareData();
assert(memory_source_data && memory_source_data->base());
ext_tensor->setData(memory_source_data);
}
else
{
auto data = operand.shareData();
assert(data && data->base());
ext_tensor->setData(data);
}
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class StaticTensorManager
ir::OperandIndexMap<bool> _as_constants;
DynamicTensorManager *_dynamic_tensor_manager;
ir::OperandIndexMap<ir::OperandIndex> _shared_memory_operand_indexes;
ir::OperandIndexMap<uint32_t> _source_operand_inds_ref_counter;
};

} // namespace basic
Expand Down
99 changes: 84 additions & 15 deletions runtime/onert/core/src/backend/basic/StaticTensorManager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
* limitations under the License.
*/

#include <algorithm>

#include "backend/basic/StaticTensorManager.h"

#include "backend/basic/DynamicTensorManager.h"
Expand Down Expand Up @@ -54,13 +56,28 @@ void StaticTensorManager::allocateNonconsts(void)

for (auto &&[ind, tensor] : _tensors->native_tensors())
{
if (!_as_constants[ind] && !tensor->is_dynamic())
bool buffer_set = false;
if (!tensor->is_dynamic())
{
auto *buffer = _nonconst_mgr->getBuffer(ind);
tensor->setBuffer(buffer);

VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(buffer) << std::endl;
if (_shared_memory_operand_indexes.find(ind) != std::end(_shared_memory_operand_indexes))
{
const auto &shared_memory_ind = _shared_memory_operand_indexes[ind];
if (!_as_constants[shared_memory_ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(shared_memory_ind));
buffer_set = true;
}
}
else if (!_as_constants[ind])
{
tensor->setBuffer(_nonconst_mgr->getBuffer(ind));
buffer_set = true;
}
if (buffer_set)
{
VERBOSE(CPU_StaticTensorManager)
<< "TENSOR " << ind << " : " << static_cast<void *>(tensor->buffer()) << std::endl;
}
}
}
}
Expand All @@ -71,17 +88,30 @@ void StaticTensorManager::buildTensor(const ir::OperandIndex &ind,
const ir::OperandInfo &tensor_info, bool as_const)
{
assert(!_tensors->getNativeTensor(ind));
std::unique_ptr<Tensor> tensor = nullptr;
if (as_const)
{
auto tensor = std::make_unique<ExternalTensor>(tensor_info);
_tensors->setNativeTensor(ind, std::move(tensor));
tensor = std::make_unique<ExternalTensor>(tensor_info);
}
else
{
auto tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
_tensors->setNativeTensor(ind, std::move(tensor));
const auto source_operand_ind = _shared_memory_operand_indexes.find(ind);
if (source_operand_ind != std::end(_shared_memory_operand_indexes) &&
_as_constants[source_operand_ind->second])
{
as_const = _as_constants[source_operand_ind->second];
auto new_tensor_info = tensor_info;
new_tensor_info.setAsConstant();
tensor = std::make_unique<ExternalTensor>(new_tensor_info);
}
else
{
tensor =
std::make_unique<Tensor>(tensor_info, _dynamic_tensor_manager->dynamic_mem_mgr().get());
}
}
assert(tensor);
_tensors->setNativeTensor(ind, std::move(tensor));
_as_constants[ind] = as_const;
}

Expand All @@ -92,8 +122,26 @@ void StaticTensorManager::claimPlan(const ir::OperandIndex &ind, uint32_t size)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->claimPlan(ind, size);
ir::OperandIndex claim_ind;
const auto source_ind = _shared_memory_operand_indexes.find(ind);
if (source_ind == std::end(_shared_memory_operand_indexes))
{
claim_ind = ind;
}
else
{
claim_ind = source_ind->second;
}
if (_as_constants[claim_ind])
{
return;
}
++_source_operand_inds_ref_counter[claim_ind];
// notify only first usage
if (1 == _source_operand_inds_ref_counter[claim_ind])
{
_nonconst_mgr->claimPlan(claim_ind, size);
}
}

void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
Expand All @@ -103,8 +151,29 @@ void StaticTensorManager::releasePlan(const ir::OperandIndex &ind)
// This method is called only when a tensor has proper shape
assert(!_tensors->getNativeTensor(ind)->is_dynamic());

if (!_as_constants[ind])
_nonconst_mgr->releasePlan(ind);
ir::OperandIndex release_ind;
const auto source_operand_ind_ind = _shared_memory_operand_indexes.find(ind);
if (source_operand_ind_ind == std::end(_shared_memory_operand_indexes))
{
release_ind = ind;
}
else
{
release_ind = source_operand_ind_ind->second;
}
if (_as_constants[release_ind])
{
return;
}
if (_source_operand_inds_ref_counter[release_ind] > 0)
{
--_source_operand_inds_ref_counter[release_ind];
}
// notify only last usage
if (0 == _source_operand_inds_ref_counter[release_ind])
{
_nonconst_mgr->releasePlan(release_ind);
}
}

void StaticTensorManager::iterate(const std::function<void(const ir::OperandIndex &)> &fn)
Expand Down

0 comments on commit 311cf7b

Please sign in to comment.