[Transformations][GPU] Constant tensor deduplication pass (#29052)

dnkurek · akladiev · web-flow · commit 7f95394aa858 · 2025-04-23T06:39:42.000Z
### Details: - Deduplicate constant tensors in order to reduce memory usage and improve cache usage ### Tickets: - CVS-156968 --------- Co-authored-by: Alina Kladieva <alina.kladieva@intel.com>
diff --git a/src/common/transformations/include/transformations/common_optimizations/constants_reduce.hpp b/src/common/transformations/include/transformations/common_optimizations/constants_reduce.hpp
@@ -0,0 +1,18 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/matcher_pass.hpp"
+#include "transformations_visibility.hpp"
+
+namespace ov::pass {
+
+class TRANSFORMATIONS_API ConstantsReduce : public ov::pass::ModelPass {
+public:
+    OPENVINO_MODEL_PASS_RTTI("ConstantsReduce");
+    bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
+};
+
+}  // namespace ov::pass
diff --git a/src/common/transformations/src/transformations/common_optimizations/constants_reduce.cpp b/src/common/transformations/src/transformations/common_optimizations/constants_reduce.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "transformations/common_optimizations/constants_reduce.hpp"
+
+#include "itt.hpp"
+#include "openvino/op/constant.hpp"
+#include "openvino/util/log.hpp"
+
+#define LARGE_TENSOR_BYTE_SIZE 64
+
+namespace ov::pass {
+
+using BlobCacheKey = std::shared_ptr<ov::Node>;
+
+struct KeyHash {
+    std::size_t operator()(const BlobCacheKey& key) const {
+        std::size_t hash = 0;
+
+        auto node = ov::as_type_ptr<op::v0::Constant>(key);
+
+        auto type = node->get_output_element_type(0);
+        auto shape = node->get_shape();
+        std::size_t size = node->get_byte_size();
+        const char* data = node->get_data_ptr<char>();
+
+        for (auto dim : shape) {
+            hash ^= std::hash<size_t>{}(dim);
+        }
+
+        for (std::size_t i = 0; i < size; i++) {
+            hash ^= ((hash << 5) + hash) + data[i];
+        }
+
+        hash ^= type.hash();
+        hash ^= size;
+
+        return hash;
+    }
+};
+
+struct KeyEqual {
+    bool operator()(const BlobCacheKey& lhs, const BlobCacheKey& rhs) const {
+        auto lhs_node = ov::as_type_ptr<op::v0::Constant>(lhs);
+        auto rhs_node = ov::as_type_ptr<op::v0::Constant>(rhs);
+
+        auto lhs_type = lhs_node->get_output_element_type(0);
+        auto rhs_type = rhs_node->get_output_element_type(0);
+
+        if (lhs_type != rhs_type)
+            return false;
+
+        auto lhs_shape = lhs_node->get_shape();
+        auto rhs_shape = rhs_node->get_shape();
+
+        if (lhs_shape != rhs_shape)
+            return false;
+
+        std::size_t lhs_size = lhs_node->get_byte_size();
+        std::size_t rhs_size = rhs_node->get_byte_size();
+
+        if (lhs_size != rhs_size)
+            return false;
+
+        // Retrieve buffer pointers
+        const char* lhs_data = lhs_node->get_data_ptr<char>();
+        const char* rhs_data = rhs_node->get_data_ptr<char>();
+
+        if (lhs_data == rhs_data)
+            return true;
+
+        return std::memcmp(lhs_data, rhs_data, lhs_size) == 0;
+    }
+};
+
+bool ConstantsReduce::run_on_model(const std::shared_ptr<ov::Model>& m) {
+    RUN_ON_FUNCTION_SCOPE(ConstantsReduce);
+
+    std::unordered_map<BlobCacheKey, std::shared_ptr<ov::Node>, KeyHash, KeyEqual> blobMemCache;
+
+    const auto& ops = m->get_ops();
+
+    unsigned int copies = 0;
+
+    for (auto& op : ops) {
+        if (!ov::is_type<ov::op::v0::Constant>(op))
+            continue;
+
+        auto const_node = ov::as_type_ptr<op::v0::Constant>(op);
+
+        // Limit size of node reading to avoid reading large tensors
+        if (const_node->get_byte_size() > LARGE_TENSOR_BYTE_SIZE)
+            continue;
+
+        const auto cache_key = op;
+        auto bufIter = blobMemCache.find(cache_key);
+
+        if (bufIter == blobMemCache.end()) {
+            blobMemCache[cache_key] = op;
+        } else {
+            copies++;
+            auto users = const_node->get_users();
+            for (auto user : users) {
+                for (size_t i = 0; i < user->get_input_size(); i++) {
+                    if (user->input_value(i) == op->output(0)) {
+                        user->input(i).replace_source_output(blobMemCache[cache_key]);
+                    }
+                }
+            }
+        }
+    }
+    OPENVINO_DEBUG("Reduced ", copies, " constant node duplications from model");
+
+    // Return true if we have made any replacements
+    return copies > 0;
+}
+
+}  // namespace ov::pass
diff --git a/src/common/transformations/tests/common_optimizations/constants_reduce.cpp b/src/common/transformations/tests/common_optimizations/constants_reduce.cpp
@@ -0,0 +1,115 @@
+// Copyright (C) 2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#define _USE_MATH_DEFINES
+
+#include "transformations/common_optimizations/constants_reduce.hpp"
+
+#include <gtest/gtest.h>
+#include <math.h>
+
+#include <memory>
+
+#include "common_test_utils/ov_test_utils.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/opsets/opset8.hpp"
+#include "openvino/pass/manager.hpp"
+
+using namespace testing;
+using namespace ov;
+
+TEST(TransformationTests, ConstantsReduce) {
+    auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
+
+    // Intentionally equal to each other
+    auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
+    auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
+
+    auto result = std::make_shared<ov::op::v0::Result>(add_2);
+    auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::ConstantsReduce>();
+    pass_manager.run_passes(f);
+
+    // One constant should be reduced since they are equal
+    ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1);
+}
+
+TEST(TransformationTests, ConstantsReduceChain) {
+    auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
+
+    // Intentionally equal to each other
+    auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+
+    // Intentionally different
+    auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {2.0, 2.0, 3.0, 4.0});
+    auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
+    auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
+    auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3);
+    auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4);
+    auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5);
+
+    auto result = std::make_shared<ov::op::v0::Result>(add_5);
+    auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::ConstantsReduce>();
+    pass_manager.run_passes(f);
+
+    // All constants should be reduced to one except the one that is different
+    ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2);
+}
+
+TEST(TransformationTests, ConstantsReduceChain2) {
+    auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
+
+    // Intentionally equal to each other
+    auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+
+    auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
+    auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
+    auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3);
+    auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4);
+    auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5);
+
+    auto result = std::make_shared<ov::op::v0::Result>(add_5);
+    auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::ConstantsReduce>();
+    pass_manager.run_passes(f);
+
+    // All constants should be reduced to one
+    ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1);
+}
+
+TEST(TransformationTests, ConstantsReduceNeg) {
+    auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
+
+    // Intentionally unequal to each other
+    auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
+    auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.5});
+    auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
+    auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
+
+    auto result = std::make_shared<ov::op::v0::Result>(add_2);
+    auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
+
+    pass::Manager pass_manager;
+    pass_manager.register_pass<ov::pass::ConstantsReduce>();
+    pass_manager.run_passes(f);
+
+    // No reduction here
+    ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2);
+}
diff --git a/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp b/src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp
@@ -109,6 +109,7 @@
 #include "transformations/common_optimizations/transpose_sinking.hpp"
 #include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
 #include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
+#include "transformations/common_optimizations/constants_reduce.hpp"
 #include "transformations/control_flow/unroll_tensor_iterator.hpp"
 #include "transformations/convert_pooling_to_reduce.hpp"
 #include "transformations/convert_precision.hpp"
@@ -1227,6 +1228,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
         // Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero.
         manager.register_pass<ov::pass::EliminatePad>();
 
+        manager.register_pass<ov::pass::ConstantsReduce>();
+
         // This is supposed to be the last pass to ensure that we don't have name collisions until
         // GPU plugin stops using friendly names for program creation
         manager.register_pass<ov::pass::ResolveNameCollisions>(true);