Skip to content

Commit 7f95394

Browse files
dnkurekakladiev
andauthored
[Transformations][GPU] Constant tensor deduplication pass (#29052)
### Details: - Deduplicate constant tensors in order to reduce memory usage and improve cache usage ### Tickets: - CVS-156968 --------- Co-authored-by: Alina Kladieva <[email protected]>
1 parent 48b6c68 commit 7f95394

File tree

4 files changed

+255
-0
lines changed

4 files changed

+255
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#pragma once
6+
7+
#include "openvino/pass/matcher_pass.hpp"
8+
#include "transformations_visibility.hpp"
9+
10+
namespace ov::pass {
11+
12+
class TRANSFORMATIONS_API ConstantsReduce : public ov::pass::ModelPass {
13+
public:
14+
OPENVINO_MODEL_PASS_RTTI("ConstantsReduce");
15+
bool run_on_model(const std::shared_ptr<ov::Model>& m) override;
16+
};
17+
18+
} // namespace ov::pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#include "transformations/common_optimizations/constants_reduce.hpp"
6+
7+
#include "itt.hpp"
8+
#include "openvino/op/constant.hpp"
9+
#include "openvino/util/log.hpp"
10+
11+
#define LARGE_TENSOR_BYTE_SIZE 64
12+
13+
namespace ov::pass {
14+
15+
using BlobCacheKey = std::shared_ptr<ov::Node>;
16+
17+
struct KeyHash {
18+
std::size_t operator()(const BlobCacheKey& key) const {
19+
std::size_t hash = 0;
20+
21+
auto node = ov::as_type_ptr<op::v0::Constant>(key);
22+
23+
auto type = node->get_output_element_type(0);
24+
auto shape = node->get_shape();
25+
std::size_t size = node->get_byte_size();
26+
const char* data = node->get_data_ptr<char>();
27+
28+
for (auto dim : shape) {
29+
hash ^= std::hash<size_t>{}(dim);
30+
}
31+
32+
for (std::size_t i = 0; i < size; i++) {
33+
hash ^= ((hash << 5) + hash) + data[i];
34+
}
35+
36+
hash ^= type.hash();
37+
hash ^= size;
38+
39+
return hash;
40+
}
41+
};
42+
43+
struct KeyEqual {
44+
bool operator()(const BlobCacheKey& lhs, const BlobCacheKey& rhs) const {
45+
auto lhs_node = ov::as_type_ptr<op::v0::Constant>(lhs);
46+
auto rhs_node = ov::as_type_ptr<op::v0::Constant>(rhs);
47+
48+
auto lhs_type = lhs_node->get_output_element_type(0);
49+
auto rhs_type = rhs_node->get_output_element_type(0);
50+
51+
if (lhs_type != rhs_type)
52+
return false;
53+
54+
auto lhs_shape = lhs_node->get_shape();
55+
auto rhs_shape = rhs_node->get_shape();
56+
57+
if (lhs_shape != rhs_shape)
58+
return false;
59+
60+
std::size_t lhs_size = lhs_node->get_byte_size();
61+
std::size_t rhs_size = rhs_node->get_byte_size();
62+
63+
if (lhs_size != rhs_size)
64+
return false;
65+
66+
// Retrieve buffer pointers
67+
const char* lhs_data = lhs_node->get_data_ptr<char>();
68+
const char* rhs_data = rhs_node->get_data_ptr<char>();
69+
70+
if (lhs_data == rhs_data)
71+
return true;
72+
73+
return std::memcmp(lhs_data, rhs_data, lhs_size) == 0;
74+
}
75+
};
76+
77+
bool ConstantsReduce::run_on_model(const std::shared_ptr<ov::Model>& m) {
78+
RUN_ON_FUNCTION_SCOPE(ConstantsReduce);
79+
80+
std::unordered_map<BlobCacheKey, std::shared_ptr<ov::Node>, KeyHash, KeyEqual> blobMemCache;
81+
82+
const auto& ops = m->get_ops();
83+
84+
unsigned int copies = 0;
85+
86+
for (auto& op : ops) {
87+
if (!ov::is_type<ov::op::v0::Constant>(op))
88+
continue;
89+
90+
auto const_node = ov::as_type_ptr<op::v0::Constant>(op);
91+
92+
// Limit size of node reading to avoid reading large tensors
93+
if (const_node->get_byte_size() > LARGE_TENSOR_BYTE_SIZE)
94+
continue;
95+
96+
const auto cache_key = op;
97+
auto bufIter = blobMemCache.find(cache_key);
98+
99+
if (bufIter == blobMemCache.end()) {
100+
blobMemCache[cache_key] = op;
101+
} else {
102+
copies++;
103+
auto users = const_node->get_users();
104+
for (auto user : users) {
105+
for (size_t i = 0; i < user->get_input_size(); i++) {
106+
if (user->input_value(i) == op->output(0)) {
107+
user->input(i).replace_source_output(blobMemCache[cache_key]);
108+
}
109+
}
110+
}
111+
}
112+
}
113+
OPENVINO_DEBUG("Reduced ", copies, " constant node duplications from model");
114+
115+
// Return true if we have made any replacements
116+
return copies > 0;
117+
}
118+
119+
} // namespace ov::pass
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
// Copyright (C) 2025 Intel Corporation
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
5+
#define _USE_MATH_DEFINES
6+
7+
#include "transformations/common_optimizations/constants_reduce.hpp"
8+
9+
#include <gtest/gtest.h>
10+
#include <math.h>
11+
12+
#include <memory>
13+
14+
#include "common_test_utils/ov_test_utils.hpp"
15+
#include "openvino/core/model.hpp"
16+
#include "openvino/opsets/opset8.hpp"
17+
#include "openvino/pass/manager.hpp"
18+
19+
using namespace testing;
20+
using namespace ov;
21+
22+
TEST(TransformationTests, ConstantsReduce) {
23+
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
24+
25+
// Intentionally equal to each other
26+
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
27+
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
28+
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
29+
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
30+
31+
auto result = std::make_shared<ov::op::v0::Result>(add_2);
32+
auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
33+
34+
pass::Manager pass_manager;
35+
pass_manager.register_pass<ov::pass::ConstantsReduce>();
36+
pass_manager.run_passes(f);
37+
38+
// One constant should be reduced since they are equal
39+
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1);
40+
}
41+
42+
TEST(TransformationTests, ConstantsReduceChain) {
43+
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
44+
45+
// Intentionally equal to each other
46+
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
47+
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
48+
auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
49+
auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
50+
51+
// Intentionally different
52+
auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {2.0, 2.0, 3.0, 4.0});
53+
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
54+
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
55+
auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3);
56+
auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4);
57+
auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5);
58+
59+
auto result = std::make_shared<ov::op::v0::Result>(add_5);
60+
auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
61+
62+
pass::Manager pass_manager;
63+
pass_manager.register_pass<ov::pass::ConstantsReduce>();
64+
pass_manager.run_passes(f);
65+
66+
// All constants should be reduced to one except the one that is different
67+
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2);
68+
}
69+
70+
TEST(TransformationTests, ConstantsReduceChain2) {
71+
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
72+
73+
// Intentionally equal to each other
74+
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
75+
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
76+
auto add_constant_3 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
77+
auto add_constant_4 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
78+
auto add_constant_5 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
79+
80+
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
81+
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
82+
auto add_3 = std::make_shared<opset8::Add>(add_2, add_constant_3);
83+
auto add_4 = std::make_shared<opset8::Add>(add_3, add_constant_4);
84+
auto add_5 = std::make_shared<opset8::Add>(add_4, add_constant_5);
85+
86+
auto result = std::make_shared<ov::op::v0::Result>(add_5);
87+
auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
88+
89+
pass::Manager pass_manager;
90+
pass_manager.register_pass<ov::pass::ConstantsReduce>();
91+
pass_manager.run_passes(f);
92+
93+
// All constants should be reduced to one
94+
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 1);
95+
}
96+
97+
TEST(TransformationTests, ConstantsReduceNeg) {
98+
auto param = std::make_shared<opset8::Parameter>(element::f32, Shape{1, 4});
99+
100+
// Intentionally unequal to each other
101+
auto add_constant_1 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.0});
102+
auto add_constant_2 = opset8::Constant::create(element::f32, Shape{1, 4}, {1.0, 2.0, 3.0, 4.5});
103+
auto add_1 = std::make_shared<opset8::Add>(param, add_constant_1);
104+
auto add_2 = std::make_shared<opset8::Add>(add_1, add_constant_2);
105+
106+
auto result = std::make_shared<ov::op::v0::Result>(add_2);
107+
auto f = std::make_shared<Model>(ResultVector{result}, ParameterVector{param});
108+
109+
pass::Manager pass_manager;
110+
pass_manager.register_pass<ov::pass::ConstantsReduce>();
111+
pass_manager.run_passes(f);
112+
113+
// No reduction here
114+
ASSERT_EQ(count_ops_of_type<opset8::Constant>(f), 2);
115+
}

src/plugins/intel_gpu/src/plugin/transformations_pipeline.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@
109109
#include "transformations/common_optimizations/transpose_sinking.hpp"
110110
#include "transformations/common_optimizations/weights_dequantize_to_fake_quantize.hpp"
111111
#include "transformations/common_optimizations/wrap_interpolate_into_transposes.hpp"
112+
#include "transformations/common_optimizations/constants_reduce.hpp"
112113
#include "transformations/control_flow/unroll_tensor_iterator.hpp"
113114
#include "transformations/convert_pooling_to_reduce.hpp"
114115
#include "transformations/convert_precision.hpp"
@@ -1227,6 +1228,8 @@ void TransformationsPipeline::apply(std::shared_ptr<ov::Model> func) {
12271228
// Remove Pad in front of MaxPool if both the pads_begin and pads_end are zero.
12281229
manager.register_pass<ov::pass::EliminatePad>();
12291230

1231+
manager.register_pass<ov::pass::ConstantsReduce>();
1232+
12301233
// This is supposed to be the last pass to ensure that we don't have name collisions until
12311234
// GPU plugin stops using friendly names for program creation
12321235
manager.register_pass<ov::pass::ResolveNameCollisions>(true);

0 commit comments

Comments
 (0)