17
17
18
18
#pragma once
19
19
20
- #include " common/exception.h"
21
20
#include " common/status.h"
22
21
#include " exprs/runtime_filter.h"
23
22
#include " runtime/runtime_filter_mgr.h"
24
23
#include " runtime/runtime_state.h"
25
- #include " vec/columns/column_nullable.h"
26
- #include " vec/columns/columns_number.h"
27
- #include " vec/common/assert_cast.h"
28
24
#include " vec/core/block.h" // IWYU pragma: keep
29
25
#include " vec/exprs/vexpr_context.h"
30
26
#include " vec/runtime/shared_hash_table_controller.h"
31
27
32
28
namespace doris {
33
29
// this class used in hash join node
34
- class VRuntimeFilterSlots {
30
+ class RuntimeFilterSlots {
35
31
public:
36
- VRuntimeFilterSlots (
37
- const std::vector<std::shared_ptr<vectorized::VExprContext>>& build_expr_ctxs,
38
- const std::vector<std::shared_ptr<IRuntimeFilter>>& runtime_filters)
39
- : _build_expr_context(build_expr_ctxs), _runtime_filters(runtime_filters) {}
40
-
41
- Status send_filter_size (RuntimeState* state, uint64_t hash_table_size,
42
- std::shared_ptr<pipeline::CountedFinishDependency> dependency) {
43
- if (_runtime_filters.empty ()) {
44
- return Status::OK ();
45
- }
46
- for (auto runtime_filter : _runtime_filters) {
47
- if (runtime_filter->need_sync_filter_size ()) {
48
- runtime_filter->set_finish_dependency (dependency);
49
- }
50
- }
51
-
52
- // send_filter_size may call dependency->sub(), so we call set_finish_dependency firstly for all rf to avoid dependency set_ready repeatedly
53
- for (auto runtime_filter : _runtime_filters) {
54
- if (runtime_filter->need_sync_filter_size ()) {
55
- RETURN_IF_ERROR (runtime_filter->send_filter_size (state, hash_table_size));
56
- }
57
- }
58
- return Status::OK ();
59
- }
60
-
61
- // use synced size when this rf has global merged
62
- static uint64_t get_real_size (IRuntimeFilter* filter, uint64_t hash_table_size) {
63
- return filter->need_sync_filter_size () ? filter->get_synced_size () : hash_table_size;
32
+ RuntimeFilterSlots (const vectorized::VExprContextSPtrs& build_expr_ctxs,
33
+ RuntimeProfile* profile,
34
+ const std::vector<std::shared_ptr<IRuntimeFilter>>& runtime_filters,
35
+ bool should_build_hash_table)
36
+ : _build_expr_context(build_expr_ctxs),
37
+ _runtime_filters (runtime_filters),
38
+ _should_build_hash_table(should_build_hash_table),
39
+ _runtime_filters_profile(new RuntimeProfile(" RuntimeFilterSlots" )) {
40
+ profile->add_child (_runtime_filters_profile.get (), true , nullptr );
41
+ _publish_runtime_filter_timer =
42
+ ADD_TIMER_WITH_LEVEL (_runtime_filters_profile, " PublishTime" , 1 );
43
+ _runtime_filter_compute_timer =
44
+ ADD_TIMER_WITH_LEVEL (_runtime_filters_profile, " BuildTime" , 1 );
64
45
}
65
46
66
- /* *
67
- Disable meaningless filters, such as filters:
68
- RF1: col1 in (1, 3, 5)
69
- RF2: col1 min: 1, max: 5
70
- We consider RF2 is meaningless, because RF1 has already filtered out all values that RF2 can filter.
71
- */
72
- Status disable_meaningless_filters (RuntimeState* state) {
73
- // process ignore duplicate IN_FILTER
74
- std::unordered_set<int > has_in_filter;
75
- for (auto filter : _runtime_filters) {
76
- if (filter->get_ignored () || filter->get_disabled ()) {
77
- continue ;
78
- }
79
- if (filter->get_real_type () != RuntimeFilterType::IN_FILTER) {
80
- continue ;
81
- }
82
- if (!filter->need_sync_filter_size () &&
83
- filter->type () == RuntimeFilterType::IN_OR_BLOOM_FILTER) {
84
- continue ;
85
- }
86
- if (has_in_filter.contains (filter->expr_order ())) {
87
- filter->set_disabled ();
88
- continue ;
89
- }
90
- has_in_filter.insert (filter->expr_order ());
91
- }
92
-
93
- // process ignore filter when it has IN_FILTER on same expr
94
- for (auto filter : _runtime_filters) {
95
- if (filter->get_ignored () || filter->get_disabled ()) {
96
- continue ;
97
- }
98
- if (filter->get_real_type () == RuntimeFilterType::IN_FILTER ||
99
- !has_in_filter.contains (filter->expr_order ())) {
100
- continue ;
101
- }
102
- filter->set_disabled ();
103
- }
104
- return Status::OK ();
105
- }
106
-
107
- Status ignore_all_filters () {
108
- for (auto filter : _runtime_filters) {
109
- filter->set_ignored ();
110
- }
111
- return Status::OK ();
112
- }
47
+ Status send_filter_size (RuntimeState* state, uint64_t hash_table_size,
48
+ std::shared_ptr<pipeline::CountedFinishDependency> dependency);
113
49
114
- Status disable_all_filters () {
50
+ Status disable_all_filters (
51
+ RuntimeState* state,
52
+ std::shared_ptr<pipeline::CountedFinishDependency> finish_dependency) {
53
+ RETURN_IF_ERROR (send_filter_size (state, 0 , finish_dependency));
115
54
for (auto filter : _runtime_filters) {
116
55
filter->set_disabled ();
117
56
}
57
+ RETURN_IF_ERROR (_publish (state));
58
+ _runtime_filters_disabled = true ;
118
59
return Status::OK ();
119
60
}
120
61
121
- Status init_filters (RuntimeState* state, uint64_t local_hash_table_size) {
122
- // process IN_OR_BLOOM_FILTER's real type
123
- for (auto filter : _runtime_filters) {
124
- if (filter->type () == RuntimeFilterType::IN_OR_BLOOM_FILTER &&
125
- get_real_size (filter.get (), local_hash_table_size) >
126
- state->runtime_filter_max_in_num ()) {
127
- RETURN_IF_ERROR (filter->change_to_bloom_filter ());
128
- }
129
-
130
- if (filter->get_real_type () == RuntimeFilterType::BLOOM_FILTER) {
131
- RETURN_IF_ERROR (filter->init_bloom_filter (
132
- get_real_size (filter.get (), local_hash_table_size)));
133
- }
134
- }
135
- return Status::OK ();
136
- }
137
-
138
- void insert (const vectorized::Block* block) {
139
- for (auto & filter : _runtime_filters) {
140
- int result_column_id =
141
- _build_expr_context[filter->expr_order ()]->get_last_result_column_id ();
142
- const auto & column = block->get_by_position (result_column_id).column ;
143
- if (filter->get_ignored () || filter->get_disabled ()) {
144
- continue ;
145
- }
146
- filter->insert_batch (column, 1 );
147
- }
148
- }
149
-
150
- // publish runtime filter
151
- Status publish (RuntimeState* state, bool publish_local) {
152
- for (auto & filter : _runtime_filters) {
153
- RETURN_IF_ERROR (filter->publish (state, publish_local));
154
- }
155
- return Status::OK ();
156
- }
62
+ Status process (RuntimeState* state, const vectorized::Block* block,
63
+ std::shared_ptr<pipeline::CountedFinishDependency> finish_dependency);
157
64
158
65
void copy_to_shared_context (vectorized::SharedHashTableContextPtr& context) {
159
66
for (auto & filter : _runtime_filters) {
@@ -173,11 +80,30 @@ class VRuntimeFilterSlots {
173
80
return Status::OK ();
174
81
}
175
82
176
- bool empty () { return _runtime_filters.empty (); }
83
+ protected:
84
+ Status _disable_meaningless_filters (RuntimeState* state);
85
+ Status _init_filters (RuntimeState* state, uint64_t local_hash_table_size);
86
+ void _insert (const vectorized::Block* block, size_t start);
87
+ Status _publish (RuntimeState* state) {
88
+ if (_runtime_filters_disabled) {
89
+ return Status::OK ();
90
+ }
91
+ SCOPED_TIMER (_publish_runtime_filter_timer);
92
+ for (auto & filter : _runtime_filters) {
93
+ RETURN_IF_ERROR (filter->publish (state, !_should_build_hash_table));
94
+ }
95
+ return Status::OK ();
96
+ }
177
97
178
- private:
179
98
const std::vector<std::shared_ptr<vectorized::VExprContext>>& _build_expr_context;
180
99
std::vector<std::shared_ptr<IRuntimeFilter>> _runtime_filters;
100
+ bool _should_build_hash_table;
101
+
102
+ RuntimeProfile::Counter* _publish_runtime_filter_timer = nullptr ;
103
+ RuntimeProfile::Counter* _runtime_filter_compute_timer = nullptr ;
104
+ std::unique_ptr<RuntimeProfile> _runtime_filters_profile;
105
+
106
+ bool _runtime_filters_disabled = false ;
181
107
};
182
108
183
109
} // namespace doris
0 commit comments