Skip to content

Commit 370fc30

Browse files
committed
Dominator analysis: analyse on basic-block granularity
Previously this used instruction granularity, which especially combined with a naive dominator set (not a dominator tree) can waste a great deal of time and memory computing and storing the dominator sets. Now that cfg_dominatorst works on a basic-block granularity, natural_loopst does as well.
1 parent 5e5c46c commit 370fc30

15 files changed

+599
-156
lines changed

jbmc/src/java_bytecode/java_local_variable_table.cpp

+19-6
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ struct procedure_local_cfg_baset<
4141
java_bytecode_convert_methodt::method_offsett>
4242
entry_mapt;
4343
entry_mapt entry_map;
44+
std::vector<java_bytecode_convert_methodt::method_offsett> possible_keys;
4445

4546
procedure_local_cfg_baset() {}
4647

@@ -52,6 +53,7 @@ struct procedure_local_cfg_baset<
5253
{
5354
// Map instruction PCs onto node indices:
5455
entry_map[inst.first]=this->add_node();
56+
possible_keys.push_back(inst.first);
5557
// Map back:
5658
(*this)[entry_map[inst.first]].PC=inst.first;
5759
}
@@ -122,6 +124,11 @@ struct procedure_local_cfg_baset<
122124
{
123125
return args.second.empty();
124126
}
127+
128+
const std::vector<java_bytecode_convert_methodt::method_offsett> &keys()
129+
{
130+
return possible_keys;
131+
}
125132
};
126133

127134
// Grab some class typedefs for brevity:
@@ -463,13 +470,19 @@ static java_bytecode_convert_methodt::method_offsett get_common_dominator(
463470
candidate_dominators;
464471
for(auto v : merge_vars)
465472
{
466-
const auto &dominator_nodeidx=
473+
const auto &var_start_basic_block =
467474
dominator_analysis.cfg.entry_map.at(v->var.start_pc);
468-
const auto &this_var_doms=
469-
dominator_analysis.cfg[dominator_nodeidx].dominators;
470-
for(const auto this_var_dom : this_var_doms)
471-
if(this_var_dom<=first_pc)
472-
candidate_dominators.push_back(this_var_dom);
475+
const auto &this_var_dom_blocks =
476+
dominator_analysis.cfg[var_start_basic_block].dominators;
477+
for(const auto this_var_dom_block_index : this_var_dom_blocks)
478+
{
479+
const auto &this_var_dom_block =
480+
dominator_analysis.cfg[this_var_dom_block_index].block;
481+
// Only consider placing variable declarations at the head of
482+
// a basic block (which conveniently is always a safe choice, even
483+
// for a live range starting midway through a block)
484+
candidate_dominators.push_back(this_var_dom_block.front());
485+
}
473486
}
474487
std::sort(candidate_dominators.begin(), candidate_dominators.end());
475488

src/analyses/cfg_dominators.h

+77-29
Original file line numberDiff line numberDiff line change
@@ -37,14 +37,14 @@ template <class P, class T, bool post_dom>
3737
class cfg_dominators_templatet
3838
{
3939
public:
40-
typedef std::set<T> target_sett;
40+
typedef std::set<std::size_t> target_sett;
4141

4242
struct nodet
4343
{
4444
target_sett dominators;
4545
};
4646

47-
typedef procedure_local_cfg_baset<nodet, P, T> cfgt;
47+
typedef cfg_basic_blockst<procedure_local_cfg_baset, nodet, P, T> cfgt;
4848
cfgt cfg;
4949

5050
void operator()(P &program);
@@ -63,43 +63,71 @@ class cfg_dominators_templatet
6363
return cfg.get_node(program_point);
6464
}
6565

66-
/// Returns true if the program point corresponding to \p rhs_node is
67-
/// dominated by program point \p lhs. Saves node lookup compared to the
68-
/// dominates overload that takes two program points, so this version is
69-
/// preferable if you intend to check more than one potential dominator.
70-
/// Note by definition all program points dominate themselves.
71-
bool dominates(T lhs, const nodet &rhs_node) const
66+
/// Get the basic-block graph node index for \p program_point
67+
std::size_t get_node_index(const T &program_point) const
68+
{
69+
return cfg.get_node_index(program_point);
70+
}
71+
72+
/// Returns true if basic block \p lhs [post]dominates \p rhs
73+
bool basic_block_dominates(std::size_t lhs, std::size_t rhs) const
7274
{
73-
return rhs_node.dominators.count(lhs);
75+
return cfg[rhs].dominators.count(lhs);
7476
}
7577

78+
/// Returns true if program point \p lhs [post]dominates \p rhs
79+
bool dominates_same_block(T lhs, T rhs, std::size_t block) const;
80+
7681
/// Returns true if program point \p lhs dominates \p rhs.
7782
/// Note by definition all program points dominate themselves.
7883
bool dominates(T lhs, T rhs) const
7984
{
80-
return dominates(lhs, get_node(rhs));
85+
const auto lhs_block = cfg.entry_map.at(lhs);
86+
const auto rhs_block = cfg.entry_map.at(rhs);
87+
88+
if(lhs == rhs)
89+
return true;
90+
91+
if(lhs_block != rhs_block)
92+
return basic_block_dominates(lhs_block, rhs_block);
93+
else
94+
return dominates_same_block(lhs, rhs, lhs_block);
8195
}
8296

83-
/// Returns true if the program point for \p program_point_node is reachable
97+
/// Returns true if the basic block \p basic_block_node is reachable
8498
/// from the entry point. Saves a lookup compared to the overload taking a
8599
/// program point, so use this overload if you already have the node.
86-
bool program_point_reachable(const nodet &program_point_node) const
100+
bool basic_block_reachable(const nodet &basic_block_node) const
87101
{
88102
// Dominator analysis walks from the entry point, so a side-effect is to
89103
// identify unreachable program points (those which don't dominate even
90104
// themselves).
91-
return !program_point_node.dominators.empty();
105+
return !basic_block_node.dominators.empty();
92106
}
93107

94-
/// Returns true if the program point for \p program_point_node is reachable
108+
/// Returns true if the basic block \p basic_block_node is reachable
95109
/// from the entry point. Saves a lookup compared to the overload taking a
96-
/// program point, so use this overload if you already have the node.
110+
/// program point, so use this overload if you already have the node index.
111+
bool basic_block_reachable(std::size_t block) const
112+
{
113+
return basic_block_reachable(cfg[block]);
114+
}
115+
116+
/// Returns true if the program point for \p program_point_node is reachable
117+
/// from the entry point.
97118
bool program_point_reachable(T program_point) const
98119
{
99120
// Dominator analysis walks from the entry point, so a side-effect is to
100121
// identify unreachable program points (those which don't dominate even
101122
// themselves).
102-
return program_point_reachable(get_node(program_point));
123+
return basic_block_reachable(get_node_index(program_point));
124+
}
125+
126+
/// Returns the set of dominator blocks for a given basic block, including
127+
/// itself. The result is a set of indices usable with this class' operator[].
128+
const target_sett &basic_block_dominators(std::size_t block) const
129+
{
130+
return cfg[block].dominators;
103131
}
104132

105133
T entry_node;
@@ -140,7 +168,7 @@ void cfg_dominators_templatet<P, T, post_dom>::initialise(P &program)
140168
template <class P, class T, bool post_dom>
141169
void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
142170
{
143-
std::list<T> worklist;
171+
std::list<typename cfgt::node_indext> worklist;
144172

145173
if(cfgt::nodes_empty(program))
146174
return;
@@ -149,23 +177,24 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
149177
entry_node = cfgt::get_last_node(program);
150178
else
151179
entry_node = cfgt::get_first_node(program);
152-
typename cfgt::nodet &n = cfg.get_node(entry_node);
153-
n.dominators.insert(entry_node);
180+
const auto entry_node_index = cfg.get_node_index(entry_node);
181+
typename cfgt::nodet &n = cfg[entry_node_index];
182+
n.dominators.insert(entry_node_index);
154183

155184
for(typename cfgt::edgest::const_iterator
156185
s_it=(post_dom?n.in:n.out).begin();
157186
s_it!=(post_dom?n.in:n.out).end();
158187
++s_it)
159-
worklist.push_back(cfg[s_it->first].PC);
188+
worklist.push_back(s_it->first);
160189

161190
while(!worklist.empty())
162191
{
163192
// get node from worklist
164-
T current=worklist.front();
193+
const auto current = worklist.front();
165194
worklist.pop_front();
166195

167196
bool changed=false;
168-
typename cfgt::nodet &node = cfg.get_node(current);
197+
typename cfgt::nodet &node = cfg[current];
169198
if(node.dominators.empty())
170199
{
171200
for(const auto &edge : (post_dom ? node.out : node.in))
@@ -222,12 +251,33 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
222251
{
223252
for(const auto &edge : (post_dom ? node.in : node.out))
224253
{
225-
worklist.push_back(cfg[edge.first].PC);
254+
worklist.push_back(edge.first);
226255
}
227256
}
228257
}
229258
}
230259

260+
template <class P, class T, bool post_dom>
261+
bool cfg_dominators_templatet<P, T, post_dom>::dominates_same_block(
262+
T lhs,
263+
T rhs,
264+
std::size_t block) const
265+
{
266+
// Special case when the program points belong to the same block: lhs
267+
// dominates rhs iff it is <= rhs in program order (or the reverse if we're
268+
// a postdominator analysis)
269+
270+
for(const auto &instruction : cfg[block].block)
271+
{
272+
if(instruction == lhs)
273+
return !post_dom;
274+
else if(instruction == rhs)
275+
return post_dom;
276+
}
277+
278+
UNREACHABLE; // Entry map is inconsistent with block members?
279+
}
280+
231281
/// Pretty-print a single node in the dominator tree. Supply a specialisation if
232282
/// operator<< is not sufficient.
233283
/// \par parameters: `node` to print and stream `out` to pretty-print it to
@@ -248,22 +298,20 @@ inline void dominators_pretty_print_node(
248298
template <class P, class T, bool post_dom>
249299
void cfg_dominators_templatet<P, T, post_dom>::output(std::ostream &out) const
250300
{
251-
for(const auto &n : cfg.keys())
301+
for(typename cfgt::node_indext i = 0; i < cfg.size(); ++i)
252302
{
253-
const auto &node = cfg.get_node(n);
254-
255-
dominators_pretty_print_node(n, out);
303+
out << "Block " << dominators_pretty_print_node(cfg[i].block.at(0), out);
256304
if(post_dom)
257305
out << " post-dominated by ";
258306
else
259307
out << " dominated by ";
260308
bool first=true;
261-
for(const auto &d : node.dominators)
309+
for(const auto &d : cfg[i].dominators)
262310
{
263311
if(!first)
264312
out << ", ";
265313
first=false;
266-
dominators_pretty_print_node(d, out);
314+
dominators_pretty_print_node(cfg[d].block.at(0), out);
267315
}
268316
out << "\n";
269317
}

src/analyses/dependence_graph.cpp

+4-12
Original file line numberDiff line numberDiff line change
@@ -97,19 +97,11 @@ void dep_graph_domaint::control_dependencies(
9797

9898
// we could hard-code assume and goto handling here to improve
9999
// performance
100-
const cfg_post_dominatorst::cfgt::nodet &m =
101-
pd.get_node(control_dep_candidate);
102-
103-
// successors of M
104-
for(const auto &edge : m.out)
100+
for(const auto &candidate_successor :
101+
goto_programt::get_well_formed_instruction_successors(
102+
control_dep_candidate))
105103
{
106-
// Could use pd.dominates(to, control_dep_candidate) but this would impose
107-
// another dominator node lookup per call to this function, which is too
108-
// expensive.
109-
const cfg_post_dominatorst::cfgt::nodet &m_s=
110-
pd.cfg[edge.first];
111-
112-
if(m_s.dominators.find(to)!=m_s.dominators.end())
104+
if(pd.dominates(to, candidate_successor))
113105
post_dom_one=true;
114106
else
115107
post_dom_all=false;

0 commit comments

Comments
 (0)