Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dominator analysis: analyse on basic block granularity #5042

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions jbmc/src/java_bytecode/java_local_variable_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ struct procedure_local_cfg_baset<
java_bytecode_convert_methodt::method_offsett>
entry_mapt;
entry_mapt entry_map;
std::vector<java_bytecode_convert_methodt::method_offsett> possible_keys;

procedure_local_cfg_baset() {}

Expand All @@ -52,6 +53,7 @@ struct procedure_local_cfg_baset<
{
// Map instruction PCs onto node indices:
entry_map[inst.first]=this->add_node();
possible_keys.push_back(inst.first);
// Map back:
(*this)[entry_map[inst.first]].PC=inst.first;
}
Expand Down Expand Up @@ -122,6 +124,11 @@ struct procedure_local_cfg_baset<
{
return args.second.empty();
}

const std::vector<java_bytecode_convert_methodt::method_offsett> &keys()
{
return possible_keys;
}
};

// Grab some class typedefs for brevity:
Expand Down Expand Up @@ -463,13 +470,19 @@ static java_bytecode_convert_methodt::method_offsett get_common_dominator(
candidate_dominators;
for(auto v : merge_vars)
{
const auto &dominator_nodeidx=
const auto &var_start_basic_block =
dominator_analysis.cfg.entry_map.at(v->var.start_pc);
const auto &this_var_doms=
dominator_analysis.cfg[dominator_nodeidx].dominators;
for(const auto this_var_dom : this_var_doms)
if(this_var_dom<=first_pc)
candidate_dominators.push_back(this_var_dom);
const auto &this_var_dom_blocks =
dominator_analysis.cfg[var_start_basic_block].dominators;
for(const auto this_var_dom_block_index : this_var_dom_blocks)
{
const auto &this_var_dom_block =
dominator_analysis.cfg[this_var_dom_block_index].block;
// Only consider placing variable declarations at the head of
// a basic block (which conveniently is always a safe choice, even

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know what this means (i.e. why is it a safe choice and why would it not be a safe choice?)

// for a live range starting midway through a block)
candidate_dominators.push_back(this_var_dom_block.front());
}
}
std::sort(candidate_dominators.begin(), candidate_dominators.end());

Expand Down
106 changes: 77 additions & 29 deletions src/analyses/cfg_dominators.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ template <class P, class T, bool post_dom>
class cfg_dominators_templatet
{
public:
typedef std::set<T> target_sett;
typedef std::set<std::size_t> target_sett;

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm guessing this is referring to the node indices? In that case, shouldn't this use node_indext?

⛏️ (As a side note, for cases like this I'd also prefer a "strong typedef" i.e. wrapping it in a struct just to avoid accidentally doing things with them they were not meant to do - like comparing them to unrelated indices or adding them together, things like that).


struct nodet
{
target_sett dominators;
};

typedef procedure_local_cfg_baset<nodet, P, T> cfgt;
typedef cfg_basic_blockst<procedure_local_cfg_baset, nodet, P, T> cfgt;
cfgt cfg;

void operator()(P &program);
Expand All @@ -63,43 +63,71 @@ class cfg_dominators_templatet
return cfg.get_node(program_point);
}

/// Returns true if the program point corresponding to \p rhs_node is
/// dominated by program point \p lhs. Saves node lookup compared to the
/// dominates overload that takes two program points, so this version is
/// preferable if you intend to check more than one potential dominator.
/// Note by definition all program points dominate themselves.
bool dominates(T lhs, const nodet &rhs_node) const
/// Get the basic-block graph node index for \p program_point
std::size_t get_node_index(const T &program_point) const
{
return cfg.get_node_index(program_point);
}

/// Returns true if basic block \p lhs [post]dominates \p rhs
bool basic_block_dominates(std::size_t lhs, std::size_t rhs) const
{
return rhs_node.dominators.count(lhs);
return cfg[rhs].dominators.count(lhs);

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⛏️ > 0 for additional verbosity? Never a big fan of implicit to-bool conversion myself

}

/// Returns true if program point \p lhs [post]dominates \p rhs
bool dominates_same_block(T lhs, T rhs, std::size_t block) const;

/// Returns true if program point \p lhs dominates \p rhs.
/// Note by definition all program points dominate themselves.
bool dominates(T lhs, T rhs) const
{
return dominates(lhs, get_node(rhs));
const auto lhs_block = cfg.entry_map.at(lhs);
const auto rhs_block = cfg.entry_map.at(rhs);

if(lhs == rhs)
return true;

if(lhs_block != rhs_block)
return basic_block_dominates(lhs_block, rhs_block);
else
return dominates_same_block(lhs, rhs, lhs_block);
}

/// Returns true if the program point for \p program_point_node is reachable
/// Returns true if the basic block \p basic_block_node is reachable
/// from the entry point. Saves a lookup compared to the overload taking a
/// program point, so use this overload if you already have the node.
bool program_point_reachable(const nodet &program_point_node) const
bool basic_block_reachable(const nodet &basic_block_node) const
{
// Dominator analysis walks from the entry point, so a side-effect is to
// identify unreachable program points (those which don't dominate even
// themselves).
return !program_point_node.dominators.empty();
return !basic_block_node.dominators.empty();
}

/// Returns true if the program point for \p program_point_node is reachable
/// Returns true if the basic block \p basic_block_node is reachable
/// from the entry point. Saves a lookup compared to the overload taking a
/// program point, so use this overload if you already have the node.
/// program point, so use this overload if you already have the node index.
bool basic_block_reachable(std::size_t block) const
{
return basic_block_reachable(cfg[block]);
}

/// Returns true if the program point for \p program_point_node is reachable
/// from the entry point.
bool program_point_reachable(T program_point) const
{
// Dominator analysis walks from the entry point, so a side-effect is to
// identify unreachable program points (those which don't dominate even
// themselves).
return program_point_reachable(get_node(program_point));
return basic_block_reachable(get_node_index(program_point));
}

/// Returns the set of dominator blocks for a given basic block, including
/// itself. The result is a set of indices usable with this class' operator[].
const target_sett &basic_block_dominators(std::size_t block) const
{
return cfg[block].dominators;
}

T entry_node;
Expand Down Expand Up @@ -140,7 +168,7 @@ void cfg_dominators_templatet<P, T, post_dom>::initialise(P &program)
template <class P, class T, bool post_dom>
void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
{
std::list<T> worklist;
std::list<typename cfgt::node_indext> worklist;

if(cfgt::nodes_empty(program))
return;
Expand All @@ -149,23 +177,24 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
entry_node = cfgt::get_last_node(program);
else
entry_node = cfgt::get_first_node(program);
typename cfgt::nodet &n = cfg.get_node(entry_node);
n.dominators.insert(entry_node);
const auto entry_node_index = cfg.get_node_index(entry_node);
typename cfgt::nodet &n = cfg[entry_node_index];
n.dominators.insert(entry_node_index);

for(typename cfgt::edgest::const_iterator
s_it=(post_dom?n.in:n.out).begin();
s_it!=(post_dom?n.in:n.out).end();
++s_it)
worklist.push_back(cfg[s_it->first].PC);
worklist.push_back(s_it->first);

while(!worklist.empty())
{
// get node from worklist
T current=worklist.front();
const auto current = worklist.front();
worklist.pop_front();

bool changed=false;
typename cfgt::nodet &node = cfg.get_node(current);
typename cfgt::nodet &node = cfg[current];
if(node.dominators.empty())
{
for(const auto &edge : (post_dom ? node.out : node.in))
Expand Down Expand Up @@ -222,12 +251,33 @@ void cfg_dominators_templatet<P, T, post_dom>::fixedpoint(P &program)
{
for(const auto &edge : (post_dom ? node.in : node.out))
{
worklist.push_back(cfg[edge.first].PC);
worklist.push_back(edge.first);
}
}
}
}

template <class P, class T, bool post_dom>
bool cfg_dominators_templatet<P, T, post_dom>::dominates_same_block(
T lhs,
T rhs,
std::size_t block) const
{
// Special case when the program points belong to the same block: lhs
// dominates rhs iff it is <= rhs in program order (or the reverse if we're
// a postdominator analysis)

for(const auto &instruction : cfg[block].block)
{
if(instruction == lhs)
return !post_dom;
else if(instruction == rhs)
return post_dom;
}

UNREACHABLE; // Entry map is inconsistent with block members?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⛏️ I think in this case it might be better to just use a regular invariant and put the comment on that instead

}

/// Pretty-print a single node in the dominator tree. Supply a specialisation if
/// operator<< is not sufficient.
/// \par parameters: `node` to print and stream `out` to pretty-print it to
Expand All @@ -248,22 +298,20 @@ inline void dominators_pretty_print_node(
template <class P, class T, bool post_dom>
void cfg_dominators_templatet<P, T, post_dom>::output(std::ostream &out) const
{
for(const auto &node : cfg.entries())
for(typename cfgt::node_indext i = 0; i < cfg.size(); ++i)
{
auto n=node.first;

dominators_pretty_print_node(n, out);
out << "Block " << dominators_pretty_print_node(cfg[i].block.at(0), out);
if(post_dom)
out << " post-dominated by ";
else
out << " dominated by ";
bool first=true;
for(const auto &d : cfg[node.second].dominators)
for(const auto &d : cfg[i].dominators)
{
if(!first)
out << ", ";
first=false;
dominators_pretty_print_node(d, out);
dominators_pretty_print_node(cfg[d].block.at(0), out);
}
out << "\n";
}
Expand Down
16 changes: 4 additions & 12 deletions src/analyses/dependence_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,19 +97,11 @@ void dep_graph_domaint::control_dependencies(

// we could hard-code assume and goto handling here to improve
// performance
const cfg_post_dominatorst::cfgt::nodet &m =
pd.get_node(control_dep_candidate);

// successors of M
for(const auto &edge : m.out)
for(const auto &candidate_successor :
goto_programt::get_well_formed_instruction_successors(
control_dep_candidate))
{
// Could use pd.dominates(to, control_dep_candidate) but this would impose
// another dominator node lookup per call to this function, which is too
// expensive.
const cfg_post_dominatorst::cfgt::nodet &m_s=
pd.cfg[edge.first];

if(m_s.dominators.find(to)!=m_s.dominators.end())
if(pd.dominates(to, candidate_successor))
post_dom_one=true;
else
post_dom_all=false;
Expand Down
Loading