diff --git a/jbmc/src/java_bytecode/java_local_variable_table.cpp b/jbmc/src/java_bytecode/java_local_variable_table.cpp index 12b21390af6..0c8a62fe7d4 100644 --- a/jbmc/src/java_bytecode/java_local_variable_table.cpp +++ b/jbmc/src/java_bytecode/java_local_variable_table.cpp @@ -464,10 +464,7 @@ static java_bytecode_convert_methodt::method_offsett get_common_dominator( candidate_dominators; for(auto v : merge_vars) { - const auto &dominator_nodeidx= - dominator_analysis.cfg.entry_map.at(v->var.start_pc); - const auto &this_var_doms= - dominator_analysis.cfg[dominator_nodeidx].dominators; + const auto &this_var_doms = dominator_analysis.dominators(v->var.start_pc); for(const auto this_var_dom : this_var_doms) if(this_var_dom<=first_pc) candidate_dominators.push_back(this_var_dom); diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index ecfdc16114f..cb2684f173c 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -12,15 +12,16 @@ Author: Georg Weissenbacher, georg@weissenbacher.name #ifndef CPROVER_ANALYSES_CFG_DOMINATORS_H #define CPROVER_ANALYSES_CFG_DOMINATORS_H -#include +#include +#include +#include #include #include -#include -#include +#include +#include #include #include -#include /// Dominator graph. This computes a control-flow graph (see \ref cfgt) and /// decorates it with dominator sets per program point, following @@ -37,15 +38,19 @@ template class cfg_dominators_templatet { public: - typedef std::set target_sett; - struct nodet { - target_sett dominators; + optionalt dominator; + // Not on the stack; not seen yet + static const int NODE_NOT_VISITED = -1; + // On the stack but not yet numbered + static const int NODE_INDEX_PENDING = -2; + int postorder_index = NODE_NOT_VISITED; }; typedef procedure_local_cfg_baset cfgt; cfgt cfg; + using cfg_nodet = typename cfgt::nodet; void operator()(P &program); @@ -69,21 +74,147 @@ class cfg_dominators_templatet return cfg.get_node_index(program_point); } - /// Returns true if the program point corresponding to \p rhs_node is - /// dominated by program point \p lhs. Saves node lookup compared to the - /// dominates overload that takes two program points, so this version is - /// preferable if you intend to check more than one potential dominator. - /// Note by definition all program points dominate themselves. - bool dominates(T lhs, const nodet &rhs_node) const + /// Iterator that allows you to walk lazily up dominators from an entry node. + class dominators_iterablet + { + // Type of our containing \ref dense_integer_mapt + using cfg_dominatorst = cfg_dominators_templatet; + + public: + class dominators_iteratort + : public std::iterator + { + // Type of the std::iterator support class we inherit + using base_typet = std::iterator; + + public: + dominators_iteratort( + const cfg_dominatorst &dominator_analysis, + std::size_t index) + : dominator_analysis(dominator_analysis), current_index(index) + { + } + + private: + explicit dominators_iteratort(const cfg_dominatorst &dominator_analysis) + : dominator_analysis(dominator_analysis), current_index{} + { + } + + public: + static dominators_iteratort end(const cfg_dominatorst &dominator_analysis) + { + return dominators_iteratort{dominator_analysis}; + } + + dominators_iteratort operator++() + { + auto i = *this; + advance(); + return i; + } + dominators_iteratort operator++(int junk) + { + advance(); + return *this; + } + const cfg_nodet &get_node() const + { + return dominator_analysis.cfg[*current_index]; + } + typename base_typet::reference operator*() const + { + return get_node().PC; + } + typename base_typet::pointer operator->() const + { + return &**this; + } + bool operator==(const dominators_iteratort &rhs) const + { + return current_index == rhs.current_index; + } + bool operator!=(const dominators_iteratort &rhs) const + { + return current_index != rhs.current_index; + } + + private: + void advance() + { + INVARIANT(current_index.has_value(), "can't advance an end() iterator"); + const auto ¤t_node = dominator_analysis.cfg[*current_index]; + const auto &next_dominator = current_node.dominator; + INVARIANT( + current_node.postorder_index == + cfg_dominatorst::nodet::NODE_NOT_VISITED || + next_dominator.has_value(), + "reachable nodes' dominator chains should lead to the root"); + + if(!next_dominator || *next_dominator == *current_index) + { + // Cycle; this is the root node + current_index = optionalt{}; + } + else + { + current_index = next_dominator; + } + } + + const cfg_dominatorst &dominator_analysis; + optionalt current_index; + }; + + dominators_iterablet( + const cfg_dominatorst &dominator_analysis, + optionalt index) + : dominator_analysis(dominator_analysis), first_instruction_index(index) + { + } + + dominators_iteratort begin() const + { + if(first_instruction_index.has_value()) + return dominators_iteratort{dominator_analysis, + *first_instruction_index}; + else + return dominators_iteratort::end(dominator_analysis); + } + + dominators_iteratort end() const + { + return dominators_iteratort::end(dominator_analysis); + } + + private: + const cfg_dominatorst &dominator_analysis; + optionalt first_instruction_index; + }; + + template + dominators_iterablet dominators(U start_instruction) const { - return rhs_node.dominators.count(lhs); + return dominators_iterablet{*this, cfg.get_node_index(start_instruction)}; } /// Returns true if program point \p lhs dominates \p rhs. /// Note by definition all program points dominate themselves. bool dominates(T lhs, T rhs) const { - return dominates(lhs, get_node(rhs)); + const auto rhs_dominators = dominators(rhs); + return std::any_of( + rhs_dominators.begin(), rhs_dominators.end(), [&lhs](const T dominator) { + return lhs == dominator; + }); + } + + /// Returns true if program point \p lhs dominates the instruction + /// corresponding to \p rhs_node. + /// Note by definition all program points dominate themselves. + bool dominates(T lhs, const typename cfgt::nodet &rhs_node) const + { + return dominates(lhs, rhs_node.PC); } /// Returns true if the program point for \p program_point_node is reachable @@ -94,7 +225,7 @@ class cfg_dominators_templatet // Dominator analysis walks from the entry point, so a side-effect is to // identify unreachable program points (those which don't dominate even // themselves). - return !program_point_node.dominators.empty(); + return program_point_node.dominator.has_value(); } /// Returns true if the program point for \p program_point_node is reachable @@ -115,6 +246,26 @@ class cfg_dominators_templatet protected: void initialise(P &program); void fixedpoint(P &program); + + /// Assigns post-order numbering to each node in the tree. + /// Full explanation here: https://en.wikipedia.org/wiki/Tree_traversal. + void assign_postordering(std::size_t start_node_index); + + /// Sort our CFG nodes in reverse post-order. Just gets every node that we've + /// processed (anything not NODE_NOT_VISITED) then orders from highest to + /// lowest. This excludes the entry-node though. + std::vector + get_reverse_postordered_instructions(std::size_t entry_node_index) const; + + /// Walks up the dominators of left/right nodes until it finds one that is + /// common to both sides. Used to work out the nearest common dominator + /// when a node that has multiple incoming edges. + /// + /// Noted by figure 3 in "A Simple, Fast Dominance Algorithm" by Cooper, + /// Harvey and Kennedy. + const cfg_nodet &intersect( + const cfg_nodet &left_input_node, + const cfg_nodet &right_input_node); }; /// Print the result of the dominator computation @@ -142,96 +293,197 @@ void cfg_dominators_templatet::initialise(P &program) cfg(program); } +template +void cfg_dominators_templatet::assign_postordering( + std::size_t start_node_index) +{ + struct stack_entryt + { + // cfg index, not the post-order index we're assigning + std::size_t node_index; + typename cfgt::nodet::edgest::const_iterator child_iterator; + typename cfgt::nodet::edgest::const_iterator child_end; + }; + + std::size_t next_postorder_index = 0; + std::vector stack; + + auto place_node_on_stack_if_not_visited = [this, + &stack](std::size_t node_index) { + // If we've already visited this node, don't re-calculate. + auto &node = cfg[node_index]; + if(node.postorder_index != nodet::NODE_NOT_VISITED) + return; + + // Otherwise set that we've processed it, and put the node on the stack. + const auto &node_successors = post_dom ? node.in : node.out; + stack.push_back( + {node_index, node_successors.begin(), node_successors.end()}); + node.postorder_index = nodet::NODE_INDEX_PENDING; + }; + + place_node_on_stack_if_not_visited(start_node_index); + INVARIANT(stack.size() == 1, "entry node should not be visited yet"); + + /// Do the actual post-order processing. + while(!stack.empty()) + { + auto &stack_top = stack.back(); + auto ¤t_node = cfg[stack_top.node_index]; + + INVARIANT( + current_node.postorder_index == nodet::NODE_INDEX_PENDING, + "a node on the stack should not be numbered yet"); + + // If we've already numbered all of our children, number ourselves + // and remove the node from further processing. + if(stack_top.child_iterator == stack_top.child_end) + { + // Node children all visited; number this node + current_node.postorder_index = next_postorder_index++; + stack.pop_back(); + } + else + { + // Otherwise put children on the stack to be visited. + // Alter top of stack before it is perhaps reallocated on extension: + const auto next_child_index = stack_top.child_iterator->first; + ++stack_top.child_iterator; + place_node_on_stack_if_not_visited(next_child_index); + } + } +} + +template +std::vector +cfg_dominators_templatet::get_reverse_postordered_instructions( + std::size_t entry_node_index) const +{ + auto reverse_postordering = [this](T lhs, T rhs) { + return cfg.get_node(lhs).postorder_index > + cfg.get_node(rhs).postorder_index; + }; + + std::vector order; + + // Note that this will only select and order nodes that are reachable from + // the entry point. + for(std::size_t i = 0; i < cfg.size(); ++i) + { + if( + i != entry_node_index && + cfg[i].postorder_index != nodet::NODE_NOT_VISITED) + { + order.push_back(cfg[i].PC); + } + } + + std::sort(order.begin(), order.end(), reverse_postordering); + + return order; +} + /// Computes the MOP for the dominator analysis template void cfg_dominators_templatet::fixedpoint(P &program) { - std::list worklist; - if(cfgt::nodes_empty(program)) return; + // Initialise entry node to be its own dominator: if(post_dom) entry_node = cfgt::get_last_node(program); else entry_node = cfgt::get_first_node(program); typename cfgt::nodet &n = cfg.get_node(entry_node); - n.dominators.insert(entry_node); + const auto entry_node_index = cfg.get_node_index(entry_node); + n.dominator = entry_node_index; - for(typename cfgt::edgest::const_iterator - s_it=(post_dom?n.in:n.out).begin(); - s_it!=(post_dom?n.in:n.out).end(); - ++s_it) - worklist.push_back(cfg[s_it->first].PC); + // Calculate a post-ordering on the CFG: + assign_postordering(entry_node_index); - while(!worklist.empty()) - { - // get node from worklist - T current=worklist.front(); - worklist.pop_front(); + const auto visit_order = + get_reverse_postordered_instructions(entry_node_index); - bool changed=false; - typename cfgt::nodet &node = cfg.get_node(current); - if(node.dominators.empty()) - { - for(const auto &edge : (post_dom ? node.out : node.in)) - if(!cfg[edge.first].dominators.empty()) - { - node.dominators=cfg[edge.first].dominators; - node.dominators.insert(current); - changed=true; - } - } + bool any_change = true; - // compute intersection of predecessors - for(const auto &edge : (post_dom ? node.out : node.in)) + while(any_change) + { + any_change = false; + for(const auto current : visit_order) { - const target_sett &other=cfg[edge.first].dominators; - if(other.empty()) - continue; - - typename target_sett::const_iterator n_it=node.dominators.begin(); - typename target_sett::const_iterator o_it=other.begin(); + auto &node = cfg.get_node(current); + const cfg_nodet *dominator_node = nullptr; - // in-place intersection. not safe to use set_intersect - while(n_it!=node.dominators.end() && o_it!=other.end()) + // compute intersection of predecessors + for(const auto &edge : (post_dom ? node.out : node.in)) { - if(*n_it==current) - ++n_it; - else if(*n_it<*o_it) - { - changed=true; - node.dominators.erase(n_it++); - } - else if(*o_it<*n_it) - ++o_it; + const typename cfgt::nodet &other = cfg[edge.first]; + if(!other.dominator) + continue; + + if(!dominator_node) + dominator_node = &other; else - { - ++n_it; - ++o_it; - } + dominator_node = &intersect(*dominator_node, other); } - while(n_it!=node.dominators.end()) + // If the calculated dominator is different than the one already + // existing on the node, or hasn't got an existing dominator, update and + // then re-calculate dominators. + if( + dominator_node != nullptr && + (!node.dominator || dominator_node->postorder_index != + cfg[*node.dominator].postorder_index)) { - if(*n_it==current) - ++n_it; - else - { - changed=true; - node.dominators.erase(n_it++); - } + node.dominator = cfg.get_node_index(dominator_node->PC); + // Note when any node's immediate dominator changes this way we re-visit + // the *whole* graph again in post-order, because changing one node's + // immediate dominator may change the dominator *set* of an instruction + // some distance away. + + // We could optimise this to only re-visit instructions reachable from + // the change site. + any_change = true; } } + } +} + +template +const typename cfg_dominators_templatet::cfgt::nodet & +cfg_dominators_templatet::intersect( + const cfg_nodet &left_input_node, + const cfg_nodet &right_input_node) +{ + auto left_node_dominators = dominators(left_input_node.PC); + auto right_node_dominators = dominators(right_input_node.PC); + auto left_it = left_node_dominators.begin(); + auto right_it = right_node_dominators.begin(); - if(changed) // fixed point for node reached? + while(left_it.get_node().postorder_index != + right_it.get_node().postorder_index) + { + while(left_it.get_node().postorder_index < + right_it.get_node().postorder_index) { - for(const auto &edge : (post_dom ? node.in : node.out)) - { - worklist.push_back(cfg[edge.first].PC); - } + ++left_it; + INVARIANT( + left_it != left_node_dominators.end(), + "should only move the iterator that is further from the root"); + } + + while(right_it.get_node().postorder_index < + left_it.get_node().postorder_index) + { + ++right_it; + INVARIANT( + right_it != right_node_dominators.end(), + "should only move the iterator that is further from the root"); } } + + return left_it.get_node(); } /// Pretty-print a single node in the dominator tree. Supply a specialisation if @@ -264,7 +516,7 @@ void cfg_dominators_templatet::output(std::ostream &out) const else out << " dominated by "; bool first=true; - for(const auto &d : cfg[node.second].dominators) + for(auto &d : dominators(n)) { if(!first) out << ", "; diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index ebbe9055987..3028dff9875 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -109,7 +109,7 @@ void dep_graph_domaint::control_dependencies( const cfg_post_dominatorst::cfgt::nodet &m_s= pd.cfg[edge.first]; - if(m_s.dominators.find(to)!=m_s.dominators.end()) + if(pd.dominates(to, m_s)) post_dom_one=true; else post_dom_all=false; diff --git a/src/analyses/sese_regions.cpp b/src/analyses/sese_regions.cpp index 5ff1aa7452b..674f4aa3ec3 100644 --- a/src/analyses/sese_regions.cpp +++ b/src/analyses/sese_regions.cpp @@ -140,45 +140,23 @@ void sese_region_analysist::compute_sese_regions( (*successors.begin())->incoming_edges.size() == 1) continue; - const auto &instruction_postdoms = postdominators.get_node(it).dominators; - - // Ideally we would start with the immediate postdominator and walk down, - // but our current dominator analysis doesn't make it easy to determine an - // immediate dominator. - - // Ideally I would use `optionalt` here, but it triggers a - // GCC-5 bug. - std::size_t closest_exit_index = dominators.cfg.size(); - for(const auto &possible_exit : instruction_postdoms) + for(const auto &possible_exit : postdominators.dominators(it)) { - const auto possible_exit_index = dominators.get_node_index(possible_exit); - const auto &possible_exit_node = dominators.cfg[possible_exit_index]; - const auto possible_exit_dominators = - possible_exit_node.dominators.size(); - if( - it != possible_exit && dominators.dominates(it, possible_exit_node) && + it != possible_exit && dominators.dominates(it, possible_exit) && get_innermost_loop(innermost_loop_ids, it) == get_innermost_loop(innermost_loop_ids, possible_exit)) { - // If there are several candidate region exit nodes, prefer the one with - // the least dominators, i.e. the closest to the region entrance. - if( - closest_exit_index == dominators.cfg.size() || - dominators.cfg[closest_exit_index].dominators.size() > - possible_exit_dominators) - { - closest_exit_index = possible_exit_index; - } - } - } + // The first candidate that meets out criteria is the best, as + // postdominators are iterated over closest first (i.e. starting with + // the immediate postdominator). - if(closest_exit_index < dominators.cfg.size()) - { - auto emplace_result = - sese_regions.emplace(it, dominators.cfg[closest_exit_index].PC); - INVARIANT( - emplace_result.second, "should only visit each region entry once"); + auto emplace_result = sese_regions.emplace(it, possible_exit); + INVARIANT( + emplace_result.second, "should only visit each region entry once"); + + break; + } } } } diff --git a/src/goto-analyzer/unreachable_instructions.cpp b/src/goto-analyzer/unreachable_instructions.cpp index 544b6c8be6f..88ceee7a875 100644 --- a/src/goto-analyzer/unreachable_instructions.cpp +++ b/src/goto-analyzer/unreachable_instructions.cpp @@ -38,7 +38,7 @@ static void unreachable_instructions( ++it) { const cfg_dominatorst::cfgt::nodet &n=dominators.cfg[it->second]; - if(n.dominators.empty()) + if(!n.dominator) dest.insert(std::make_pair(it->first->location_number, it->first)); } diff --git a/src/goto-instrument/full_slicer.cpp b/src/goto-instrument/full_slicer.cpp index d034045d2e5..ad40401296e 100644 --- a/src/goto-instrument/full_slicer.cpp +++ b/src/goto-instrument/full_slicer.cpp @@ -153,32 +153,9 @@ void full_slicert::add_jumps( { // check whether the nearest post-dominator is different from // lex_succ - goto_programt::const_targett nearest=lex_succ; - std::size_t post_dom_size=0; - for(cfg_dominatorst::target_sett::const_iterator d_it = - j_PC_node.dominators.begin(); - d_it != j_PC_node.dominators.end(); - ++d_it) + if(j_PC_node.dominator && pd.cfg[*j_PC_node.dominator].PC != lex_succ) { - const auto &node = cfg.get_node(*d_it); - if(node.node_required) - { - const irep_idt &id2 = node.function_id; - INVARIANT(id==id2, - "goto/jump expected to be within a single function"); - - const auto &postdom_node = pd.get_node(*d_it); - - if(postdom_node.dominators.size() > post_dom_size) - { - nearest=*d_it; - post_dom_size = postdom_node.dominators.size(); - } - } - } - if(nearest!=lex_succ) - { - add_to_queue(queue, *it, nearest); + add_to_queue(queue, *it, lex_succ); jumps.erase(it); } }