diff --git a/jbmc/src/java_bytecode/java_local_variable_table.cpp b/jbmc/src/java_bytecode/java_local_variable_table.cpp index c7fde311bd0..a2b27cd7a13 100644 --- a/jbmc/src/java_bytecode/java_local_variable_table.cpp +++ b/jbmc/src/java_bytecode/java_local_variable_table.cpp @@ -41,6 +41,7 @@ struct procedure_local_cfg_baset< java_bytecode_convert_methodt::method_offsett> entry_mapt; entry_mapt entry_map; + std::vector possible_keys; procedure_local_cfg_baset() {} @@ -52,6 +53,7 @@ struct procedure_local_cfg_baset< { // Map instruction PCs onto node indices: entry_map[inst.first]=this->add_node(); + possible_keys.push_back(inst.first); // Map back: (*this)[entry_map[inst.first]].PC=inst.first; } @@ -122,6 +124,11 @@ struct procedure_local_cfg_baset< { return args.second.empty(); } + + const std::vector &keys() + { + return possible_keys; + } }; // Grab some class typedefs for brevity: @@ -463,13 +470,19 @@ static java_bytecode_convert_methodt::method_offsett get_common_dominator( candidate_dominators; for(auto v : merge_vars) { - const auto &dominator_nodeidx= + const auto &var_start_basic_block = dominator_analysis.cfg.entry_map.at(v->var.start_pc); - const auto &this_var_doms= - dominator_analysis.cfg[dominator_nodeidx].dominators; - for(const auto this_var_dom : this_var_doms) - if(this_var_dom<=first_pc) - candidate_dominators.push_back(this_var_dom); + const auto &this_var_dom_blocks = + dominator_analysis.cfg[var_start_basic_block].dominators; + for(const auto this_var_dom_block_index : this_var_dom_blocks) + { + const auto &this_var_dom_block = + dominator_analysis.cfg[this_var_dom_block_index].block; + // Only consider placing variable declarations at the head of + // a basic block (which conveniently is always a safe choice, even + // for a live range starting midway through a block) + candidate_dominators.push_back(this_var_dom_block.front()); + } } std::sort(candidate_dominators.begin(), candidate_dominators.end()); diff --git a/src/analyses/cfg_dominators.h b/src/analyses/cfg_dominators.h index de50f48f568..bb0d1722fd1 100644 --- a/src/analyses/cfg_dominators.h +++ b/src/analyses/cfg_dominators.h @@ -37,14 +37,14 @@ template class cfg_dominators_templatet { public: - typedef std::set target_sett; + typedef std::set target_sett; struct nodet { target_sett dominators; }; - typedef procedure_local_cfg_baset cfgt; + typedef cfg_basic_blockst cfgt; cfgt cfg; void operator()(P &program); @@ -63,43 +63,71 @@ class cfg_dominators_templatet return cfg.get_node(program_point); } - /// Returns true if the program point corresponding to \p rhs_node is - /// dominated by program point \p lhs. Saves node lookup compared to the - /// dominates overload that takes two program points, so this version is - /// preferable if you intend to check more than one potential dominator. - /// Note by definition all program points dominate themselves. - bool dominates(T lhs, const nodet &rhs_node) const + /// Get the basic-block graph node index for \p program_point + std::size_t get_node_index(const T &program_point) const + { + return cfg.get_node_index(program_point); + } + + /// Returns true if basic block \p lhs [post]dominates \p rhs + bool basic_block_dominates(std::size_t lhs, std::size_t rhs) const { - return rhs_node.dominators.count(lhs); + return cfg[rhs].dominators.count(lhs); } + /// Returns true if program point \p lhs [post]dominates \p rhs + bool dominates_same_block(T lhs, T rhs, std::size_t block) const; + /// Returns true if program point \p lhs dominates \p rhs. /// Note by definition all program points dominate themselves. bool dominates(T lhs, T rhs) const { - return dominates(lhs, get_node(rhs)); + const auto lhs_block = cfg.entry_map.at(lhs); + const auto rhs_block = cfg.entry_map.at(rhs); + + if(lhs == rhs) + return true; + + if(lhs_block != rhs_block) + return basic_block_dominates(lhs_block, rhs_block); + else + return dominates_same_block(lhs, rhs, lhs_block); } - /// Returns true if the program point for \p program_point_node is reachable + /// Returns true if the basic block \p basic_block_node is reachable /// from the entry point. Saves a lookup compared to the overload taking a /// program point, so use this overload if you already have the node. - bool program_point_reachable(const nodet &program_point_node) const + bool basic_block_reachable(const nodet &basic_block_node) const { // Dominator analysis walks from the entry point, so a side-effect is to // identify unreachable program points (those which don't dominate even // themselves). - return !program_point_node.dominators.empty(); + return !basic_block_node.dominators.empty(); } - /// Returns true if the program point for \p program_point_node is reachable + /// Returns true if the basic block \p basic_block_node is reachable /// from the entry point. Saves a lookup compared to the overload taking a - /// program point, so use this overload if you already have the node. + /// program point, so use this overload if you already have the node index. + bool basic_block_reachable(std::size_t block) const + { + return basic_block_reachable(cfg[block]); + } + + /// Returns true if the program point for \p program_point_node is reachable + /// from the entry point. bool program_point_reachable(T program_point) const { // Dominator analysis walks from the entry point, so a side-effect is to // identify unreachable program points (those which don't dominate even // themselves). - return program_point_reachable(get_node(program_point)); + return basic_block_reachable(get_node_index(program_point)); + } + + /// Returns the set of dominator blocks for a given basic block, including + /// itself. The result is a set of indices usable with this class' operator[]. + const target_sett &basic_block_dominators(std::size_t block) const + { + return cfg[block].dominators; } T entry_node; @@ -140,7 +168,7 @@ void cfg_dominators_templatet::initialise(P &program) template void cfg_dominators_templatet::fixedpoint(P &program) { - std::list worklist; + std::list worklist; if(cfgt::nodes_empty(program)) return; @@ -149,23 +177,24 @@ void cfg_dominators_templatet::fixedpoint(P &program) entry_node = cfgt::get_last_node(program); else entry_node = cfgt::get_first_node(program); - typename cfgt::nodet &n = cfg.get_node(entry_node); - n.dominators.insert(entry_node); + const auto entry_node_index = cfg.get_node_index(entry_node); + typename cfgt::nodet &n = cfg[entry_node_index]; + n.dominators.insert(entry_node_index); for(typename cfgt::edgest::const_iterator s_it=(post_dom?n.in:n.out).begin(); s_it!=(post_dom?n.in:n.out).end(); ++s_it) - worklist.push_back(cfg[s_it->first].PC); + worklist.push_back(s_it->first); while(!worklist.empty()) { // get node from worklist - T current=worklist.front(); + const auto current = worklist.front(); worklist.pop_front(); bool changed=false; - typename cfgt::nodet &node = cfg.get_node(current); + typename cfgt::nodet &node = cfg[current]; if(node.dominators.empty()) { for(const auto &edge : (post_dom ? node.out : node.in)) @@ -222,12 +251,33 @@ void cfg_dominators_templatet::fixedpoint(P &program) { for(const auto &edge : (post_dom ? node.in : node.out)) { - worklist.push_back(cfg[edge.first].PC); + worklist.push_back(edge.first); } } } } +template +bool cfg_dominators_templatet::dominates_same_block( + T lhs, + T rhs, + std::size_t block) const +{ + // Special case when the program points belong to the same block: lhs + // dominates rhs iff it is <= rhs in program order (or the reverse if we're + // a postdominator analysis) + + for(const auto &instruction : cfg[block].block) + { + if(instruction == lhs) + return !post_dom; + else if(instruction == rhs) + return post_dom; + } + + UNREACHABLE; // Entry map is inconsistent with block members? +} + /// Pretty-print a single node in the dominator tree. Supply a specialisation if /// operator<< is not sufficient. /// \par parameters: `node` to print and stream `out` to pretty-print it to @@ -248,22 +298,20 @@ inline void dominators_pretty_print_node( template void cfg_dominators_templatet::output(std::ostream &out) const { - for(const auto &node : cfg.entries()) + for(typename cfgt::node_indext i = 0; i < cfg.size(); ++i) { - auto n=node.first; - - dominators_pretty_print_node(n, out); + out << "Block " << dominators_pretty_print_node(cfg[i].block.at(0), out); if(post_dom) out << " post-dominated by "; else out << " dominated by "; bool first=true; - for(const auto &d : cfg[node.second].dominators) + for(const auto &d : cfg[i].dominators) { if(!first) out << ", "; first=false; - dominators_pretty_print_node(d, out); + dominators_pretty_print_node(cfg[d].block.at(0), out); } out << "\n"; } diff --git a/src/analyses/dependence_graph.cpp b/src/analyses/dependence_graph.cpp index ebbe9055987..675588ca836 100644 --- a/src/analyses/dependence_graph.cpp +++ b/src/analyses/dependence_graph.cpp @@ -97,19 +97,11 @@ void dep_graph_domaint::control_dependencies( // we could hard-code assume and goto handling here to improve // performance - const cfg_post_dominatorst::cfgt::nodet &m = - pd.get_node(control_dep_candidate); - - // successors of M - for(const auto &edge : m.out) + for(const auto &candidate_successor : + goto_programt::get_well_formed_instruction_successors( + control_dep_candidate)) { - // Could use pd.dominates(to, control_dep_candidate) but this would impose - // another dominator node lookup per call to this function, which is too - // expensive. - const cfg_post_dominatorst::cfgt::nodet &m_s= - pd.cfg[edge.first]; - - if(m_s.dominators.find(to)!=m_s.dominators.end()) + if(pd.dominates(to, candidate_successor)) post_dom_one=true; else post_dom_all=false; diff --git a/src/analyses/natural_loops.h b/src/analyses/natural_loops.h index bf42238e3b4..02c26d06e2f 100644 --- a/src/analyses/natural_loops.h +++ b/src/analyses/natural_loops.h @@ -31,17 +31,19 @@ Author: Georg Weissenbacher, georg@weissenbacher.name template class natural_loops_templatet; -/// A natural loop, specified as a set of instructions +/// A natural loop, specified as a set of basic block indices that correspond +/// to graph nodes in the linked \ref natural_loop_templatet's dominator +/// analysis. template class natural_loop_templatet { typedef natural_loops_templatet natural_loopst; - // For natural_loopst to directly manipulate loop_instructions, cf. clients - // which should use the public iterface: + // For natural_loopst to directly manipulate loop_blocks, cf. clients + // which should use the public interface: friend natural_loopst; - typedef std::set loop_instructionst; - loop_instructionst loop_instructions; + typedef std::set loop_blockst; + loop_blockst loop_blocks; public: explicit natural_loop_templatet(natural_loopst &natural_loops) @@ -49,6 +51,12 @@ class natural_loop_templatet { } + /// Returns true if \p block_index is in this loop + bool contains(std::size_t block_index) const + { + return loop_blocks.count(block_index); + } + /// Returns true if \p instruction is in this loop bool contains(const T instruction) const { @@ -67,38 +75,40 @@ class natural_loop_templatet } // NOLINTNEXTLINE(readability/identifiers) - typedef typename loop_instructionst::const_iterator const_iterator; + typedef typename loop_blockst::const_iterator const_iterator; - /// Iterator over this loop's instructions + /// Iterator over this loop's blocks const_iterator begin() const { - return loop_instructions.begin(); + return loop_blocks.begin(); } - /// Iterator over this loop's instructions + /// Iterator over this loop's blocks const_iterator end() const { - return loop_instructions.end(); + return loop_blocks.end(); } - /// Number of instructions in this loop + /// Number of blocks in this loop std::size_t size() const { - return loop_instructions.size(); + return loop_blocks.size(); } - /// Returns true if this loop contains no instructions + /// Returns true if this loop contains no blocks bool empty() const { - return loop_instructions.empty(); + return loop_blocks.empty(); + } + + const loop_blockst &blocks() const + { + return loop_blocks; } - /// Adds \p instruction to this loop. The caller must verify that the added - /// instruction does not alter loop structure; if it does they must discard - /// and recompute the related \ref natural_loopst instance. - void insert_instruction(const T instruction) + const std::vector &get_basic_block(std::size_t index) const { - loop_instructions.insert(instruction); + return natural_loops.get_basic_block(index); } private: @@ -141,16 +151,63 @@ class natural_loops_templatet return cfg_dominators; } + /// Returns true if \p instruction is the header of any loop + bool is_loop_header(const T instruction) const + { + return loop_map.count(instruction); + } + + /// Returns true if \p block_index is the header block of any loop + bool is_loop_header_block(std::size_t block_index) const + { + return is_loop_header(get_basic_block(block_index).front()); + } + + /// Returns the basic block with the given \p index + const std::vector &get_basic_block(std::size_t index) const + { + return cfg_dominators.cfg[index].block; + } + /// Returns true if \p instruction is in \p loop bool loop_contains(const natural_loopt &loop, const T instruction) const { - return loop.loop_instructions.count(instruction); + const auto instruction_block = cfg_dominators.get_node_index(instruction); + return loop.loop_blocks.count(instruction_block); } - /// Returns true if \p instruction is the header of any loop - bool is_loop_header(const T instruction) const + /// Inserts instruction \p new_instruction after \p insert_after. The new + /// instruction must join the same basic block as \p insert_after (i.e. + /// \p insert_after must not be a block terminator), such that the insertion + /// has no effect on loop structure. + void insert_instruction_after(const T new_instruction, const T insert_after) { - return loop_map.count(instruction); + cfg_dominators.cfg.insert_instruction_after(new_instruction, insert_after); + } + + /// Splits the basic block containing \p instruction immediately after + /// \p instruction. The caller should fix up the new block; the new block will + /// be added to any loops that contained the old one. + void split_basic_block_after(const T instruction) + { + auto new_block_index = + cfg_dominators.cfg.split_basic_block_after(instruction); + auto old_block_index = cfg_dominators.cfg.entry_map.at(instruction); + for(auto &loop : loop_map) + { + if(loop.second.loop_blocks.count(old_block_index)) + loop.second.loop_blocks.insert(new_block_index); + } + } + + /// Adds a new basic block graph edge, which must not alter loop structure, + /// as we make no attempt to maintain analysis consistency here. + void + add_basic_block_graph_edge(const T from_block_tail, const T to_block_head) + { + // We assume for now that these don't alter loop structure. + cfg_dominators.cfg.add_basic_block_graph_edge( + from_block_tail, to_block_head); } natural_loops_templatet() @@ -238,30 +295,31 @@ void natural_loops_templatet::compute_natural_loop(T m, T n) { assert(n->location_number<=m->location_number); - std::stack stack; + std::stack stack; auto insert_result = loop_map.emplace(n, natural_loopt{*this}); INVARIANT(insert_result.second, "each loop head should only be visited once"); natural_loopt &loop = insert_result.first->second; - loop.insert_instruction(n); - loop.insert_instruction(m); + const auto n_block = cfg_dominators.cfg.entry_map.at(n); + const auto m_block = cfg_dominators.cfg.entry_map.at(m); + loop.loop_blocks.insert(n_block); + loop.loop_blocks.insert(m_block); - if(n!=m) - stack.push(m); + if(n_block != m_block) + stack.push(m_block); while(!stack.empty()) { - T p=stack.top(); + std::size_t p = stack.top(); stack.pop(); - const nodet &node = cfg_dominators.get_node(p); + const nodet &node = cfg_dominators.cfg[p]; for(const auto &edge : node.in) { - T q=cfg_dominators.cfg[edge.first].PC; - std::pair result = - loop.loop_instructions.insert(q); + std::size_t q = edge.first; + auto result = loop.loop_blocks.insert(q); if(result.second) stack.push(q); } @@ -277,12 +335,17 @@ void natural_loops_templatet::output(std::ostream &out) const unsigned n=loop.first->location_number; out << n << " is head of { "; - for(typename natural_loopt::const_iterator l_it=loop.second.begin(); - l_it!=loop.second.end(); ++l_it) + bool first = true; + for(const auto loop_block : loop.second.loop_blocks) { - if(l_it!=loop.second.begin()) - out << ", "; - out << (*l_it)->location_number; + for(const auto &instruction : this->get_basic_block(loop_block)) + { + if(!first) + out << ", "; + else + first = false; + out << instruction->location_number; + } } out << " }\n"; } diff --git a/src/goto-analyzer/unreachable_instructions.cpp b/src/goto-analyzer/unreachable_instructions.cpp index 544b6c8be6f..b6afa88fe95 100644 --- a/src/goto-analyzer/unreachable_instructions.cpp +++ b/src/goto-analyzer/unreachable_instructions.cpp @@ -32,15 +32,13 @@ static void unreachable_instructions( cfg_dominatorst dominators; dominators(goto_program); - for(cfg_dominatorst::cfgt::entry_mapt::const_iterator - it=dominators.cfg.entry_map.begin(); - it!=dominators.cfg.entry_map.end(); - ++it) + for(auto instruction = goto_program.instructions.begin(); + instruction != goto_program.instructions.end(); + ++instruction) { - const cfg_dominatorst::cfgt::nodet &n=dominators.cfg[it->second]; + const cfg_dominatorst::cfgt::nodet &n = dominators.get_node(instruction); if(n.dominators.empty()) - dest.insert(std::make_pair(it->first->location_number, - it->first)); + dest.insert(std::make_pair(instruction->location_number, instruction)); } } diff --git a/src/goto-instrument/accelerate/accelerate.cpp b/src/goto-instrument/accelerate/accelerate.cpp index 616407f2c33..2cbcd57f7f8 100644 --- a/src/goto-instrument/accelerate/accelerate.cpp +++ b/src/goto-instrument/accelerate/accelerate.cpp @@ -38,8 +38,9 @@ goto_programt::targett acceleratet::find_back_jump( natural_loops.loop_map.at(loop_header); goto_programt::targett back_jump=loop_header; - for(const auto &t : loop) + for(const auto loop_block : loop) { + goto_programt::targett t = loop.get_basic_block(loop_block).back(); if( t->is_goto() && t->get_condition().is_true() && t->targets.size() == 1 && t->targets.front() == loop_header && @@ -57,19 +58,26 @@ bool acceleratet::contains_nested_loops(goto_programt::targett &loop_header) natural_loops_mutablet::natural_loopt &loop = natural_loops.loop_map.at(loop_header); - for(const auto &t : loop) + for(const auto loop_block : loop) { - if(t->is_backwards_goto()) + const auto &block = loop.get_basic_block(loop_block); + + const auto block_header = block.front(); + const auto block_tail = block.back(); + + if(block_tail->is_backwards_goto()) { - if(t->targets.size()!=1 || - t->get_target()!=loop_header) + if( + block_tail->targets.size() != 1 || + block_tail->get_target() != loop_header) { return true; } } // Header of some other loop? - if(t != loop_header && natural_loops.is_loop_header(t)) + if( + block_header != loop_header && natural_loops.is_loop_header(block_header)) { return true; } @@ -152,7 +160,7 @@ int acceleratet::accelerate_loop(goto_programt::targett &loop_header) goto_programt::targett new_inst=loop_header; ++new_inst; - loop.insert_instruction(new_inst); + natural_loops.insert_instruction_after(new_inst, loop_header); std::cout << "Overflow loc is " << overflow_loc->location_number << '\n'; std::cout << "Back jump is " << back_jump->location_number << '\n'; @@ -239,32 +247,52 @@ void acceleratet::make_overflow_loc( natural_loops.loop_map.at(loop_header); overflow_instrumentert instrumenter(program, overflow_var, symbol_table); - for(const auto &loop_instruction : loop) + for(const auto block_index : loop) { - overflow_locs[loop_instruction] = goto_programt::targetst(); - goto_programt::targetst &added = overflow_locs[loop_instruction]; + const auto &basic_block = natural_loops.get_basic_block(block_index); + std::vector all_added; + for(const auto instruction : basic_block) + { + overflow_locs[instruction] = goto_programt::targetst(); + goto_programt::targetst &added = overflow_locs[instruction]; - instrumenter.add_overflow_checks(loop_instruction, added); - for(const auto &new_instruction : added) - loop.insert_instruction(new_instruction); + instrumenter.add_overflow_checks(instruction, added); + all_added.insert(all_added.end(), added.begin(), added.end()); + } + // Insert newly-added instructions into the basic block representation. + // The block header must not have been replaced. + for(const auto new_instruction : all_added) + { + auto predecessor = std::prev(new_instruction); + INVARIANT( + std::find(basic_block.begin(), basic_block.end(), predecessor) != + basic_block.end(), + "newly added instructions should not come at the start of a block"); + // Note this will add to basic_block + natural_loops.insert_instruction_after(new_instruction, predecessor); + } } goto_programt::targett t = program.insert_after( loop_header, goto_programt::make_assignment(code_assignt(overflow_var, false_exprt()))); t->swap(*loop_header); - loop.insert_instruction(t); + natural_loops.insert_instruction_after(t, loop_header); overflow_locs[loop_header].push_back(t); overflow_loc = program.insert_after(loop_end, goto_programt::make_skip()); overflow_loc->swap(*loop_end); - loop.insert_instruction(overflow_loc); + natural_loops.insert_instruction_after(overflow_loc, loop_end); goto_programt::targett t2 = program.insert_after( loop_end, goto_programt::make_goto(overflow_loc, not_exprt(overflow_var))); t2->swap(*loop_end); overflow_locs[overflow_loc].push_back(t2); - loop.insert_instruction(t2); + natural_loops.insert_instruction_after(t2, loop_end); + + natural_loops.split_basic_block_after(loop_end); + natural_loops.split_basic_block_after(t2); + natural_loops.add_basic_block_graph_edge(loop_end, overflow_loc); goto_programt::targett tmp=overflow_loc; overflow_loc=loop_end; diff --git a/src/goto-instrument/accelerate/acceleration_utils.cpp b/src/goto-instrument/accelerate/acceleration_utils.cpp index c72284d68a4..3cb53b2b7c3 100644 --- a/src/goto-instrument/accelerate/acceleration_utils.cpp +++ b/src/goto-instrument/accelerate/acceleration_utils.cpp @@ -95,8 +95,11 @@ void acceleration_utilst::find_modified( const natural_loops_mutablet::natural_loopt &loop, expr_sett &modified) { - for(const auto &step : loop) - find_modified(step, modified); + for(const auto block_index : loop) + { + for(const auto instruction : loop.get_basic_block(block_index)) + find_modified(instruction, modified); + } } void acceleration_utilst::find_modified( diff --git a/src/goto-instrument/accelerate/disjunctive_polynomial_acceleration.cpp b/src/goto-instrument/accelerate/disjunctive_polynomial_acceleration.cpp index d03139cb0d8..14dacee36d8 100644 --- a/src/goto-instrument/accelerate/disjunctive_polynomial_acceleration.cpp +++ b/src/goto-instrument/accelerate/disjunctive_polynomial_acceleration.cpp @@ -752,9 +752,10 @@ void disjunctive_polynomial_accelerationt::cone_of_influence( void disjunctive_polynomial_accelerationt::find_distinguishing_points() { - for(const auto &loop_instruction : loop) + for(const auto basic_block_index : loop) { - const auto succs = goto_program.get_successors(loop_instruction); + const auto &basic_block = loop.get_basic_block(basic_block_index); + const auto succs = goto_program.get_successors(basic_block.back()); if(succs.size() > 1) { diff --git a/src/goto-instrument/accelerate/sat_path_enumerator.cpp b/src/goto-instrument/accelerate/sat_path_enumerator.cpp index 61aa54e79ee..a11317ab378 100644 --- a/src/goto-instrument/accelerate/sat_path_enumerator.cpp +++ b/src/goto-instrument/accelerate/sat_path_enumerator.cpp @@ -112,7 +112,8 @@ void sat_path_enumeratort::find_distinguishing_points() it != loop.end(); ++it) { - const auto succs=goto_program.get_successors(*it); + const auto tail = loop.get_basic_block(*it).back(); + const auto succs = goto_program.get_successors(tail); if(succs.size()>1) { diff --git a/src/goto-instrument/code_contracts.cpp b/src/goto-instrument/code_contracts.cpp index dfc5e8fd20e..410e4faafea 100644 --- a/src/goto-instrument/code_contracts.cpp +++ b/src/goto-instrument/code_contracts.cpp @@ -80,10 +80,14 @@ static void check_apply_invariants( it=loop.begin(); it!=loop.end(); ++it) - if((*it)->is_goto() && - (*it)->get_target()==loop_head && - (*it)->location_number>loop_end->location_number) - loop_end=*it; + { + const auto &basic_block = loop.get_basic_block(*it); + const auto block_tail = basic_block.back(); + if( + block_tail->is_goto() && block_tail->get_target() == loop_head && + block_tail->location_number > loop_end->location_number) + loop_end = block_tail; + } // see whether we have an invariant exprt invariant = static_cast( diff --git a/src/goto-instrument/dump_c.cpp b/src/goto-instrument/dump_c.cpp index 6013a17ad7e..de05e7288ed 100644 --- a/src/goto-instrument/dump_c.cpp +++ b/src/goto-instrument/dump_c.cpp @@ -663,6 +663,9 @@ void dump_ct::cleanup_decl( tmp.add(goto_programt::make_end_function()); + // goto_program2codet requires valid location numbers: + tmp.update(); + std::unordered_set typedef_names; for(const auto &td : typedef_map) typedef_names.insert(td.first); diff --git a/src/goto-instrument/full_slicer.cpp b/src/goto-instrument/full_slicer.cpp index d034045d2e5..54966190758 100644 --- a/src/goto-instrument/full_slicer.cpp +++ b/src/goto-instrument/full_slicer.cpp @@ -141,38 +141,57 @@ void full_slicert::add_jumps( const irep_idt &id = j.function_id; const cfg_post_dominatorst &pd=post_dominators.at(id); - const auto &j_PC_node = pd.get_node(j.PC); - // find the nearest post-dominator in slice - if(!pd.dominates(lex_succ, j_PC_node)) + if(!pd.dominates(lex_succ, j.PC)) { add_to_queue(queue, *it, lex_succ); jumps.erase(it); } else { + const auto pc_basic_block_index = pd.cfg.entry_map.at(j.PC); + // check whether the nearest post-dominator is different from // lex_succ goto_programt::const_targett nearest=lex_succ; std::size_t post_dom_size=0; - for(cfg_dominatorst::target_sett::const_iterator d_it = - j_PC_node.dominators.begin(); - d_it != j_PC_node.dominators.end(); - ++d_it) + for(const auto postdom_block_index : + pd.basic_block_dominators(pc_basic_block_index)) { - const auto &node = cfg.get_node(*d_it); - if(node.node_required) - { - const irep_idt &id2 = node.function_id; - INVARIANT(id==id2, - "goto/jump expected to be within a single function"); + const auto &postdom_node = pd.cfg[postdom_block_index]; + const auto &postdom_block = postdom_node.block; + auto instruction_it = postdom_block.begin(); - const auto &postdom_node = pd.get_node(*d_it); + // If this is the same block as j.PC, only consider instructions that + // come after it: + if(postdom_block_index == pc_basic_block_index) + { + while(*instruction_it != j.PC) + { + INVARIANT( + instruction_it != postdom_block.end(), + "entry map and basic block members should be consistent"); + ++instruction_it; + } + } - if(postdom_node.dominators.size() > post_dom_size) + for(; instruction_it != postdom_block.end(); ++instruction_it) + { + const auto &node = cfg.get_node(*instruction_it); + if(node.node_required) { - nearest=*d_it; - post_dom_size = postdom_node.dominators.size(); + const irep_idt &id2 = node.function_id; + INVARIANT( + id == id2, "goto/jump expected to be within a single function"); + + if(postdom_node.dominators.size() > post_dom_size) + { + nearest = *instruction_it; + post_dom_size = postdom_node.dominators.size(); + } + + // Later instructions in the same block can only be worse: + break; } } } @@ -271,10 +290,9 @@ void full_slicert::operator()( // declarations or dead instructions may be necessary as well decl_deadt decl_dead; - for(const auto &instruction_and_index : cfg.entries()) + for(const auto &instruction : cfg.keys()) { - const auto &instruction = instruction_and_index.first; - const auto instruction_node_index = instruction_and_index.second; + const auto instruction_node_index = cfg.get_node_index(instruction); if(criterion(cfg[instruction_node_index].function_id, instruction)) add_to_queue(queue, instruction_node_index, instruction); else if(implicit(instruction)) diff --git a/src/goto-instrument/goto_instrument_parse_options.cpp b/src/goto-instrument/goto_instrument_parse_options.cpp index 030a0b205bd..e5c743e0922 100644 --- a/src/goto-instrument/goto_instrument_parse_options.cpp +++ b/src/goto-instrument/goto_instrument_parse_options.cpp @@ -669,6 +669,10 @@ int goto_instrument_parse_optionst::doit() // applied restore_returns(goto_model); + // dump_c (actually goto_program2code) requires valid instruction + // location numbers: + goto_model.goto_functions.update(); + if(cmdline.args.size()==2) { #ifdef _MSC_VER @@ -1497,6 +1501,10 @@ void goto_instrument_parse_optionst::instrument_goto_program() do_indirect_call_and_rtti_removal(); log.status() << "Performing a reachability slice" << messaget::eom; + + // reachability_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); + if(cmdline.isset("property")) reachability_slicer(goto_model, cmdline.get_values("property")); else @@ -1523,7 +1531,11 @@ void goto_instrument_parse_optionst::instrument_goto_program() if(cmdline.isset("property")) property_slicer(goto_model, cmdline.get_values("property")); else + { + // full_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); full_slicer(goto_model); + } } // splice option @@ -1544,6 +1556,9 @@ void goto_instrument_parse_optionst::instrument_goto_program() { do_indirect_call_and_rtti_removal(); + // reachability_slicer requires that the model has unique location numbers: + goto_model.goto_functions.update(); + log.status() << "Slicing away initializations of unused global variables" << messaget::eom; slice_global_inits(goto_model); @@ -1572,6 +1587,8 @@ void goto_instrument_parse_optionst::instrument_goto_program() aggressive_slicer.doit(); + goto_model.goto_functions.update(); + log.status() << "Performing a reachability slice" << messaget::eom; if(cmdline.isset("property")) reachability_slicer(goto_model, cmdline.get_values("property")); diff --git a/src/goto-instrument/goto_program2code.cpp b/src/goto-instrument/goto_program2code.cpp index a20d06a5dfe..34c21aa454f 100644 --- a/src/goto-instrument/goto_program2code.cpp +++ b/src/goto-instrument/goto_program2code.cpp @@ -68,16 +68,21 @@ void goto_program2codet::build_loop_map() // edge -- we also ignore such code goto_programt::const_targett loop_start=l_it->first; goto_programt::const_targett loop_end=loop_start; - for(natural_loopst::natural_loopt::const_iterator - it=l_it->second.begin(); - it!=l_it->second.end(); - ++it) - if((*it)->is_goto()) + for(const auto block_index : l_it->second.blocks()) + { + for(const auto instruction : loops.get_basic_block(block_index)) { - if((*it)->get_target()==loop_start && - (*it)->location_number>loop_end->location_number) - loop_end=*it; + if(instruction->is_goto()) + { + if( + instruction->get_target() == loop_start && + instruction->location_number > loop_end->location_number) + { + loop_end = instruction; + } + } } + } if(!loop_map.insert(std::make_pair(loop_start, loop_end)).second) UNREACHABLE; @@ -763,10 +768,8 @@ bool goto_program2codet::set_block_end_points( case_end!=upper_bound; ++case_end) { - const auto &case_end_node = dominators.get_node(case_end); - // ignore dead instructions for the following checks - if(!dominators.program_point_reachable(case_end_node)) + if(!dominators.program_point_reachable(case_end)) { // simplification may have figured out that a case is unreachable // this is possibly getting too weird, abort to be safe @@ -777,7 +780,7 @@ bool goto_program2codet::set_block_end_points( } // find the last instruction dominated by the case start - if(!dominators.dominates(it->case_start, case_end_node)) + if(!dominators.dominates(it->case_start, case_end)) break; if(!processed_locations.insert(case_end->location_number).second) diff --git a/src/goto-instrument/havoc_loops.cpp b/src/goto-instrument/havoc_loops.cpp index 6589c6a6417..3fa16780afe 100644 --- a/src/goto-instrument/havoc_loops.cpp +++ b/src/goto-instrument/havoc_loops.cpp @@ -80,10 +80,10 @@ void havoc_loopst::havoc_loop( get_loop_exit(loop); // divert all gotos to the loop head to the loop exit - for(loopt::const_iterator - l_it=loop.begin(); l_it!=loop.end(); l_it++) + for(const auto loop_block : loop.blocks()) { - goto_programt::instructiont &instruction=**l_it; + goto_programt::instructiont &instruction = + *natural_loops.get_basic_block(loop_block).back(); if(instruction.is_goto()) { for(goto_programt::targetst::iterator @@ -105,8 +105,9 @@ void havoc_loopst::get_modifies( const loopt &loop, modifiest &modifies) { - for(const auto &instruction_it : loop) - function_modifies.get_modifies(local_may_alias, instruction_it, modifies); + for(const auto basic_block : loop) + for(const auto instruction_it : loop.get_basic_block(basic_block)) + function_modifies.get_modifies(local_may_alias, instruction_it, modifies); } void havoc_loopst::havoc_loops() diff --git a/src/goto-instrument/loop_utils.cpp b/src/goto-instrument/loop_utils.cpp index efb14b4aaf8..d63c4359a5d 100644 --- a/src/goto-instrument/loop_utils.cpp +++ b/src/goto-instrument/loop_utils.cpp @@ -26,7 +26,12 @@ goto_programt::targett get_loop_exit(const loopt &loop) for(loopt::const_iterator l_it=loop.begin(); l_it!=loop.end(); l_it++) - loop_map[(*l_it)->location_number]=*l_it; + { + for(const auto &instruction : loop.get_basic_block(*l_it)) + { + loop_map[instruction->location_number] = instruction; + } + } // get the one with the highest number goto_programt::targett last=(--loop_map.end())->second; @@ -92,17 +97,21 @@ void get_modifies( for(loopt::const_iterator i_it=loop.begin(); i_it!=loop.end(); i_it++) { - const goto_programt::instructiont &instruction=**i_it; - - if(instruction.is_assign()) - { - const exprt &lhs=to_code_assign(instruction.code).lhs(); - get_modifies_lhs(local_may_alias, *i_it, lhs, modifies); - } - else if(instruction.is_function_call()) + const auto basic_block = *i_it; + for(const auto instruction_it : loop.get_basic_block(basic_block)) { - const exprt &lhs=to_code_function_call(instruction.code).lhs(); - get_modifies_lhs(local_may_alias, *i_it, lhs, modifies); + const goto_programt::instructiont &instruction = *instruction_it; + + if(instruction.is_assign()) + { + const exprt &lhs = to_code_assign(instruction.code).lhs(); + get_modifies_lhs(local_may_alias, instruction_it, lhs, modifies); + } + else if(instruction.is_function_call()) + { + const exprt &lhs = to_code_function_call(instruction.code).lhs(); + get_modifies_lhs(local_may_alias, instruction_it, lhs, modifies); + } } } } diff --git a/src/goto-instrument/points_to.cpp b/src/goto-instrument/points_to.cpp index 237590b881b..3455243eb8d 100644 --- a/src/goto-instrument/points_to.cpp +++ b/src/goto-instrument/points_to.cpp @@ -21,9 +21,9 @@ void points_tot::fixedpoint() { added=false; - for(const auto &instruction_and_entry : cfg.entries()) + for(const auto &key : cfg.keys()) { - if(transform(cfg[instruction_and_entry.second])) + if(transform(cfg.get_node(key))) added=true; } } diff --git a/src/goto-instrument/reachability_slicer.cpp b/src/goto-instrument/reachability_slicer.cpp index 18b2e1c323a..b5e2cfe2ede 100644 --- a/src/goto-instrument/reachability_slicer.cpp +++ b/src/goto-instrument/reachability_slicer.cpp @@ -39,12 +39,13 @@ reachability_slicert::get_sources( const slicing_criteriont &criterion) { std::vector sources; - for(const auto &e_it : cfg.entries()) + for(const auto &instruction : cfg.keys()) { + const auto cfg_node_index = cfg.get_node_index(instruction); if( - criterion(cfg[e_it.second].function_id, e_it.first) || - is_threaded(e_it.first)) - sources.push_back(e_it.second); + criterion(cfg[cfg_node_index].function_id, instruction) || + is_threaded(instruction)) + sources.push_back(cfg_node_index); } if(sources.empty()) diff --git a/src/goto-programs/cfg.h b/src/goto-programs/cfg.h index cfeb28ce892..c3d5ba5820e 100644 --- a/src/goto-programs/cfg.h +++ b/src/goto-programs/cfg.h @@ -12,8 +12,9 @@ Author: Daniel Kroening, kroening@kroening.com #ifndef CPROVER_GOTO_PROGRAMS_CFG_H #define CPROVER_GOTO_PROGRAMS_CFG_H -#include +#include #include +#include #include "goto_functions.h" @@ -31,6 +32,30 @@ struct cfg_base_nodet:public graph_nodet, public T I PC; }; +/// Functor to convert cfg nodes into dense integers, used by \ref cfg_baset. +/// Default implementation: the identity function. +template +class cfg_instruction_to_dense_integert +{ +public: + template + std::size_t operator()(U &&u) const + { + return std::forward(identity_functort{}(u)); + } +}; + +/// GOTO-instruction to location number functor. +template <> +class cfg_instruction_to_dense_integert +{ +public: + std::size_t operator()(const goto_programt::const_targett &t) const + { + return t->location_number; + } +}; + /// A multi-procedural control flow graph (CFG) whose nodes store references to /// instructions in a GOTO program. /// @@ -69,28 +94,16 @@ class cfg_baset:public grapht< cfg_base_nodet > class entry_mapt final { - typedef std::map data_typet; + typedef dense_integer_mapt< + goto_programt::const_targett, + entryt, + cfg_instruction_to_dense_integert> + data_typet; data_typet data; public: grapht< cfg_base_nodet > &container; - // NOLINTNEXTLINE(readability/identifiers) - typedef typename data_typet::iterator iterator; - // NOLINTNEXTLINE(readability/identifiers) - typedef typename data_typet::const_iterator const_iterator; - - template - const_iterator find(U &&u) const { return data.find(std::forward(u)); } - - iterator begin() { return data.begin(); } - const_iterator begin() const { return data.begin(); } - const_iterator cbegin() const { return data.cbegin(); } - - iterator end() { return data.end(); } - const_iterator end() const { return data.end(); } - const_iterator cend() const { return data.cend(); } - explicit entry_mapt(grapht< cfg_base_nodet > &_container): container(_container) { @@ -100,10 +113,10 @@ class cfg_baset:public grapht< cfg_base_nodet > { auto e=data.insert(std::make_pair(t, 0)); - if(e.second) - e.first->second=container.add_node(); + if(e) + data.at(t) = container.add_node(); - return e.first->second; + return data.at(t); } entryt &at(const goto_programt::const_targett &t) @@ -114,6 +127,25 @@ class cfg_baset:public grapht< cfg_base_nodet > { return data.at(t); } + + std::size_t count(const goto_programt::const_targett &t) const + { + return data.count(t); + } + + typedef typename data_typet::possible_keyst keyst; + const keyst &keys() const + { + // We always define exactly the keys the entry map was set up for, so + // data's possible key set is exactly our key set + return data.possible_keys(); + } + + template + void setup_for_keys(Iter begin, Iter end) + { + data.setup_for_keys(begin, end); + } }; entry_mapt entry_map; @@ -173,12 +205,30 @@ class cfg_baset:public grapht< cfg_base_nodet > void operator()( const goto_functionst &goto_functions) { + std::vector possible_keys; + for(const auto &id_and_function : goto_functions.function_map) + { + const auto &instructions = id_and_function.second.body.instructions; + possible_keys.reserve( + possible_keys.size() + + std::distance(instructions.begin(), instructions.end())); + for(auto it = instructions.begin(); it != instructions.end(); ++it) + possible_keys.push_back(it); + } + entry_map.setup_for_keys(possible_keys.begin(), possible_keys.end()); compute_edges(goto_functions); } void operator()(P &goto_program) { goto_functionst goto_functions; + std::vector possible_keys; + const auto &instructions = goto_program.instructions; + possible_keys.reserve( + std::distance(instructions.begin(), instructions.end())); + for(auto it = instructions.begin(); it != instructions.end(); ++it) + possible_keys.push_back(it); + entry_map.setup_for_keys(possible_keys.begin(), possible_keys.end()); compute_edges(goto_functions, goto_program); } @@ -187,29 +237,28 @@ class cfg_baset:public grapht< cfg_base_nodet > /// in that particular case you should just use `cfg.get_node(i)`). Storing /// node indices saves a map lookup, so it can be worthwhile when you expect /// to repeatedly look up the same program point. - entryt get_node_index(const I &program_point) const + entryt get_node_index(const goto_programt::const_targett &program_point) const { return entry_map.at(program_point); } /// Get the CFG graph node relating to \p program_point. - nodet &get_node(const I &program_point) + nodet &get_node(const goto_programt::const_targett &program_point) { return (*this)[get_node_index(program_point)]; } /// Get the CFG graph node relating to \p program_point. - const nodet &get_node(const I &program_point) const + const nodet &get_node(const goto_programt::const_targett &program_point) const { return (*this)[get_node_index(program_point)]; } - /// Get a map from program points to their corresponding node indices. Use - /// the indices with `operator[]` similar to those returned by - /// \ref get_node_index. - const entry_mapt &entries() const + /// Get a vector of keys present in this cfg. Use these with \ref get_node or + /// \ref get_node_index to get the corresponding CFG nodes. + const typename entry_mapt::keyst &keys() const { - return entry_map; + return entry_map.keys(); } static I get_first_node(P &program) @@ -496,4 +545,260 @@ void cfg_baset::compute_edges( compute_edges(goto_functions, it->second.body); } +template +struct basic_block_graph_nodet : public graph_nodet, public T +{ + typedef typename graph_nodet::edget edget; + typedef typename graph_nodet::edgest edgest; + + basic_block_graph_nodet() = default; + explicit basic_block_graph_nodet(std::vector &&block) + : block(std::move(block)) + { + } + + std::vector block; +}; + +template +struct constify_cfg_instruction_typet +{ + typedef T type; +}; + +template <> +struct constify_cfg_instruction_typet +{ + typedef goto_programt::const_targett type; +}; + +template < + template class CFGT, + typename T, + typename P = const goto_programt, + typename I = goto_programt::const_targett> +class cfg_basic_blockst : public grapht> +{ +public: + typedef CFGT base_cfgt; + typedef grapht> graph_typet; + typedef typename graph_typet::nodet nodet; + typedef typename graph_typet::node_indext node_indext; + typedef typename constify_cfg_instruction_typet::type ConstI; + typedef dense_integer_mapt< + ConstI, + node_indext, + cfg_instruction_to_dense_integert> + entry_mapt; + entry_mapt entry_map; + + void operator()(P &program) + { + // Compute underlying program graph: + base_cfgt base_cfg; + base_cfg(program); + + if(base_cfg.empty()) + return; + + entry_map.setup_for_keys(base_cfg.keys().begin(), base_cfg.keys().end()); + + std::vector worklist; + // Queue all instructions without predecessors to start block construction: + for(node_indext i = 0; i < base_cfg.size(); ++i) + if(base_cfg[i].in.size() == 0) + worklist.push_back(i); + + // Consider the entry-point, if not already added: + auto entry_point_index = base_cfg.entry_map[get_first_node(program)]; + if(base_cfg[entry_point_index].in.size() != 0) + worklist.push_back(entry_point_index); + + while(true) + { + // Extract basic blocks: + while(!worklist.empty()) + { + node_indext block_head = worklist.back(); + worklist.pop_back(); + std::vector block = {base_cfg[block_head].PC}; + node_indext block_current = block_head; + + // Find other instructions in the same block: + while(base_cfg[block_current].out.size() == 1) + { + node_indext block_next = base_cfg[block_current].out.begin()->first; + // Note the successor instruction could already have a block if + // (a) it is the entry point, which has an extra imaginary predecessor + // due to control flow from another function, or (b) we're picking + // up unreachable code and our arbitrarily picked starting point + // fell in the middle of a basic block. + if( + base_cfg[block_next].in.size() == 1 && + !entry_map.count(base_cfg[block_next].PC)) + { + // Part of the same basic block + block.push_back(base_cfg[block_next].PC); + block_current = block_next; + } + else + { + // Start of a new basic block + break; + } + } + + // Store the new basic block: + node_indext new_block_index = this->add_node(std::move(block)); + + // Record the instruction -> block index mapping: + for(const auto &instruction : (*this)[new_block_index].block) + { + auto insert_result = entry_map.insert({instruction, new_block_index}); + INVARIANT( + insert_result, + "should only visit each instruction once. Are the keys unique? " + "If your key is goto_programt::targett, do you need to run " + "goto_programt/functionst::update()?"); + } + + // Queue all the block tail's successors that we haven't visited yet: + for(const auto successor_entry : base_cfg[block_current].out) + { + if(!entry_map.count(base_cfg[successor_entry.first].PC)) + worklist.push_back(successor_entry.first); + } + } + + // Check that we covered the whole CFG: if not then there are unreachable + // cycles; pick one arbitrarily and try again. + if(entry_map.size() == base_cfg.size()) + break; + + for(node_indext i = 0; i < base_cfg.size(); ++i) + { + if(!entry_map.count(base_cfg[i].PC)) + { + worklist.push_back(i); + break; + } + } + + INVARIANT( + worklist.size() == 1, + "if the entry-map is not fully populated there must be some missing " + "instruction"); + } + + // Populate basic-block edges: + for(node_indext block_index = 0; block_index < this->size(); ++block_index) + { + auto &block_node = (*this)[block_index]; + I block_tail = block_node.block.back(); + const auto &base_successors = + base_cfg[base_cfg.entry_map[block_tail]].out; + for(const auto base_successor_entry : base_successors) + { + const auto &base_successor_node = base_cfg[base_successor_entry.first]; + this->add_edge(block_index, entry_map.at(base_successor_node.PC)); + } + } + } + + /// Get the graph node index for \p program_point. Use this with operator[] + /// to get the related graph node (e.g. `cfg[cfg.get_node_index(i)]`, though + /// in that particular case you should just use `cfg.get_node(i)`). Storing + /// node indices saves a map lookup, so it can be worthwhile when you expect + /// to repeatedly look up the same program point. + node_indext get_node_index(const I &program_point) const + { + return entry_map.at(program_point); + } + + /// Get the CFG graph node relating to \p program_point. + nodet &get_node(const I &program_point) + { + return (*this)[get_node_index(program_point)]; + } + + /// Get the CFG graph node relating to \p program_point. + const nodet &get_node(const I &program_point) const + { + return (*this)[get_node_index(program_point)]; + } + + void insert_instruction_after(const I new_instruction, const I insert_after) + { + const auto existing_block_index = entry_map.at(insert_after); + auto &existing_block = (*this)[existing_block_index].block; + const auto insert_after_iterator = + std::find(existing_block.begin(), existing_block.end(), insert_after); + INVARIANT( + insert_after_iterator != existing_block.end(), + "entry_map should be consistent with block members"); + existing_block.insert(std::next(insert_after_iterator), new_instruction); + entry_map[new_instruction] = existing_block_index; + } + + node_indext split_basic_block_after(const I instruction) + { + // Split the existing basic block in half: + const auto existing_block_index = entry_map.at(instruction); + auto &existing_block = (*this)[existing_block_index].block; + const auto split_after_iterator = + std::find(existing_block.begin(), existing_block.end(), instruction); + INVARIANT( + split_after_iterator != existing_block.end(), + "entry_map should be consistent with block members"); + std::vector new_block; + new_block.insert( + new_block.end(), split_after_iterator, existing_block.end()); + const auto new_block_index = this->add_node(std::move(new_block)); + + // Move all existing block -> successor edges to the new one: + auto existing_successors = (*this)[existing_block_index].out; + for(const auto &successor : existing_successors) + { + this->remove_edge(existing_block_index, successor.first); + this->add_edge(new_block_index, successor.first); + } + + // Create an edge existing -> new: + this->add_edge(existing_block_index, new_block_index); + + // Update the entry map: + for(const auto moved_instruction : (*this)[new_block_index].block) + entry_map[moved_instruction] = new_block_index; + + return new_block_index; + } + + void + add_basic_block_graph_edge(const I from_block_tail, const I to_block_head) + { + const auto from_block_index = entry_map.at(from_block_tail); + const auto to_block_index = entry_map.at(to_block_head); + INVARIANT( + (*this)[from_block_index].block.back() == from_block_tail, + "from_block_tail should be a block tail"); + INVARIANT( + (*this)[to_block_index].block.front() == to_block_head, + "to_block_head should be a block head"); + this->add_edge(from_block_index, to_block_index); + } + + static I get_first_node(P &program) + { + return base_cfgt::get_first_node(program); + } + static I get_last_node(P &program) + { + return base_cfgt::get_last_node(program); + } + static bool nodes_empty(P &program) + { + return base_cfgt::nodes_empty(program); + } +}; + #endif // CPROVER_GOTO_PROGRAMS_CFG_H diff --git a/src/goto-programs/goto_program.h b/src/goto-programs/goto_program.h index 0192b0c8cbf..36431b75323 100644 --- a/src/goto-programs/goto_program.h +++ b/src/goto-programs/goto_program.h @@ -600,6 +600,10 @@ class goto_programt template std::list get_successors(Target target) const; + template + static std::list + get_well_formed_instruction_successors(Target target); + void compute_incoming_edges(); /// Insertion that preserves jumps to "target". @@ -1068,34 +1072,24 @@ class goto_programt } }; -/// Get control-flow successors of a given instruction. The instruction is -/// represented by a pointer `target` of type `Target`. An instruction has -/// either 0, 1, or 2 successors (more than two successors is deprecated). For -/// example, an `ASSUME` instruction with the `guard` being a `false_exprt` has -/// 0 successors, and `ASSIGN` instruction has 1 successor, and a `GOTO` -/// instruction with the `guard` not being a `true_exprt` has 2 successors. -/// -/// \tparam Target: type used to represent a pointer to an instruction in a goto -/// program -/// \param target: pointer to the instruction of which to get the successors of -/// \return List of control-flow successors of the pointed-to goto program -/// instruction template -std::list goto_programt::get_successors( - Target target) const +std::list get_successors( + Target target, + std::function is_instruction_out_of_range) { - if(target==instructions.end()) + if(is_instruction_out_of_range(target)) return std::list(); const auto next=std::next(target); + const auto next_out_of_range = is_instruction_out_of_range(next); - const instructiont &i=*target; + const goto_programt::instructiont &i = *target; if(i.is_goto()) { std::list successors(i.targets.begin(), i.targets.end()); - if(!i.get_condition().is_true() && next != instructions.end()) + if(!i.get_condition().is_true() && !next_out_of_range) successors.push_back(next); return successors; @@ -1105,7 +1099,7 @@ std::list goto_programt::get_successors( { std::list successors(i.targets.begin(), i.targets.end()); - if(next!=instructions.end()) + if(!next_out_of_range) successors.push_back(next); return successors; @@ -1125,12 +1119,15 @@ std::list goto_programt::get_successors( if(i.is_assume()) { - return !i.get_condition().is_false() && next != instructions.end() + return !i.get_condition().is_false() && !next_out_of_range ? std::list{next} : std::list(); } - if(next!=instructions.end()) + if(i.is_end_function()) + return std::list(); + + if(!next_out_of_range) { return std::list{next}; } @@ -1138,6 +1135,40 @@ std::list goto_programt::get_successors( return std::list(); } +/// Get control-flow successors of a given instruction. The instruction is +/// represented by a pointer `target` of type `Target`. An instruction has +/// either 0, 1, or 2 successors (more than two successors is deprecated). For +/// example, an `ASSUME` instruction with the `guard` being a `false_exprt` has +/// 0 successors, and `ASSIGN` instruction has 1 successor, and a `GOTO` +/// instruction with the `guard` not being a `true_exprt` has 2 successors. +/// +/// \tparam Target: type used to represent a pointer to an instruction in a goto +/// program +/// \param target: pointer to the instruction of which to get the successors of +/// \return List of control-flow successors of the pointed-to goto program +/// instruction +template +std::list goto_programt::get_successors(Target target) const +{ + const std::function instruction_out_of_range = + [this](const Target &target) { return target == instructions.end(); }; + return ::get_successors(target, instruction_out_of_range); +} + +/// Like \ref get_successors, except we assume that neither \p target nor its +/// successor is out of range (i.e., equal to instructions.end()). This holds +/// so long as \p target is a valid iterator and its containing function is +/// complete (it has an END_OF_FUNCTION terminator, and all its instruction +/// targets are valid iterators). +template +std::list +goto_programt::get_well_formed_instruction_successors(Target target) +{ + const std::function instructions_never_out_of_range = + [](const Target &) { return false; }; + return ::get_successors(target, instructions_never_out_of_range); +} + inline bool order_const_target( const goto_programt::const_targett i1, const goto_programt::const_targett i2) diff --git a/src/util/dense_integer_map.h b/src/util/dense_integer_map.h new file mode 100644 index 00000000000..ce1eeeb0cbc --- /dev/null +++ b/src/util/dense_integer_map.h @@ -0,0 +1,160 @@ +/*******************************************************************\ + +Module: Dense integer map + +Author: Diffblue Ltd + +\*******************************************************************/ + +/// \file +/// Dense integer map + +#ifndef CPROVER_UTIL_DENSE_INTEGER_MAP_H +#define CPROVER_UTIL_DENSE_INTEGER_MAP_H + +#include +#include +#include + +#include + +/// Identity functor. When we use C++20 this can be replaced with std::identity. +class identity_functort +{ +public: + template + constexpr auto operator()(T &&t) const -> decltype(std::forward(t)) + { + return std::forward(t); + } +}; + +/// A map type that is backed by a vector, which relies on the ability to (a) +/// see the keys that might be used in advance of their usage, and (b) map those +/// keys onto a dense range of integers. You should specialise +/// key_to_dense_integer for your key type before using. If it maps your keys +/// onto too sparse a range, considerable memory will be wasted. +/// +/// The type is optimised for fast lookups at the expense of flexibility. +/// So far I've only implemented the support needed for use in +/// cfg_basic_blockst. Due to the vector storage the precise interface of +/// std::map is hard to achieve, but something close is practically achievable +/// for the interested developer. +template +class dense_integer_mapt +{ +public: + typedef std::vector possible_keyst; + +private: + std::size_t offset; + typedef std::vector backing_storet; + std::vector map; + std::vector value_set; + std::size_t n_values_set; + possible_keyst possible_keys_vector; + + std::size_t key_to_index(const K &key) const + { + std::size_t key_integer = KeyToDenseInteger{}(key); + INVARIANT(key_integer >= offset, "index should be in range"); + return key_integer - offset; + } + + void mark_index_set(std::size_t index) + { + if(!value_set[index]) + { + ++n_values_set; + value_set[index] = true; + } + } + +public: + dense_integer_mapt() : offset(0), n_values_set(0) + { + } + + template + void setup_for_keys(Iter first, Iter last) + { + std::size_t highest_key = std::numeric_limits::min(); + std::size_t lowest_key = std::numeric_limits::max(); + std::unordered_set seen_keys; + for(Iter it = first; it != last; ++it) + { + std::size_t integer_key = KeyToDenseInteger{}(*it); + highest_key = std::max(integer_key, highest_key); + lowest_key = std::min(integer_key, lowest_key); + INVARIANT( + seen_keys.insert(integer_key).second, + "keys for use in dense_integer_mapt must be unique"); + } + + if(highest_key < lowest_key) + { + offset = 0; + } + else + { + offset = lowest_key; + map.resize((highest_key - lowest_key) + 1); + value_set.resize((highest_key - lowest_key) + 1); + } + + possible_keys_vector.insert(possible_keys_vector.end(), first, last); + } + + const V &at(const K &key) const + { + std::size_t index = key_to_index(key); + INVARIANT(value_set[index], "map value should be set"); + return map.at(index); + } + V &at(const K &key) + { + std::size_t index = key_to_index(key); + INVARIANT(value_set[index], "map value should be set"); + return map.at(index); + } + + const V &operator[](const K &key) const + { + std::size_t index = key_to_index(key); + mark_index_set(index); + return map.at(index); + } + V &operator[](const K &key) + { + std::size_t index = key_to_index(key); + mark_index_set(index); + return map.at(index); + } + + bool insert(const std::pair &pair) + { + std::size_t index = key_to_index(pair.first); + if(value_set[index]) + return false; + mark_index_set(index); + map.at(index) = pair.second; + return true; + } + + std::size_t count(const K &key) const + { + return value_set[key_to_index(key)]; + } + + std::size_t size() const + { + return n_values_set; + } + + const possible_keyst &possible_keys() const + { + return possible_keys_vector; + } +}; + +#endif // CPROVER_UTIL_DENSE_INTEGER_MAP_H