From 941eed46cedeebc83740d8c8757f46189561d2da Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Fri, 7 Feb 2020 13:19:03 +0000 Subject: [PATCH 1/6] Add c-specific refinement utils For handling byte-extract expression, string-constants etc., in places that expect string arrays. Also slightly extend the definition of char-type. --- .../strings/string_refinement_util.cpp | 79 ++++++++++++++++++- src/solvers/strings/string_refinement_util.h | 5 ++ 2 files changed, 81 insertions(+), 3 deletions(-) diff --git a/src/solvers/strings/string_refinement_util.cpp b/src/solvers/strings/string_refinement_util.cpp index dd0cd19cfd0..4f8316f44d2 100644 --- a/src/solvers/strings/string_refinement_util.cpp +++ b/src/solvers/strings/string_refinement_util.cpp @@ -14,19 +14,27 @@ Author: Diffblue Ltd. #include #include #include +#include #include #include #include #include #include #include -#include +#include +#include #include bool is_char_type(const typet &type) { - return type.id() == ID_unsignedbv && to_unsignedbv_type(type).get_width() <= - STRING_REFINEMENT_MAX_CHAR_WIDTH; + if(type.id() == ID_unsignedbv) + return to_unsignedbv_type(type).get_width() <= + STRING_REFINEMENT_MAX_CHAR_WIDTH; + + if(type.id() == ID_signedbv) + return to_signedbv_type(type).get_width() <= 8; + + return false; } bool is_char_array_type(const typet &type, const namespacet &ns) @@ -192,3 +200,68 @@ array_exprt interval_sparse_arrayt::concretize( size, it == entries.end() ? default_value : it->second); return array; } + +exprt maybe_byte_extract_expr(const exprt &expr) +{ + if(!can_cast_expr(expr)) + return expr; + + const auto &byte_extract_expr = to_byte_extract_expr(expr); + const auto &offset = byte_extract_expr.offset(); + PRECONDITION(offset.id() == ID_constant); + if(offset.id() != ID_constant) + { + return expr; + } + const auto &constant_offset = to_constant_expr(offset); + const auto &op = byte_extract_expr.op(); + auto numeric_offset = + string2optional_int(id2string(constant_offset.get_value())); + PRECONDITION(numeric_offset.has_value()); + if(*numeric_offset == 0) + { + return op; + } + + array_exprt::operandst offset_operands; + offset_operands.insert( + offset_operands.end(), + op.operands().begin() + *numeric_offset, + op.operands().end()); + const auto extracted_array_suffix = + array_exprt{offset_operands, to_array_type(op.type())}; + + return extracted_array_suffix; +} + +exprt maybe_remove_string_exprs(const exprt &expr) +{ + return [&]() { + if( + auto const &maybe_string_constant = + expr_try_dynamic_cast(expr)) + { + return static_cast(maybe_string_constant->to_array_expr()); + } + else + { + return expr; + } + }(); +} + +exprt massage_weird_arrays_into_non_weird_arrays(const exprt &expr) +{ + auto const byte_extracted = maybe_byte_extract_expr(expr); + auto const string_removed = maybe_remove_string_exprs(byte_extracted); + PRECONDITION(string_removed.type().id() == ID_array); + if(string_removed.id() == ID_if) + { + return if_exprt{ + string_removed.op0(), + massage_weird_arrays_into_non_weird_arrays(string_removed.op1()), + massage_weird_arrays_into_non_weird_arrays(string_removed.op2()), + string_removed.type()}; + } + return string_removed; +} diff --git a/src/solvers/strings/string_refinement_util.h b/src/solvers/strings/string_refinement_util.h index dee45c60a75..43c93ab8b59 100644 --- a/src/solvers/strings/string_refinement_util.h +++ b/src/solvers/strings/string_refinement_util.h @@ -140,4 +140,9 @@ class interval_sparse_arrayt final : public sparse_arrayt } }; +exprt maybe_remove_string_exprs(const exprt &expr); + +exprt maybe_byte_extract_expr(const exprt &expr); + +exprt massage_weird_arrays_into_non_weird_arrays(const exprt &expr); #endif // CPROVER_SOLVERS_REFINEMENT_STRING_REFINEMENT_UTIL_H From 8d8263948a68386d664bb9d4b99a95100ce1d9bb Mon Sep 17 00:00:00 2001 From: Hannes Steffenhagen Date: Fri, 7 Feb 2020 13:20:06 +0000 Subject: [PATCH 2/6] Extend index-of for c into a new c-specific axiom-generating function. Basically, the addition encodes the existence of terminating-zero and the properties this entails. --- .../strings/string_constraint_generator.h | 6 + .../string_constraint_generator_indexof.cpp | 115 ++++++++++++++++++ .../string_constraint_generator_main.cpp | 2 + src/util/irep_ids.def | 1 + 4 files changed, 124 insertions(+) diff --git a/src/solvers/strings/string_constraint_generator.h b/src/solvers/strings/string_constraint_generator.h index a263eb8d577..0143eff61c8 100644 --- a/src/solvers/strings/string_constraint_generator.h +++ b/src/solvers/strings/string_constraint_generator.h @@ -193,12 +193,18 @@ class string_constraint_generatort final const array_string_exprt &str, const exprt &c, const exprt &from_index); + std::pair add_axioms_for_c_index_of( + const array_string_exprt &str, + const exprt &c, + const exprt &from_index); std::pair add_axioms_for_index_of_string( const array_string_exprt &haystack, const array_string_exprt &needle, const exprt &from_index); std::pair add_axioms_for_index_of(const function_application_exprt &f); + std::pair + add_axioms_for_c_index_of(const function_application_exprt &f); std::pair add_axioms_for_last_index_of_string( const array_string_exprt &haystack, const array_string_exprt &needle, diff --git a/src/solvers/strings/string_constraint_generator_indexof.cpp b/src/solvers/strings/string_constraint_generator_indexof.cpp index 9a3e1f33633..a79d1a769c0 100644 --- a/src/solvers/strings/string_constraint_generator_indexof.cpp +++ b/src/solvers/strings/string_constraint_generator_indexof.cpp @@ -82,6 +82,90 @@ string_constraint_generatort::add_axioms_for_index_of( return {index, std::move(constraints)}; } +/// Add axioms stating that the returned value is the index within `haystack` +/// (`str`) of the first occurrence of `needle` (`c`) starting the search at +/// `from_index`, or is `-1` if no such character occurs at or after position +/// `from_index`. +/// \todo Make argument names match whose of add_axioms_for_index_of_string +/// +/// These axioms are: +/// 1a. \f$-1 \le {\tt index} < {\tt terminating_zero} \f$ +/// 1b. \f${\tt terminating_zero} < |{\tt haystack}| \f$ +/// 2a. \f$ \lnot contains \Leftrightarrow {\tt index} = -1 \f$ +/// 2b. \f$ {\tt haystack}{\tt terminating_zero} = '\0' \f$ +/// 3. \f$ contains \Rightarrow {\tt from\_index} \le {\tt index} +/// \land {\tt haystack}[{\tt index}] = {\tt needle} \f$ +/// 4. \f$ \forall i \in [{\tt from\_index}, {\tt index}).\ contains +/// \Rightarrow {\tt haystack}[i] \ne {\tt needle} \f$ +/// 5. \f$ \forall m, n \in [{\tt from\_index}, {\tt terminating_zero+1}) +/// .\ \lnot contains \Rightarrow {\tt haystack}[m] \ne {\tt needle} +/// \f$ +/// \param str: an array of characters expression +/// \param c: a character expression +/// \param from_index: an integer expression +/// \return integer expression `index` +std::pair +string_constraint_generatort::add_axioms_for_c_index_of( + const array_string_exprt &str, + const exprt &c, + const exprt &from_index) +{ + string_constraintst constraints; + const typet &index_type = str.length_type(); + symbol_exprt index = fresh_symbol("index_of", index_type); + symbol_exprt contains = fresh_symbol("contains_in_index_of"); + symbol_exprt terminating_zero = fresh_symbol("zero_in_index_of", index_type); + + exprt minus1 = from_integer(-1, index_type); + and_exprt a1( + binary_relation_exprt(index, ID_ge, minus1), + binary_relation_exprt(index, ID_le, terminating_zero), + binary_relation_exprt( + terminating_zero, ID_lt, array_pool.get_or_create_length(str))); + constraints.existential.push_back(a1); + + equal_exprt a2(not_exprt(contains), equal_exprt(index, minus1)); + constraints.existential.push_back(a2); + + const exprt lower_bound(zero_if_negative(from_index)); + // make sure that terminating zero exists (and is the smallest index after + // from that has a 0 character) + constraints.existential.push_back( + equal_exprt{str[terminating_zero], from_integer(0, c.type())}); + symbol_exprt k = fresh_symbol("QA_index_of", index_type); + const string_constraintt a0 = string_constraintt{ + k, + lower_bound, + zero_if_negative(terminating_zero), + not_exprt{equal_exprt{str[k], from_integer(0, c.type())}}}; + constraints.universal.push_back(a0); + + implies_exprt a3( + contains, + and_exprt( + binary_relation_exprt(from_index, ID_le, index), + equal_exprt(str[index], c))); + constraints.existential.push_back(a3); + + symbol_exprt n = fresh_symbol("QA_index_of", index_type); + string_constraintt a4{n, + lower_bound, + zero_if_negative(index), + implies_exprt{contains, notequal_exprt{str[n], c}}}; + constraints.universal.push_back(a4); + + symbol_exprt m = fresh_symbol("QA_index_of", index_type); + string_constraintt a5( + m, + lower_bound, + zero_if_negative( + plus_exprt{terminating_zero, from_integer(1, terminating_zero.type())}), + implies_exprt(not_exprt(contains), not_exprt(equal_exprt(str[m], c)))); + constraints.universal.push_back(a5); + + return {index, std::move(constraints)}; +} + /// Add axioms stating that the returned value `index` is the index within /// `haystack` of the first occurrence of `needle` starting the search at /// `from_index`, or `-1` if needle does not occur at or after position @@ -329,6 +413,37 @@ string_constraint_generatort::add_axioms_for_index_of( } } +/// Index of the first occurence of a target inside the string +/// +/// If the target is a character: +// NOLINTNEXTLINE +/// \copybrief add_axioms_for_index_of(const array_string_exprt&,const exprt&,const exprt&) +// NOLINTNEXTLINE +/// \link add_axioms_for_index_of(const array_string_exprt&,const exprt&,const exprt&) +/// (More...) \endlink +/// +/// If the target is a refined_string: +/// \copybrief add_axioms_for_index_of_string +/// \link add_axioms_for_index_of_string (More...) +/// \endlink +/// \warning slow for string targets +/// \param f: function application with arguments refined_string `haystack`, +/// refined_string or character `needle`, and optional integer `from_index` +/// with default value `0` +/// \return integer expression +std::pair +string_constraint_generatort::add_axioms_for_c_index_of( + const function_application_exprt &f) +{ + auto const &str = f.arguments().at(0); + auto const &c = f.arguments().at(1); + auto const str_array = get_string_expr(array_pool, str); + return add_axioms_for_c_index_of( + str_array, + typecast_exprt{c, str_array.content().type().subtype()}, + from_integer(0, str_array.length_type())); +} + /// Add axioms stating that the returned value is the index within `haystack` /// (`str`) of the last occurrence of `needle` (`c`) starting the search /// backward at `from_index`, or `-1` if no such character occurs at or before diff --git a/src/solvers/strings/string_constraint_generator_main.cpp b/src/solvers/strings/string_constraint_generator_main.cpp index 83ef18cf436..c919c046b3b 100644 --- a/src/solvers/strings/string_constraint_generator_main.cpp +++ b/src/solvers/strings/string_constraint_generator_main.cpp @@ -237,6 +237,8 @@ string_constraint_generatort::add_axioms_for_function_application( return add_axioms_for_contains(expr); else if(id == ID_cprover_string_index_of_func) return add_axioms_for_index_of(expr); + else if(id == ID_cprover_string_c_index_of_func) + return add_axioms_for_c_index_of(expr); else if(id == ID_cprover_string_last_index_of_func) return add_axioms_for_last_index_of(expr); else if(id == ID_cprover_string_parse_int_func) diff --git a/src/util/irep_ids.def b/src/util/irep_ids.def index 2e286e0ba64..1fcaecac67b 100644 --- a/src/util/irep_ids.def +++ b/src/util/irep_ids.def @@ -610,6 +610,7 @@ IREP_ID_ONE(cprover_string_empty_string_func) IREP_ID_ONE(cprover_string_endswith_func) IREP_ID_ONE(cprover_string_format_func) IREP_ID_ONE(cprover_string_index_of_func) +IREP_ID_ONE(cprover_string_c_index_of_func) IREP_ID_ONE(cprover_string_insert_func) IREP_ID_ONE(cprover_string_is_prefix_func) IREP_ID_ONE(cprover_string_is_suffix_func) From 3f76f13d11e34e91d8aaaeb3934df9aec1c5e779 Mon Sep 17 00:00:00 2001 From: xbauch Date: Fri, 7 Feb 2020 13:20:37 +0000 Subject: [PATCH 3/6] Implement strchr in terms of index-of by 1: associating the `src` with a new infinite array, 2: associating the array with its length variable, 3: applying the c_index_of function 4: translating the returned index to the expected char-pointer/NULL. --- src/cbmc/cbmc_parse_options.cpp | 12 +- src/goto-programs/string_instrumentation.cpp | 305 +++++++++++++++++-- src/goto-programs/string_instrumentation.h | 9 +- 3 files changed, 304 insertions(+), 22 deletions(-) diff --git a/src/cbmc/cbmc_parse_options.cpp b/src/cbmc/cbmc_parse_options.cpp index e6d75a97e18..b06661aca9d 100644 --- a/src/cbmc/cbmc_parse_options.cpp +++ b/src/cbmc/cbmc_parse_options.cpp @@ -362,6 +362,9 @@ void cbmc_parse_optionst::get_command_line_options(optionst &options) { options.set_option("refine-strings", true); options.set_option("string-printable", cmdline.isset("string-printable")); + options.set_option( + "max-nondet-string-length", + cmdline.get_value("max-nondet-string-length")); } if(cmdline.isset("max-node-refinement")) @@ -830,8 +833,13 @@ bool cbmc_parse_optionst::process_goto_program( link_to_library( goto_model, log.get_message_handler(), cprover_c_library_factory); - if(options.get_bool_option("string-abstraction")) - string_instrumentation(goto_model, log.get_message_handler()); + if( + options.get_bool_option("string-abstraction") || + options.get_bool_option("refine-strings")) + string_instrumentation( + goto_model, + log.get_message_handler(), + options.get_option("max-nondet-string-length")); // remove function pointers log.status() << "Removal of function pointers and virtual functions" diff --git a/src/goto-programs/string_instrumentation.cpp b/src/goto-programs/string_instrumentation.cpp index 5ad323737bc..74203d0b697 100644 --- a/src/goto-programs/string_instrumentation.cpp +++ b/src/goto-programs/string_instrumentation.cpp @@ -17,10 +17,15 @@ Author: Daniel Kroening, kroening@kroening.com #include #include #include +#include +#include #include +#include #include #include +#include #include +#include #include #include @@ -57,10 +62,12 @@ class string_instrumentationt:public messaget public: string_instrumentationt( symbol_tablet &_symbol_table, - message_handlert &_message_handler): - messaget(_message_handler), - symbol_table(_symbol_table), - ns(_symbol_table) + message_handlert &_message_handler, + size_t max_nondet_string_length) + : messaget(_message_handler), + symbol_table(_symbol_table), + ns(_symbol_table), + max_nondet_string_length(max_nondet_string_length) { } @@ -70,6 +77,8 @@ class string_instrumentationt:public messaget protected: symbol_tablet &symbol_table; namespacet ns; + size_t max_nondet_string_length; + size_t symbol_counter = 0; void make_type(exprt &dest, const typet &type) { @@ -97,6 +106,17 @@ class string_instrumentationt:public messaget goto_programt &dest, goto_programt::targett it, const code_function_callt &); + + auxiliary_symbolt new_aux_symbol( + const std::string &name, + const typet &type, + symbol_table_baset &symbol_table); + + refined_string_exprt + char_ptr_to_refined_string(const exprt &char_ptr, goto_programt &program); + + void do_strchr_via_c_index_of(goto_functiont &strchr_function); + void do_strchr( goto_programt &dest, goto_programt::targett it, @@ -157,29 +177,46 @@ class string_instrumentationt:public messaget void string_instrumentation( symbol_tablet &symbol_table, message_handlert &message_handler, - goto_programt &dest) + goto_programt &dest, + size_t max_nondet_string_length) { - string_instrumentationt string_instrumentation(symbol_table, message_handler); + string_instrumentationt string_instrumentation( + symbol_table, message_handler, max_nondet_string_length); string_instrumentation(dest); } void string_instrumentation( symbol_tablet &symbol_table, message_handlert &message_handler, - goto_functionst &dest) + goto_functionst &dest, + size_t max_nondet_string_length) { - string_instrumentationt string_instrumentation(symbol_table, message_handler); + string_instrumentationt string_instrumentation( + symbol_table, message_handler, max_nondet_string_length); string_instrumentation(dest); } void string_instrumentation( goto_modelt &goto_model, - message_handlert &message_handler) + message_handlert &message_handler, + const std::string &maybe_max_nondet_string_length_string) { + size_t max_nondet_string_length = 1000; // default + if(!maybe_max_nondet_string_length_string.empty()) + { + auto maybe_max_nondet_string_length = + string2optional_size_t(maybe_max_nondet_string_length_string); + if(!maybe_max_nondet_string_length.has_value()) + { + throw invalid_command_line_argument_exceptiont{"", ""}; + } + max_nondet_string_length = *maybe_max_nondet_string_length; + } string_instrumentation( goto_model.symbol_table, message_handler, - goto_model.goto_functions); + goto_model.goto_functions, + max_nondet_string_length); } void string_instrumentationt::operator()(goto_functionst &dest) @@ -189,6 +226,10 @@ void string_instrumentationt::operator()(goto_functionst &dest) it!=dest.function_map.end(); it++) { + if(it->first == "strchr") + { + do_strchr_via_c_index_of(it->second); + } (*this)(it->second.body); } } @@ -228,8 +269,6 @@ void string_instrumentationt::do_function_call( else if(identifier=="strxfrm") { } - else if(identifier=="strchr") - do_strchr(dest, target, call); else if(identifier=="strcspn") { } @@ -610,6 +649,199 @@ void string_instrumentationt::do_strncmp( { } +auxiliary_symbolt string_instrumentationt::new_aux_symbol( + const std::string &name, + const typet &type, + symbol_table_baset &symbol_table) +{ + auto decorated_name = std::string{name}; + decorated_name += std::to_string(++symbol_counter); + auxiliary_symbolt new_symbol; + new_symbol.base_name = decorated_name; + new_symbol.pretty_name = decorated_name; + new_symbol.is_static_lifetime = false; + new_symbol.is_state_var = false; + new_symbol.mode = ID_C; + new_symbol.name = decorated_name; + new_symbol.type = type; + symbol_table.add(new_symbol); + + return new_symbol; +} + +/// Helper function to produce the necessary associations related to +/// char-pointers needed for string solver. +refined_string_exprt string_instrumentationt::char_ptr_to_refined_string( + const exprt &char_ptr, + goto_programt &program) +{ + // int string_length = nondet(); + const auto length_symbol = new_aux_symbol( + "cprover_string_index_of_func::string_length", index_type(), symbol_table); + const auto length_symbol_expr = length_symbol.symbol_expr(); + program.add(goto_programt::make_decl(length_symbol_expr)); + program.add(goto_programt::make_assignment( + length_symbol_expr, + side_effect_expr_nondett{length_symbol.type, source_locationt{}})); + + // assume(string_length <= max_nondet_string_length); + program.add(goto_programt::make_assumption(binary_relation_exprt{ + length_symbol_expr, + ID_le, + from_integer(max_nondet_string_length, index_type())})); + + // char *string_content = src; + const auto content_symbol = new_aux_symbol( + "cprover_string_index_of_func::string_content", + char_ptr.type(), + symbol_table); + const auto content_symbol_expr = content_symbol.symbol_expr(); + program.add(goto_programt::make_decl(content_symbol_expr)); + program.add(goto_programt::make_assignment(content_symbol_expr, char_ptr)); + + // char (*nondet_infinite_array_ponter)[\infty]; + const symbolt nondet_infinite_array_pointer = new_aux_symbol( + "cprover_string_index_of_func::nondet_infinite_array_pointer", + pointer_type(array_typet{char_type(), infinity_exprt(index_type())}), + symbol_table); + const symbol_exprt nondet_infinite_array_pointer_expr = + nondet_infinite_array_pointer.symbol_expr(); + program.add(goto_programt::make_decl(nondet_infinite_array_pointer_expr)); + program.add(goto_programt::make_assignment( + nondet_infinite_array_pointer_expr, + typecast_exprt{address_of_exprt{char_ptr}, + nondet_infinite_array_pointer_expr.type()})); + + // cprover_associate_length_to_array_func(nondet_infinite_array_pointer, + // string_length); + const auto refined_string_expr = refined_string_exprt{ + length_symbol_expr, + content_symbol_expr, + refined_string_typet{index_type(), to_pointer_type(char_ptr.type())}}; + const symbolt return_array_length = new_aux_symbol( + "cprover_string_index_of_func::return_array_length", + index_type(), + symbol_table); + dereference_exprt nondet_array_expr{nondet_infinite_array_pointer_expr}; + const auto cprover_associate_length_to_array_func_symbol = symbol_exprt{ + ID_cprover_associate_length_to_array_func, + mathematical_function_typet{{nondet_array_expr.type(), length_symbol.type}, + return_array_length.type}}; + const auto apply_associate_length_to_array = function_application_exprt{ + cprover_associate_length_to_array_func_symbol, + {nondet_array_expr, refined_string_expr.length()}, + index_type()}; + const auto return_length_expr = return_array_length.symbol_expr(); + program.add(goto_programt::make_decl(return_length_expr)); + program.add(goto_programt::make_assignment( + return_length_expr, apply_associate_length_to_array)); + + // cprover_associate_array_to_pointer_func(nondet_infinite_array_pointer, + // src); + const address_of_exprt array_pointer( + index_exprt(nondet_array_expr, from_integer(0, index_type()))); + program.add(goto_programt::make_assignment(array_pointer, char_ptr)); + const symbolt return_sym_pointer = + new_aux_symbol("return_array_pointer", index_type(), symbol_table); + const auto cprover_associate_array_to_pointer_func_symbol = + symbol_exprt{ID_cprover_associate_array_to_pointer_func, + mathematical_function_typet{ + {nondet_array_expr.type(), array_pointer.type()}, + return_sym_pointer.type}}; + const auto apply_associate_pointer_to_array = + function_application_exprt{cprover_associate_array_to_pointer_func_symbol, + {nondet_array_expr, char_ptr}}; + const auto return_pointer_expr = return_sym_pointer.symbol_expr(); + program.add(goto_programt::make_decl(return_pointer_expr)); + program.add(goto_programt::make_assignment( + return_pointer_expr, apply_associate_pointer_to_array)); + + return refined_string_exprt{refined_string_expr.length(), char_ptr}; +} + +// Implement strchr in terms of indexOf for the purposes of using the +// string-solver. +// +// char * strchr(char *src, int c) { +// int string_length = nondet(); +// assume (string_length < =max_nondet_string_length); +// char *string_content = src; +// char (*nondet_infinite_array_pointer)[\infty]; +// nondet_infinite_array_pointer = string_content; +// +// cprover_associate_length_to_array_func(nondet_infinite_array_pointer, +// string_length); +// cprover_associate_array_to_pointer_func(nondet_infinite_array_pointer, +// src); +// struct refined_string_struct { +// int length = string_length; +// char *content = src; +// } refined_string; +// +// maybe_return = cprover_string_index_of_func(refined_string, c); +// if (maybe_return >= 0) +// return src + maybe_return; +// else +// return NULL: +// } +void string_instrumentationt::do_strchr_via_c_index_of( + goto_functiont &strchr_function) +{ + // struct refined_string_struct { + // int length = string_length; + // char *content = src; + // } refined_string; + const auto ¶ms = to_code_type(strchr_function.type).parameters(); + const auto &str_param = params.at(0); + const auto &ch_param = params.at(1); + const auto refined_string_type = + refined_string_typet{index_type(), to_pointer_type(str_param.type())}; + const auto str_param_symbol = + symbol_exprt{str_param.get_identifier(), str_param.type()}; + const auto ch_param_symbol = + symbol_exprt{ch_param.get_identifier(), ch_param.type()}; + goto_programt new_body; + const auto refined_string = + char_ptr_to_refined_string(str_param_symbol, new_body); + + // maybe_return = cprover_string_index_of_func(refined_string, c); + const auto cprover_string_index_of_func_symbol = symbol_exprt{ + ID_cprover_string_c_index_of_func, + mathematical_function_typet( + {refined_string.type(), ch_param_symbol.type()}, index_type())}; + const auto apply_index_of = function_application_exprt{ + cprover_string_index_of_func_symbol, {refined_string, ch_param_symbol}}; + const auto maybe_return = + new_aux_symbol("strchr::indexOf::maybe_return", index_type(), symbol_table); + const auto maybe_return_expr = maybe_return.symbol_expr(); + new_body.add(goto_programt::make_decl(maybe_return_expr)); + new_body.add( + goto_programt::make_assignment(maybe_return_expr, apply_index_of)); + + // if (maybe_return >= 0) + // return src + maybe_return; + // else + // return NULL: + auto found_case = new_body.add(goto_programt::make_return(code_returnt{ + plus_exprt{str_param_symbol, + typecast_exprt{maybe_return.symbol_expr(), size_type()}}})); + auto jump_to_found_case = new_body.insert_before( + found_case, + goto_programt::make_goto( + found_case, + binary_relation_exprt{ + maybe_return.symbol_expr(), ID_ge, from_integer(0, index_type())})); + auto return_null = new_body.insert_after( + jump_to_found_case, + goto_programt::make_return( + code_returnt{null_pointer_exprt{to_pointer_type(str_param.type())}})); + auto end_function = new_body.add(goto_programt::make_end_function()); + new_body.insert_after( + return_null, goto_programt::make_goto(end_function, true_exprt{})); + + strchr_function.body = std::move(new_body); +} + void string_instrumentationt::do_strchr( goto_programt &dest, goto_programt::targett target, @@ -625,11 +857,50 @@ void string_instrumentationt::do_strchr( goto_programt tmp; - goto_programt::targett assertion = tmp.add(goto_programt::make_assertion( - is_zero_string(arguments[0]), target->source_location)); - assertion->source_location.set_property_class("string"); - assertion->source_location.set_comment( - "zero-termination of string argument of strchr"); + auto new_aux_symbol = + []( + const irep_idt &name, + const typet &type, + symbol_table_baset &symbol_table) -> auxiliary_symbolt { + auxiliary_symbolt new_symbol; + new_symbol.base_name = name; + new_symbol.pretty_name = name; + new_symbol.is_static_lifetime = false; + new_symbol.is_state_var = false; + new_symbol.mode = ID_C; + new_symbol.name = name; + new_symbol.type = type; + symbol_table.add(new_symbol); + + return new_symbol; + }; + + const auto length_symbol = + new_aux_symbol("strchr_arg_length", size_type(), symbol_table); + + std::vector argument_types; + for(const auto &arg : arguments) + argument_types.push_back(arg.type()); + + const auto func_symbol = new_aux_symbol( + ID_cprover_string_index_of_func, + mathematical_function_typet(std::move(argument_types), size_type()), + symbol_table); + + const auto &string_arg = arguments.at(0); + const auto refined_string_type = + refined_string_typet{size_type(), to_pointer_type(string_arg.type())}; + + exprt::operandst refined_arguments; + refined_arguments.push_back(struct_exprt{ + {length_symbol.symbol_expr(), string_arg}, refined_string_type}); + refined_arguments.push_back(arguments.at(1)); + + const auto apply_index_of = function_application_exprt( + func_symbol.symbol_expr(), refined_arguments, size_type()); + + tmp.add(goto_programt::make_assignment( + target->get_function_call().lhs(), plus_exprt{string_arg, apply_index_of})); target->turn_into_skip(); dest.insert_before_swap(target, tmp); diff --git a/src/goto-programs/string_instrumentation.h b/src/goto-programs/string_instrumentation.h index cff956b4f20..6d70b439299 100644 --- a/src/goto-programs/string_instrumentation.h +++ b/src/goto-programs/string_instrumentation.h @@ -41,16 +41,19 @@ class incorrect_source_program_exceptiont : public cprover_exception_baset void string_instrumentation( symbol_tablet &, message_handlert &, - goto_programt &); + goto_programt &, + size_t); void string_instrumentation( symbol_tablet &, message_handlert &, - goto_functionst &); + goto_functionst &, + size_t); void string_instrumentation( goto_modelt &, - message_handlert &); + message_handlert &, + const std::string &); predicate_exprt is_zero_string(const exprt &what, bool write = false); exprt zero_string_length(const exprt &what, bool write=false); From 80c88cc83b4f0e81170356d52bd041a57321cc97 Mon Sep 17 00:00:00 2001 From: xbauch Date: Fri, 7 Feb 2020 13:23:24 +0000 Subject: [PATCH 4/6] Handle the c-specific cases in string refinement extracting string arrays from byte-expressions and string constants, hiding the `symex::args` renaming, etc. --- src/solvers/strings/array_pool.cpp | 47 +++++++++++++------ .../string_constraint_generator_main.cpp | 6 ++- src/solvers/strings/string_refinement.cpp | 42 ++++++++++++++--- src/solvers/strings/string_refinement.h | 1 + src/util/string_constant.h | 6 +++ 5 files changed, 78 insertions(+), 24 deletions(-) diff --git a/src/solvers/strings/array_pool.cpp b/src/solvers/strings/array_pool.cpp index ab7d3c7cf5b..55b2dff3271 100644 --- a/src/solvers/strings/array_pool.cpp +++ b/src/solvers/strings/array_pool.cpp @@ -7,6 +7,8 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ #include "array_pool.h" +#include "string_refinement_util.h" +#include symbol_exprt symbol_generatort:: operator()(const irep_idt &prefix, const typet &type) @@ -102,13 +104,15 @@ array_string_exprt array_poolt::make_char_array_for_char_pointer( const bool is_constant_array = char_pointer.id() == ID_address_of && to_address_of_expr(char_pointer).object().id() == ID_index && - to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == - ID_array; + (to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == + ID_array || + to_index_expr(to_address_of_expr(char_pointer).object()).array().id() == + ID_string_constant); if(is_constant_array) { - return to_array_string_expr( - to_index_expr(to_address_of_expr(char_pointer).object()).array()); + return to_array_string_expr(massage_weird_arrays_into_non_weird_arrays( + to_index_expr(to_address_of_expr(char_pointer).object()).array())); } const std::string symbol_name = "char_array_" + id2string(char_pointer.id()); const auto array_sym = @@ -135,12 +139,19 @@ static void attempt_assign_length_from_type( std::unordered_map &length_of_array, symbol_generatort &symbol_generator) { - DATA_INVARIANT( - array_expr.id() != ID_if, - "attempt_assign_length_from_type does not handle if exprts"); - // This invariant seems always true, but I don't know why. - // If we find a case where this is violated, try calling - // attempt_assign_length_from_type on the true and false cases. + if(array_expr.id() == ID_if) + { + const auto &if_expr = to_if_expr(array_expr); + attempt_assign_length_from_type( + to_array_string_expr(if_expr.true_case()), + length_of_array, + symbol_generator); + attempt_assign_length_from_type( + to_array_string_expr(if_expr.false_case()), + length_of_array, + symbol_generator); + return; + } const exprt &size_from_type = to_array_type(array_expr.type()).size(); const exprt &size_to_assign = size_from_type != infinity_exprt(size_from_type.type()) @@ -151,7 +162,7 @@ static void attempt_assign_length_from_type( length_of_array.emplace(array_expr, size_to_assign); INVARIANT( emplace_result.second, - "attempt_assign_length_from_type should only be called when no entry" + "attempt_assign_length_from_type should only be called when no entry " "for the array_string_exprt exists in the length_of_array map"); } @@ -159,11 +170,17 @@ void array_poolt::insert( const exprt &pointer_expr, const array_string_exprt &array_expr) { - const auto it_bool = + const auto association_it = arrays_of_pointers.find(pointer_expr); + if(association_it != arrays_of_pointers.end()) + { + INVARIANT( + association_it->second == array_expr, + "should not associate two different arrays to the same pointer"); + } + else + { arrays_of_pointers.insert(std::make_pair(pointer_expr, array_expr)); - - INVARIANT( - it_bool.second, "should not associate two arrays to the same pointer"); + } if(length_of_array.find(array_expr) == length_of_array.end()) { diff --git a/src/solvers/strings/string_constraint_generator_main.cpp b/src/solvers/strings/string_constraint_generator_main.cpp index c919c046b3b..84e7a271a85 100644 --- a/src/solvers/strings/string_constraint_generator_main.cpp +++ b/src/solvers/strings/string_constraint_generator_main.cpp @@ -19,6 +19,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include "string_constraint_generator.h" #include "string_refinement_invariant.h" +#include "string_refinement_util.h" #include #include @@ -68,8 +69,9 @@ exprt string_constraint_generatort::associate_array_to_pointer( /// \todo: we allow expression of the form of `arr[0]` instead of `arr` as /// the array argument but this could go away. array_string_exprt array_expr = to_array_string_expr( - f.arguments()[0].id() == ID_index ? to_index_expr(f.arguments()[0]).array() - : f.arguments()[0]); + f.arguments()[0].id() == ID_index + ? to_index_expr(f.arguments()[0]).array() + : massage_weird_arrays_into_non_weird_arrays(f.arguments()[0])); const exprt &pointer_expr = f.arguments()[1]; array_pool.insert(simplify_expr(pointer_expr, ns), array_expr); diff --git a/src/solvers/strings/string_refinement.cpp b/src/solvers/strings/string_refinement.cpp index 32960538db9..d77e679155c 100644 --- a/src/solvers/strings/string_refinement.cpp +++ b/src/solvers/strings/string_refinement.cpp @@ -24,9 +24,11 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include #include +#include #include #include #include +#include #include #include @@ -327,6 +329,16 @@ static void add_equations_for_symbol_resolution( continue; } + if( + rhs.id() == ID_symbol && + has_prefix( + id2string(to_symbol_expr(rhs).get_identifier()), "symex::args::")) + { + // Symex introduces a new name for non-constant arguments that it uses in + // trace building, we do not need it here for symbol resolve. + continue; + } + if(is_char_pointer_type(rhs.type())) { symbol_solver.make_union(lhs, simplify_expr(rhs, ns)); @@ -1066,7 +1078,7 @@ static exprt get_char_array_and_concretize( const namespacet &ns, messaget::mstreamt &stream, const array_string_exprt &arr, - array_poolt &array_pool) + const array_poolt &array_pool) { stream << "- " << format(arr) << ":\n"; stream << std::string(4, ' ') << "- type: " << format(arr.type()) @@ -1115,7 +1127,7 @@ void debug_model( const namespacet &ns, const std::function &super_get, const std::vector &symbols, - array_poolt &array_pool) + const array_poolt &array_pool) { stream << "debug_model:" << '\n'; for(const auto &pointer_array : generator.array_pool.get_arrays_of_pointers()) @@ -1201,7 +1213,8 @@ static optionalt substitute_array_access( symbol_generatort &symbol_generator, const bool left_propagate) { - const exprt &array = index_expr.array(); + const auto array = + massage_weird_arrays_into_non_weird_arrays(index_expr.array()); if(auto array_of = expr_try_dynamic_cast(array)) return array_of->op(); if(auto array_with = expr_try_dynamic_cast(array)) @@ -1617,8 +1630,20 @@ static void initial_index_set( } if(auto ite = expr_try_dynamic_cast(s)) { - initial_index_set(index_set, ns, qvar, upper_bound, ite->true_case(), i); - initial_index_set(index_set, ns, qvar, upper_bound, ite->false_case(), i); + initial_index_set( + index_set, + ns, + qvar, + upper_bound, + massage_weird_arrays_into_non_weird_arrays(ite->true_case()), + i); + initial_index_set( + index_set, + ns, + qvar, + upper_bound, + massage_weird_arrays_into_non_weird_arrays(ite->false_case()), + i); return; } const minus_exprt u_minus_1(upper_bound, from_integer(1, upper_bound.type())); @@ -1641,7 +1666,8 @@ static void initial_index_set( if(it->id() == ID_index && is_char_type(it->type())) { const auto &index_expr = to_index_expr(*it); - const auto &s = index_expr.array(); + const auto s = + massage_weird_arrays_into_non_weird_arrays(index_expr.array()); initial_index_set(index_set, ns, qvar, bound, s, index_expr.index()); it.next_sibling_or_parent(); } @@ -1835,7 +1861,9 @@ exprt string_refinementt::get(const exprt &expr) const else UNREACHABLE; } - const auto array = supert::get(current.get()); + const auto array = + massage_weird_arrays_into_non_weird_arrays(supert::get(current.get())); + const auto index = get(index_expr->index()); // If the underlying solver does not know about the existence of an array, diff --git a/src/solvers/strings/string_refinement.h b/src/solvers/strings/string_refinement.h index 98b4d9c785d..1cdd7ba3094 100644 --- a/src/solvers/strings/string_refinement.h +++ b/src/solvers/strings/string_refinement.h @@ -56,6 +56,7 @@ Author: Alberto Griggio, alberto.griggio@gmail.com // it is not turned on by default and not all options are available. #define OPT_STRING_REFINEMENT_CBMC \ "(refine-strings)" \ + "(max-nondet-string-length):" \ "(string-printable)" #define HELP_STRING_REFINEMENT_CBMC \ diff --git a/src/util/string_constant.h b/src/util/string_constant.h index eab8d29badc..ac8b5ce5297 100644 --- a/src/util/string_constant.h +++ b/src/util/string_constant.h @@ -50,4 +50,10 @@ inline string_constantt &to_string_constant(typet &type) return to_string_constant((exprt &)type); } +template <> +inline bool can_cast_expr(const exprt &base) +{ + return base.id() == ID_string_constant; +} + #endif // CPROVER_ANSI_C_STRING_CONSTANT_H From ddf72d1ffaf22c54562cda4c09b1e1d709031070 Mon Sep 17 00:00:00 2001 From: xbauch Date: Fri, 7 Feb 2020 13:23:44 +0000 Subject: [PATCH 5/6] Add tests for strchr using the string-refinement solver. --- regression/cbmc/string-refinement-strchr1/test.c | 15 +++++++++++++++ .../cbmc/string-refinement-strchr1/test.desc | 12 ++++++++++++ regression/cbmc/string-refinement-strchr2/test.c | 13 +++++++++++++ .../cbmc/string-refinement-strchr2/test.desc | 10 ++++++++++ 4 files changed, 50 insertions(+) create mode 100644 regression/cbmc/string-refinement-strchr1/test.c create mode 100644 regression/cbmc/string-refinement-strchr1/test.desc create mode 100644 regression/cbmc/string-refinement-strchr2/test.c create mode 100644 regression/cbmc/string-refinement-strchr2/test.desc diff --git a/regression/cbmc/string-refinement-strchr1/test.c b/regression/cbmc/string-refinement-strchr1/test.c new file mode 100644 index 00000000000..3c34ad8927e --- /dev/null +++ b/regression/cbmc/string-refinement-strchr1/test.c @@ -0,0 +1,15 @@ +#include +#include + +int main() +{ + char test[] = "foo"; + + char *first_o = strchr(test, 'o'); + assert(*first_o == test[1]); + assert(*first_o == test[0]); + char *first_t = strchr(test, 't'); + assert(*first_t == test[1]); + assert(*first_t == test[0]); + return 0; +} diff --git a/regression/cbmc/string-refinement-strchr1/test.desc b/regression/cbmc/string-refinement-strchr1/test.desc new file mode 100644 index 00000000000..7984871b3bc --- /dev/null +++ b/regression/cbmc/string-refinement-strchr1/test.desc @@ -0,0 +1,12 @@ +CORE broken-smt-backend +test.c +--refine-strings --max-nondet-string-length 10 +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[1\]: SUCCESS$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[0\]: FAILURE$ +^\[main.assertion.\d+\] line \d+ assertion \*first_t == test\[1\]: FAILURE$ +^\[main.assertion.\d+\] line \d+ assertion \*first_t == test\[0\]: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring diff --git a/regression/cbmc/string-refinement-strchr2/test.c b/regression/cbmc/string-refinement-strchr2/test.c new file mode 100644 index 00000000000..37a3837e721 --- /dev/null +++ b/regression/cbmc/string-refinement-strchr2/test.c @@ -0,0 +1,13 @@ +#include +#include + +int main() +{ + int i; + char *test = i ? "fo\0tis" : "notfoti"; + + char *first_o = strchr(test, 'o'); + assert(first_o != NULL); + assert(*first_o == test[0]); + return 0; +} diff --git a/regression/cbmc/string-refinement-strchr2/test.desc b/regression/cbmc/string-refinement-strchr2/test.desc new file mode 100644 index 00000000000..65615497f6e --- /dev/null +++ b/regression/cbmc/string-refinement-strchr2/test.desc @@ -0,0 +1,10 @@ +CORE broken-smt-backend +test.c +--refine-strings +^EXIT=10$ +^SIGNAL=0$ +^\[main.assertion.\d+\] line \d+ assertion first_o != (NULL|\(\(void \*\)0\)): SUCCESS$ +^\[main.assertion.\d+\] line \d+ assertion \*first_o == test\[0\]: FAILURE$ +^VERIFICATION FAILED$ +-- +^warning: ignoring From 0879ad07d95039f15f2abd82db01870ee02dbde7 Mon Sep 17 00:00:00 2001 From: xbauch Date: Fri, 7 Feb 2020 13:23:57 +0000 Subject: [PATCH 6/6] Add case for byte-extracting arrays from offset 0, i.e. the whole array should be the result. --- src/util/pointer_offset_size.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/util/pointer_offset_size.cpp b/src/util/pointer_offset_size.cpp index bae960a1021..73fa82a28aa 100644 --- a/src/util/pointer_offset_size.cpp +++ b/src/util/pointer_offset_size.cpp @@ -632,6 +632,17 @@ optionalt get_subexpression_at_offset( return typecast_exprt(expr, target_type_raw); } + if( + offset_bytes == 0 && expr.type().id() == ID_pointer && + target_type_raw.id() == ID_array) + { + // subexpression at offset zero is the whole thing even for arrays + return byte_extract_exprt{byte_extract_id(), + expr, + from_integer(offset_bytes, index_type()), + target_type_raw}; + } + const typet &source_type = ns.follow(expr.type()); const auto target_size_bits = pointer_offset_bits(target_type_raw, ns); if(!target_size_bits.has_value())