diff --git a/.gitignore b/.gitignore index bbe21138d4b..26e7e5de234 100644 --- a/.gitignore +++ b/.gitignore @@ -60,8 +60,7 @@ regression/**/tests-*.log regression/**/*.goto-cc-saved regression/**/*.gb regression/**/*.smt2 -jbmc/regression/**/tests.log -jbmc/regression/**/tests-symex-driven-loading.log +jbmc/regression/**/*.log # regression/coverage file /regression/coverage_** diff --git a/jbmc/regression/jbmc-strings/CMakeLists.txt b/jbmc/regression/jbmc-strings/CMakeLists.txt index c7b8343e59c..17fa555d5b4 100644 --- a/jbmc/regression/jbmc-strings/CMakeLists.txt +++ b/jbmc/regression/jbmc-strings/CMakeLists.txt @@ -2,6 +2,13 @@ add_test_pl_tests( "$ --validate-goto-model --validate-ssa-equation" ) +add_test_pl_profile( + "jbmc-strings-fixed-size-arrays" + "$ --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" + "-C;-s;fixed-size-strings;-X;limited-size-strings-expected-failure" + "CORE" +) + add_test_pl_profile( "jbmc-strings-symex-driven-lazy-loading" "$ --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" diff --git a/jbmc/regression/jbmc-strings/Makefile b/jbmc/regression/jbmc-strings/Makefile index 98ddf3b06e8..b2d5da20428 100644 --- a/jbmc/regression/jbmc-strings/Makefile +++ b/jbmc/regression/jbmc-strings/Makefile @@ -4,18 +4,22 @@ include ../../src/config.inc test: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -s symex-driven-loading testfuture: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" -CF + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -CF -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -CF -s symex-driven-loading testall: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" -CFTK + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -CFTK -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -CFTK -s symex-driven-loading tests.log: ../$(CPROVER_DIR)/regression/test.pl @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -s symex-driven-loading show: diff --git a/jbmc/regression/jbmc-strings/StringContains03/test.desc b/jbmc/regression/jbmc-strings/StringContains03/test.desc index 20cd76bef3c..b5bf5768ff6 100644 --- a/jbmc/regression/jbmc-strings/StringContains03/test.desc +++ b/jbmc/regression/jbmc-strings/StringContains03/test.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.check ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/StringIndexOf/test.desc b/jbmc/regression/jbmc-strings/StringIndexOf/test.desc index eef902a48e9..921d42abca8 100644 --- a/jbmc/regression/jbmc-strings/StringIndexOf/test.desc +++ b/jbmc/regression/jbmc-strings/StringIndexOf/test.desc @@ -1,6 +1,9 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.check --unwind 4 --max-nondet-string-length 3 --java-assume-inputs-non-null ^EXIT=0$ ^SIGNAL=0$ ^VERIFICATION SUCCESSFUL$ +-- +-- +Actually works fine with limited size strings, just takes a very long time. diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc b/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc index 33f099a0a20..5366ce8b533 100644 --- a/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/test_det.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.det ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc b/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc index de8979f7726..e5e70afe6ae 100644 --- a/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc +++ b/jbmc/regression/jbmc-strings/StringToLowerCase/test_nondet.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.nonDet --max-nondet-string-length 1000 ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/StringToUpperCase/test_dependency.desc b/jbmc/regression/jbmc-strings/StringToUpperCase/test_dependency.desc index 1c42f6c95ac..3cde1169b4a 100644 --- a/jbmc/regression/jbmc-strings/StringToUpperCase/test_dependency.desc +++ b/jbmc/regression/jbmc-strings/StringToUpperCase/test_dependency.desc @@ -8,3 +8,4 @@ assertion at file Test.java line 50 .*: FAILURE -- -- Check that when there are dependencies, axioms are added correctly. + diff --git a/jbmc/regression/jbmc-strings/StringToUpperCase/test_det.desc b/jbmc/regression/jbmc-strings/StringToUpperCase/test_det.desc index b9d6380416f..44226164aaa 100644 --- a/jbmc/regression/jbmc-strings/StringToUpperCase/test_det.desc +++ b/jbmc/regression/jbmc-strings/StringToUpperCase/test_det.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.det ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/StringToUpperCase/test_nondet.desc b/jbmc/regression/jbmc-strings/StringToUpperCase/test_nondet.desc index b050eec9aa0..dcd670d7206 100644 --- a/jbmc/regression/jbmc-strings/StringToUpperCase/test_nondet.desc +++ b/jbmc/regression/jbmc-strings/StringToUpperCase/test_nondet.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.nonDet --max-nondet-string-length 1000 ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/long_string/test.desc b/jbmc/regression/jbmc-strings/long_string/test.desc index 44790d0ba65..07de09699d6 100644 --- a/jbmc/regression/jbmc-strings/long_string/test.desc +++ b/jbmc/regression/jbmc-strings/long_string/test.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.check --max-nondet-string-length 2000000 ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/long_string/test_abort.desc b/jbmc/regression/jbmc-strings/long_string/test_abort.desc index 8e9e06f4e15..f2bfaa5e49d 100644 --- a/jbmc/regression/jbmc-strings/long_string/test_abort.desc +++ b/jbmc/regression/jbmc-strings/long_string/test_abort.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.checkAbort --trace --max-nondet-string-length 100000000 ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/max-length/test2.desc b/jbmc/regression/jbmc-strings/max-length/test2.desc index 363f983d857..339023d7035 100644 --- a/jbmc/regression/jbmc-strings/max-length/test2.desc +++ b/jbmc/regression/jbmc-strings/max-length/test2.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --max-nondet-string-length 500000 --function Test.checkMaxInputLength ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/max-length/test3.desc b/jbmc/regression/jbmc-strings/max-length/test3.desc index 24365a184e7..532ee79e104 100644 --- a/jbmc/regression/jbmc-strings/max-length/test3.desc +++ b/jbmc/regression/jbmc-strings/max-length/test3.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --max-nondet-string-length 4001 --function Test.checkMaxLength ^EXIT=10$ diff --git a/jbmc/regression/jbmc-strings/max-length/test4.desc b/jbmc/regression/jbmc-strings/max-length/test4.desc index 18902c95c3b..a6a5ad9135b 100644 --- a/jbmc/regression/jbmc-strings/max-length/test4.desc +++ b/jbmc/regression/jbmc-strings/max-length/test4.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --max-nondet-string-length 4000 --function Test.checkMaxLength ^SIGNAL=0$ diff --git a/jbmc/regression/jbmc-strings/string-non-empty-option/test_non_empty.desc b/jbmc/regression/jbmc-strings/string-non-empty-option/test_non_empty.desc index e4af0572e69..ed5a5f12201 100644 --- a/jbmc/regression/jbmc-strings/string-non-empty-option/test_non_empty.desc +++ b/jbmc/regression/jbmc-strings/string-non-empty-option/test_non_empty.desc @@ -1,4 +1,4 @@ -CORE +CORE limited-size-strings-expected-failure Test --function Test.checkMinLength --string-non-empty --max-nondet-string-length 2147483647 ^EXIT=10$ diff --git a/jbmc/regression/strings-smoke-tests/CMakeLists.txt b/jbmc/regression/strings-smoke-tests/CMakeLists.txt index e3584329528..683ba51aea4 100644 --- a/jbmc/regression/strings-smoke-tests/CMakeLists.txt +++ b/jbmc/regression/strings-smoke-tests/CMakeLists.txt @@ -2,6 +2,13 @@ add_test_pl_tests( "$ --validate-goto-model --validate-ssa-equation" ) +add_test_pl_profile( + "strings-smoke-tests-fixed-size-arrays" + "$ --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" + "-C;-s;fixed-size-strings;-X;limited-size-strings-expected-failure" + "CORE" +) + add_test_pl_profile( "strings-smoke-tests-symex-driven-lazy-loading" "$ --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" diff --git a/jbmc/regression/strings-smoke-tests/Makefile b/jbmc/regression/strings-smoke-tests/Makefile index 98ddf3b06e8..b2d5da20428 100644 --- a/jbmc/regression/strings-smoke-tests/Makefile +++ b/jbmc/regression/strings-smoke-tests/Makefile @@ -4,18 +4,22 @@ include ../../src/config.inc test: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -s symex-driven-loading testfuture: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" -CF + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -CF -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -CF -s symex-driven-loading testall: @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" -CFTK + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -CFTK -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -CFTK -s symex-driven-loading tests.log: ../$(CPROVER_DIR)/regression/test.pl @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation" + @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --max-intermediate-string-length 100 --use-fixed-size-arrays-for-bounded-strings --no-array-field-sensitivity" -X limited-size-strings-expected-failure -s fixed-size-strings @../$(CPROVER_DIR)/regression/test.pl -e -p -c "../../../src/jbmc/jbmc --validate-goto-model --validate-ssa-equation --symex-driven-lazy-loading" -X symex-driven-lazy-loading-expected-failure -s symex-driven-loading show: diff --git a/jbmc/src/java_bytecode/java_bytecode_language.cpp b/jbmc/src/java_bytecode/java_bytecode_language.cpp index 3903f3053d5..5fd6ee0eecd 100644 --- a/jbmc/src/java_bytecode/java_bytecode_language.cpp +++ b/jbmc/src/java_bytecode/java_bytecode_language.cpp @@ -261,6 +261,16 @@ java_bytecode_language_optionst::java_bytecode_language_optionst( void java_bytecode_languaget::set_language_options(const optionst &options) { object_factory_parameters.set(options); + if(options.is_set("max-intermediate-string-length")) + { + string_preprocess.set_max_intermediate_string_length( + options.get_unsigned_int_option("max-intermediate-string-length")); + } + if(options.is_set("use-fixed-size-arrays-for-bounded-strings")) + { + string_preprocess.set_use_fixed_size_arrays_for_bounded_strings(true); + } + language_options = java_bytecode_language_optionst{options, *this}; const auto &new_points = build_extra_entry_points(options); language_options->extra_methods.insert( diff --git a/jbmc/src/java_bytecode/java_object_factory.cpp b/jbmc/src/java_bytecode/java_object_factory.cpp index fc559b500ee..23fd4b3ffe1 100644 --- a/jbmc/src/java_bytecode/java_object_factory.cpp +++ b/jbmc/src/java_bytecode/java_object_factory.cpp @@ -327,6 +327,9 @@ static irep_idt integer_interval_to_string(const integer_intervalt &interval) /// data array) /// \param min_nondet_string_length: minimum length of strings to initialize /// \param max_nondet_string_length: maximum length of strings to initialize +/// \param use_fixed_size_array_for_bounded_string: if true, allocate a fixed +// size array when max_nondet_string_length is set and reasonably small +// (currently hard-coded to 256 chars) /// \param loc: location in the source /// \param function_id: function ID to associate with auxiliary variables /// \param symbol_table: the symbol table @@ -362,6 +365,7 @@ void initialize_nondet_string_fields( code_blockt &code, const std::size_t &min_nondet_string_length, const std::size_t &max_nondet_string_length, + const bool use_fixed_size_array_for_bounded_string, const source_locationt &loc, const irep_idt &function_id, symbol_table_baset &symbol_table, @@ -413,8 +417,13 @@ void initialize_nondet_string_fields( code_assumet(binary_relation_exprt(length_expr, ID_le, max_length))); } - const exprt data_expr = - make_nondet_infinite_char_array(symbol_table, loc, function_id, code); + const exprt data_expr = make_nondet_char_array( + symbol_table, + loc, + function_id, + code, + use_fixed_size_array_for_bounded_string ? max_nondet_string_length + : optionalt{}); struct_expr.operands()[struct_type.component_number("length")] = length_expr; const address_of_exprt array_pointer( @@ -832,6 +841,7 @@ void java_object_factoryt::gen_nondet_struct_init( assignments, object_factory_parameters.min_nondet_string_length, object_factory_parameters.max_nondet_string_length, + object_factory_parameters.use_fixed_size_arrays_for_bounded_strings, location, object_factory_parameters.function_id, symbol_table, diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp index c5a7569d739..726bf6f5372 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.cpp +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.cpp @@ -17,6 +17,7 @@ Date: April 2017 /// java.lang.StringBuilder, java.lang.StringBuffer. #include +#include #include #include #include @@ -493,8 +494,21 @@ refined_string_exprt java_string_library_preprocesst::make_nondet_string_expr( const side_effect_expr_nondett nondet_length(str.length().type(), loc); code.add(code_assignt(str.length(), nondet_length), loc); - const exprt nondet_array_expr = - make_nondet_infinite_char_array(symbol_table, loc, function_id, code); + const exprt nondet_array_expr = make_nondet_char_array( + symbol_table, + loc, + function_id, + code, + use_fixed_size_arrays_for_bounded_strings ? max_intermediate_string_length + : optionalt{}); + + if(max_intermediate_string_length) + { + code.add(code_assumet(binary_relation_exprt( + str.length(), + ID_le, + from_integer(*max_intermediate_string_length, java_int_type())))); + } const address_of_exprt array_pointer( index_exprt(nondet_array_expr, from_integer(0, java_int_type()))); @@ -576,23 +590,26 @@ codet java_string_library_preprocesst::code_return_function_application( make_function_application(function_id, arguments, type, symbol_table)); } -/// Declare a fresh symbol of type array of character with infinite size. +/// Declare a fresh symbol of type array of characters suitable for a string +/// of the given maximum length /// \param symbol_table: the symbol table /// \param loc: source location /// \param function_id: name of the function containing the array /// \param [out] code: code block where the declaration gets added +/// \param max_length: maximum string length /// \return created symbol expression -exprt make_nondet_infinite_char_array( +exprt make_nondet_char_array( symbol_table_baset &symbol_table, const source_locationt &loc, const irep_idt &function_id, - code_blockt &code) + code_blockt &code, + optionalt max_length) { - const array_typet array_type( - java_char_type(), infinity_exprt(java_int_type())); + const array_typet array_type = + make_char_array_type(java_char_type(), java_int_type(), max_length); const symbolt data_sym = fresh_java_symbol( pointer_type(array_type), - "nondet_infinite_array_pointer", + "nondet_array_pointer", loc, function_id, symbol_table); diff --git a/jbmc/src/java_bytecode/java_string_library_preprocess.h b/jbmc/src/java_bytecode/java_string_library_preprocess.h index 1477eb249e5..7c8f85fe9f1 100644 --- a/jbmc/src/java_bytecode/java_string_library_preprocess.h +++ b/jbmc/src/java_bytecode/java_string_library_preprocess.h @@ -44,6 +44,15 @@ class java_string_library_preprocesst:public messaget void initialize_known_type_table(); void initialize_conversion_table(); + void set_max_intermediate_string_length( + optionalt max_intermediate_string_length_) + { + this->max_intermediate_string_length = max_intermediate_string_length_; + } + void set_use_fixed_size_arrays_for_bounded_strings(bool value) + { + use_fixed_size_arrays_for_bounded_strings = value; + } bool implements_function(const irep_idt &function_id) const; void get_all_function_names(std::unordered_set &methods) const; @@ -145,6 +154,13 @@ class java_string_library_preprocesst:public messaget // A set tells us what java types should be considered as string objects std::unordered_set string_types; + // Maximum length enforced on freshly-allocated strings + optionalt max_intermediate_string_length; + + // If true, allocate fixed-size arrays to hold small (currently hard-coded + // to 256 chars) bounded strings + bool use_fixed_size_arrays_for_bounded_strings = false; + code_blockt make_float_to_string_code( const java_method_typet &type, const source_locationt &loc, @@ -309,11 +325,12 @@ class java_string_library_preprocesst:public messaget symbol_table_baset &symbol_table); }; -exprt make_nondet_infinite_char_array( +exprt make_nondet_char_array( symbol_table_baset &symbol_table, const source_locationt &loc, const irep_idt &function_id, - code_blockt &code); + code_blockt &code, + optionalt max_length); void add_pointer_to_array_association( const exprt &pointer, diff --git a/jbmc/src/jbmc/jbmc_parse_options.cpp b/jbmc/src/jbmc/jbmc_parse_options.cpp index 3677bf0084f..58f3718b06c 100644 --- a/jbmc/src/jbmc/jbmc_parse_options.cpp +++ b/jbmc/src/jbmc/jbmc_parse_options.cpp @@ -299,6 +299,16 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options) options.set_option("refine-arithmetic", true); } + if(cmdline.isset("max-intermediate-string-length")) + { + options.set_option( + "max-intermediate-string-length", + cmdline.get_value("max-intermediate-string-length")); + } + + if(cmdline.isset("use-fixed-size-arrays-for-bounded-strings")) + options.set_option("use-fixed-size-arrays-for-bounded-strings", true); + if(cmdline.isset("no-refine-strings")) options.set_option("refine-strings", false); @@ -457,6 +467,9 @@ void jbmc_parse_optionst::get_command_line_options(optionst &options) if(cmdline.isset("show-goto-symex-steps")) options.set_option("show-goto-symex-steps", true); + + if(!string_solver_options_valid(options, log)) + exit(CPROVER_EXIT_USAGE_ERROR); } /// invoke main modules diff --git a/jbmc/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp b/jbmc/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp index 597b447e245..9c61dd0bd10 100644 --- a/jbmc/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp +++ b/jbmc/unit/java_bytecode/java_object_factory/gen_nondet_string_init.cpp @@ -89,20 +89,20 @@ SCENARIO( "tmp_object_factory = NONDET(int);", CPROVER_PREFIX "assume(tmp_object_factory >= 0);", CPROVER_PREFIX "assume(tmp_object_factory <= 20);", - "char (*nondet_infinite_array_pointer)[INFINITY()];", - "nondet_infinite_array_pointer = " + "char (*nondet_array_pointer)[INFINITY()];", + "nondet_array_pointer = " "ALLOCATE(char [INFINITY()], INFINITY(), false);", - "*nondet_infinite_array_pointer = NONDET(char [INFINITY()]);", + "*nondet_array_pointer = NONDET(char [INFINITY()]);", "int return_array;", "return_array = cprover_associate_array_to_pointer_func" - "(*nondet_infinite_array_pointer, *nondet_infinite_array_pointer);", + "(*nondet_array_pointer, *nondet_array_pointer);", "int return_array;", "return_array = cprover_associate_length_to_array_func" - "(*nondet_infinite_array_pointer, tmp_object_factory);", + "(*nondet_array_pointer, tmp_object_factory);", "arg = { .@java.lang.Object={ .@class_identifier" "=\"java::java.lang.String\" }," " .length=tmp_object_factory, " - ".data=*nondet_infinite_array_pointer };"}; + ".data=*nondet_array_pointer };"}; // clang-format on for(std::size_t i = 0; diff --git a/jbmc/unit/solvers/strings/string_constraint_instantiation/instantiate_not_contains.cpp b/jbmc/unit/solvers/strings/string_constraint_instantiation/instantiate_not_contains.cpp index 6f1ec9f1ca8..b08f6a83712 100644 --- a/jbmc/unit/solvers/strings/string_constraint_instantiation/instantiate_not_contains.cpp +++ b/jbmc/unit/solvers/strings/string_constraint_instantiation/instantiate_not_contains.cpp @@ -187,7 +187,7 @@ SCENARIO( config.set_arch("none"); symbol_generatort symbol_generator; - array_poolt array_pool(symbol_generator); + array_poolt array_pool(symbol_generator, {}, false); // Creating strings const auto ab_array = make_string_exprt("ab"); @@ -210,7 +210,7 @@ SCENARIO( t.length_type()); // Generating the corresponding axioms and simplifying, recording info - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); const auto pair = generator.add_axioms_for_function_application(func); const string_constraintst &constraints = pair.second; @@ -305,7 +305,7 @@ SCENARIO( a_array}; // Create witness for axiom - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); std::unordered_map witnesses; witnesses.emplace(vacuous, generator.fresh_symbol("w", t.witness_type())); @@ -355,7 +355,7 @@ SCENARIO( b_array}; // Create witness for axiom - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); std::unordered_map witnesses; witnesses.emplace(trivial, generator.fresh_symbol("w", t.witness_type())); @@ -406,7 +406,7 @@ SCENARIO( empty_array}; // Create witness for axiom - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); std::unordered_map witnesses; witnesses.emplace(trivial, generator.fresh_symbol("w", t.witness_type())); @@ -459,7 +459,7 @@ SCENARIO( ab_array}; // Create witness for axiom - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); std::unordered_map witnesses; witnesses.emplace(trivial, generator.fresh_symbol("w", t.witness_type())); @@ -510,7 +510,7 @@ SCENARIO( cd_array}; // Create witness for axiom - string_constraint_generatort generator(empty_ns); + string_constraint_generatort generator(empty_ns, {}, false); std::unordered_map witnesses; witnesses.emplace(trivial, generator.fresh_symbol("w", t.witness_type())); diff --git a/jbmc/unit/solvers/strings/string_refinement/dependency_graph.cpp b/jbmc/unit/solvers/strings/string_refinement/dependency_graph.cpp index 93d7d0c06dc..890b1b694d9 100644 --- a/jbmc/unit/solvers/strings/string_refinement/dependency_graph.cpp +++ b/jbmc/unit/solvers/strings/string_refinement/dependency_graph.cpp @@ -87,7 +87,7 @@ SCENARIO("dependency_graph", "[core][solvers][refinement][string_refinement]") WHEN("We add dependencies") { symbol_generatort generator; - array_poolt array_pool(generator); + array_poolt array_pool(generator, {}, false); optionalt new_equation1 = add_node(dependencies, equation1, array_pool, generator); diff --git a/src/goto-checker/solver_factory.cpp b/src/goto-checker/solver_factory.cpp index 90b67d8de06..0c84f9ff84f 100644 --- a/src/goto-checker/solver_factory.cpp +++ b/src/goto-checker/solver_factory.cpp @@ -263,6 +263,11 @@ solver_factoryt::get_string_refinement() options.get_unsigned_int_option("max-node-refinement"); info.refine_arrays = options.get_bool_option("refine-arrays"); info.refine_arithmetic = options.get_bool_option("refine-arithmetic"); + if(options.is_set("max-intermediate-string-length")) + info.maximum_intermediate_string_length = + options.get_unsigned_int_option("max-intermediate-string-length"); + if(options.is_set("use-fixed-size-arrays-for-bounded-strings")) + info.use_fixed_size_arrays_for_bounded_strings = true; info.message_handler = &message_handler; auto decision_procedure = util_make_unique(info); diff --git a/src/solvers/Makefile b/src/solvers/Makefile index 5f3238de90c..357f20f59db 100644 --- a/src/solvers/Makefile +++ b/src/solvers/Makefile @@ -162,6 +162,7 @@ SRC = $(BOOLEFORCE_SRC) \ strings/array_pool.cpp \ strings/equation_symbol_mapping.cpp \ strings/format_specifier.cpp \ + strings/max_concrete_char_array.cpp \ strings/string_builtin_function.cpp \ strings/string_dependencies.cpp \ strings/string_concatenation_builtin_function.cpp \ diff --git a/src/solvers/prop/prop_conv_solver.h b/src/solvers/prop/prop_conv_solver.h index 8bd6a728aaa..3979fb951b0 100644 --- a/src/solvers/prop/prop_conv_solver.h +++ b/src/solvers/prop/prop_conv_solver.h @@ -126,7 +126,7 @@ class prop_conv_solvert : public conflict_providert, propt ∝ - messaget log; + mutable messaget log; static const char *context_prefix; diff --git a/src/solvers/strings/array_pool.cpp b/src/solvers/strings/array_pool.cpp index ab7d3c7cf5b..58158d237ee 100644 --- a/src/solvers/strings/array_pool.cpp +++ b/src/solvers/strings/array_pool.cpp @@ -7,6 +7,7 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com \*******************************************************************/ #include "array_pool.h" +#include "max_concrete_char_array.h" symbol_exprt symbol_generatort:: operator()(const irep_idt &prefix, const typet &type) @@ -54,7 +55,11 @@ array_poolt::get_length_if_exists(const array_string_exprt &s) const array_string_exprt array_poolt::fresh_string(const typet &index_type, const typet &char_type) { - array_typet array_type{char_type, infinity_exprt(index_type)}; + array_typet array_type = make_char_array_type( + char_type, + index_type, + use_fixed_size_arrays_for_bounded_strings ? maximum_fresh_string_length + : optionalt{}); symbol_exprt content = fresh_symbol("string_content", array_type); array_string_exprt str = to_array_string_expr(content); arrays_of_pointers.emplace( @@ -122,6 +127,19 @@ array_string_exprt array_poolt::make_char_array_for_char_pointer( return to_array_string_expr(insert_result.first->second); } +static bool is_array_of_constants(const exprt &e) +{ + const array_exprt *array = expr_try_dynamic_cast(e); + if(!array) + return false; + for(const auto &op : array->operands()) + { + if(!can_cast_expr(op)) + return false; + } + return true; +} + /// Given an array_string_exprt, get the size of the underlying array. If that /// size is undefined, create a new symbol for the size. /// Then add an entry from `array_expr` to that size in the `length_of_array` @@ -141,10 +159,9 @@ static void attempt_assign_length_from_type( // This invariant seems always true, but I don't know why. // If we find a case where this is violated, try calling // attempt_assign_length_from_type on the true and false cases. - const exprt &size_from_type = to_array_type(array_expr.type()).size(); const exprt &size_to_assign = - size_from_type != infinity_exprt(size_from_type.type()) - ? size_from_type + is_array_of_constants(array_expr) + ? to_array_type(array_expr.type()).size() : symbol_generator("string_length", array_expr.length_type()); const auto emplace_result = diff --git a/src/solvers/strings/array_pool.h b/src/solvers/strings/array_pool.h index 6e6e3fd20fa..4b497bb9b46 100644 --- a/src/solvers/strings/array_pool.h +++ b/src/solvers/strings/array_pool.h @@ -42,8 +42,14 @@ class symbol_generatort final class array_poolt final { public: - explicit array_poolt(symbol_generatort &symbol_generator) - : fresh_symbol(symbol_generator) + explicit array_poolt( + symbol_generatort &symbol_generator, + optionalt maximum_string_length, + bool use_fixed_size_arrays_for_bounded_strings) + : fresh_symbol(symbol_generator), + maximum_fresh_string_length(maximum_string_length), + use_fixed_size_arrays_for_bounded_strings( + use_fixed_size_arrays_for_bounded_strings) { } @@ -101,6 +107,13 @@ class array_poolt final /// Generate fresh symbols symbol_generatort &fresh_symbol; + /// Maximum length of fresh strings introduced by this class + optionalt maximum_fresh_string_length; + + /// If true, fresh strings with short, bounded size (<= 256 chars) will be + /// allocated as fixed-size arrays. + bool use_fixed_size_arrays_for_bounded_strings = false; + array_string_exprt make_char_array_for_char_pointer( const exprt &char_pointer, const typet &char_array_type); diff --git a/src/solvers/strings/max_concrete_char_array.cpp b/src/solvers/strings/max_concrete_char_array.cpp new file mode 100644 index 00000000000..d1394248de0 --- /dev/null +++ b/src/solvers/strings/max_concrete_char_array.cpp @@ -0,0 +1,34 @@ +/*******************************************************************\ + +Module: Size limit to choose between variable- and fixed-size char arrays + +Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Size limit to choose between variable- and fixed-size char arrays + +#include "max_concrete_char_array.h" + +#include +#include + +/// Pure guess; TODO study what this should be. +/// Char arrays with max length <= this will be allocated with a fixed size +/// (assuming the perhaps-wasted space is worth the less-complex formula +/// generated for fixed-size arrays), while larger arrays will be allocated +/// with indefinite size assuming generally the wasted space is not worth it. +const std::size_t max_concrete_char_array_length = 256; + +array_typet make_char_array_type( + const typet &element_type, + const typet &index_type, + optionalt max_length) +{ + exprt array_size = max_length && *max_length <= max_concrete_char_array_length + ? (exprt)from_integer(*max_length, index_type) + : (exprt)infinity_exprt{index_type}; + + return array_typet{element_type, array_size}; +} diff --git a/src/solvers/strings/max_concrete_char_array.h b/src/solvers/strings/max_concrete_char_array.h new file mode 100644 index 00000000000..c8f33e57a25 --- /dev/null +++ b/src/solvers/strings/max_concrete_char_array.h @@ -0,0 +1,34 @@ +/*******************************************************************\ + +Module: Size limit to choose between variable- and fixed-size char arrays + +Author: Diffblue Ltd. + +\*******************************************************************/ + +/// \file +/// Size limit to choose between variable- and fixed-size char arrays + +#ifndef CPROVER_SOLVERS_STRINGS_MAX_CONCRETE_CHAR_ARRAY_H +#define CPROVER_SOLVERS_STRINGS_MAX_CONCRETE_CHAR_ARRAY_H + +#include +#include +#include + +extern const std::size_t max_concrete_char_array_length; + +/// Create a char array type suitable for a variable-length string of +/// perhaps-limited size. This may have a constant- or variable-length type, +/// depending on the size limit given. If no limit is given it will certainly +/// have variable-length type. +/// \param element_type character type +/// \param index_type type for the array size, must match the type of indices +/// this array +/// \param max_length optional maximum length +array_typet make_char_array_type( + const typet &element_type, + const typet &index_type, + optionalt max_length); + +#endif // CPROVER_SOLVERS_STRINGS_MAX_CONCRETE_CHAR_ARRAY_H diff --git a/src/solvers/strings/string_constraint_generator.h b/src/solvers/strings/string_constraint_generator.h index ae976e77ad3..db5c9eccd34 100644 --- a/src/solvers/strings/string_constraint_generator.h +++ b/src/solvers/strings/string_constraint_generator.h @@ -48,7 +48,10 @@ class string_constraint_generatort final // string constraints for different string functions and add them // to the axiom list. - explicit string_constraint_generatort(const namespacet &ns); + string_constraint_generatort( + const namespacet &ns, + optionalt maximum_string_length, + bool use_fixed_size_arrays_for_bounded_strings); std::pair add_axioms_for_function_application(const function_application_exprt &expr); diff --git a/src/solvers/strings/string_constraint_generator_main.cpp b/src/solvers/strings/string_constraint_generator_main.cpp index e83b0d9460b..2e3673b7d43 100644 --- a/src/solvers/strings/string_constraint_generator_main.cpp +++ b/src/solvers/strings/string_constraint_generator_main.cpp @@ -31,8 +31,15 @@ Author: Romain Brenguier, romain.brenguier@diffblue.com #include #include -string_constraint_generatort::string_constraint_generatort(const namespacet &ns) - : array_pool(fresh_symbol), ns(ns) +string_constraint_generatort::string_constraint_generatort( + const namespacet &ns, + optionalt maximum_string_length, + bool use_fixed_size_arrays_for_bounded_strings) + : array_pool( + fresh_symbol, + maximum_string_length, + use_fixed_size_arrays_for_bounded_strings), + ns(ns) { } diff --git a/src/solvers/strings/string_refinement.cpp b/src/solvers/strings/string_refinement.cpp index 32960538db9..1948ed327c1 100644 --- a/src/solvers/strings/string_refinement.cpp +++ b/src/solvers/strings/string_refinement.cpp @@ -157,7 +157,10 @@ string_refinementt::string_refinementt(const infot &info, bool) : supert(info), config_(info), loop_bound_(info.refinement_bound), - generator(*info.ns) + generator( + *info.ns, + info.maximum_intermediate_string_length, + info.use_fixed_size_arrays_for_bounded_strings) { } @@ -704,7 +707,6 @@ decision_proceduret::resultt string_refinementt::dec_solve() log.debug() << "dec_solve: add constraints" << messaget::eom; merge(constraints, dependencies.add_constraints(generator)); - #ifdef DEBUG output_equations(log.debug(), equations); #endif @@ -768,12 +770,21 @@ decision_proceduret::resultt string_refinementt::dec_solve() add_lemma(substitute_array_access(lemma, generator.fresh_symbol, true)); } - // All generated strings should have non-negative length + // All generated strings should have non-negative length, and perhaps a + // maximum length for(const auto &pair : generator.array_pool.created_strings()) { exprt length = generator.array_pool.get_or_create_length(pair.first); add_lemma( binary_relation_exprt{length, ID_ge, from_integer(0, length.type())}); + if(config_.maximum_intermediate_string_length) + { + add_lemma(binary_relation_exprt{ + length, + ID_le, + from_integer( + *config_.maximum_intermediate_string_length, length.type())}); + } } // Initial try without index set @@ -1235,12 +1246,16 @@ static void substitute_array_access_in_place( { if(const auto index_expr = expr_try_dynamic_cast(*it)) { - optionalt result = - substitute_array_access(*index_expr, symbol_generator, left_propagate); + if(!can_cast_expr( + to_array_type(index_expr->array().type()).size())) + { + optionalt result = substitute_array_access( + *index_expr, symbol_generator, left_propagate); - // Only perform a write when we have something changed. - if(result) - it.mutate() = *result; + // Only perform a write when we have something changed. + if(result) + it.mutate() = *result; + } } } } @@ -1785,8 +1800,8 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) string_refinement_invariantt("array-lists must have at least two " "operands")); const typet &char_type = expr.operands()[1].type(); - array_typet arr_type(char_type, infinity_exprt(char_type)); - exprt ret_expr = array_of_exprt(from_integer(0, char_type), arr_type); + exprt ret_expr = + array_of_exprt(from_integer(0, char_type), to_array_type(expr.type())); for(size_t i = 0; i < expr.operands().size(); i += 2) { @@ -1804,6 +1819,140 @@ exprt substitute_array_lists(exprt expr, size_t string_max_length) return expr; } +/// Given an index set, return a sorted vector of the concrete indices +/// defined in the current model. +/// \param index_set index set +/// \return concrete indices +std::vector string_refinementt::get_model_defined_indices( + const std::set &index_set) const +{ + std::vector defined_indices; + for(const auto &index_expr : index_set) + { + if(auto index = numeric_cast(simplify_expr(get(index_expr), ns))) + { + defined_indices.push_back(*index); + } + } + std::sort(defined_indices.begin(), defined_indices.end()); + defined_indices.erase( + std::unique(defined_indices.begin(), defined_indices.end()), + defined_indices.end()); + return defined_indices; +} + +/// Replace the given array with one where any index not defined in the given +/// defined indices inherits its value from its right-hand defined neighbouring +// index. +/// For example, if the input array represents the string "abcde" and the +/// defined indices are 1, 3 and 4, the result is "bbdde". +/// \param array array to restrict +/// \param defined_indices sorted index-set to restrict by +/// \return restricted array +static std::pair restrict_sparse_array_to_indices( + interval_sparse_arrayt array, + const std::vector &defined_indices) +{ + if(defined_indices.empty()) + { + if(array.begin() == array.end()) + return {std::move(array), false}; + else + { + return { + interval_sparse_arrayt{array.at(std::numeric_limits::max())}, + true}; + } + } + + // In the resulting array, we want all undefined indices (those not appearing + // in defined_indices) to inherit their value from their right-hand defined + // neighbour. This means all intervals should be extended to their left + // to cover any intermediate undefined region. + + bool any_changes = false; + + for(auto intervals_it = array.begin(); intervals_it != array.end(); + /* no increment */) + { + auto find_defined_index = std::lower_bound( + defined_indices.begin(), defined_indices.end(), intervals_it->first); + if( + find_defined_index == defined_indices.end() || + *find_defined_index != intervals_it->first) + { + any_changes = true; + + // If there is no previous defined index, or the previous defined index + // already has an interval associated (necessarily the previous interval), + // just drop this interval and therefore effectively extend the one to + // our right to cover the whole intervening space: + bool should_drop_interval = + find_defined_index == defined_indices.begin() || + (intervals_it != array.begin() && + std::prev(intervals_it)->first == *std::prev(find_defined_index)); + + if(should_drop_interval) + { + intervals_it = array.erase(intervals_it); + } + else + { + // Extend the interval to our right (i.e. shrink this one) so that + // this one extends to exactly the previous defined index. Remember + // these intervals are of the form "VALUE until-including INDEX". + exprt existing_value = intervals_it->second; + intervals_it = array.erase(intervals_it); + array.insert({*std::prev(find_defined_index), existing_value}); + } + } + else + { + // This interval already extends to a defined index; leave it alone. + intervals_it++; + } + } + + return {std::move(array), any_changes}; +} + +/// Replace the given array with one where any index not defined in the given +/// index set inherits its value from its right-hand defined neighbouring index. +/// For example, if the input array represents the string "abcde" and the +/// defined indices are 1, 3 and 4, the result is "bbdde". +/// \param array array to restrict +/// \param index_set index-set to restrict by (if it contains symbolic +/// expressions they are resolved against the current model) +/// \return restricted array +interval_sparse_arrayt string_refinementt::restrict_sparse_array_to_index_set( + interval_sparse_arrayt array, + const std::set &index_set) const +{ + std::ostringstream old_value; + if(log.get_message_handler().get_verbosity() >= 10) + { + array.print(old_value); + } + + std::vector defined_indices = + get_model_defined_indices(index_set); + + bool any_changes; + std::tie(array, any_changes) = + restrict_sparse_array_to_indices(std::move(array), defined_indices); + + if(any_changes && log.get_message_handler().get_verbosity() >= 10) + { + log.debug() << "Removed undefined indices from a string.\n" + << "Old string: " << old_value.str() << "\n" + << "New string: "; + array.print(log.debug()); + log.debug() << messaget::eom; + } + + return array; +} + /// Evaluates the given expression in the valuation found by /// string_refinementt::dec_solve. /// @@ -1845,9 +1994,36 @@ exprt string_refinementt::get(const exprt &expr) const const exprt unknown = from_integer(CHARACTER_FOR_UNKNOWN, index_expr->type()); - if( - const auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown)) + if(auto sparse_array = interval_sparse_arrayt::of_expr(array, unknown)) { + if(can_cast_expr( + to_array_type(current.get().type()).size())) + { + auto find_it = index_sets.cumulative.find(current); + if(find_it != index_sets.cumulative.end()) + { + // The underlying solver assigns fixed-sized array cells arbitrary + // values at cells that are otherwise unconstrained. (For example, if + // our current constraints specified char x[5] had x[3] == 'a' and + // isdigit(x[4]) it might return "@:^a0", and the leading arbitrary + // choices might violate universal axioms (perhaps for-all i we must + // have isalphanumeric(x[i]))). + // + // Therefore, mask out cells that don't appear in (this model of) + // the array's index set, instead using the nearest constrained cell + // on the right-hand side. Because index sets are initialised to + // contain the upper-bound of each universal constraint, and all + // universal constraints created by this solver have the property that + // a valid solution to a universal constraint at one offset is also + // valid at another (e.g. we can have for-all i, x[i] == y[i+offset], + // but not for-all i, x[i] = i), this will ensure that if a universal + // constraint holds for all values in the index set, it also holds for + // all values in that universal constraint's bounds. + sparse_array = + restrict_sparse_array_to_index_set(*sparse_array, find_it->second); + } + } + if(const auto index_value = numeric_cast(index)) return sparse_array->at(*index_value); return sparse_array->to_if_expression(index); @@ -2034,3 +2210,57 @@ static bool is_valid_string_constraint( return true; } + +static bool check_string_limit( + const optionst &options, + const char *limit_name, + std::size_t field_sensitivity_limit, + messaget &log) +{ + if( + options.is_set(limit_name) && + options.get_unsigned_int_option(limit_name) <= field_sensitivity_limit) + { + log.error() << "Invalid option combination: " + "use-fixed-size-arrays-for-bounded-strings is enabled, but " + << limit_name + << " is lower than symex's array-cell-sensitivity limit (" + << field_sensitivity_limit << "). Either raise " << limit_name + << ", or use a lower max-field-sensitivity-array-size, " + << "or use --no-array-field-sensitivity." << messaget::eom; + return false; + } + + return true; +} + +bool string_solver_options_valid(const optionst &options, messaget &log) +{ + if(options.get_bool_option("use-fixed-size-arrays-for-bounded-strings")) + { + // Make sure none of the fixed-size arrays created would be subject to + // field-sensitivity-- the string solver can't currently generate new + // references to arrays represented as individual symbols this way. + + if(!options.get_bool_option("no-array-field-sensitivity")) + { + std::size_t field_sensitivity_limit = + options.is_set("max-field-sensitivity-array-size") + ? options.get_unsigned_int_option("max-field-sensitivity-array-size") + : DEFAULT_MAX_FIELD_SENSITIVITY_ARRAY_SIZE; + + if(!check_string_limit( + options, "max-nondet-string-length", field_sensitivity_limit, log)) + return false; + + if(!check_string_limit( + options, + "max-intermediate-string-length", + field_sensitivity_limit, + log)) + return false; + } + } + + return true; +} diff --git a/src/solvers/strings/string_refinement.h b/src/solvers/strings/string_refinement.h index 98b4d9c785d..b5fcea7414b 100644 --- a/src/solvers/strings/string_refinement.h +++ b/src/solvers/strings/string_refinement.h @@ -22,10 +22,12 @@ Author: Alberto Griggio, alberto.griggio@gmail.com #include #include +#include #include #include #include +#include "max_concrete_char_array.h" #include "string_constraint.h" #include "string_constraint_generator.h" #include "string_dependencies.h" @@ -38,7 +40,9 @@ Author: Alberto Griggio, alberto.griggio@gmail.com "(string-printable)" \ "(string-input-value):" \ "(string-non-empty)" \ - "(max-nondet-string-length):" + "(max-nondet-string-length):" \ + "(max-intermediate-string-length):" \ + "(use-fixed-size-arrays-for-bounded-strings)" #define HELP_STRING_REFINEMENT \ " --no-refine-strings turn off string refinement\n" \ @@ -50,7 +54,24 @@ Author: Alberto Griggio, alberto.griggio@gmail.com " --max-nondet-string-length n bound the length of nondet (e.g. input) strings.\n" /* NOLINT(*) */ \ " Default is " + std::to_string(MAX_CONCRETE_STRING_SIZE - 1) + "; note that\n" /* NOLINT(*) */ \ " setting the value higher than this does not work\n" /* NOLINT(*) */ \ - " with --trace or --validate-trace.\n" /* NOLINT(*) */ + " with --trace or --validate-trace.\n" /* NOLINT(*) */ \ + " --max-intermediate-string-length n bound the length of intermediate\n" /* NOLINT(*) */ \ + " strings introduced by string operations such\n" /* NOLINT(*) */ \ + " as concatenation, substring operations, etc.\n" /* NOLINT(*) */ \ + " If this is less than or equal to " + /* NOLINT(*) */ \ + std::to_string(max_concrete_char_array_length) + /* NOLINT(*) */ \ + " then such\n" /* NOLINT(*) */ \ + " strings can be represented by fixed-sized\n" /* NOLINT(*) */ \ + " arrays to the backend solver, which can\n" /* NOLINT(*) */ \ + " affect solver performance.\n" /* NOLINT(*) */ \ + " --use-fixed-size-arrays-for-bounded-strings if set, strings with bounded\n" /* NOLINT(*) */ \ + " length, either by max-intermediate- or\n" /* NOLINT(*) */ \ + " max-nondet-string-length, and which are short\n" /* NOLINT(*) */ \ + " (no longer than " + /* NOLINT(*) */ \ + std::to_string(max_concrete_char_array_length) + /* NOLINT(*) */ \ + " characters) will be stored\n" /* NOLINT(*) */ \ + " using a fixed-size array. This can have\n" /* NOLINT(*) */ \ + " implications for solver performance.\n" /* NOLINT(*) */ // The integration of the string solver into CBMC is incomplete. Therefore, // it is not turned on by default and not all options are available. @@ -72,6 +93,15 @@ class string_refinementt final : public bv_refinementt { std::size_t refinement_bound = 0; bool use_counter_example = true; + /// If set, all intermediate strings created by the solver (e.g. the + /// result of substring, concatenation and so on) are bounded by the + /// given maximum length. This can mean strings are represented by + /// constant-sized arrays, which can be much cheaper to solve against. + optionalt maximum_intermediate_string_length; + /// If set, and maximum_intermediate_string_length is also set to a + /// suitably small value (currently hard-coded to 256 chars or less), + /// fixed-size arrays will be used to store these now-bounded-size strings. + bool use_fixed_size_arrays_for_bounded_strings = false; }; public: @@ -120,6 +150,13 @@ class string_refinementt final : public bv_refinementt string_dependenciest dependencies; void add_lemma(const exprt &lemma, bool simplify_lemma = true); + + interval_sparse_arrayt restrict_sparse_array_to_index_set( + interval_sparse_arrayt array, + const std::set &index_set) const; + + std::vector + get_model_defined_indices(const std::set &index_set) const; }; exprt substitute_array_lists(exprt expr, std::size_t string_max_length); @@ -136,4 +173,6 @@ exprt substitute_array_access( symbol_generatort &symbol_generator, const bool left_propagate); +bool string_solver_options_valid(const optionst &, messaget &log); + #endif diff --git a/src/solvers/strings/string_refinement_util.cpp b/src/solvers/strings/string_refinement_util.cpp index dd0cd19cfd0..ef0f34cbebe 100644 --- a/src/solvers/strings/string_refinement_util.cpp +++ b/src/solvers/strings/string_refinement_util.cpp @@ -192,3 +192,22 @@ array_exprt interval_sparse_arrayt::concretize( size, it == entries.end() ? default_value : it->second); return array; } + +void interval_sparse_arrayt::print(std::ostream &log) const +{ + if(entries.empty()) + log << format(default_value) << " everywhere"; + else + { + bool first = true; + for(const auto &entry : entries) + { + if(!first) + log << ", then "; + else + first = false; + log << format(entry.second) << " up to index " << entry.first; + } + log << "; otherwise " << format(default_value); + } +} diff --git a/src/solvers/strings/string_refinement_util.h b/src/solvers/strings/string_refinement_util.h index dee45c60a75..96c50ba47d1 100644 --- a/src/solvers/strings/string_refinement_util.h +++ b/src/solvers/strings/string_refinement_util.h @@ -133,6 +133,28 @@ class interval_sparse_arrayt final : public sparse_arrayt /// Complexity is logarithmic in the number of entries. exprt at(std::size_t index) const; + /// Iterators over interval boundaries + typedef std::map::const_iterator const_iterator; // NOLINT + const_iterator begin() const + { + return entries.begin(); + } + const_iterator end() const + { + return entries.end(); + } + const_iterator erase(const_iterator pos) + { + return entries.erase(pos); + } + std::pair + insert(const std::pair &value) + { + return entries.insert(value); + } + + void print(std::ostream &log) const; + /// Array containing the same value at each index. explicit interval_sparse_arrayt(exprt default_value) : sparse_arrayt(default_value) diff --git a/src/util/object_factory_parameters.cpp b/src/util/object_factory_parameters.cpp index 867bb7e4efb..18d8742cb24 100644 --- a/src/util/object_factory_parameters.cpp +++ b/src/util/object_factory_parameters.cpp @@ -47,6 +47,10 @@ void object_factory_parameterst::set(const optionst &options) min_nondet_string_length = options.get_unsigned_int_option("min-nondet-string-length"); } + if(options.is_set("use-fixed-size-arrays-for-bounded-strings")) + { + use_fixed_size_arrays_for_bounded_strings = true; + } } /// Parse the object factory parameters from a given command line diff --git a/src/util/object_factory_parameters.h b/src/util/object_factory_parameters.h index 6f07c898a7a..4d4404e1ac3 100644 --- a/src/util/object_factory_parameters.h +++ b/src/util/object_factory_parameters.h @@ -46,6 +46,12 @@ struct object_factory_parameterst /// Minimum value for the non-deterministically-chosen length of a string. size_t min_nondet_string_length = 0; + /// If true, and nondet strings have a reasonably small bounded size + // (currently limited to 256 chars) we'll use a fixed-size array to store + // them. Otherwise the array will be unbounded (but the actual string + // length will still be bounded as normal). + bool use_fixed_size_arrays_for_bounded_strings = false; + /// Maximum depth of pointer chains (that contain recursion) in the nondet /// generated input objects. /// diff --git a/unit/solvers/strings/array_pool/array_pool.cpp b/unit/solvers/strings/array_pool/array_pool.cpp index 3b938682c63..d296d1d7436 100644 --- a/unit/solvers/strings/array_pool/array_pool.cpp +++ b/unit/solvers/strings/array_pool/array_pool.cpp @@ -21,7 +21,7 @@ SCENARIO("array_pool", "[core][solvers][strings][string_constraint_generator]") GIVEN("An array pool") { symbol_generatort symbol_generator; - array_poolt pool(symbol_generator); + array_poolt pool(symbol_generator, {}, false); WHEN("Looking for a pointer symbol") { diff --git a/unit/solvers/strings/string_format_builtin_function/length_for_format_specifier.cpp b/unit/solvers/strings/string_format_builtin_function/length_for_format_specifier.cpp index 661c18be626..74b92def349 100644 --- a/unit/solvers/strings/string_format_builtin_function/length_for_format_specifier.cpp +++ b/unit/solvers/strings/string_format_builtin_function/length_for_format_specifier.cpp @@ -50,7 +50,7 @@ SCENARIO( const symbol_tablet symbol_table; const namespacet ns{symbol_table}; symbol_generatort fresh_symbol; - array_poolt array_pool{fresh_symbol}; + array_poolt array_pool{fresh_symbol, {}, false}; WHEN("format specifier = %s") {