From bab134108845f57c3ee74314df7f4e209bd030c3 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Thu, 16 Nov 2023 09:19:44 +0000 Subject: [PATCH 1/4] Move is_compile_time_constantt to top of file This is useful in various methods and should be available to all of them. --- src/ansi-c/c_typecheck_expr.cpp | 176 ++++++++++++++++---------------- 1 file changed, 88 insertions(+), 88 deletions(-) diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index df210afc6cf..9f8e7bf338d 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -44,6 +44,94 @@ Author: Daniel Kroening, kroening@kroening.com #include +/// Architecturally similar to \ref can_forward_propagatet, but specialized for +/// what is a constexpr, i.e., an expression that can be fully evaluated at +/// compile time. +class is_compile_time_constantt +{ +public: + explicit is_compile_time_constantt(const namespacet &ns) : ns(ns) + { + } + + /// returns true iff the expression can be considered constant + bool operator()(const exprt &e) const + { + return is_constant(e); + } + +protected: + const namespacet &ns; + + /// This function determines what expressions are to be propagated as + /// "constants" + bool is_constant(const exprt &e) const + { + if(e.id() == ID_infinity) + return true; + + if(e.is_constant()) + return true; + + if(e.id() == ID_address_of) + { + return is_constant_address_of(to_address_of_expr(e).object()); + } + else if( + e.id() == ID_typecast || e.id() == ID_array_of || e.id() == ID_plus || + e.id() == ID_mult || e.id() == ID_array || e.id() == ID_with || + e.id() == ID_struct || e.id() == ID_union || e.id() == ID_empty_union || + e.id() == ID_equal || e.id() == ID_notequal || e.id() == ID_lt || + e.id() == ID_le || e.id() == ID_gt || e.id() == ID_ge || + e.id() == ID_if || e.id() == ID_not || e.id() == ID_and || + e.id() == ID_or || e.id() == ID_bitnot || e.id() == ID_bitand || + e.id() == ID_bitor || e.id() == ID_bitxor || e.id() == ID_vector) + { + return std::all_of( + e.operands().begin(), e.operands().end(), [this](const exprt &op) { + return is_constant(op); + }); + } + + return false; + } + + /// this function determines which reference-typed expressions are constant + bool is_constant_address_of(const exprt &e) const + { + if(e.id() == ID_symbol) + { + return e.type().id() == ID_code || + ns.lookup(to_symbol_expr(e).get_identifier()).is_static_lifetime; + } + else if(e.id() == ID_array && e.get_bool(ID_C_string_constant)) + return true; + else if(e.id() == ID_label) + return true; + else if(e.id() == ID_index) + { + const index_exprt &index_expr = to_index_expr(e); + + return is_constant_address_of(index_expr.array()) && + is_constant(index_expr.index()); + } + else if(e.id() == ID_member) + { + return is_constant_address_of(to_member_expr(e).compound()); + } + else if(e.id() == ID_dereference) + { + const dereference_exprt &deref = to_dereference_expr(e); + + return is_constant(deref.pointer()); + } + else if(e.id() == ID_string_constant) + return true; + + return false; + } +}; + void c_typecheck_baset::typecheck_expr(exprt &expr) { if(expr.id()==ID_already_typechecked) @@ -4561,94 +4649,6 @@ void c_typecheck_baset::typecheck_side_effect_assignment( throw 0; } -/// Architecturally similar to \ref can_forward_propagatet, but specialized for -/// what is a constexpr, i.e., an expression that can be fully evaluated at -/// compile time. -class is_compile_time_constantt -{ -public: - explicit is_compile_time_constantt(const namespacet &ns) : ns(ns) - { - } - - /// returns true iff the expression can be considered constant - bool operator()(const exprt &e) const - { - return is_constant(e); - } - -protected: - const namespacet &ns; - - /// This function determines what expressions are to be propagated as - /// "constants" - bool is_constant(const exprt &e) const - { - if(e.id() == ID_infinity) - return true; - - if(e.is_constant()) - return true; - - if(e.id() == ID_address_of) - { - return is_constant_address_of(to_address_of_expr(e).object()); - } - else if( - e.id() == ID_typecast || e.id() == ID_array_of || e.id() == ID_plus || - e.id() == ID_mult || e.id() == ID_array || e.id() == ID_with || - e.id() == ID_struct || e.id() == ID_union || e.id() == ID_empty_union || - e.id() == ID_equal || e.id() == ID_notequal || e.id() == ID_lt || - e.id() == ID_le || e.id() == ID_gt || e.id() == ID_ge || - e.id() == ID_if || e.id() == ID_not || e.id() == ID_and || - e.id() == ID_or || e.id() == ID_bitnot || e.id() == ID_bitand || - e.id() == ID_bitor || e.id() == ID_bitxor || e.id() == ID_vector) - { - return std::all_of( - e.operands().begin(), e.operands().end(), [this](const exprt &op) { - return is_constant(op); - }); - } - - return false; - } - - /// this function determines which reference-typed expressions are constant - bool is_constant_address_of(const exprt &e) const - { - if(e.id() == ID_symbol) - { - return e.type().id() == ID_code || - ns.lookup(to_symbol_expr(e).get_identifier()).is_static_lifetime; - } - else if(e.id() == ID_array && e.get_bool(ID_C_string_constant)) - return true; - else if(e.id() == ID_label) - return true; - else if(e.id() == ID_index) - { - const index_exprt &index_expr = to_index_expr(e); - - return is_constant_address_of(index_expr.array()) && - is_constant(index_expr.index()); - } - else if(e.id() == ID_member) - { - return is_constant_address_of(to_member_expr(e).compound()); - } - else if(e.id() == ID_dereference) - { - const dereference_exprt &deref = to_dereference_expr(e); - - return is_constant(deref.pointer()); - } - else if(e.id() == ID_string_constant) - return true; - - return false; - } -}; - void c_typecheck_baset::make_constant(exprt &expr) { // Floating-point expressions may require a rounding mode. From 7d0523f499f65619a7b8fb8ee7104001078e6c99 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Fri, 8 Jan 2021 10:56:39 +0000 Subject: [PATCH 2/4] Mimic GCC/Clang simplification behaviour when type checking ?: Neither GCC nor Clang simplify expressions involving symbols even when the result would always be 0, which our simplifier figure out. Fixes: #7927 --- regression/ansi-c/sizeof6/main.c | 23 +++++++++++++++++++++++ regression/ansi-c/sizeof6/test.desc | 8 ++++++++ src/ansi-c/c_typecheck_expr.cpp | 22 +++++++++++++++------- 3 files changed, 46 insertions(+), 7 deletions(-) create mode 100644 regression/ansi-c/sizeof6/main.c create mode 100644 regression/ansi-c/sizeof6/test.desc diff --git a/regression/ansi-c/sizeof6/main.c b/regression/ansi-c/sizeof6/main.c new file mode 100644 index 00000000000..071b1008b0b --- /dev/null +++ b/regression/ansi-c/sizeof6/main.c @@ -0,0 +1,23 @@ +int main() +{ + long long i; +#ifndef _MSC_VER + _Static_assert(sizeof(int) == sizeof(*(1 ? ((void *)(0ll)) : (int *)1)), ""); + // We are able to simplify all of the expressions involving i below to 0, but + // GCC and Clang don't do so. Hence, the static asserts pass for those + // compilers. + _Static_assert( + sizeof(int) != sizeof(*(1 ? ((void *)(i * 0)) : (int *)1)), ""); + _Static_assert( + sizeof(int) != sizeof(*(1 ? ((void *)(i - i)) : (int *)1)), ""); + _Static_assert( + sizeof(int) != sizeof(*(1 ? ((void *)(i ? 0ll : 0ll)) : (int *)1)), ""); + _Static_assert( + sizeof(int) != sizeof(*(1 ? ((void *)(0 ? i : 0ll)) : (int *)1)), ""); +#else + static_assert(sizeof(int) == sizeof(*(1 ? ((void *)(0)) : (int *)1)), ""); + // Visual Studio rejects this as "illegal indirection" + // static_assert( + // sizeof(int) == sizeof(*(1 ? ((void *)(i * 0)) : (int *)1)), ""); +#endif +} diff --git a/regression/ansi-c/sizeof6/test.desc b/regression/ansi-c/sizeof6/test.desc new file mode 100644 index 00000000000..466da18b2b5 --- /dev/null +++ b/regression/ansi-c/sizeof6/test.desc @@ -0,0 +1,8 @@ +CORE +main.c + +^EXIT=0$ +^SIGNAL=0$ +-- +^warning: ignoring +^CONVERSION ERROR$ diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 9f8e7bf338d..1ad4ef2edb7 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -1719,24 +1719,32 @@ void c_typecheck_baset::typecheck_expr_trinary(if_exprt &expr) operands[1].type().id()!=ID_pointer) implicit_typecast(operands[1], operands[2].type()); + auto compile_time_null_pointer = [](const exprt &e, const namespacet &ns) { + if(!is_compile_time_constantt(ns)(e)) + return false; + auto s = simplify_expr(e, ns); + CHECK_RETURN(is_compile_time_constantt(ns)(s)); + if(!s.is_constant()) + return false; + return is_null_pointer(to_constant_expr(s)); + }; + if(operands[1].type().id()==ID_pointer && operands[2].type().id()==ID_pointer && operands[1].type()!=operands[2].type()) { - exprt tmp1=simplify_expr(operands[1], *this); - exprt tmp2=simplify_expr(operands[2], *this); - - // is one of them void * AND null? Convert that to the other. - // (at least that's how GCC behaves) + // Is one of them void * AND null? Convert that to the other. + // (At least that's how GCC, Clang, and Visual Studio behave. Presence of + // symbols blocks them from simplifying the expression to NULL.) if( to_pointer_type(operands[1].type()).base_type().id() == ID_empty && - tmp1.is_constant() && is_null_pointer(to_constant_expr(tmp1))) + compile_time_null_pointer(operands[1], *this)) { implicit_typecast(operands[1], operands[2].type()); } else if( to_pointer_type(operands[2].type()).base_type().id() == ID_empty && - tmp2.is_constant() && is_null_pointer(to_constant_expr(tmp2))) + compile_time_null_pointer(operands[2], *this)) { implicit_typecast(operands[2], operands[1].type()); } From 843088749353f2a7b947242d9fe7f97799244507 Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 13 Dec 2023 13:57:39 +0000 Subject: [PATCH 3/4] Expand documentation of is_compile_time_constantt C11 Section 6.6 "Constant expressions" defines what are required to be constant expressions, and permits implementations to pick additional constant expressions. --- src/ansi-c/c_typecheck_expr.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 1ad4ef2edb7..09b124f92dc 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -45,8 +45,9 @@ Author: Daniel Kroening, kroening@kroening.com #include /// Architecturally similar to \ref can_forward_propagatet, but specialized for -/// what is a constexpr, i.e., an expression that can be fully evaluated at -/// compile time. +/// what is an expression that can be fully evaluated at compile time within +/// the limits of what the C standard permits. See 6.6 Constant expressions in +/// C11. class is_compile_time_constantt { public: @@ -67,16 +68,21 @@ class is_compile_time_constantt /// "constants" bool is_constant(const exprt &e) const { + // non-standard numeric constant if(e.id() == ID_infinity) return true; + // numeric, character, or pointer-typed constant if(e.is_constant()) return true; + // possibly an address constant if(e.id() == ID_address_of) { return is_constant_address_of(to_address_of_expr(e).object()); } + // we choose to accept the following expressions (over constant operands) as + // constant expressions else if( e.id() == ID_typecast || e.id() == ID_array_of || e.id() == ID_plus || e.id() == ID_mult || e.id() == ID_array || e.id() == ID_with || From 2d5adef42fdf53ab05cba1aaa5da0527facc7bec Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Wed, 13 Dec 2023 15:01:07 +0000 Subject: [PATCH 4/4] __builtin_constant_p: properly test, fix behaviour We previously only compile-time tested what required a run-time check of assertions, so move this to the "cbmc" regression test suite. Also, extend it by behaviour that distinguishes it from actual C-standard specified constant expressions. Finally, fix the behaviour for string literals. --- .../ansi-c/gcc_builtin_constant_p1/main.c | 28 ----- .../cbmc/gcc_builtin_constant_p1/main.c | 47 ++++++++ .../gcc_builtin_constant_p1/test.desc | 3 +- src/ansi-c/c_typecheck_expr.cpp | 106 +++++++++++++----- 4 files changed, 130 insertions(+), 54 deletions(-) delete mode 100644 regression/ansi-c/gcc_builtin_constant_p1/main.c create mode 100644 regression/cbmc/gcc_builtin_constant_p1/main.c rename regression/{ansi-c => cbmc}/gcc_builtin_constant_p1/test.desc (63%) diff --git a/regression/ansi-c/gcc_builtin_constant_p1/main.c b/regression/ansi-c/gcc_builtin_constant_p1/main.c deleted file mode 100644 index 8ce2e0f5522..00000000000 --- a/regression/ansi-c/gcc_builtin_constant_p1/main.c +++ /dev/null @@ -1,28 +0,0 @@ -#include - -enum { E1=1 } var; - -struct whatnot -{ -} whatnot_var; - -int main() -{ - // this is gcc only - - #ifdef __GNUC__ - assert(__builtin_constant_p("some string")); - assert(__builtin_constant_p(1.0f)); - assert(__builtin_constant_p(E1)); - assert(!__builtin_constant_p(var)); - assert(!__builtin_constant_p(main)); - assert(!__builtin_constant_p(whatnot_var)); - assert(!__builtin_constant_p(&var)); - assert(__builtin_constant_p(__builtin_constant_p(var))); - - // side-effects are _not_ evaluated - int i=0; - assert(!__builtin_constant_p(i++)); - assert(i==0); - #endif -} diff --git a/regression/cbmc/gcc_builtin_constant_p1/main.c b/regression/cbmc/gcc_builtin_constant_p1/main.c new file mode 100644 index 00000000000..53621d6f122 --- /dev/null +++ b/regression/cbmc/gcc_builtin_constant_p1/main.c @@ -0,0 +1,47 @@ +#include + +#ifdef __GNUC__ +enum +{ + E1 = 1 +} var; + +struct whatnot +{ +} whatnot_var; +#endif + +int main() +{ + // this is gcc only + +#ifdef __GNUC__ + assert(__builtin_constant_p("some string")); + assert(__builtin_constant_p(1.0f)); + assert(__builtin_constant_p(E1)); + assert(!__builtin_constant_p(var)); + assert(!__builtin_constant_p(main)); + assert(!__builtin_constant_p(whatnot_var)); + assert(!__builtin_constant_p(&var)); + assert(__builtin_constant_p(__builtin_constant_p(var))); + + // The following are not constant expressions in the sense of the C standard + // and GCC wouldn't deem them constant expressions either, but they are + // subject to GCC's constant folding. See also regression test ansi-c/sizeof6. + // Clang's behaviour, however, is somewhat different. See + // https://github.com/llvm/llvm-project/issues/55946 for further examples of + // where they differ. + int j; +# ifndef __clang__ + assert(__builtin_constant_p(j * 0)); + assert(__builtin_constant_p(j - j)); + assert(__builtin_constant_p(j ? 0ll : 0ll)); +# endif + assert(__builtin_constant_p(0 ? j : 0ll)); + + // side-effects are _not_ evaluated + int i = 0; + assert(!__builtin_constant_p(i++)); + assert(i == 0); +#endif +} diff --git a/regression/ansi-c/gcc_builtin_constant_p1/test.desc b/regression/cbmc/gcc_builtin_constant_p1/test.desc similarity index 63% rename from regression/ansi-c/gcc_builtin_constant_p1/test.desc rename to regression/cbmc/gcc_builtin_constant_p1/test.desc index 307a7ac801e..659a36aac28 100644 --- a/regression/ansi-c/gcc_builtin_constant_p1/test.desc +++ b/regression/cbmc/gcc_builtin_constant_p1/test.desc @@ -1,6 +1,7 @@ -CORE gcc-only broken-test-c++-front-end +CORE main.c +^VERIFICATION SUCCESSFUL$ ^EXIT=0$ ^SIGNAL=0$ -- diff --git a/src/ansi-c/c_typecheck_expr.cpp b/src/ansi-c/c_typecheck_expr.cpp index 09b124f92dc..59edf0f932a 100644 --- a/src/ansi-c/c_typecheck_expr.cpp +++ b/src/ansi-c/c_typecheck_expr.cpp @@ -66,14 +66,14 @@ class is_compile_time_constantt /// This function determines what expressions are to be propagated as /// "constants" - bool is_constant(const exprt &e) const + virtual bool is_constant(const exprt &e) const { // non-standard numeric constant if(e.id() == ID_infinity) return true; // numeric, character, or pointer-typed constant - if(e.is_constant()) + if(e.is_constant() || e.id() == ID_string_constant) return true; // possibly an address constant @@ -138,6 +138,54 @@ class is_compile_time_constantt } }; +/// Clang appears to have a somewhat different idea of what is/isn't to be +/// considered a constant at compile time. +class clang_is_constant_foldedt : public is_compile_time_constantt +{ +public: + explicit clang_is_constant_foldedt(const namespacet &ns) + : is_compile_time_constantt(ns) + { + } + +protected: + /// This function determines what expressions are constant folded by clang + bool is_constant(const exprt &e) const override + { + // we need to adhere to short-circuit semantics for the following + if(e.id() == ID_if) + { + const if_exprt &if_expr = to_if_expr(e); + if(!is_constant(if_expr.cond())) + return false; + exprt const_cond = simplify_expr(if_expr.cond(), ns); + CHECK_RETURN(const_cond.is_constant()); + if(const_cond.is_true()) + return is_constant(if_expr.true_case()); + else + return is_constant(if_expr.false_case()); + } + else if(e.id() == ID_and || e.id() == ID_or) + { + for(const auto &op : e.operands()) + { + if(!is_constant(op)) + return false; + exprt const_cond = simplify_expr(op, ns); + CHECK_RETURN(const_cond.is_constant()); + // stop when we hit false (for an and) or true (for an or) + if(const_cond == make_boolean_expr(e.id() == ID_or)) + break; + } + return true; + } + else if(e.id() == ID_address_of) + return false; + else + return is_compile_time_constantt::is_constant(e); + } +}; + void c_typecheck_baset::typecheck_expr(exprt &expr) { if(expr.id()==ID_already_typechecked) @@ -3684,8 +3732,11 @@ exprt c_typecheck_baset::do_special_functions( } else if(identifier=="__builtin_constant_p") { - // this is a gcc extension to tell whether the argument - // is known to be a compile-time constant + // This is a gcc/clang extension to tell whether the argument + // is known to be a compile-time constant. The behavior of these two + // compiler families, however, is quite different, which we need to take + // care of in the below config-dependent branches. + if(expr.arguments().size()!=1) { error().source_location = f_op.source_location(); @@ -3693,30 +3744,35 @@ exprt c_typecheck_baset::do_special_functions( throw 0; } - // do not typecheck the argument - it is never evaluated, and thus side - // effects must not show up either - - // try to produce constant - exprt tmp1=expr.arguments().front(); - simplify(tmp1, *this); - - bool is_constant=false; - - // Need to do some special treatment for string literals, - // which are (void *)&("lit"[0]) - if( - tmp1.id() == ID_typecast && - to_typecast_expr(tmp1).op().id() == ID_address_of && - to_address_of_expr(to_typecast_expr(tmp1).op()).object().id() == - ID_index && - to_index_expr(to_address_of_expr(to_typecast_expr(tmp1).op()).object()) - .array() - .id() == ID_string_constant) + bool is_constant = false; + if(config.ansi_c.mode == configt::ansi_ct::flavourt::CLANG) { - is_constant=true; + is_constant = clang_is_constant_foldedt(*this)(expr.arguments().front()); } else - is_constant=tmp1.is_constant(); + { + // try to produce constant + exprt tmp1 = expr.arguments().front(); + simplify(tmp1, *this); + + // Need to do some special treatment for string literals, + // which are (void *)&("lit"[0]) + if( + tmp1.id() == ID_typecast && + to_typecast_expr(tmp1).op().id() == ID_address_of && + to_address_of_expr(to_typecast_expr(tmp1).op()).object().id() == + ID_index && + to_index_expr(to_address_of_expr(to_typecast_expr(tmp1).op()).object()) + .array() + .id() == ID_string_constant) + { + is_constant = true; + } + else if(tmp1.id() == ID_string_constant) + is_constant = true; + else + is_constant = tmp1.is_constant(); + } exprt tmp2=from_integer(is_constant, expr.type()); tmp2.add_source_location()=source_location;