Skip to content

Commit bc76446

Browse files
tautschnigDaniel Kroening
authored and
Daniel Kroening
committed
Rearchitect var args support (making it actually work)
Previously all the logic to access values of variable argument lists was put into our va_arg() implementation. This required guessing what the parameters could have been, and did not take into account the initial value passed to va_start. Now va_start triggers constructing an array of pointers to parameters, the first of which is the original argument to va_start. The va_list is then just a pointer to the first element of the array. va_arg just increments the pointer. goto_program conversion places a side_effect_exprt where va_start was, and goto-symex populates the array with the actual values applicable for a particular invocation of the function.
1 parent d6a35bc commit bc76446

File tree

12 files changed

+130
-105
lines changed

12 files changed

+130
-105
lines changed

Diff for: regression/cbmc/Variadic1/test.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
FUTURE
1+
CORE
22
main.c
33

44
^EXIT=0$

Diff for: regression/cbmc/va_list3/test.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE broken-smt-backend
22
main.c
33

44
^EXIT=0$

Diff for: regression/cbmc/va_list3/windows-preprocessed.desc

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
KNOWNBUG
1+
CORE broken-smt-backend
22
windows-preprocessed.i
33
--winx64
44
^EXIT=0$

Diff for: src/ansi-c/expr2c.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3617,8 +3617,8 @@ std::string expr2ct::convert_with_precedence(
36173617
return convert_prob_uniform(src, precedence=16);
36183618
else if(statement==ID_statement_expression)
36193619
return convert_statement_expression(src, precedence=15);
3620-
else if(statement==ID_gcc_builtin_va_arg_next)
3621-
return convert_function(src, "gcc_builtin_va_arg_next");
3620+
else if(statement == ID_va_start)
3621+
return convert_function(src, CPROVER_PREFIX "va_start");
36223622
else
36233623
return convert_norep(src, precedence);
36243624
}

Diff for: src/goto-instrument/goto_program2code.cpp

+10-8
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,10 @@ void goto_program2codet::scan_for_varargs()
104104
const exprt &l = target->get_assign().lhs();
105105
const exprt &r = target->get_assign().rhs();
106106

107-
// find va_arg_next
108-
if(r.id()==ID_side_effect &&
109-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
107+
// find va_start
108+
if(
109+
r.id() == ID_side_effect &&
110+
to_side_effect_expr(r).get_statement() == ID_va_start)
110111
{
111112
assert(r.has_operands());
112113
va_list_expr.insert(r.op0());
@@ -308,16 +309,17 @@ goto_programt::const_targett goto_program2codet::convert_assign_varargs(
308309

309310
dest.add(std::move(f));
310311
}
311-
else if(r.id()==ID_address_of)
312+
else if(
313+
r.id() == ID_side_effect &&
314+
to_side_effect_expr(r).get_statement() == ID_va_start)
312315
{
313316
code_function_callt f(
314-
symbol_exprt("va_start", code_typet({}, empty_typet())),
315-
{this_va_list_expr, to_address_of_expr(r).object()});
317+
symbol_exprt(ID_va_start, code_typet({}, empty_typet())),
318+
{this_va_list_expr, to_address_of_expr(skip_typecast(r.op0())).object()});
316319

317320
dest.add(std::move(f));
318321
}
319-
else if(r.id()==ID_side_effect &&
320-
to_side_effect_expr(r).get_statement()==ID_gcc_builtin_va_arg_next)
322+
else if(r.id() == ID_plus)
321323
{
322324
code_function_callt f(
323325
symbol_exprt("va_arg", code_typet({}, empty_typet())),

Diff for: src/goto-programs/builtin_functions.cpp

+18-19
Original file line numberDiff line numberDiff line change
@@ -1080,10 +1080,10 @@ void goto_convertt::do_function_call_symbol(
10801080
else if(identifier==ID_gcc_builtin_va_arg)
10811081
{
10821082
// This does two things.
1083-
// 1) Move list pointer to next argument.
1084-
// Done by gcc_builtin_va_arg_next.
1085-
// 2) Return value of argument.
1083+
// 1) Return value of argument.
10861084
// This is just dereferencing.
1085+
// 2) Move list pointer to next argument.
1086+
// This is just an increment.
10871087

10881088
if(arguments.size()!=1)
10891089
{
@@ -1095,25 +1095,21 @@ void goto_convertt::do_function_call_symbol(
10951095

10961096
exprt list_arg=make_va_list(arguments[0]);
10971097

1098-
{
1099-
side_effect_exprt rhs(
1100-
ID_gcc_builtin_va_arg_next,
1101-
list_arg.type(),
1102-
function.source_location());
1103-
rhs.copy_to_operands(list_arg);
1104-
rhs.set(ID_C_va_arg_type, to_code_type(function.type()).return_type());
1105-
dest.add(goto_programt::make_assignment(
1106-
list_arg, rhs, function.source_location()));
1107-
}
1108-
11091098
if(lhs.is_not_nil())
11101099
{
11111100
typet t=pointer_type(lhs.type());
1112-
dereference_exprt rhs(typecast_exprt(list_arg, t), lhs.type());
1101+
dereference_exprt rhs{typecast_exprt(dereference_exprt{list_arg}, t)};
11131102
rhs.add_source_location()=function.source_location();
11141103
dest.add(
11151104
goto_programt::make_assignment(lhs, rhs, function.source_location()));
11161105
}
1106+
1107+
code_assignt assign{
1108+
list_arg, plus_exprt{list_arg, from_integer(1, pointer_diff_type())}};
1109+
assign.rhs().set(
1110+
ID_C_va_arg_type, to_code_type(function.type()).return_type());
1111+
dest.add(goto_programt::make_assignment(
1112+
std::move(assign), function.source_location()));
11171113
}
11181114
else if(identifier=="__builtin_va_copy")
11191115
{
@@ -1138,7 +1134,7 @@ void goto_convertt::do_function_call_symbol(
11381134
dest.add(goto_programt::make_assignment(
11391135
dest_expr, src_expr, function.source_location()));
11401136
}
1141-
else if(identifier=="__builtin_va_start")
1137+
else if(identifier == "__builtin_va_start" || identifier == "__va_start")
11421138
{
11431139
// Set the list argument to be the address of the
11441140
// parameter argument.
@@ -1151,8 +1147,6 @@ void goto_convertt::do_function_call_symbol(
11511147
}
11521148

11531149
exprt dest_expr=make_va_list(arguments[0]);
1154-
const typecast_exprt src_expr(
1155-
address_of_exprt(arguments[1]), dest_expr.type());
11561150

11571151
if(!is_lvalue(dest_expr))
11581152
{
@@ -1161,8 +1155,13 @@ void goto_convertt::do_function_call_symbol(
11611155
throw 0;
11621156
}
11631157

1158+
side_effect_exprt rhs{
1159+
ID_va_start, dest_expr.type(), function.source_location()};
1160+
rhs.add_to_operands(
1161+
typecast_exprt{address_of_exprt{arguments[1]}, dest_expr.type()});
1162+
11641163
dest.add(goto_programt::make_assignment(
1165-
dest_expr, src_expr, function.source_location()));
1164+
std::move(dest_expr), std::move(rhs), function.source_location()));
11661165
}
11671166
else if(identifier=="__builtin_va_end")
11681167
{

Diff for: src/goto-symex/frame.h

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ struct framet
2626
irep_idt function_identifier;
2727
std::map<goto_programt::const_targett, goto_state_listt> goto_state_map;
2828
symex_targett::sourcet calling_location;
29+
std::vector<irep_idt> parameter_names;
2930

3031
goto_programt::const_targett end_of_function;
3132
exprt return_value = nil_exprt();

Diff for: src/goto-symex/goto_symex.h

+3-4
Original file line numberDiff line numberDiff line change
@@ -610,10 +610,9 @@ class goto_symext
610610
/// \return The resulting expression
611611
static exprt add_to_lhs(const exprt &lhs, const exprt &what);
612612

613-
virtual void symex_gcc_builtin_va_arg_next(
614-
statet &state,
615-
const exprt &lhs,
616-
const side_effect_exprt &code);
613+
virtual void
614+
symex_va_start(statet &, const exprt &lhs, const side_effect_exprt &);
615+
617616
/// Symbolically execute an assignment instruction that has an `allocate` on
618617
/// the right hand side
619618
/// \param state: Symbolic execution state for current instruction

Diff for: src/goto-symex/symex_assign.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,8 @@ void goto_symext::symex_assign(statet &state, const code_assignt &code)
5252
PRECONDITION(lhs.is_nil());
5353
symex_printf(state, side_effect_expr);
5454
}
55-
else if(statement==ID_gcc_builtin_va_arg_next)
56-
symex_gcc_builtin_va_arg_next(state, lhs, side_effect_expr);
55+
else if(statement == ID_va_start)
56+
symex_va_start(state, lhs, side_effect_expr);
5757
else
5858
UNREACHABLE;
5959
}

Diff for: src/goto-symex/symex_builtin_functions.cpp

+75-43
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ Author: Daniel Kroening, [email protected]
1414
#include <util/arith_tools.h>
1515
#include <util/c_types.h>
1616
#include <util/expr_initializer.h>
17+
#include <util/expr_util.h>
18+
#include <util/fresh_symbol.h>
1719
#include <util/invariant_utils.h>
1820
#include <util/optional.h>
1921
#include <util/pointer_offset_size.h>
@@ -206,24 +208,44 @@ void goto_symext::symex_allocate(
206208
code_assignt(lhs, typecast_exprt::conditional_cast(rhs, lhs.type())));
207209
}
208210

209-
irep_idt get_symbol(const exprt &src)
211+
/// Construct an entry for the var args array. Visual Studio complicates this as
212+
/// we need to put immediate values or pointers in there, depending on the size
213+
/// of the parameter.
214+
static exprt va_list_entry(
215+
const irep_idt &parameter,
216+
const pointer_typet &lhs_type,
217+
const namespacet &ns)
210218
{
211-
if(src.id()==ID_typecast)
212-
return get_symbol(to_typecast_expr(src).op());
213-
else if(src.id()==ID_address_of)
219+
static const pointer_typet void_ptr_type = pointer_type(empty_typet{});
220+
221+
symbol_exprt parameter_expr = ns.lookup(parameter).symbol_expr();
222+
223+
// Visual Studio has va_list == char* and stores parameters of size no
224+
// greater than the size of a pointer as immediate values
225+
if(lhs_type.subtype().id() != ID_pointer)
214226
{
215-
exprt op=to_address_of_expr(src).object();
216-
if(op.id()==ID_symbol &&
217-
op.get_bool(ID_C_SSA_symbol))
218-
return to_ssa_expr(op).get_object_name();
219-
else
220-
return irep_idt();
227+
auto parameter_size = size_of_expr(parameter_expr.type(), ns);
228+
CHECK_RETURN(parameter_size.has_value());
229+
230+
binary_predicate_exprt fits_slot{
231+
*parameter_size,
232+
ID_le,
233+
from_integer(lhs_type.get_width(), parameter_size->type())};
234+
235+
return if_exprt{
236+
fits_slot,
237+
typecast_exprt::conditional_cast(parameter_expr, void_ptr_type),
238+
typecast_exprt::conditional_cast(
239+
address_of_exprt{parameter_expr}, void_ptr_type)};
221240
}
222241
else
223-
return irep_idt();
242+
{
243+
return typecast_exprt::conditional_cast(
244+
address_of_exprt{std::move(parameter_expr)}, void_ptr_type);
245+
}
224246
}
225247

226-
void goto_symext::symex_gcc_builtin_va_arg_next(
248+
void goto_symext::symex_va_start(
227249
statet &state,
228250
const exprt &lhs,
229251
const side_effect_exprt &code)
@@ -233,42 +255,52 @@ void goto_symext::symex_gcc_builtin_va_arg_next(
233255
if(lhs.is_nil())
234256
return; // ignore
235257

236-
// to allow constant propagation
237-
exprt tmp = state.rename(code.op0(), ns).get();
238-
do_simplify(tmp);
239-
irep_idt id=get_symbol(tmp);
240-
241-
const auto zero = zero_initializer(lhs.type(), code.source_location(), ns);
242-
CHECK_RETURN(zero.has_value());
243-
exprt rhs(*zero);
258+
// create an array holding pointers to the parameters, starting after the
259+
// parameter that the operand points to
260+
const exprt &op = skip_typecast(code.op0());
261+
// this must be the address of a symbol
262+
const irep_idt start_parameter =
263+
to_ssa_expr(to_address_of_expr(op).object()).get_object_name();
244264

245-
if(!id.empty())
265+
exprt::operandst va_arg_operands;
266+
bool parameter_id_found = false;
267+
for(auto &parameter : state.call_stack().top().parameter_names)
246268
{
247-
// strip last name off id to get function name
248-
std::size_t pos=id2string(id).rfind("::");
249-
if(pos!=std::string::npos)
269+
if(!parameter_id_found)
250270
{
251-
irep_idt function_identifier=std::string(id2string(id), 0, pos);
252-
std::string base=id2string(function_identifier)+"::va_arg";
253-
254-
if(has_prefix(id2string(id), base))
255-
id=base+std::to_string(
256-
safe_string2unsigned(
257-
std::string(id2string(id), base.size(), std::string::npos))+1);
258-
else
259-
id=base+"0";
260-
261-
const symbolt *symbol;
262-
if(!ns.lookup(id, symbol))
263-
{
264-
const symbol_exprt symbol_expr(symbol->name, symbol->type);
265-
rhs = typecast_exprt::conditional_cast(
266-
address_of_exprt(symbol_expr), lhs.type());
267-
}
271+
parameter_id_found = parameter == start_parameter;
272+
continue;
268273
}
269-
}
270274

271-
symex_assign(state, code_assignt(lhs, rhs));
275+
va_arg_operands.push_back(
276+
va_list_entry(parameter, to_pointer_type(lhs.type()), ns));
277+
}
278+
const std::size_t va_arg_size = va_arg_operands.size();
279+
exprt array =
280+
array_exprt{std::move(va_arg_operands),
281+
array_typet{pointer_type(empty_typet{}),
282+
from_integer(va_arg_size, size_type())}};
283+
284+
symbolt &va_array = get_fresh_aux_symbol(
285+
array.type(),
286+
id2string(state.source.function_id),
287+
"va_args",
288+
state.source.pc->source_location,
289+
ns.lookup(state.source.function_id).mode,
290+
state.symbol_table);
291+
va_array.value = array;
292+
293+
clean_expr(array, state, false);
294+
array = state.rename(std::move(array), ns).get();
295+
do_simplify(array);
296+
symex_assign(state, code_assignt{va_array.symbol_expr(), std::move(array)});
297+
298+
exprt rhs = address_of_exprt{
299+
index_exprt{va_array.symbol_expr(), from_integer(0, index_type())}};
300+
rhs = state.rename(std::move(rhs), ns).get();
301+
symex_assign(
302+
state,
303+
code_assignt{lhs, typecast_exprt::conditional_cast(rhs, lhs.type())});
272304
}
273305

274306
irep_idt get_string_argument_rec(const exprt &src)

Diff for: src/goto-symex/symex_function_call.cpp

+15-23
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ Author: Daniel Kroening, [email protected]
1515
#include <util/byte_operators.h>
1616
#include <util/c_types.h>
1717
#include <util/exception_utils.h>
18+
#include <util/fresh_symbol.h>
1819
#include <util/invariant.h>
1920
#include <util/prefix.h>
2021
#include <util/range.h>
@@ -47,6 +48,7 @@ void goto_symext::parameter_assignments(
4748
{
4849
INVARIANT(
4950
!identifier.empty(), "function parameter must have an identifier");
51+
state.call_stack().top().parameter_names.push_back(identifier);
5052

5153
const symbolt &symbol=ns.lookup(identifier);
5254
symbol_exprt lhs=symbol.symbol_expr();
@@ -147,30 +149,20 @@ void goto_symext::parameter_assignments(
147149
if(function_type.has_ellipsis())
148150
{
149151
// These are va_arg arguments; their types may differ from call to call
150-
std::size_t va_count=0;
151-
const symbolt *va_sym=nullptr;
152-
while(!ns.lookup(
153-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count),
154-
va_sym))
155-
++va_count;
156-
157-
for( ; it1!=arguments.end(); it1++, va_count++)
152+
for(; it1 != arguments.end(); it1++)
158153
{
159-
irep_idt id=
160-
id2string(function_identifier)+"::va_arg"+std::to_string(va_count);
161-
162-
// add to symbol table
163-
symbolt symbol;
164-
symbol.name=id;
165-
symbol.base_name="va_arg"+std::to_string(va_count);
166-
symbol.mode=ID_C;
167-
symbol.type=it1->type();
168-
169-
state.symbol_table.insert(std::move(symbol));
170-
171-
symbol_exprt lhs=symbol_exprt(id, it1->type());
172-
173-
symex_assign(state, code_assignt(lhs, *it1));
154+
symbolt &va_arg = get_fresh_aux_symbol(
155+
it1->type(),
156+
id2string(function_identifier),
157+
"va_arg",
158+
state.source.pc->source_location,
159+
ns.lookup(function_identifier).mode,
160+
state.symbol_table);
161+
va_arg.is_parameter = true;
162+
163+
state.call_stack().top().parameter_names.push_back(va_arg.name);
164+
165+
symex_assign(state, code_assignt{va_arg.symbol_expr(), *it1});
174166
}
175167
}
176168
else if(it1!=arguments.end())

Diff for: src/util/irep_ids.def

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ IREP_ID_ONE(alignof)
303303
IREP_ID_ONE(clang_builtin_convertvector)
304304
IREP_ID_ONE(gcc_builtin_va_arg)
305305
IREP_ID_ONE(gcc_builtin_types_compatible_p)
306-
IREP_ID_ONE(gcc_builtin_va_arg_next)
306+
IREP_ID_ONE(va_start)
307307
IREP_ID_ONE(gcc_float16)
308308
IREP_ID_ONE(gcc_float32)
309309
IREP_ID_ONE(gcc_float32x)

0 commit comments

Comments
 (0)