-
Notifications
You must be signed in to change notification settings - Fork 273
/
Copy pathremove_java_new.cpp
461 lines (397 loc) · 15.9 KB
/
remove_java_new.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
/*******************************************************************\
Module: Remove Java New Operators
Author: Peter Schrammel
\*******************************************************************/
/// \file
/// Remove Java New Operators
#include "remove_java_new.h"
#include <goto-programs/class_identifier.h>
#include <goto-programs/goto_convert.h>
#include "java_utils.h"
#include <util/arith_tools.h>
#include <util/c_types.h>
#include <util/expr_cast.h>
#include <util/expr_initializer.h>
#include <util/message.h>
#include <util/pointer_offset_size.h>
class remove_java_newt : public messaget
{
public:
remove_java_newt(
symbol_table_baset &symbol_table,
message_handlert &_message_handler)
: messaget(_message_handler), symbol_table(symbol_table), ns(symbol_table)
{
}
// Lower java_new for a single function
bool lower_java_new(const irep_idt &function_identifier, goto_programt &);
// Lower java_new for a single instruction
goto_programt::targett lower_java_new(
const irep_idt &function_identifier,
goto_programt &,
goto_programt::targett);
protected:
symbol_table_baset &symbol_table;
namespacet ns;
goto_programt::targett lower_java_new(
const irep_idt &function_identifier,
const exprt &lhs,
const side_effect_exprt &rhs,
goto_programt &,
goto_programt::targett);
goto_programt::targett lower_java_new_array(
const irep_idt &function_identifier,
const exprt &lhs,
const side_effect_exprt &rhs,
goto_programt &,
goto_programt::targett);
};
/// Replaces the instruction `lhs = new java_type` by
/// two instructions:
/// lhs = ALLOCATE(java_type)
/// *lhs = { zero-initialized java_type }
/// \param function_identifier: Name of the function containing \p target.
/// \param lhs: the lhs
/// \param rhs: the rhs
/// \param dest: the goto program to modify
/// \param target: the goto instruction to replace
/// \return the iterator advanced to the last of the inserted instructions
/// Note: we have to take a copy of `lhs` and `rhs` since they would suffer
/// destruction when replacing the instruction.
goto_programt::targett remove_java_newt::lower_java_new(
const irep_idt &function_identifier,
const exprt &lhs,
const side_effect_exprt &rhs,
goto_programt &dest,
goto_programt::targett target)
{
PRECONDITION(!lhs.is_nil());
PRECONDITION(rhs.operands().empty());
PRECONDITION(rhs.type().id() == ID_pointer);
source_locationt location = rhs.source_location();
typet object_type = rhs.type().subtype();
// build size expression
const auto object_size = size_of_expr(object_type, ns);
CHECK_RETURN(object_size.has_value());
// we produce a malloc side-effect, which stays
side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
malloc_expr.copy_to_operands(*object_size);
// could use true and get rid of the code below
malloc_expr.copy_to_operands(false_exprt());
auto malloc_expr_casted =
typecast_exprt::conditional_cast(malloc_expr, lhs.type());
*target = goto_programt::make_assignment(
code_assignt(lhs, malloc_expr_casted), location);
// zero-initialize the object
auto lhs_casted = typecast_exprt::conditional_cast(lhs, rhs.type());
dereference_exprt deref(lhs_casted, object_type);
auto zero_object = zero_initializer(object_type, location, ns);
CHECK_RETURN(zero_object.has_value());
set_class_identifier(
to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
return dest.insert_after(
target,
goto_programt::make_assignment(
code_assignt(deref, *zero_object), location));
}
/// Replaces the instruction `lhs = new java_array_type` by
/// the following code:
/// lhs = ALLOCATE(java_type)
/// loops to initialize the elements (including multi-dimensional arrays)
/// \param function_identifier: Name of the function containing \p target.
/// \param lhs: the lhs
/// \param rhs: the rhs
/// \param dest: the goto program to modify
/// \param target: the goto instruction to replace
/// \return the iterator advanced to the last of the inserted instructions
/// Note: we have to take a copy of `lhs` and `rhs` since they would suffer
/// destruction when replacing the instruction.
goto_programt::targett remove_java_newt::lower_java_new_array(
const irep_idt &function_identifier,
const exprt &lhs,
const side_effect_exprt &rhs,
goto_programt &dest,
goto_programt::targett target)
{
// lower_java_new_array without lhs not implemented
PRECONDITION(!lhs.is_nil());
PRECONDITION(rhs.operands().size() >= 1); // one per dimension
PRECONDITION(rhs.type().id() == ID_pointer);
source_locationt location = rhs.source_location();
typet object_type = rhs.type().subtype();
PRECONDITION(ns.follow(object_type).id() == ID_struct);
// build size expression
const auto object_size = size_of_expr(object_type, ns);
CHECK_RETURN(object_size.has_value());
// we produce a malloc side-effect, which stays
side_effect_exprt malloc_expr(ID_allocate, rhs.type(), location);
malloc_expr.copy_to_operands(*object_size);
// code use true and get rid of the code below
malloc_expr.copy_to_operands(false_exprt());
auto lhs_casted = typecast_exprt::conditional_cast(lhs, malloc_expr.type());
*target = goto_programt::make_assignment(
code_assignt(lhs_casted, malloc_expr), location);
goto_programt::targett next = std::next(target);
const struct_typet &struct_type = to_struct_type(ns.follow(object_type));
// Ideally we would have a check for `is_valid_java_array(struct_type)` but
// `is_valid_java_array is part of the java_bytecode module and we cannot
// introduce such dependencies. We do this simple check instead:
PRECONDITION(struct_type.components().size() == 3);
// Init base class:
dereference_exprt deref(lhs_casted, object_type);
auto zero_object = zero_initializer(object_type, location, ns);
CHECK_RETURN(zero_object.has_value());
set_class_identifier(
to_struct_expr(*zero_object), ns, to_struct_tag_type(object_type));
dest.insert_before(
next,
goto_programt::make_assignment(
code_assignt(deref, *zero_object), location));
// if it's an array, we need to set the length field
member_exprt length(
deref,
struct_type.components()[1].get_name(),
struct_type.components()[1].type());
dest.insert_before(
next,
goto_programt::make_assignment(code_assignt(length, rhs.op0()), location));
// we also need to allocate space for the data
member_exprt data(
deref,
struct_type.components()[2].get_name(),
struct_type.components()[2].type());
// Allocate a (struct realtype**) instead of a (void**) if possible.
const irept &given_element_type = object_type.find(ID_element_type);
typet allocate_data_type;
if(given_element_type.is_not_nil())
{
allocate_data_type =
pointer_type(static_cast<const typet &>(given_element_type));
}
else
allocate_data_type = data.type();
side_effect_exprt data_java_new_expr(
ID_java_new_array_data, allocate_data_type, location);
// The instruction may specify a (hopefully small) upper bound on the
// array size, in which case we allocate a fixed-length array that may
// be larger than the `length` member rather than use a true variable-
// length array, which produces a more complex formula in the current
// backend.
const irept size_bound = rhs.find(ID_length_upper_bound);
if(size_bound.is_nil())
data_java_new_expr.set(ID_size, rhs.op0());
else
data_java_new_expr.set(ID_size, size_bound);
// Must directly assign the new array to a temporary
// because goto-symex will notice `x=side_effect_exprt` but not
// `x=typecast_exprt(side_effect_exprt(...))`
symbol_exprt new_array_data_symbol = fresh_java_symbol(
data_java_new_expr.type(),
"tmp_new_data_array",
location,
function_identifier,
symbol_table)
.symbol_expr();
code_declt array_decl(new_array_data_symbol);
array_decl.add_source_location() = location;
dest.insert_before(next, goto_programt::make_decl(array_decl, location));
dest.insert_before(
next,
goto_programt::make_assignment(
code_assignt(new_array_data_symbol, data_java_new_expr), location));
exprt cast_java_new = new_array_data_symbol;
if(cast_java_new.type() != data.type())
cast_java_new = typecast_exprt(cast_java_new, data.type());
dest.insert_before(
next,
goto_programt::make_assignment(
code_assignt(data, cast_java_new), location));
// zero-initialize the data
if(!rhs.get_bool(ID_skip_initialize))
{
const auto zero_element =
zero_initializer(data.type().subtype(), location, ns);
CHECK_RETURN(zero_element.has_value());
codet array_set(ID_array_set);
array_set.copy_to_operands(new_array_data_symbol, *zero_element);
dest.insert_before(next, goto_programt::make_other(array_set, location));
}
// multi-dimensional?
if(rhs.operands().size() >= 2)
{
// produce
// for(int i=0; i<size; i++) tmp[i]=java_new(dim-1);
// This will be converted recursively.
goto_programt tmp;
symbol_exprt tmp_i =
fresh_java_symbol(
length.type(), "tmp_index", location, function_identifier, symbol_table)
.symbol_expr();
code_declt decl(tmp_i);
decl.add_source_location() = location;
tmp.insert_before(
tmp.instructions.begin(), goto_programt::make_decl(decl, location));
side_effect_exprt sub_java_new = rhs;
sub_java_new.operands().erase(sub_java_new.operands().begin());
// we already know that rhs has pointer type
typet sub_type =
static_cast<const typet &>(rhs.type().subtype().find(ID_element_type));
CHECK_RETURN(sub_type.id() == ID_pointer);
sub_java_new.type() = sub_type;
side_effect_exprt inc(ID_assign, typet(), location);
inc.operands().resize(2);
inc.op0() = tmp_i;
inc.op1() = plus_exprt(tmp_i, from_integer(1, tmp_i.type()));
dereference_exprt deref_expr(
plus_exprt(data, tmp_i), data.type().subtype());
code_blockt for_body;
symbol_exprt init_sym =
fresh_java_symbol(
sub_type, "subarray_init", location, function_identifier, symbol_table)
.symbol_expr();
code_declt init_decl(init_sym);
init_decl.add_source_location() = location;
for_body.add(std::move(init_decl));
code_assignt init_subarray(init_sym, sub_java_new);
for_body.add(std::move(init_subarray));
code_assignt assign_subarray(
deref_expr, typecast_exprt(init_sym, deref_expr.type()));
for_body.add(std::move(assign_subarray));
const code_fort for_loop(
code_assignt(tmp_i, from_integer(0, tmp_i.type())),
binary_relation_exprt(tmp_i, ID_lt, rhs.op0()),
std::move(inc),
std::move(for_body));
goto_convert(for_loop, symbol_table, tmp, get_message_handler(), ID_java);
// lower new side effects recursively
lower_java_new(function_identifier, tmp);
dest.destructive_insert(next, tmp);
}
return std::prev(next);
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// \param function_identifier: Name of the function containing \p target.
/// \param goto_program: program to process
/// \param target: instruction to check for java_new expressions
/// \return true if a replacement has been made
goto_programt::targett remove_java_newt::lower_java_new(
const irep_idt &function_identifier,
goto_programt &goto_program,
goto_programt::targett target)
{
if(!target->is_assign())
return target;
// we make a copy of lhs/rhs, as we intend to destroy the assignment
// inside lower_java_new and lower_java_new_array
const auto &code_assign = target->get_assign();
auto rhs = code_assign.rhs();
auto lhs = code_assign.lhs();
if(rhs.id() == ID_typecast) // follow one typecast
rhs = to_typecast_expr(rhs).op();
if(rhs.id() == ID_side_effect)
{
const auto &side_effect_expr = to_side_effect_expr(rhs);
const auto &statement = side_effect_expr.get_statement();
if(statement == ID_java_new)
{
return lower_java_new(
function_identifier, lhs, side_effect_expr, goto_program, target);
}
else if(statement == ID_java_new_array)
{
return lower_java_new_array(
function_identifier, lhs, side_effect_expr, goto_program, target);
}
}
return target;
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// Extra auxiliary variables may be introduced into symbol_table.
/// \param function_identifier: Name of the function \p goto_program.
/// \param goto_program: The function body to work on.
/// \return true if one or more java_new expressions have been replaced
bool remove_java_newt::lower_java_new(
const irep_idt &function_identifier,
goto_programt &goto_program)
{
bool changed = false;
for(goto_programt::instructionst::iterator target =
goto_program.instructions.begin();
target != goto_program.instructions.end();
++target)
{
goto_programt::targett new_target =
lower_java_new(function_identifier, goto_program, target);
changed = changed || new_target == target;
target = new_target;
}
if(!changed)
return false;
goto_program.update();
return true;
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param function_identifier: Name of the function containing \p target.
/// \param target: The instruction to work on.
/// \param goto_program: The function body containing the instruction.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: a message handler
void remove_java_new(
const irep_idt &function_identifier,
goto_programt::targett target,
goto_programt &goto_program,
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_java_newt rem(symbol_table, message_handler);
rem.lower_java_new(function_identifier, goto_program, target);
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param function_identifier: Name of the function \p function.
/// \param function: The function to work on.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: a message handler
void remove_java_new(
const irep_idt &function_identifier,
goto_functionst::goto_functiont &function,
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_java_newt rem(symbol_table, message_handler);
rem.lower_java_new(function_identifier, function.body);
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param goto_functions: The functions to work on.
/// \param symbol_table: The symbol table to add symbols to.
/// \param message_handler: a message handler
void remove_java_new(
goto_functionst &goto_functions,
symbol_table_baset &symbol_table,
message_handlert &message_handler)
{
remove_java_newt rem(symbol_table, message_handler);
bool changed = false;
for(auto &f : goto_functions.function_map)
changed = rem.lower_java_new(f.first, f.second.body) || changed;
if(changed)
goto_functions.compute_location_numbers();
}
/// Replace every java_new or java_new_array by a malloc side-effect
/// and zero initialization.
/// \remarks Extra auxiliary variables may be introduced into symbol_table.
/// \param goto_model: The functions to work on and the symbol table to add
/// symbols to.
/// \param message_handler: a message handler
void remove_java_new(goto_modelt &goto_model, message_handlert &message_handler)
{
remove_java_new(
goto_model.goto_functions, goto_model.symbol_table, message_handler);
}