-
Notifications
You must be signed in to change notification settings - Fork 273
/
Copy pathjava_bytecode_language.cpp
1273 lines (1148 loc) · 44.2 KB
/
java_bytecode_language.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*******************************************************************\
Module:
Author: Daniel Kroening, [email protected]
\*******************************************************************/
#include "java_bytecode_language.h"
#include <string>
#include <util/cmdline.h>
#include <util/config.h>
#include <util/expr_iterator.h>
#include <util/invariant.h>
#include <util/journalling_symbol_table.h>
#include <util/options.h>
#include <util/prefix.h>
#include <util/string2int.h>
#include <util/suffix.h>
#include <util/symbol_table.h>
#include <util/symbol_table_builder.h>
#include <json/json_parser.h>
#include <goto-programs/class_hierarchy.h>
#include "java_bytecode_concurrency_instrumentation.h"
#include "java_bytecode_convert_class.h"
#include "java_bytecode_convert_method.h"
#include "java_bytecode_internal_additions.h"
#include "java_bytecode_instrument.h"
#include "java_bytecode_typecheck.h"
#include "java_entry_point.h"
#include "java_bytecode_parser.h"
#include "java_class_loader.h"
#include "java_string_literals.h"
#include "java_static_initializers.h"
#include "java_utils.h"
#include "ci_lazy_methods.h"
#include "expr2java.h"
#include "load_method_by_regex.h"
/// Parse options that are java bytecode specific.
/// \param cmd: Command line
/// \param [out] options: The options object that will be updated.
void parse_java_language_options(const cmdlinet &cmd, optionst &options)
{
options.set_option(
"java-assume-inputs-non-null", cmd.isset("java-assume-inputs-non-null"));
options.set_option(
"throw-runtime-exceptions", cmd.isset("throw-runtime-exceptions"));
options.set_option(
"uncaught-exception-check", !cmd.isset("disable-uncaught-exception-check"));
options.set_option(
"throw-assertion-error", cmd.isset("throw-assertion-error"));
options.set_option("java-threading", cmd.isset("java-threading"));
if(cmd.isset("java-max-vla-length"))
{
options.set_option(
"java-max-vla-length", cmd.get_value("java-max-vla-length"));
}
options.set_option(
"symex-driven-lazy-loading", cmd.isset("symex-driven-lazy-loading"));
if(cmd.isset("java-load-class"))
options.set_option("java-load-class", cmd.get_values("java-load-class"));
if(cmd.isset("java-no-load-class"))
{
options.set_option(
"java-no-load-class", cmd.get_values("java-no-load-class"));
}
if(cmd.isset("lazy-methods-extra-entry-point"))
{
options.set_option(
"lazy-methods-extra-entry-point",
cmd.get_values("lazy-methods-extra-entry-point"));
}
if(cmd.isset("java-cp-include-files"))
{
options.set_option(
"java-cp-include-files", cmd.get_value("java-cp-include-files"));
}
}
/// Consume options that are java bytecode specific.
void java_bytecode_languaget::set_language_options(const optionst &options)
{
object_factory_parameters.set(options);
assume_inputs_non_null =
options.get_bool_option("java-assume-inputs-non-null");
string_refinement_enabled = options.get_bool_option("refine-strings");
throw_runtime_exceptions =
options.get_bool_option("throw-runtime-exceptions");
assert_uncaught_exceptions =
options.get_bool_option("uncaught-exception-check");
throw_assertion_error = options.get_bool_option("throw-assertion-error");
threading_support = options.get_bool_option("java-threading");
max_user_array_length =
options.get_unsigned_int_option("java-max-vla-length");
if(options.get_bool_option("symex-driven-lazy-loading"))
lazy_methods_mode=LAZY_METHODS_MODE_EXTERNAL_DRIVER;
else if(options.get_bool_option("lazy-methods"))
lazy_methods_mode=LAZY_METHODS_MODE_CONTEXT_INSENSITIVE;
else
lazy_methods_mode=LAZY_METHODS_MODE_EAGER;
if(throw_runtime_exceptions)
{
java_load_classes.insert(
java_load_classes.end(),
exception_needed_classes.begin(),
exception_needed_classes.end());
}
if(options.is_set("java-load-class"))
{
const auto &load_values = options.get_list_option("java-load-class");
java_load_classes.insert(
java_load_classes.end(), load_values.begin(), load_values.end());
}
if(options.is_set("java-no-load-class"))
{
const auto &no_load_values = options.get_list_option("java-no-load-class");
no_load_classes = {no_load_values.begin(), no_load_values.end()};
}
const std::list<std::string> &extra_entry_points =
options.get_list_option("lazy-methods-extra-entry-point");
std::transform(
extra_entry_points.begin(),
extra_entry_points.end(),
std::back_inserter(extra_methods),
build_load_method_by_regex);
const auto &new_points = build_extra_entry_points(options);
extra_methods.insert(
extra_methods.end(), new_points.begin(), new_points.end());
java_cp_include_files = options.get_option("java-cp-include-files");
if(!java_cp_include_files.empty())
{
// load file list from JSON file
if(java_cp_include_files[0]=='@')
{
jsont json_cp_config;
if(parse_json(
java_cp_include_files.substr(1),
get_message_handler(),
json_cp_config))
throw "cannot read JSON input configuration for JAR loading";
if(!json_cp_config.is_object())
throw "the JSON file has a wrong format";
jsont include_files=json_cp_config["jar"];
if(!include_files.is_array())
throw "the JSON file has a wrong format";
// add jars from JSON config file to classpath
for(const jsont &file_entry : to_json_array(include_files))
{
DATA_INVARIANT(
file_entry.is_string() && has_suffix(file_entry.value, ".jar"),
"classpath entry must be jar filename, but '" + file_entry.value +
"' found");
config.java.classpath.push_back(file_entry.value);
}
}
}
else
java_cp_include_files=".*";
nondet_static = options.get_bool_option("nondet-static");
language_options_initialized=true;
}
std::set<std::string> java_bytecode_languaget::extensions() const
{
return { "class", "jar" };
}
void java_bytecode_languaget::modules_provided(std::set<std::string> &)
{
// modules.insert(translation_unit(parse_path));
}
/// ANSI-C preprocessing
bool java_bytecode_languaget::preprocess(
std::istream &,
const std::string &,
std::ostream &)
{
// there is no preprocessing!
return true;
}
/// We set the main class (i.e.\ class to start the class loading analysis from,
/// see \ref java_class_loadert) depending on the file type of `path`.
/// `path` can be the name of either a .class file or a .jar file.
/// If it is a .class file, the top-level class in this file is the main class.
/// If it is a .jar file, we first check for the main class in three steps
/// 1) the argument of the --main-class command-line option,
/// 2) the class implied by the argument of the --function option,
/// 3) the manifest file of the JAR.
/// If no main class was found, all classes in the JAR file are loaded.
bool java_bytecode_languaget::parse(
std::istream &,
const std::string &path)
{
PRECONDITION(language_options_initialized);
java_class_loader.clear_classpath();
for(const auto &p : config.java.classpath)
java_class_loader.add_classpath_entry(p);
java_class_loader.set_message_handler(get_message_handler());
java_class_loader.set_java_cp_include_files(java_cp_include_files);
java_class_loader.add_load_classes(java_load_classes);
if(string_refinement_enabled)
{
string_preprocess.initialize_known_type_table();
auto get_string_base_classes = [this](const irep_idt &id) {
return string_preprocess.get_string_type_base_classes(id);
};
java_class_loader.set_extra_class_refs_function(get_string_base_classes);
}
// look at extension
if(has_suffix(path, ".class"))
{
// override main_class
main_class=java_class_loadert::file_to_class_name(path);
}
else if(has_suffix(path, ".jar"))
{
// build an object to potentially limit which classes are loaded
java_class_loader_limitt class_loader_limit(
get_message_handler(),
java_cp_include_files);
if(config.java.main_class.empty())
{
const std::string &entry_method = config.main;
// If we have an entry method, we can derive a main class.
if(!entry_method.empty())
{
const auto last_dot_position = entry_method.find_last_of('.');
main_class = entry_method.substr(0, last_dot_position);
}
else
{
auto manifest = java_class_loader.jar_pool(path).get_manifest();
std::string manifest_main_class = manifest["Main-Class"];
// if the manifest declares a Main-Class line, we got a main class
if(!manifest_main_class.empty())
main_class = manifest_main_class;
}
}
else
main_class=config.java.main_class;
// do we have one now?
if(main_class.empty())
{
status() << "JAR file without entry point: loading class files" << eom;
const auto classes = java_class_loader.load_entire_jar(path);
for(const auto &c : classes)
main_jar_classes.push_back(c);
}
else
java_class_loader.add_classpath_entry(path);
}
else
UNREACHABLE;
if(!main_class.empty())
{
status() << "Java main class: " << main_class << eom;
java_class_loader(main_class);
}
return false;
}
/// Infer fields that must exist on opaque types from field accesses against
/// them. Note that we don't yet try to infer inheritence between opaque types
/// here, so we may introduce the same field onto a type and its ancestor
/// without realising that is in fact the same field, inherited from that
/// ancestor. This can lead to incorrect results when opaque types are cast
/// to other opaque types and their fields do not alias as intended.
/// We set opaque fields as final to avoid assuming they can be overridden.
/// \param parse_tree: class parse tree
/// \param symbol_table: global symbol table
static void infer_opaque_type_fields(
const java_bytecode_parse_treet &parse_tree,
symbol_tablet &symbol_table)
{
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getfield" ||
instruction.statement == "putfield")
{
const fieldref_exprt &fieldref =
expr_dynamic_cast<fieldref_exprt>(instruction.args[0]);
irep_idt class_symbol_id = fieldref.class_name();
const symbolt *class_symbol = symbol_table.lookup(class_symbol_id);
INVARIANT(
class_symbol != nullptr,
"all types containing fields should have been loaded");
const java_class_typet *class_type =
&to_java_class_type(class_symbol->type);
const irep_idt &component_name = fieldref.component_name();
while(!class_type->has_component(component_name))
{
if(class_type->get_is_stub())
{
// Accessing a field of an incomplete (opaque) type.
symbolt &writable_class_symbol =
symbol_table.get_writeable_ref(class_symbol_id);
auto &components =
to_java_class_type(writable_class_symbol.type).components();
components.emplace_back(component_name, fieldref.type());
components.back().set_base_name(component_name);
components.back().set_pretty_name(component_name);
components.back().set_is_final(true);
break;
}
else
{
// Not present here: check the superclass.
INVARIANT(
!class_type->bases().empty(),
"class '" + id2string(class_symbol->name)
+ "' (which was missing a field '" + id2string(component_name)
+ "' referenced from method '" + id2string(method.name)
+ "') should have an opaque superclass");
const auto &superclass_type = class_type->bases().front().type();
class_symbol_id = superclass_type.get_identifier();
class_type = &to_java_class_type(ns.follow(superclass_type));
}
}
}
}
}
}
/// Create if necessary, then return the constant global java.lang.Class symbol
/// for a given class id
/// \param class_id: class identifier
/// \param symbol_table: global symbol table; a symbol may be added
/// \return java.lang.Class typed symbol expression
static symbol_exprt get_or_create_class_literal_symbol(
const irep_idt &class_id, symbol_tablet &symbol_table)
{
struct_tag_typet java_lang_Class("java::java.lang.Class");
symbol_exprt symbol_expr(
id2string(class_id) + JAVA_CLASS_MODEL_SUFFIX,
java_lang_Class);
if(!symbol_table.has_symbol(symbol_expr.get_identifier()))
{
symbolt new_class_symbol;
new_class_symbol.name = symbol_expr.get_identifier();
new_class_symbol.type = symbol_expr.type();
INVARIANT(
has_prefix(id2string(new_class_symbol.name), "java::"),
"class identifier should have 'java::' prefix");
new_class_symbol.base_name =
id2string(new_class_symbol.name).substr(6);
new_class_symbol.mode = ID_java;
new_class_symbol.is_lvalue = true;
new_class_symbol.is_state_var = true;
new_class_symbol.is_static_lifetime = true;
new_class_symbol.type.set(ID_C_no_nondet_initialization, true);
symbol_table.add(new_class_symbol);
}
return symbol_expr;
}
/// Get result of a Java load-constant (ldc) instruction.
/// Possible cases:
/// 1) Pushing a String causes a reference to a java.lang.String object
/// to be constructed and pushed onto the operand stack.
/// 2) Pushing an int or a float causes a primitive value to be pushed
/// onto the stack.
/// 3) Pushing a Class constant causes a reference to a java.lang.Class
/// to be pushed onto the operand stack
/// \param ldc_arg0: raw operand to the ldc opcode
/// \param symbol_table: global symbol table. If the argument `ldc_arg0` is a
/// String or Class constant then a new constant global may be added.
/// \param string_refinement_enabled: true if --refine-strings is enabled, which
/// influences how String literals are structured.
/// \return ldc result
static exprt get_ldc_result(
const exprt &ldc_arg0,
symbol_tablet &symbol_table,
bool string_refinement_enabled)
{
if(ldc_arg0.id() == ID_type)
{
const irep_idt &class_id = ldc_arg0.type().get(ID_identifier);
return
address_of_exprt(
get_or_create_class_literal_symbol(class_id, symbol_table));
}
else if(ldc_arg0.id() == ID_java_string_literal)
{
return
address_of_exprt(
get_or_create_string_literal_symbol(
ldc_arg0, symbol_table, string_refinement_enabled));
}
else
{
INVARIANT(
ldc_arg0.id() == ID_constant,
"ldc argument should be constant, string literal or class literal");
return ldc_arg0;
}
}
/// Creates global variables for constants mentioned in a given method. These
/// are either string literals, or class literals (the java.lang.Class instance
/// returned by `(some_reference_typed_expression).class`). The method parse
/// tree is rewritten to directly reference these globals.
/// \param parse_tree: parse tree to search for constant global references
/// \param symbol_table: global symbol table, to which constant globals will be
/// added.
/// \param string_refinement_enabled: true if `--refine-stings` is active,
/// which changes how string literals are structured.
static void generate_constant_global_variables(
java_bytecode_parse_treet &parse_tree,
symbol_tablet &symbol_table,
bool string_refinement_enabled)
{
for(auto &method : parse_tree.parsed_class.methods)
{
for(java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
// ldc* instructions are Java bytecode "load constant" ops, which can
// retrieve a numeric constant, String literal, or Class literal.
if(instruction.statement == "ldc" ||
instruction.statement == "ldc2" ||
instruction.statement == "ldc_w" ||
instruction.statement == "ldc2_w")
{
INVARIANT(
instruction.args.size() != 0,
"ldc instructions should have an argument");
instruction.args[0] =
get_ldc_result(
instruction.args[0],
symbol_table,
string_refinement_enabled);
}
}
}
}
/// Add a stub global symbol to the symbol table, initialising pointer-typed
/// symbols with null and primitive-typed ones with an arbitrary (nondet) value.
/// \param symbol_table: table to add to
/// \param symbol_id: new symbol fully-qualified identifier
/// \param symbol_basename: new symbol basename
/// \param symbol_type: new symbol type
/// \param class_id: class id that directly encloses this static field
/// \param force_nondet_init: if true, always leave the symbol's value nil so it
/// gets nondet initialized during __CPROVER_initialize. Otherwise, pointer-
/// typed globals are initialized null and we expect a synthetic clinit method
/// to be created later.
static void create_stub_global_symbol(
symbol_table_baset &symbol_table,
const irep_idt &symbol_id,
const irep_idt &symbol_basename,
const typet &symbol_type,
const irep_idt &class_id,
bool force_nondet_init)
{
symbolt new_symbol;
new_symbol.is_static_lifetime = true;
new_symbol.is_lvalue = true;
new_symbol.is_state_var = true;
new_symbol.name = symbol_id;
new_symbol.base_name = symbol_basename;
new_symbol.type = symbol_type;
set_declaring_class(new_symbol, class_id);
// Public access is a guess; it encourages merging like-typed static fields,
// whereas a more restricted visbility would encourage separating them.
// Neither is correct, as without the class file we can't know the truth.
new_symbol.type.set(ID_C_access, ID_public);
// We set the field as final to avoid assuming they can be overridden.
new_symbol.type.set(ID_C_constant, true);
new_symbol.pretty_name = new_symbol.name;
new_symbol.mode = ID_java;
new_symbol.is_type = false;
// If pointer-typed, initialise to null and a static initialiser will be
// created to initialise on first reference. If primitive-typed, specify
// nondeterministic initialisation by setting a nil value.
if(symbol_type.id() == ID_pointer && !force_nondet_init)
new_symbol.value = null_pointer_exprt(to_pointer_type(symbol_type));
else
new_symbol.value.make_nil();
bool add_failed = symbol_table.add(new_symbol);
INVARIANT(
!add_failed, "caller should have checked symbol not already in table");
}
/// Find any incomplete ancestor of a given class that can have a stub static
/// field attached to it. This specifically excludes java.lang.Object, which we
/// know cannot have static fields.
/// \param start_class_id: class to start searching from
/// \param symbol_table: global symbol table
/// \param class_hierarchy: global class hierarchy
/// \return first incomplete ancestor encountered,
/// including start_class_id itself.
static irep_idt get_any_incomplete_ancestor_for_stub_static_field(
const irep_idt &start_class_id,
const symbol_tablet &symbol_table,
const class_hierarchyt &class_hierarchy)
{
// Depth-first search: return the first stub ancestor, or irep_idt() if none
// found.
std::vector<irep_idt> classes_to_check;
classes_to_check.push_back(start_class_id);
while(!classes_to_check.empty())
{
irep_idt to_check = classes_to_check.back();
classes_to_check.pop_back();
// Exclude java.lang.Object because it can
if(
to_java_class_type(symbol_table.lookup_ref(to_check).type)
.get_is_stub() &&
to_check != "java::java.lang.Object")
{
return to_check;
}
const class_hierarchyt::idst &parents =
class_hierarchy.class_map.at(to_check).parents;
classes_to_check.insert(
classes_to_check.end(), parents.begin(), parents.end());
}
return irep_idt();
}
/// Search for getstatic and putstatic instructions in a class' bytecode and
/// create stub symbols for any static fields that aren't already in the symbol
/// table. The new symbols are null-initialized for reference-typed globals /
/// static fields, and nondet-initialized for primitives.
/// \param parse_tree: class bytecode
/// \param symbol_table: symbol table; may gain new symbols
/// \param class_hierarchy: global class hierarchy
/// \param log: message handler used to log warnings when stub static fields are
/// found belonging to non-stub classes.
static void create_stub_global_symbols(
const java_bytecode_parse_treet &parse_tree,
symbol_table_baset &symbol_table,
const class_hierarchyt &class_hierarchy,
messaget &log)
{
namespacet ns(symbol_table);
for(const auto &method : parse_tree.parsed_class.methods)
{
for(const java_bytecode_parse_treet::instructiont &instruction :
method.instructions)
{
if(instruction.statement == "getstatic" ||
instruction.statement == "putstatic")
{
INVARIANT(
instruction.args.size() > 0,
"get/putstatic should have at least one argument");
const fieldref_exprt &field_ref =
expr_dynamic_cast<fieldref_exprt>(instruction.args[0]);
irep_idt component = field_ref.component_name();
irep_idt class_id = field_ref.class_name();
// The final 'true' parameter here includes interfaces, as they can
// define static fields.
const auto referred_component = get_inherited_component(
class_id, component, symbol_table, class_hierarchy, true);
if(!referred_component)
{
// Create a new stub global on an arbitrary incomplete ancestor of the
// class that was referred to. This is just a guess, but we have no
// better information to go on.
irep_idt add_to_class_id =
get_any_incomplete_ancestor_for_stub_static_field(
class_id, symbol_table, class_hierarchy);
// If there are no incomplete ancestors to ascribe the missing field
// to, we must have an incomplete model of a class or simply a
// version mismatch of some kind. Normally this would be an error, but
// our models library currently triggers this error in some cases
// (notably java.lang.System, which is missing System.in/out/err).
// Therefore for this case we ascribe the missing field to the class
// it was directly referenced from, and fall back to initialising the
// field in __CPROVER_initialize, rather than try to create or augment
// a clinit method for a non-stub class.
bool no_incomplete_ancestors = add_to_class_id.empty();
if(no_incomplete_ancestors)
{
add_to_class_id = class_id;
// TODO forbid this again once the models library has been checked
// for missing static fields.
log.warning() << "Stub static field " << component << " found for "
<< "non-stub type " << class_id << ". In future this "
<< "will be a fatal error." << messaget::eom;
}
irep_idt identifier =
id2string(add_to_class_id) + "." + id2string(component);
create_stub_global_symbol(
symbol_table,
identifier,
component,
instruction.args[0].type(),
add_to_class_id,
no_incomplete_ancestors);
}
}
}
}
}
bool java_bytecode_languaget::typecheck(
symbol_tablet &symbol_table,
const std::string &)
{
PRECONDITION(language_options_initialized);
// There are various cases in the Java front-end where pre-existing symbols
// from a previous load are not handled. We just rule this case out for now;
// a user wishing to ensure a particular class is loaded should use
// --java-load-class (to force class-loading) or
// --lazy-methods-extra-entry-point (to ensure a method body is loaded)
// instead of creating two instances of the front-end.
INVARIANT(
symbol_table.begin() == symbol_table.end(),
"the Java front-end should only be used with an empty symbol table");
java_internal_additions(symbol_table);
if(string_refinement_enabled)
string_preprocess.initialize_conversion_table();
// Must load java.lang.Object first to avoid stubbing
// This ordering could alternatively be enforced by
// moving the code below to the class loader.
java_class_loadert::parse_tree_with_overridest_mapt::const_iterator it =
java_class_loader.get_class_with_overlays_map().find("java.lang.Object");
if(it != java_class_loader.get_class_with_overlays_map().end())
{
if(
java_bytecode_convert_class(
it->second,
symbol_table,
get_message_handler(),
max_user_array_length,
method_bytecode,
string_preprocess,
no_load_classes))
{
return true;
}
}
// first generate a new struct symbol for each class and a new function symbol
// for every method
for(const auto &class_trees : java_class_loader.get_class_with_overlays_map())
{
if(class_trees.second.front().parsed_class.name.empty())
continue;
if(
java_bytecode_convert_class(
class_trees.second,
symbol_table,
get_message_handler(),
max_user_array_length,
method_bytecode,
string_preprocess,
no_load_classes))
{
return true;
}
}
// Now that all classes have been created in the symbol table we can populate
// the class hierarchy:
class_hierarchy(symbol_table);
// find and mark all implicitly generic class types
// this can only be done once all the class symbols have been created
for(const auto &c : java_class_loader.get_class_with_overlays_map())
{
if(c.second.front().parsed_class.name.empty())
continue;
try
{
mark_java_implicitly_generic_class_type(
c.second.front().parsed_class.name, symbol_table);
}
catch(missing_outer_class_symbol_exceptiont &)
{
messaget::warning()
<< "Not marking class " << c.first
<< " implicitly generic due to missing outer class symbols"
<< messaget::eom;
}
}
// Infer fields on opaque types based on the method instructions just loaded.
// For example, if we don't have bytecode for field x of class A, but we can
// see an int-typed getfield instruction referring to it, add that field now.
for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
{
for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
infer_opaque_type_fields(parse_tree, symbol_table);
}
// Create global variables for constants (String and Class literals) up front.
// This means that when running with lazy loading, we will be aware of these
// literal globals' existence when __CPROVER_initialize is generated in
// `generate_support_functions`.
const std::size_t before_constant_globals_size = symbol_table.symbols.size();
for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
{
for(java_bytecode_parse_treet &parse_tree : class_to_trees.second)
{
generate_constant_global_variables(
parse_tree, symbol_table, string_refinement_enabled);
}
}
status() << "Java: added "
<< (symbol_table.symbols.size() - before_constant_globals_size)
<< " String or Class constant symbols"
<< messaget::eom;
// For each reference to a stub global (that is, a global variable declared on
// a class we don't have bytecode for, and therefore don't know the static
// initialiser for), create a synthetic static initialiser (clinit method)
// to nondet initialise it.
// Note this must be done before making static initialiser wrappers below, as
// this makes a Classname.clinit method, then the next pass makes a wrapper
// that ensures it is only run once, and that static initialisation happens
// in class-graph topological order.
{
journalling_symbol_tablet symbol_table_journal =
journalling_symbol_tablet::wrap(symbol_table);
for(auto &class_to_trees : java_class_loader.get_class_with_overlays_map())
{
for(const java_bytecode_parse_treet &parse_tree : class_to_trees.second)
{
create_stub_global_symbols(
parse_tree, symbol_table_journal, class_hierarchy, *this);
}
}
stub_global_initializer_factory.create_stub_global_initializer_symbols(
symbol_table, symbol_table_journal.get_inserted(), synthetic_methods);
}
// For each class that will require a static initializer wrapper, create a
// function named package.classname::clinit_wrapper, and a corresponding
// global tracking whether it has run or not:
create_static_initializer_wrappers(
symbol_table, synthetic_methods, threading_support);
// Now incrementally elaborate methods
// that are reachable from this entry point.
switch(lazy_methods_mode)
{
case LAZY_METHODS_MODE_CONTEXT_INSENSITIVE:
// ci = context-insensitive
if(do_ci_lazy_method_conversion(symbol_table))
return true;
break;
case LAZY_METHODS_MODE_EAGER:
{
symbol_table_buildert symbol_table_builder =
symbol_table_buildert::wrap(symbol_table);
journalling_symbol_tablet journalling_symbol_table =
journalling_symbol_tablet::wrap(symbol_table_builder);
// Convert all synthetic methods:
for(const auto &function_id_and_type : synthetic_methods)
{
convert_single_method(
function_id_and_type.first, journalling_symbol_table);
}
// Convert all methods for which we have bytecode now
for(const auto &method_sig : method_bytecode)
{
convert_single_method(method_sig.first, journalling_symbol_table);
}
// Now convert all newly added string methods
for(const auto &fn_name : journalling_symbol_table.get_inserted())
{
if(string_preprocess.implements_function(fn_name))
convert_single_method(fn_name, symbol_table);
}
}
break;
default:
// Our caller is in charge of elaborating methods on demand.
break;
}
// now instrument runtime exceptions
java_bytecode_instrument(
symbol_table,
throw_runtime_exceptions,
get_message_handler());
// now typecheck all
bool res = java_bytecode_typecheck(
symbol_table, get_message_handler(), string_refinement_enabled);
// now instrument thread-blocks and synchronized methods.
if(threading_support)
{
convert_threadblock(symbol_table);
convert_synchronized_methods(symbol_table, get_message_handler());
}
return res;
}
bool java_bytecode_languaget::generate_support_functions(
symbol_tablet &symbol_table)
{
PRECONDITION(language_options_initialized);
symbol_table_buildert symbol_table_builder =
symbol_table_buildert::wrap(symbol_table);
main_function_resultt res=
get_main_symbol(symbol_table, main_class, get_message_handler());
if(!res.is_success())
return res.is_error();
// Load the main function into the symbol table to get access to its
// parameter names
convert_lazy_method(res.main_function.name, symbol_table_builder);
// generate the test harness in __CPROVER__start and a call the entry point
return java_entry_point(
symbol_table_builder,
main_class,
get_message_handler(),
assume_inputs_non_null,
assert_uncaught_exceptions,
object_factory_parameters,
get_pointer_type_selector(),
string_refinement_enabled,
[&](const symbolt &function, symbol_table_baset &symbol_table) {
return java_build_arguments(
function,
symbol_table,
assume_inputs_non_null,
object_factory_parameters,
get_pointer_type_selector(),
get_message_handler());
});
}
/// Uses a simple context-insensitive ('ci') analysis to determine which methods
/// may be reachable from the main entry point. In brief, static methods are
/// reachable if we find a callsite in another reachable site, while virtual
/// methods are reachable if we find a virtual callsite targeting a compatible
/// type *and* a constructor callsite indicating an object of that type may be
/// instantiated (or evidence that an object of that type exists before the main
/// function is entered, such as being passed as a parameter).
/// \param symbol_table: global symbol table
/// \return Elaborates lazily-converted methods that may be reachable starting
/// from the main entry point (usually provided with the --function command-
/// line option) (side-effect on the symbol_table). Returns false on success.
bool java_bytecode_languaget::do_ci_lazy_method_conversion(
symbol_tablet &symbol_table)
{
symbol_table_buildert symbol_table_builder =
symbol_table_buildert::wrap(symbol_table);
const method_convertert method_converter =
[this, &symbol_table_builder](
const irep_idt &function_id,
ci_lazy_methods_neededt lazy_methods_needed) {
return convert_single_method(
function_id, symbol_table_builder, std::move(lazy_methods_needed));
};
ci_lazy_methodst method_gather(
symbol_table,
main_class,
main_jar_classes,
extra_methods,
java_class_loader,
java_load_classes,
get_pointer_type_selector(),
get_message_handler(),
synthetic_methods);
return method_gather(symbol_table, method_bytecode, method_converter);
}
const select_pointer_typet &
java_bytecode_languaget::get_pointer_type_selector() const
{
PRECONDITION(pointer_type_selector.get()!=nullptr);
return *pointer_type_selector;
}
/// Provide feedback to `language_filest` so that when asked for a lazy method,
/// it can delegate to this instance of java_bytecode_languaget.
/// \return Populates `methods` with the complete list of lazy methods that are
/// available to convert (those which are valid parameters for
/// `convert_lazy_method`)
void java_bytecode_languaget::methods_provided(
std::unordered_set<irep_idt> &methods) const
{
const std::string cprover_class_prefix = "java::org.cprover.CProver.";
// Add all string solver methods to map
string_preprocess.get_all_function_names(methods);
// Add all concrete methods to map
for(const auto &kv : method_bytecode)
methods.insert(kv.first);
// Add all synthetic methods to map
for(const auto &kv : synthetic_methods)
methods.insert(kv.first);
}
/// \brief Promote a lazy-converted method (one whose type is known but whose
/// body hasn't been converted) into a fully-elaborated one.
/// \remarks Amends the symbol table entry for function `function_id`, which
/// should be a method provided by this instance of `java_bytecode_languaget`
/// to have a value representing the method body identical to that produced
/// using eager method conversion.
/// \param function_id: method ID to convert
/// \param symtab: global symbol table
void java_bytecode_languaget::convert_lazy_method(
const irep_idt &function_id,
symbol_table_baset &symtab)
{
const symbolt &symbol = symtab.lookup_ref(function_id);
if(symbol.value.is_not_nil())
return;
journalling_symbol_tablet symbol_table=
journalling_symbol_tablet::wrap(symtab);
convert_single_method(function_id, symbol_table);
// Instrument runtime exceptions (unless symbol is a stub)
if(symbol.value.is_not_nil())
{
java_bytecode_instrument_symbol(
symbol_table,
symbol_table.get_writeable_ref(function_id),
throw_runtime_exceptions,
get_message_handler());
}
// now typecheck this function
java_bytecode_typecheck_updated_symbols(
symbol_table, get_message_handler(), string_refinement_enabled);
}
/// Notify ci_lazy_methods, if present, of any static function calls made by
/// the given function body.
/// \param function_body: function body code
/// \param needed_lazy_methods: optional ci_lazy_method_neededt interface. If
/// not set, this is a no-op; otherwise, its add_needed_method function will
/// be called for each function call in `function_body`.
static void notify_static_method_calls(
const codet &function_body,
optionalt<ci_lazy_methods_neededt> needed_lazy_methods)