From 1059ee72be909e8bba12bb598e8ab8fab7ae3edd Mon Sep 17 00:00:00 2001
From: Daniel Kroening <dkr@amazon.com>
Date: Mon, 23 Dec 2024 15:57:33 +0000
Subject: [PATCH] uninitialized check

Uninitialized local or dynamically allocated variables have an indeterminate
initial value.

Reading an indeterminate value _may_ be undefined behavior, or may yield an
unspecific value.

This adds a check for uninitialized local variables.  The check is not added
as a standard check.
---
 src/ansi-c/goto-conversion/goto_check_c.cpp | 86 +++++++++++++++++++--
 src/ansi-c/goto-conversion/goto_check_c.h   |  7 ++
 2 files changed, 86 insertions(+), 7 deletions(-)

diff --git a/src/ansi-c/goto-conversion/goto_check_c.cpp b/src/ansi-c/goto-conversion/goto_check_c.cpp
index 6ff092e2555..bf813a88973 100644
--- a/src/ansi-c/goto-conversion/goto_check_c.cpp
+++ b/src/ansi-c/goto-conversion/goto_check_c.cpp
@@ -46,13 +46,15 @@ class goto_check_ct
 {
 public:
   goto_check_ct(
-    const namespacet &_ns,
+    symbol_table_baset &_symbol_table,
     const optionst &_options,
     message_handlert &_message_handler)
-    : ns(_ns), local_bitvector_analysis(nullptr), log(_message_handler)
+    : ns(_symbol_table), symbol_table(_symbol_table), local_bitvector_analysis(nullptr), log(_message_handler)
   {
     enable_bounds_check = _options.get_bool_option("bounds-check");
     enable_pointer_check = _options.get_bool_option("pointer-check");
+    enable_uninitialized_check =
+      _options.get_bool_option("uninitialized-check");
     enable_memory_leak_check = _options.get_bool_option("memory-leak-check");
     enable_memory_cleanup_check =
       _options.get_bool_option("memory-cleanup-check");
@@ -94,7 +96,8 @@ class goto_check_ct
   void collect_allocations(const goto_functionst &goto_functions);
 
 protected:
-  const namespacet &ns;
+  const namespacet ns;
+  symbol_table_baset &symbol_table;
   std::unique_ptr<local_bitvector_analysist> local_bitvector_analysis;
   goto_programt::const_targett current_target;
   messaget log;
@@ -189,6 +192,8 @@ class goto_check_ct
   void undefined_shift_check(const shift_exprt &, const guardt &);
   void pointer_rel_check(const binary_exprt &, const guardt &);
   void pointer_overflow_check(const exprt &, const guardt &);
+  void
+  uninitialized_check(const symbol_exprt &, const guardt &, bool is_assigned);
   void memory_leak_check(const irep_idt &function_id);
 
   /// Generates VCCs for the validity of the given dereferencing operation.
@@ -265,6 +270,7 @@ class goto_check_ct
 
   bool enable_bounds_check;
   bool enable_pointer_check;
+  bool enable_uninitialized_check;
   bool enable_memory_leak_check;
   bool enable_memory_cleanup_check;
   bool enable_div_by_zero_check;
@@ -286,6 +292,7 @@ class goto_check_ct
   std::map<irep_idt, bool *> name_to_flag{
     {"bounds-check", &enable_bounds_check},
     {"pointer-check", &enable_pointer_check},
+    {"uninitialized-check", &enable_uninitialized_check},
     {"memory-leak-check", &enable_memory_leak_check},
     {"memory-cleanup-check", &enable_memory_cleanup_check},
     {"div-by-zero-check", &enable_div_by_zero_check},
@@ -1341,6 +1348,66 @@ void goto_check_ct::nan_check(const exprt &expr, const guardt &guard)
     guard);
 }
 
+void goto_check_ct::uninitialized_check(
+  const symbol_exprt &expr,
+  const guardt &guard,
+  bool is_assigned)
+{
+  if(!enable_uninitialized_check)
+    return;
+
+  // Ignore C function symbols.
+  if(expr.type().id() == ID_code)
+    return;
+
+  // Don't check the LHS of an assignment -- these do not
+  // have to be initialized
+  if(is_assigned)
+    return;
+
+  // Look up the symbol
+  auto &symbol = ns.lookup(expr);
+
+  // Anything with static lifetime is initialized by the runtime,
+  // and is hence never uninitialized.
+  if(symbol.is_static_lifetime)
+    return;
+
+  // Is the address taken? If so, uninitialized accesses are merely
+  // intederminate, but not undefined.
+  if(local_bitvector_analysis->dirty(expr))
+    return;
+
+  // Ignore structs/union/arrays for now
+  if(
+    symbol.type.id() == ID_struct || symbol.type.id() == ID_union ||
+    symbol.type.id() == ID_array)
+  {
+    return;
+  }
+
+  // Look up the bits that track initialization
+  auto init_bit_id = id2string(symbol.name) + "$init";
+
+  if(!symbol_table.has_symbol(init_bit_id))
+  {
+    auxiliary_symbolt new_init_symbol{init_bit_id, bool_typet{}, symbol.mode};
+    new_init_symbol.is_static_lifetime = false;
+    symbol_table.add(new_init_symbol);
+  }
+
+  const symbolt &init_symbol = ns.lookup(init_bit_id);
+
+  add_guarded_property(
+    init_symbol.symbol_expr(),
+    "reading uninitialized local",
+    "uninitialized",
+    true, // not fatal
+    expr.find_source_location(),
+    expr,
+    guard);
+}
+
 void goto_check_ct::pointer_rel_check(
   const binary_exprt &expr,
   const guardt &guard)
@@ -2061,6 +2128,10 @@ void goto_check_ct::check_rec(
   {
     pointer_validity_check(to_dereference_expr(expr), expr, guard);
   }
+  else if(expr.id() == ID_symbol)
+  {
+    uninitialized_check(to_symbol_expr(expr), guard, is_assigned);
+  }
   else if(requires_pointer_primitive_check(expr))
   {
     pointer_primitive_check(expr, guard);
@@ -2469,6 +2540,7 @@ exprt goto_check_ct::is_in_bounds_of_some_explicit_allocation(
   return disjunction(alloc_disjuncts);
 }
 
+#if 0
 void goto_check_c(
   const irep_idt &function_identifier,
   goto_functionst::goto_functiont &goto_function,
@@ -2479,14 +2551,15 @@ void goto_check_c(
   goto_check_ct goto_check(ns, options, message_handler);
   goto_check.goto_check(function_identifier, goto_function);
 }
+#endif
 
 void goto_check_c(
-  const namespacet &ns,
   const optionst &options,
+  symbol_table_baset &symbol_table,
   goto_functionst &goto_functions,
   message_handlert &message_handler)
 {
-  goto_check_ct goto_check(ns, options, message_handler);
+  goto_check_ct goto_check(symbol_table, options, message_handler);
 
   goto_check.collect_allocations(goto_functions);
 
@@ -2501,8 +2574,7 @@ void goto_check_c(
   goto_modelt &goto_model,
   message_handlert &message_handler)
 {
-  const namespacet ns(goto_model.symbol_table);
-  goto_check_c(ns, options, goto_model.goto_functions, message_handler);
+  goto_check_c(options, goto_model.symbol_table, goto_model.goto_functions, message_handler);
 }
 
 void goto_check_ct::add_active_named_check_pragmas(
diff --git a/src/ansi-c/goto-conversion/goto_check_c.h b/src/ansi-c/goto-conversion/goto_check_c.h
index e560634cf16..206d6be3e9a 100644
--- a/src/ansi-c/goto-conversion/goto_check_c.h
+++ b/src/ansi-c/goto-conversion/goto_check_c.h
@@ -19,6 +19,7 @@ class namespacet;
 class optionst;
 class message_handlert;
 
+#if 0
 void goto_check_c(
   const namespacet &ns,
   const optionst &options,
@@ -31,6 +32,7 @@ void goto_check_c(
   const namespacet &ns,
   const optionst &options,
   message_handlert &message_handler);
+#endif
 
 void goto_check_c(
   const optionst &options,
@@ -39,6 +41,7 @@ void goto_check_c(
 
 #define OPT_GOTO_CHECK                                                         \
   "(bounds-check)(pointer-check)(memory-leak-check)(memory-cleanup-check)"     \
+  "(uninitialized-check)"                                                      \
   "(div-by-zero-check)(float-div-by-zero-check)"                               \
   "(enum-range-check)"                                                         \
   "(signed-overflow-check)(unsigned-overflow-check)"                           \
@@ -51,6 +54,7 @@ void goto_check_c(
   "(assert-to-assume)"                                                         \
   "(no-bounds-check)(no-pointer-check)(no-signed-overflow-check)"              \
   "(no-pointer-primitive-check)(no-undefined-shift-check)"                     \
+  "(no-uninitialized-check)"                                                   \
   "(no-div-by-zero-check)"
 
 // clang-format off
@@ -59,6 +63,8 @@ void goto_check_c(
   " {y--bounds-check} \t enable array bounds checks (default on)\n"            \
   " {y--no-bounds-check} \t disable array bounds checks\n"                     \
   " {y--pointer-check} \t enable pointer checks (default on)\n"                \
+  " {y--uninitialized-check} \t enable checks for uninitialized data (default off)\n" \
+  " {y--no-uninitialized-check} \t disable checks for uninitialized data\n"    \
   " {y--no-pointer-check} \t disable pointer checks\n"                         \
   " {y--memory-leak-check} \t enable memory leak checks\n"                     \
   " {y--memory-cleanup-check} \t enable memory cleanup checks\n"               \
@@ -126,6 +132,7 @@ void goto_check_c(
     options.set_option("error-label", cmdline.get_values("error-label"));      \
   PARSE_OPTION_OVERRIDE(cmdline, options, "bounds-check"); \
   PARSE_OPTION_OVERRIDE(cmdline, options, "pointer-check"); \
+  PARSE_OPTION_OVERRIDE(cmdline, options, "uninitialized-check"); \
   PARSE_OPTION_OVERRIDE(cmdline, options, "div-by-zero-check"); \
   PARSE_OPTION_OVERRIDE(cmdline, options, "float-div-by-zero-check"); \
   PARSE_OPTION_OVERRIDE(cmdline, options, "signed-overflow-check"); \