Do not optimize atomic gets in GUFA (WebAssembly#7161)

tlively · web-flow · commit 7c42700738fa · 2024-12-19T17:45:05.000-08:00
Conservatively avoid introducing synchronization bugs by not optimizing
atomic struct.gets at all in GUFA. It is possible that we could be more
precise in the future.

Also remove obsolete logic dealing with the types of null values as a
drive-by. All null values now have bottom types, so the type mismatch
this code checked for is impossible.
diff --git a/src/ir/properties.h b/src/ir/properties.h
@@ -486,6 +486,30 @@ inline bool canEmitSelectWithArms(Expression* ifTrue, Expression* ifFalse) {
   return ifTrue->type.isSingle() && ifFalse->type.isSingle();
 }
 
+// If this instruction accesses memory or the heap, or otherwise participates in
+// shared memory synchronization, return the memory order corresponding to the
+// kind of synchronization it does. Return MemoryOrder::Unordered if there is no
+// synchronization. Does not look at children.
+inline MemoryOrder getMemoryOrder(Expression* curr) {
+  if (auto* get = curr->dynCast<StructGet>()) {
+    return get->order;
+  }
+  if (auto* set = curr->dynCast<StructSet>()) {
+    return set->order;
+  }
+  if (auto* load = curr->dynCast<Load>()) {
+    return load->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+  }
+  if (auto* store = curr->dynCast<Store>()) {
+    return store->isAtomic ? MemoryOrder::SeqCst : MemoryOrder::Unordered;
+  }
+  if (curr->is<AtomicRMW>() || curr->is<AtomicWait>() ||
+      curr->is<AtomicNotify>() || curr->is<AtomicFence>()) {
+    return MemoryOrder::SeqCst;
+  }
+  return MemoryOrder::Unordered;
+}
+
 // A "generative" expression is one that can generate different results for the
 // same inputs, and that difference is *not* explained by other expressions that
 // interact with this one. This is an intrinsic/internal property of the
diff --git a/src/passes/GUFA.cpp b/src/passes/GUFA.cpp
@@ -150,20 +150,14 @@ struct GUFAOptimizer
       return;
     }
 
-    if (contents.isNull() && curr->type.isNullable()) {
-      // Null values are all identical, so just fix up the type here if we need
-      // to (the null's type might not fit in this expression, if it passed
-      // through casts).
-      if (!Type::isSubType(contents.getType(), curr->type)) {
-        contents = PossibleContents::literal(
-          Literal::makeNull(curr->type.getHeapType()));
-      }
-
-      // Note that if curr's type is *not* nullable, then the code will trap at
-      // runtime (the null must arrive through a cast that will trap). We handle
-      // that below, so we don't need to think about it here.
-
-      // TODO: would emitting a more specific null be useful when valid?
+    if (Properties::getMemoryOrder(curr) != MemoryOrder::Unordered) {
+      // This load might synchronize with some store, and if we replaced the
+      // load with a constant or with a load from a global, it would not
+      // synchronize with that store anymore. Since we know what value the store
+      // must write, and we know it is the same as every other store to the same
+      // location, it's possible that optimizing here would be allowable, but
+      // for now be conservative and do not optimize.
+      return;
     }
 
     auto* c = contents.makeExpression(wasm);
diff --git a/test/lit/passes/gufa-refs.wast b/test/lit/passes/gufa-refs.wast
@@ -6137,3 +6137,56 @@
     )
   )
 )
+
+;; Atomic accesses require special handling
+(module
+  ;; CHECK:      (type $A (shared (struct (field i32))))
+  (type $A (shared (struct (field i32))))
+
+  ;; CHECK:      (type $1 (func))
+
+  ;; CHECK:      (func $gets (type $1)
+  ;; CHECK-NEXT:  (local $0 (ref $A))
+  ;; CHECK-NEXT:  (local.set $0
+  ;; CHECK-NEXT:   (struct.new_default $A)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (i32.const 0)
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.atomic.get acqrel $A 0
+  ;; CHECK-NEXT:    (local.get $0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT:  (drop
+  ;; CHECK-NEXT:   (struct.atomic.get $A 0
+  ;; CHECK-NEXT:    (local.get $0)
+  ;; CHECK-NEXT:   )
+  ;; CHECK-NEXT:  )
+  ;; CHECK-NEXT: )
+  (func $gets
+    (local (ref $A))
+    (local.set 0
+      (struct.new_default $A)
+    )
+    (drop
+      ;; This is optimizable. It reads from shared memory, but there is only one
+      ;; possible value that can be read.
+      (struct.get $A 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; We do not (yet) optimize atomic gets.
+      (struct.atomic.get acqrel $A 0
+        (local.get 0)
+      )
+    )
+    (drop
+      ;; We do not (yet) optimize atomic gets.
+      (struct.atomic.get $A 0
+        (local.get 0)
+      )
+    )
+  )
+)