Skip to content

Commit 793e073

Browse files
authored
Update GlobalStructInference to handle atomics (WebAssembly#7168)
GlobalStructInference optimizes gets of immutable fields of structs that are only ever instantiated to initialize immutable globals. Due to all the immutability, it's not possible for the optimized reads to synchronize with any writes via the accessed memory, so we just need to be careful to replace removed seqcst gets with seqcst fences. As a drive-by, fix some stale comments in gsi.wast.
1 parent 7c42700 commit 793e073

File tree

2 files changed

+251
-12
lines changed

2 files changed

+251
-12
lines changed

src/passes/GlobalStructInference.cpp

Lines changed: 28 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,11 @@ struct GlobalStructInference : public Pass {
359359
// refined, which could change the struct.get's type.
360360
refinalize = true;
361361
}
362+
// No need to worry about atomic gets here. We will still read from
363+
// the same memory location as before and preserve all side effects
364+
// (including synchronization) that were previously present. The
365+
// memory location is immutable anyway, so there cannot be any writes
366+
// to synchronize with in the first place.
362367
curr->ref = builder.makeSequence(
363368
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
364369
builder.makeGlobalGet(global, globalType));
@@ -457,10 +462,18 @@ struct GlobalStructInference : public Pass {
457462
// the early return above) so that only leaves 1 and 2.
458463
if (values.size() == 1) {
459464
// The case of 1 value is simple: trap if the ref is null, and
460-
// otherwise return the value.
461-
replaceCurrent(builder.makeSequence(
462-
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref)),
463-
getReadValue(values[0])));
465+
// otherwise return the value. We must also fence if the get was
466+
// seqcst. No additional work is necessary for an acquire get because
467+
// there cannot have been any writes to this immutable field that it
468+
// would synchronize with.
469+
Expression* replacement =
470+
builder.makeDrop(builder.makeRefAs(RefAsNonNull, curr->ref));
471+
if (curr->order == MemoryOrder::SeqCst) {
472+
replacement =
473+
builder.blockify(replacement, builder.makeAtomicFence());
474+
}
475+
replaceCurrent(
476+
builder.blockify(replacement, getReadValue(values[0])));
464477
return;
465478
}
466479
assert(values.size() == 2);
@@ -486,11 +499,19 @@ struct GlobalStructInference : public Pass {
486499
// of their execution matters (they may note globals for un-nesting).
487500
auto* left = getReadValue(values[0]);
488501
auto* right = getReadValue(values[1]);
489-
// Note that we must trap on null, so add a ref.as_non_null here.
502+
// Note that we must trap on null, so add a ref.as_non_null here. We
503+
// must also add a fence if this get is seqcst. As before, no extra work
504+
// is necessary for an acquire get because there cannot be a write it
505+
// synchronizes with.
506+
Expression* getGlobal =
507+
builder.makeGlobalGet(checkGlobal, wasm.getGlobal(checkGlobal)->type);
508+
if (curr->order == MemoryOrder::SeqCst) {
509+
getGlobal =
510+
builder.makeSequence(builder.makeAtomicFence(), getGlobal);
511+
}
490512
replaceCurrent(builder.makeSelect(
491513
builder.makeRefEq(builder.makeRefAs(RefAsNonNull, curr->ref),
492-
builder.makeGlobalGet(
493-
checkGlobal, wasm.getGlobal(checkGlobal)->type)),
514+
getGlobal),
494515
left,
495516
right));
496517
}

test/lit/passes/gsi.wast

Lines changed: 223 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -144,16 +144,16 @@
144144
;; CHECK-NEXT: )
145145
;; CHECK-NEXT: )
146146
(func $test1 (param $struct1 (ref null $struct1)) (param $struct2 (ref null $struct2))
147-
;; We can infer that this get must reference $global1 and make the reference
148-
;; point to that. Note that we do not infer the value of 42 here, but leave
149-
;; it for other passes to do.
147+
;; Even though the value here is not known at compile time - it reads an
148+
;; imported global - we can still infer that we are reading from $global1.
150149
(drop
151150
(struct.get $struct1 0
152151
(local.get $struct1)
153152
)
154153
)
155-
;; Even though the value here is not known at compile time - it reads an
156-
;; imported global - we can still infer that we are reading from $global2.
154+
;; We can infer that this get must reference $global2 and make the reference
155+
;; point to that. Note that we do not infer the value of 42 here, but leave
156+
;; it for other passes to do.
157157
(drop
158158
(struct.get $struct2 0
159159
(local.get $struct2)
@@ -1944,3 +1944,221 @@
19441944
)
19451945
)
19461946
)
1947+
1948+
;; Test atomic gets.
1949+
(module
1950+
(rec
1951+
;; CHECK: (rec
1952+
;; CHECK-NEXT: (type $one (shared (struct (field i32))))
1953+
(type $one (shared (struct (field i32))))
1954+
;; CHECK: (type $two (shared (struct (field i32))))
1955+
(type $two (shared (struct (field i32))))
1956+
;; CHECK: (type $two-same (shared (struct (field i32))))
1957+
(type $two-same (shared (struct (field i32))))
1958+
)
1959+
1960+
;; CHECK: (type $3 (func (param (ref $one))))
1961+
1962+
;; CHECK: (type $4 (func (param (ref $two))))
1963+
1964+
;; CHECK: (type $5 (func (param (ref $two-same))))
1965+
1966+
;; CHECK: (global $one (ref $one) (struct.new $one
1967+
;; CHECK-NEXT: (i32.const 42)
1968+
;; CHECK-NEXT: ))
1969+
(global $one (ref $one) (struct.new $one (i32.const 42)))
1970+
1971+
;; CHECK: (global $two-a (ref $two) (struct.new $two
1972+
;; CHECK-NEXT: (i32.const 42)
1973+
;; CHECK-NEXT: ))
1974+
(global $two-a (ref $two) (struct.new $two (i32.const 42)))
1975+
1976+
;; CHECK: (global $two-b (ref $two) (struct.new $two
1977+
;; CHECK-NEXT: (i32.const 1337)
1978+
;; CHECK-NEXT: ))
1979+
(global $two-b (ref $two) (struct.new $two (i32.const 1337)))
1980+
1981+
;; CHECK: (global $two-same-a (ref $two-same) (struct.new $two-same
1982+
;; CHECK-NEXT: (i32.const 42)
1983+
;; CHECK-NEXT: ))
1984+
(global $two-same-a (ref $two-same) (struct.new $two-same (i32.const 42)))
1985+
1986+
;; CHECK: (global $two-same-b (ref $two-same) (struct.new $two-same
1987+
;; CHECK-NEXT: (i32.const 42)
1988+
;; CHECK-NEXT: ))
1989+
(global $two-same-b (ref $two-same) (struct.new $two-same (i32.const 42)))
1990+
1991+
;; CHECK: (func $one (type $3) (param $0 (ref $one))
1992+
;; CHECK-NEXT: (drop
1993+
;; CHECK-NEXT: (struct.get $one 0
1994+
;; CHECK-NEXT: (block (result (ref $one))
1995+
;; CHECK-NEXT: (drop
1996+
;; CHECK-NEXT: (ref.as_non_null
1997+
;; CHECK-NEXT: (local.get $0)
1998+
;; CHECK-NEXT: )
1999+
;; CHECK-NEXT: )
2000+
;; CHECK-NEXT: (global.get $one)
2001+
;; CHECK-NEXT: )
2002+
;; CHECK-NEXT: )
2003+
;; CHECK-NEXT: )
2004+
;; CHECK-NEXT: (drop
2005+
;; CHECK-NEXT: (struct.atomic.get acqrel $one 0
2006+
;; CHECK-NEXT: (block (result (ref $one))
2007+
;; CHECK-NEXT: (drop
2008+
;; CHECK-NEXT: (ref.as_non_null
2009+
;; CHECK-NEXT: (local.get $0)
2010+
;; CHECK-NEXT: )
2011+
;; CHECK-NEXT: )
2012+
;; CHECK-NEXT: (global.get $one)
2013+
;; CHECK-NEXT: )
2014+
;; CHECK-NEXT: )
2015+
;; CHECK-NEXT: )
2016+
;; CHECK-NEXT: (drop
2017+
;; CHECK-NEXT: (struct.atomic.get $one 0
2018+
;; CHECK-NEXT: (block (result (ref $one))
2019+
;; CHECK-NEXT: (drop
2020+
;; CHECK-NEXT: (ref.as_non_null
2021+
;; CHECK-NEXT: (local.get $0)
2022+
;; CHECK-NEXT: )
2023+
;; CHECK-NEXT: )
2024+
;; CHECK-NEXT: (global.get $one)
2025+
;; CHECK-NEXT: )
2026+
;; CHECK-NEXT: )
2027+
;; CHECK-NEXT: )
2028+
;; CHECK-NEXT: )
2029+
(func $one (param (ref $one))
2030+
(drop
2031+
(struct.get $one 0
2032+
(local.get 0)
2033+
)
2034+
)
2035+
(drop
2036+
(struct.atomic.get acqrel $one 0
2037+
(local.get 0)
2038+
)
2039+
)
2040+
(drop
2041+
(struct.atomic.get $one 0
2042+
(local.get 0)
2043+
)
2044+
)
2045+
)
2046+
2047+
;; CHECK: (func $two (type $4) (param $0 (ref $two))
2048+
;; CHECK-NEXT: (drop
2049+
;; CHECK-NEXT: (select
2050+
;; CHECK-NEXT: (i32.const 42)
2051+
;; CHECK-NEXT: (i32.const 1337)
2052+
;; CHECK-NEXT: (ref.eq
2053+
;; CHECK-NEXT: (ref.as_non_null
2054+
;; CHECK-NEXT: (local.get $0)
2055+
;; CHECK-NEXT: )
2056+
;; CHECK-NEXT: (global.get $two-a)
2057+
;; CHECK-NEXT: )
2058+
;; CHECK-NEXT: )
2059+
;; CHECK-NEXT: )
2060+
;; CHECK-NEXT: (drop
2061+
;; CHECK-NEXT: (select
2062+
;; CHECK-NEXT: (i32.const 42)
2063+
;; CHECK-NEXT: (i32.const 1337)
2064+
;; CHECK-NEXT: (ref.eq
2065+
;; CHECK-NEXT: (ref.as_non_null
2066+
;; CHECK-NEXT: (local.get $0)
2067+
;; CHECK-NEXT: )
2068+
;; CHECK-NEXT: (global.get $two-a)
2069+
;; CHECK-NEXT: )
2070+
;; CHECK-NEXT: )
2071+
;; CHECK-NEXT: )
2072+
;; CHECK-NEXT: (drop
2073+
;; CHECK-NEXT: (select
2074+
;; CHECK-NEXT: (i32.const 42)
2075+
;; CHECK-NEXT: (i32.const 1337)
2076+
;; CHECK-NEXT: (ref.eq
2077+
;; CHECK-NEXT: (ref.as_non_null
2078+
;; CHECK-NEXT: (local.get $0)
2079+
;; CHECK-NEXT: )
2080+
;; CHECK-NEXT: (block (result (ref $two))
2081+
;; CHECK-NEXT: (atomic.fence)
2082+
;; CHECK-NEXT: (global.get $two-a)
2083+
;; CHECK-NEXT: )
2084+
;; CHECK-NEXT: )
2085+
;; CHECK-NEXT: )
2086+
;; CHECK-NEXT: )
2087+
;; CHECK-NEXT: )
2088+
(func $two (param (ref $two))
2089+
(drop
2090+
(struct.get $two 0
2091+
(local.get 0)
2092+
)
2093+
)
2094+
(drop
2095+
;; This is optimized normally because there cannot be any writes it
2096+
;; synchronizes with.
2097+
(struct.atomic.get acqrel $two 0
2098+
(local.get 0)
2099+
)
2100+
)
2101+
(drop
2102+
;; This requires a fence to maintain its effect on the global order of
2103+
;; seqcst operations.
2104+
(struct.atomic.get $two 0
2105+
(local.get 0)
2106+
)
2107+
)
2108+
)
2109+
2110+
;; CHECK: (func $two-same (type $5) (param $0 (ref $two-same))
2111+
;; CHECK-NEXT: (drop
2112+
;; CHECK-NEXT: (block (result i32)
2113+
;; CHECK-NEXT: (drop
2114+
;; CHECK-NEXT: (ref.as_non_null
2115+
;; CHECK-NEXT: (local.get $0)
2116+
;; CHECK-NEXT: )
2117+
;; CHECK-NEXT: )
2118+
;; CHECK-NEXT: (i32.const 42)
2119+
;; CHECK-NEXT: )
2120+
;; CHECK-NEXT: )
2121+
;; CHECK-NEXT: (drop
2122+
;; CHECK-NEXT: (block (result i32)
2123+
;; CHECK-NEXT: (drop
2124+
;; CHECK-NEXT: (ref.as_non_null
2125+
;; CHECK-NEXT: (local.get $0)
2126+
;; CHECK-NEXT: )
2127+
;; CHECK-NEXT: )
2128+
;; CHECK-NEXT: (i32.const 42)
2129+
;; CHECK-NEXT: )
2130+
;; CHECK-NEXT: )
2131+
;; CHECK-NEXT: (drop
2132+
;; CHECK-NEXT: (block (result i32)
2133+
;; CHECK-NEXT: (drop
2134+
;; CHECK-NEXT: (ref.as_non_null
2135+
;; CHECK-NEXT: (local.get $0)
2136+
;; CHECK-NEXT: )
2137+
;; CHECK-NEXT: )
2138+
;; CHECK-NEXT: (atomic.fence)
2139+
;; CHECK-NEXT: (i32.const 42)
2140+
;; CHECK-NEXT: )
2141+
;; CHECK-NEXT: )
2142+
;; CHECK-NEXT: )
2143+
(func $two-same (param (ref $two-same))
2144+
(drop
2145+
(struct.get $two-same 0
2146+
(local.get 0)
2147+
)
2148+
)
2149+
(drop
2150+
;; This is optimized normally because there cannot be any writes it
2151+
;; synchronizes with.
2152+
(struct.atomic.get acqrel $two-same 0
2153+
(local.get 0)
2154+
)
2155+
)
2156+
(drop
2157+
;; This requires a fence to maintain its effect on the global order of
2158+
;; seqcst operations.
2159+
(struct.atomic.get $two-same 0
2160+
(local.get 0)
2161+
)
2162+
)
2163+
)
2164+
)

0 commit comments

Comments
 (0)