Skip to content

Commit 38a712f

Browse files
authored
[Relaxed SIMD] Do not optimize relaxed SIMD (#7465)
Add an interpreter mode to return "nonconstant" on relaxed SIMD, which is what we need to do when optimizing, as we don't know what machine the code will run on (and relaxed SIMD instructions must match it).
1 parent 948d4a6 commit 38a712f

File tree

4 files changed

+129
-10
lines changed

4 files changed

+129
-10
lines changed

src/tools/execution-results.h

+4
Original file line numberDiff line numberDiff line change
@@ -271,6 +271,9 @@ struct ExecutionResults {
271271
LoggingExternalInterface interface(loggings, wasm);
272272
try {
273273
ModuleRunner instance(wasm, &interface);
274+
// This is not an optimization: we want to execute anything, even relaxed
275+
// SIMD instructions.
276+
instance.setRelaxedBehavior(ModuleRunner::RelaxedBehavior::Execute);
274277
instance.instantiate();
275278
interface.setModuleRunner(&instance);
276279
// execute all exported methods (that are therefore preserved through
@@ -431,6 +434,7 @@ struct ExecutionResults {
431434
LoggingExternalInterface interface(loggings, wasm);
432435
try {
433436
ModuleRunner instance(wasm, &interface);
437+
instance.setRelaxedBehavior(ModuleRunner::RelaxedBehavior::Execute);
434438
instance.instantiate();
435439
interface.setModuleRunner(&instance);
436440
return run(func, wasm, instance);

src/tools/wasm-shell.cpp

+3
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,9 @@ struct Shell {
139139
std::make_shared<ShellExternalInterface>(linkedInstances);
140140
auto instance =
141141
std::make_shared<ModuleRunner>(wasm, interface.get(), linkedInstances);
142+
// This is not an optimization: we want to execute anything, even relaxed
143+
// SIMD instructions.
144+
instance->setRelaxedBehavior(ModuleRunner::RelaxedBehavior::Execute);
142145
instance->instantiate();
143146
return {{std::move(interface), std::move(instance)}};
144147
} catch (...) {

src/wasm-interpreter.h

+86-10
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,29 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
219219
// Indicates no limit of maxDepth or maxLoopIterations.
220220
static const Index NO_LIMIT = 0;
221221

222+
enum RelaxedBehavior {
223+
// Consider relaxed SIMD instructions non-constant. This is suitable for
224+
// optimizations, as we bake the results of optimizations into the output,
225+
// but relaxed operations must behave according to the host semantics, not
226+
// ours, so we do not want to optimize such expressions.
227+
NonConstant,
228+
// Execute relaxed SIMD instructions.
229+
Execute,
230+
};
231+
232+
protected:
233+
RelaxedBehavior relaxedBehavior = RelaxedBehavior::NonConstant;
234+
235+
public:
222236
ExpressionRunner(Module* module = nullptr,
223237
Index maxDepth = NO_LIMIT,
224238
Index maxLoopIterations = NO_LIMIT)
225239
: module(module), maxDepth(maxDepth), maxLoopIterations(maxLoopIterations) {
226240
}
227241
virtual ~ExpressionRunner() = default;
228242

243+
void setRelaxedBehavior(RelaxedBehavior value) { relaxedBehavior = value; }
244+
229245
Flow visit(Expression* curr) {
230246
depth++;
231247
if (maxDepth != NO_LIMIT && depth > maxDepth) {
@@ -584,11 +600,21 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
584600
return value.extAddPairwiseToSI32x4();
585601
case ExtAddPairwiseUVecI16x8ToI32x4:
586602
return value.extAddPairwiseToUI32x4();
587-
case TruncSatSVecF32x4ToVecI32x4:
588603
case RelaxedTruncSVecF32x4ToVecI32x4:
604+
// TODO: We could do this only if the actual values are in the relaxed
605+
// range.
606+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
607+
return NONCONSTANT_FLOW;
608+
}
609+
[[fallthrough]];
610+
case TruncSatSVecF32x4ToVecI32x4:
589611
return value.truncSatToSI32x4();
590-
case TruncSatUVecF32x4ToVecI32x4:
591612
case RelaxedTruncUVecF32x4ToVecI32x4:
613+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
614+
return NONCONSTANT_FLOW;
615+
}
616+
[[fallthrough]];
617+
case TruncSatUVecF32x4ToVecI32x4:
592618
return value.truncSatToUI32x4();
593619
case ConvertSVecI32x4ToVecF32x4:
594620
return value.convertSToF32x4();
@@ -622,11 +648,19 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
622648
return value.convertLowSToF64x2();
623649
case ConvertLowUVecI32x4ToVecF64x2:
624650
return value.convertLowUToF64x2();
625-
case TruncSatZeroSVecF64x2ToVecI32x4:
626651
case RelaxedTruncZeroSVecF64x2ToVecI32x4:
652+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
653+
return NONCONSTANT_FLOW;
654+
}
655+
[[fallthrough]];
656+
case TruncSatZeroSVecF64x2ToVecI32x4:
627657
return value.truncSatZeroSToI32x4();
628-
case TruncSatZeroUVecF64x2ToVecI32x4:
629658
case RelaxedTruncZeroUVecF64x2ToVecI32x4:
659+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
660+
return NONCONSTANT_FLOW;
661+
}
662+
[[fallthrough]];
663+
case TruncSatZeroUVecF64x2ToVecI32x4:
630664
return value.truncSatZeroUToI32x4();
631665
case DemoteZeroVecF64x2ToVecF32x4:
632666
return value.demoteZeroToF32x4();
@@ -989,8 +1023,12 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
9891023
return left.maxUI16x8(right);
9901024
case AvgrUVecI16x8:
9911025
return left.avgrUI16x8(right);
992-
case Q15MulrSatSVecI16x8:
9931026
case RelaxedQ15MulrSVecI16x8:
1027+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1028+
return NONCONSTANT_FLOW;
1029+
}
1030+
[[fallthrough]];
1031+
case Q15MulrSatSVecI16x8:
9941032
return left.q15MulrSatSI16x8(right);
9951033
case ExtMulLowSVecI16x8:
9961034
return left.extMulLowSI16x8(right);
@@ -1064,11 +1102,19 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
10641102
return left.mulF32x4(right);
10651103
case DivVecF32x4:
10661104
return left.divF32x4(right);
1067-
case MinVecF32x4:
10681105
case RelaxedMinVecF32x4:
1106+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1107+
return NONCONSTANT_FLOW;
1108+
}
1109+
[[fallthrough]];
1110+
case MinVecF32x4:
10691111
return left.minF32x4(right);
1070-
case MaxVecF32x4:
10711112
case RelaxedMaxVecF32x4:
1113+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1114+
return NONCONSTANT_FLOW;
1115+
}
1116+
[[fallthrough]];
1117+
case MaxVecF32x4:
10721118
return left.maxF32x4(right);
10731119
case PMinVecF32x4:
10741120
return left.pminF32x4(right);
@@ -1082,11 +1128,19 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
10821128
return left.mulF64x2(right);
10831129
case DivVecF64x2:
10841130
return left.divF64x2(right);
1085-
case MinVecF64x2:
10861131
case RelaxedMinVecF64x2:
1132+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1133+
return NONCONSTANT_FLOW;
1134+
}
1135+
[[fallthrough]];
1136+
case MinVecF64x2:
10871137
return left.minF64x2(right);
1088-
case MaxVecF64x2:
10891138
case RelaxedMaxVecF64x2:
1139+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1140+
return NONCONSTANT_FLOW;
1141+
}
1142+
[[fallthrough]];
1143+
case MaxVecF64x2:
10901144
return left.maxF64x2(right);
10911145
case PMinVecF64x2:
10921146
return left.pminF64x2(right);
@@ -1102,8 +1156,12 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
11021156
case NarrowUVecI32x4ToVecI16x8:
11031157
return left.narrowUToI16x8(right);
11041158

1105-
case SwizzleVecI8x16:
11061159
case RelaxedSwizzleVecI8x16:
1160+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1161+
return NONCONSTANT_FLOW;
1162+
}
1163+
[[fallthrough]];
1164+
case SwizzleVecI8x16:
11071165
return left.swizzleI8x16(right);
11081166

11091167
case DotI8x16I7x16SToVecI16x8:
@@ -1213,16 +1271,34 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
12131271
return c.bitselectV128(a, b);
12141272

12151273
case RelaxedMaddVecF16x8:
1274+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1275+
return NONCONSTANT_FLOW;
1276+
}
12161277
return a.relaxedMaddF16x8(b, c);
12171278
case RelaxedNmaddVecF16x8:
1279+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1280+
return NONCONSTANT_FLOW;
1281+
}
12181282
return a.relaxedNmaddF16x8(b, c);
12191283
case RelaxedMaddVecF32x4:
1284+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1285+
return NONCONSTANT_FLOW;
1286+
}
12201287
return a.relaxedMaddF32x4(b, c);
12211288
case RelaxedNmaddVecF32x4:
1289+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1290+
return NONCONSTANT_FLOW;
1291+
}
12221292
return a.relaxedNmaddF32x4(b, c);
12231293
case RelaxedMaddVecF64x2:
1294+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1295+
return NONCONSTANT_FLOW;
1296+
}
12241297
return a.relaxedMaddF64x2(b, c);
12251298
case RelaxedNmaddVecF64x2:
1299+
if (relaxedBehavior == RelaxedBehavior::NonConstant) {
1300+
return NONCONSTANT_FLOW;
1301+
}
12261302
return a.relaxedNmaddF64x2(b, c);
12271303
default:
12281304
// TODO: implement signselect and dot_add
+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited.
2+
3+
;; RUN: wasm-opt %s --precompute -all -S -o - | filecheck %s
4+
5+
(module
6+
;; CHECK: (type $0 (func (result v128)))
7+
8+
;; CHECK: (func $relaxed-max (type $0) (result v128)
9+
;; CHECK-NEXT: (f32x4.relaxed_max
10+
;; CHECK-NEXT: (v128.const i32x4 0x3f800000 0x40000000 0x40400000 0x40800000)
11+
;; CHECK-NEXT: (v128.const i32x4 0x40a00000 0x40c00000 0x40e00000 0x41000000)
12+
;; CHECK-NEXT: )
13+
;; CHECK-NEXT: )
14+
(func $relaxed-max (result v128)
15+
;; Though this is all constant and precomputable, we do not optimize
16+
;; relaxed SIMD operations.
17+
;; TODO if we optimize some cases of relaxed operations (ones without
18+
;; nondeterminism) we should pick proper nondeterministic values here.
19+
(f32x4.relaxed_max
20+
(v128.const f32x4 1 2 3 4)
21+
(v128.const f32x4 5 6 7 8)
22+
)
23+
)
24+
25+
;; CHECK: (func $normal-max (type $0) (result v128)
26+
;; CHECK-NEXT: (v128.const i32x4 0x41100000 0x40c00000 0x40e00000 0x41000000)
27+
;; CHECK-NEXT: )
28+
(func $normal-max (result v128)
29+
;; For comparison, we do optimize non-relaxed SIMD, even one with a
30+
;; corresponding relaxed variant.
31+
(f32x4.max
32+
(v128.const f32x4 5 6 7 8)
33+
(v128.const f32x4 9 3 1 0)
34+
)
35+
)
36+
)

0 commit comments

Comments
 (0)