Skip to content

Commit 3d1e89c

Browse files
committed
Add Word{S,U}<N>_{add,mul,neg,sub}AndCheck fns to Word-ops.h
It appears that GCC (and, to a lesser extent) Clang/LLVM do not always successfully fuse adjacent `Word<N>_<op>` and `Word{S,U}<N>_<op>CheckP` primitives. The performance results reported at MLton#273 and MLton#292 suggest that this does not always have significant impact, but a close look at the `md5` benchmark shows that the native codegen significantly outperforms the C codegen with gcc-9 due to redundant arithmetic computations (one for `Word{S,U}<N>_<op>CheckP` and another for `Word<N>_<op>`). These functions compute both the arithmetic result and a boolean indicating overflow (using `__builtin_<op>_overflow`). They will be used for explicit fusing of adjacent `Word<N>_<op>` and `Word{S,U}<N>_<op>CheckP` primitives in the C codegen for `-codegen-fuse-op-and-check true`.
1 parent 6b738b8 commit 3d1e89c

File tree

1 file changed

+25
-3
lines changed

1 file changed

+25
-3
lines changed

runtime/basis/Word/Word-ops.h

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ binaryOvflOp (U##size, name)
3535
binaryOvflChk (S##size, name) \
3636
binaryOvflChk (U##size, name)
3737

38+
#define binaryOvflOpAndChk(kind, name) \
39+
PRIVATE INLINE \
40+
void Word##kind##_##name##AndCheck (Word##kind w1, Word##kind w2, Word##kind *rw, Bool *rb) { \
41+
*rb = __builtin_##name##_overflow(w1, w2, rw); \
42+
}
43+
44+
#define bothBinaryOvflOpAndChk(size, name) \
45+
binaryOvflOpAndChk (S##size, name) \
46+
binaryOvflOpAndChk (U##size, name)
47+
3848
#define compare(kind, name, op) \
3949
PRIVATE INLINE \
4050
Bool Word##kind##_##name (Word##kind w1, Word##kind w2) { \
@@ -60,6 +70,12 @@ compare (U##size, name, op)
6070
return __builtin_sub_overflow(0, w, &res); \
6171
}
6272

73+
#define negOvflOpAndChk(kind) \
74+
PRIVATE INLINE \
75+
void Word##kind##_negAndCheck (Word##kind w, Word##kind *rw, Bool *rb) { \
76+
*rb = __builtin_sub_overflow(0, w, rw); \
77+
}
78+
6379
#define rol(size) \
6480
PRIVATE INLINE \
6581
Word##size Word##size##_rol (Word##size w1, Word32 w2) { \
@@ -78,7 +94,7 @@ compare (U##size, name, op)
7894
return (Word##kind)(w1 op w2); \
7995
}
8096

81-
#define unary(kind, name, op) \
97+
#define unary(kind, name, op) \
8298
PRIVATE INLINE \
8399
Word##kind Word##kind##_##name (Word##kind w) { \
84100
return (Word##kind)(op w); \
@@ -105,6 +121,7 @@ compare (U##size, name, op)
105121
#define all(size) \
106122
binaryOvflOp (size, add) \
107123
bothBinaryOvflChk (size, add) \
124+
bothBinaryOvflOpAndChk (size, add) \
108125
binary (size, andb, &) \
109126
compare (size, equal, ==) \
110127
bothCompare (size, ge, >=) \
@@ -114,9 +131,12 @@ shift (size, lshift, <<) \
114131
bothCompare (size, lt, <) \
115132
bothBinaryOvflOp (size, mul) \
116133
bothBinaryOvflChk (size, mul) \
134+
bothBinaryOvflOpAndChk (size, mul) \
117135
negOvflOp (size) \
118136
negOvflChk (S##size) \
119137
negOvflChk (U##size) \
138+
negOvflOpAndChk (S##size) \
139+
negOvflOpAndChk (U##size) \
120140
unary (size, notb, ~) \
121141
bothBinary (size, quot, /) \
122142
bothBinary (size, rem, %) \
@@ -134,6 +154,7 @@ shift (S##size, rshift, >>) \
134154
shift (U##size, rshift, >>) \
135155
binaryOvflOp (size, sub) \
136156
bothBinaryOvflChk (size, sub) \
157+
bothBinaryOvflOpAndChk (size, sub) \
137158
binary (size, xorb, ^)
138159

139160
all (8)
@@ -149,12 +170,13 @@ misaligned(64)
149170
#undef shift
150171
#undef ror
151172
#undef rol
152-
#undef negOvfl
173+
#undef negOvflOpAndChk
153174
#undef negOvflChk
154175
#undef negOvflOp
155176
#undef bothCompare
156177
#undef compare
157-
#undef bothBinaryOvfl
178+
#undef bothBinaryOvflOpAndChk
179+
#undef binaryOvflOpAndChk
158180
#undef bothBinaryOvflChk
159181
#undef binaryOvflChk
160182
#undef bothBinaryOvflOp

0 commit comments

Comments
 (0)