Skip to content

Commit 058994b

Browse files
Mike PallBuristan
Mike Pall
authored andcommitted
x86/x64: Don't fuse loads across IR_NEWREF.
Reported by Peter Cawley. (cherry picked from commit 6447236) Load fusing optimization doesn't take into account the presence of the `IR_NEWREF` which may cause rehashing and deallocation of the array part of the table. This may lead to the incorrect stores if the fusing optimization occurs across this IR, leading to inconsistent behaviour between the JIT and the VM. This patch adds the corresponding check by the refactoring of the `noconflict()` function -- it now accepts the mask of the `check` as the last argument. The first bit stands for the `IR_NEWREF` check, the second for the multiple reference of the given instruction. Unfortunately, this commit misses the check for the `table.clear()` introduced for the preprevious patch. Thus, the corresponding test fails again. This will be fixed in the next commit. Sergey Kaplun: * added the description and the test for the problem Part of tarantool/tarantool#10709 Reviewed-by: Sergey Bronnikov <[email protected]> Signed-off-by: Sergey Kaplun <[email protected]> (cherry picked from commit d09b7bd)
1 parent c2d9b9f commit 058994b

File tree

2 files changed

+61
-8
lines changed

2 files changed

+61
-8
lines changed

src/lj_asm_x86.h

+9-8
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
109109
/* Check if there's no conflicting instruction between curins and ref.
110110
** Also avoid fusing loads if there are multiple references.
111111
*/
112-
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
112+
static int noconflict(ASMState *as, IRRef ref, IROp conflict, int check)
113113
{
114114
IRIns *ir = as->ir;
115115
IRRef i = as->curins;
@@ -118,7 +118,9 @@ static int noconflict(ASMState *as, IRRef ref, IROp conflict, int noload)
118118
while (--i > ref) {
119119
if (ir[i].o == conflict)
120120
return 0; /* Conflict found. */
121-
else if (!noload && (ir[i].op1 == ref || ir[i].op2 == ref))
121+
else if ((check & 1) && ir[i].o == IR_NEWREF)
122+
return 0;
123+
else if ((check & 2) && (ir[i].op1 == ref || ir[i].op2 == ref))
122124
return 0;
123125
}
124126
return 1; /* Ok, no conflict. */
@@ -134,7 +136,7 @@ static IRRef asm_fuseabase(ASMState *as, IRRef ref)
134136
lj_assertA(irb->op2 == IRFL_TAB_ARRAY, "expected FLOAD TAB_ARRAY");
135137
/* We can avoid the FLOAD of t->array for colocated arrays. */
136138
if (ira->o == IR_TNEW && ira->op1 <= LJ_MAX_COLOSIZE &&
137-
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 1)) {
139+
!neverfuse(as) && noconflict(as, irb->op1, IR_NEWREF, 0)) {
138140
as->mrm.ofs = (int32_t)sizeof(GCtab); /* Ofs to colocated array. */
139141
return irb->op1; /* Table obj. */
140142
}
@@ -448,7 +450,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
448450
RegSet xallow = (allow & RSET_GPR) ? allow : RSET_GPR;
449451
if (ir->o == IR_SLOAD) {
450452
if (!(ir->op2 & (IRSLOAD_PARENT|IRSLOAD_CONVERT)) &&
451-
noconflict(as, ref, IR_RETF, 0) &&
453+
noconflict(as, ref, IR_RETF, 2) &&
452454
!(LJ_GC64 && irt_isaddr(ir->t))) {
453455
as->mrm.base = (uint8_t)ra_alloc1(as, REF_BASE, xallow);
454456
as->mrm.ofs = 8*((int32_t)ir->op1-1-LJ_FR2) +
@@ -459,13 +461,12 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
459461
} else if (ir->o == IR_FLOAD) {
460462
/* Generic fusion is only ok for 32 bit operand (but see asm_comp). */
461463
if ((irt_isint(ir->t) || irt_isu32(ir->t) || irt_isaddr(ir->t)) &&
462-
noconflict(as, ref, IR_FSTORE, 0)) {
464+
noconflict(as, ref, IR_FSTORE, 2)) {
463465
asm_fusefref(as, ir, xallow);
464466
return RID_MRM;
465467
}
466468
} else if (ir->o == IR_ALOAD || ir->o == IR_HLOAD || ir->o == IR_ULOAD) {
467-
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 0) &&
468-
noconflict(as, ref, IR_CALLS, 1) && /* Don't cross table.clear. */
469+
if (noconflict(as, ref, ir->o + IRDELTA_L2S, 2+(ir->o != IR_ULOAD)) &&
469470
!(LJ_GC64 && irt_isaddr(ir->t))) {
470471
asm_fuseahuref(as, ir->op1, xallow);
471472
return RID_MRM;
@@ -475,7 +476,7 @@ static Reg asm_fuseload(ASMState *as, IRRef ref, RegSet allow)
475476
** Fusing unaligned memory operands is ok on x86 (except for SIMD types).
476477
*/
477478
if ((!irt_typerange(ir->t, IRT_I8, IRT_U16)) &&
478-
noconflict(as, ref, IR_XSTORE, 0)) {
479+
noconflict(as, ref, IR_XSTORE, 2)) {
479480
asm_fusexref(as, ir->op1, xallow);
480481
return RID_MRM;
481482
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
local tap = require('tap')
2+
-- Test file to demonstrate LuaJIT's incorrect fusion across
3+
-- `IR_NEWREF`.
4+
-- See also: https://github.com/LuaJIT/LuaJIT/issues/1117.
5+
local test = tap.test('lj-1117-fuse-across-newref'):skipcond({
6+
['Test requires JIT enabled'] = not jit.status(),
7+
})
8+
9+
local ffi = require('ffi')
10+
11+
test:plan(1)
12+
13+
-- Table with content.
14+
local tab = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 42}
15+
-- Use the alias to trick the code flow analysis.
16+
local tab_alias = tab
17+
local result_tab = {}
18+
19+
-- Need to start recording trace at the 16th iteration to avoid
20+
-- rehashing of the `t` and `result_tab` before the `if`
21+
-- condition below on the 32nd iteration. Also, the inner loop
22+
-- isn't recorded this way. After rehashing in the NEWREF, the
23+
-- fusion will use the wrong address, which leads to the dirty
24+
-- reads visible (always, not flaky) under Valgrind with the
25+
-- `--free-fill` option set.
26+
jit.opt.start('hotloop=16')
27+
28+
-- The amount of iterations required for the rehashing of the
29+
-- table.
30+
for i = 1, 33 do
31+
-- ALOAD to be fused.
32+
local value = tab[16]
33+
-- NEWREF instruction.
34+
tab_alias[{}] = 100
35+
-- Need this CONV cast to trigger load fusion. See `asm_comp()`
36+
-- for the details. Before the patch, this fusion takes the
37+
-- incorrect address of the already deallocated array part of
38+
-- the table, which leads to the incorrect result.
39+
result_tab[i] = ffi.cast('int64_t', value)
40+
if i == 32 then
41+
-- Clear the array part.
42+
for j = 1, 15 do
43+
tab[j] = nil
44+
end
45+
-- Next rehash of the `tab`/`tab_alias` will dealloc the array
46+
-- part.
47+
end
48+
end
49+
50+
test:samevalues(result_tab, 'no fusion across NEWREF')
51+
52+
test:done(true)

0 commit comments

Comments
 (0)