Skip to content

Commit 73674ed

Browse files
Mike PallBuristan
Mike Pall
authored andcommitted
Cleanup CPU detection and tuning for old CPUs.
(cherry picked from commit 0eddcbe) This patch does the following refactoring: 1) Drops optimizations for the Intel Atom CPU [1]: removes the `JIT_F_LEA_AGU` flag and related optimizations. The considerations for the use of LEA are complex and very CPU-specific, mostly dependent on the number of operands. Mostly, it isn't worth it due to the extra register pressure and/or extra instructions. Be aware that it applies to the original and obsolete Atom architecture. Today "Intel Atom" is just a trade name for reduced-performance implementations of the current Intel architecture. 2) Drops optimizations for the AMD K8, K10 CPU [2][3]: removes the `JIT_F_PREFER_IMUL` flag and related optimizations. 3) Refactors JIT flags defined in the <lj_jit.h>. Now all CPU-specific JIT flags are defined as the left shift of `JIT_F_CPU` instead of hardcoded constants, similar for the optimization flags. 4) Adds detection of the ARM8 CPU. 5) Drops the check for SSE2 since the VM already presumes CPU supports it. 6) Adds checks for `__ARM_ARCH`[4] macro in <lj_arch.h>. 7) Drops outdated comment in the amalgamation file about memory requirements. Sergey Kaplun: * added the description for the patch [1]: https://en.wikipedia.org/wiki/Intel_Atom [2]: https://en.wikipedia.org/wiki/AMD_K8 [3]: https://en.wikipedia.org/wiki/AMD_K10 [4]: https://developer.arm.com/documentation/dui0774/l/Other-Compiler-specific-Features/Predefined-macros Part of tarantool/tarantool#10709 Reviewed-by: Sergey Bronnikov <[email protected]> Signed-off-by: Sergey Kaplun <[email protected]> (cherry picked from commit 1d988a8)
1 parent 2c92ae0 commit 73674ed

9 files changed

+87
-138
lines changed

src/Makefile.original

-1
Original file line numberDiff line numberDiff line change
@@ -621,7 +621,6 @@ E= @echo
621621
default all: $(TARGET_T)
622622

623623
amalg:
624-
@grep "^[+|]" ljamalg.c
625624
$(MAKE) -f Makefile.original all "LJCORE_O=ljamalg.o"
626625

627626
clean:

src/lib_jit.c

+23-42
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,8 @@ LJLIB_CF(jit_status)
104104
jit_State *J = L2J(L);
105105
L->top = L->base;
106106
setboolV(L->top++, (J->flags & JIT_F_ON) ? 1 : 0);
107-
flagbits_to_strings(L, J->flags, JIT_F_CPU_FIRST, JIT_F_CPUSTRING);
108-
flagbits_to_strings(L, J->flags, JIT_F_OPT_FIRST, JIT_F_OPTSTRING);
107+
flagbits_to_strings(L, J->flags, JIT_F_CPU, JIT_F_CPUSTRING);
108+
flagbits_to_strings(L, J->flags, JIT_F_OPT, JIT_F_OPTSTRING);
109109
return (int)(L->top - L->base);
110110
#else
111111
setboolV(L->top++, 0);
@@ -467,7 +467,7 @@ static int jitopt_flag(jit_State *J, const char *str)
467467
str += str[2] == '-' ? 3 : 2;
468468
set = 0;
469469
}
470-
for (opt = JIT_F_OPT_FIRST; ; opt <<= 1) {
470+
for (opt = JIT_F_OPT; ; opt <<= 1) {
471471
size_t len = *(const uint8_t *)lst;
472472
if (len == 0)
473473
break;
@@ -636,80 +636,64 @@ JIT_PARAMDEF(JIT_PARAMINIT)
636636
#undef JIT_PARAMINIT
637637
0
638638
};
639-
#endif
640639

641640
#if LJ_TARGET_ARM && LJ_TARGET_LINUX
642641
#include <sys/utsname.h>
643642
#endif
644643

645-
/* Arch-dependent CPU detection. */
646-
static uint32_t jit_cpudetect(lua_State *L)
644+
/* Arch-dependent CPU feature detection. */
645+
static uint32_t jit_cpudetect(void)
647646
{
648647
uint32_t flags = 0;
649648
#if LJ_TARGET_X86ORX64
649+
650650
uint32_t vendor[4];
651651
uint32_t features[4];
652652
if (lj_vm_cpuid(0, vendor) && lj_vm_cpuid(1, features)) {
653-
#if !LJ_HASJIT
654-
#define JIT_F_SSE2 2
655-
#endif
656-
flags |= ((features[3] >> 26)&1) * JIT_F_SSE2;
657-
#if LJ_HASJIT
658653
flags |= ((features[2] >> 0)&1) * JIT_F_SSE3;
659654
flags |= ((features[2] >> 19)&1) * JIT_F_SSE4_1;
660-
if (vendor[2] == 0x6c65746e) { /* Intel. */
661-
if ((features[0] & 0x0fff0ff0) == 0x000106c0) /* Atom. */
662-
flags |= JIT_F_LEA_AGU;
663-
} else if (vendor[2] == 0x444d4163) { /* AMD. */
664-
uint32_t fam = (features[0] & 0x0ff00f00);
665-
if (fam >= 0x00000f00) /* K8, K10. */
666-
flags |= JIT_F_PREFER_IMUL;
667-
}
668655
if (vendor[0] >= 7) {
669656
uint32_t xfeatures[4];
670657
lj_vm_cpuid(7, xfeatures);
671658
flags |= ((xfeatures[1] >> 8)&1) * JIT_F_BMI2;
672659
}
673-
#endif
674660
}
675-
/* Check for required instruction set support on x86 (unnecessary on x64). */
676-
#if LJ_TARGET_X86
677-
if (!(flags & JIT_F_SSE2))
678-
luaL_error(L, "CPU with SSE2 required");
679-
#endif
661+
/* Don't bother checking for SSE2 -- the VM will crash before getting here. */
662+
680663
#elif LJ_TARGET_ARM
681-
#if LJ_HASJIT
664+
682665
int ver = LJ_ARCH_VERSION; /* Compile-time ARM CPU detection. */
683666
#if LJ_TARGET_LINUX
684667
if (ver < 70) { /* Runtime ARM CPU detection. */
685668
struct utsname ut;
686669
uname(&ut);
687670
if (strncmp(ut.machine, "armv", 4) == 0) {
688-
if (ut.machine[4] >= '7')
689-
ver = 70;
690-
else if (ut.machine[4] == '6')
691-
ver = 60;
671+
if (ut.machine[4] >= '8') ver = 80;
672+
else if (ut.machine[4] == '7') ver = 70;
673+
else if (ut.machine[4] == '6') ver = 60;
692674
}
693675
}
694676
#endif
695677
flags |= ver >= 70 ? JIT_F_ARMV7 :
696678
ver >= 61 ? JIT_F_ARMV6T2_ :
697679
ver >= 60 ? JIT_F_ARMV6_ : 0;
698680
flags |= LJ_ARCH_HASFPU == 0 ? 0 : ver >= 70 ? JIT_F_VFPV3 : JIT_F_VFPV2;
699-
#endif
681+
700682
#elif LJ_TARGET_ARM64
683+
701684
/* No optional CPU features to detect (for now). */
685+
702686
#elif LJ_TARGET_PPC
703-
#if LJ_HASJIT
687+
704688
#if LJ_ARCH_SQRT
705689
flags |= JIT_F_SQRT;
706690
#endif
707691
#if LJ_ARCH_ROUND
708692
flags |= JIT_F_ROUND;
709693
#endif
710-
#endif
694+
711695
#elif LJ_TARGET_MIPS
712-
#if LJ_HASJIT
696+
713697
/* Compile-time MIPS CPU detection. */
714698
#if LJ_ARCH_VERSION >= 20
715699
flags |= JIT_F_MIPSXXR2;
@@ -727,31 +711,28 @@ static uint32_t jit_cpudetect(lua_State *L)
727711
if (x) flags |= JIT_F_MIPSXXR2; /* Either 0x80000000 (R2) or 0 (R1). */
728712
}
729713
#endif
730-
#endif
714+
731715
#else
732716
#error "Missing CPU detection for this architecture"
733717
#endif
734-
UNUSED(L);
735718
return flags;
736719
}
737720

738721
/* Initialize JIT compiler. */
739722
static void jit_init(lua_State *L)
740723
{
741-
uint32_t flags = jit_cpudetect(L);
742-
#if LJ_HASJIT
743724
jit_State *J = L2J(L);
744-
J->flags = flags | JIT_F_ON | JIT_F_OPT_DEFAULT;
725+
J->flags = jit_cpudetect() | JIT_F_ON | JIT_F_OPT_DEFAULT;
745726
memcpy(J->param, jit_param_default, sizeof(J->param));
746727
lj_dispatch_update(G(L));
747-
#else
748-
UNUSED(flags);
749-
#endif
750728
}
729+
#endif
751730

752731
LUALIB_API int luaopen_jit(lua_State *L)
753732
{
733+
#if LJ_HASJIT
754734
jit_init(L);
735+
#endif
755736
lua_pushliteral(L, LJ_OS_NAME);
756737
lua_pushliteral(L, LJ_ARCH_NAME);
757738
lua_pushinteger(L, LUAJIT_VERSION_NUM);

src/lj_arch.h

+3-3
Original file line numberDiff line numberDiff line change
@@ -209,13 +209,13 @@
209209
#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR. */
210210
#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL
211211

212-
#if __ARM_ARCH____ARM_ARCH_8__ || __ARM_ARCH_8A__
212+
#if __ARM_ARCH == 8 || __ARM_ARCH_8__ || __ARM_ARCH_8A__
213213
#define LJ_ARCH_VERSION 80
214-
#elif __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
214+
#elif __ARM_ARCH == 7 || __ARM_ARCH_7__ || __ARM_ARCH_7A__ || __ARM_ARCH_7R__ || __ARM_ARCH_7S__ || __ARM_ARCH_7VE__
215215
#define LJ_ARCH_VERSION 70
216216
#elif __ARM_ARCH_6T2__
217217
#define LJ_ARCH_VERSION 61
218-
#elif __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
218+
#elif __ARM_ARCH == 6 || __ARM_ARCH_6__ || __ARM_ARCH_6J__ || __ARM_ARCH_6K__ || __ARM_ARCH_6Z__ || __ARM_ARCH_6ZK__
219219
#define LJ_ARCH_VERSION 60
220220
#else
221221
#define LJ_ARCH_VERSION 50

src/lj_asm_x86.h

+9-24
Original file line numberDiff line numberDiff line change
@@ -1224,13 +1224,8 @@ static void asm_href(ASMState *as, IRIns *ir, IROp merge)
12241224
emit_rmro(as, XO_MOV, dest|REX_GC64, tab, offsetof(GCtab, node));
12251225
} else {
12261226
emit_rmro(as, XO_ARITH(XOg_ADD), dest|REX_GC64, tab, offsetof(GCtab,node));
1227-
if ((as->flags & JIT_F_PREFER_IMUL)) {
1228-
emit_i8(as, sizeof(Node));
1229-
emit_rr(as, XO_IMULi8, dest, dest);
1230-
} else {
1231-
emit_shifti(as, XOg_SHL, dest, 3);
1232-
emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
1233-
}
1227+
emit_shifti(as, XOg_SHL, dest, 3);
1228+
emit_rmrxo(as, XO_LEA, dest, dest, dest, XM_SCALE2, 0);
12341229
if (isk) {
12351230
emit_gri(as, XG_ARITHi(XOg_AND), dest, (int32_t)khash);
12361231
emit_rmro(as, XO_MOV, dest, tab, offsetof(GCtab, hmask));
@@ -1289,7 +1284,7 @@ static void asm_hrefk(ASMState *as, IRIns *ir)
12891284
lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot");
12901285
if (ra_hasreg(dest)) {
12911286
if (ofs != 0) {
1292-
if (dest == node && !(as->flags & JIT_F_LEA_AGU))
1287+
if (dest == node)
12931288
emit_gri(as, XG_ARITHi(XOg_ADD), dest|REX_GC64, ofs);
12941289
else
12951290
emit_rmro(as, XO_LEA, dest|REX_GC64, node, ofs);
@@ -2183,8 +2178,7 @@ static void asm_add(ASMState *as, IRIns *ir)
21832178
{
21842179
if (irt_isnum(ir->t))
21852180
asm_fparith(as, ir, XO_ADDSD);
2186-
else if ((as->flags & JIT_F_LEA_AGU) || as->flagmcp == as->mcp ||
2187-
irt_is64(ir->t) || !asm_lea(as, ir))
2181+
else if (as->flagmcp == as->mcp || irt_is64(ir->t) || !asm_lea(as, ir))
21882182
asm_intarith(as, ir, XOg_ADD);
21892183
}
21902184

@@ -2889,7 +2883,7 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
28892883
MCode *target, *q;
28902884
int32_t spadj = as->T->spadjust;
28912885
if (spadj == 0) {
2892-
p -= ((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0);
2886+
p -= LJ_64 ? 7 : 6;
28932887
} else {
28942888
MCode *p1;
28952889
/* Patch stack adjustment. */
@@ -2901,20 +2895,11 @@ static void asm_tail_fixup(ASMState *as, TraceNo lnk)
29012895
p1 = p-9;
29022896
*(int32_t *)p1 = spadj;
29032897
}
2904-
if ((as->flags & JIT_F_LEA_AGU)) {
2905-
#if LJ_64
2906-
p1[-4] = 0x48;
2907-
#endif
2908-
p1[-3] = (MCode)XI_LEA;
2909-
p1[-2] = MODRM(checki8(spadj) ? XM_OFS8 : XM_OFS32, RID_ESP, RID_ESP);
2910-
p1[-1] = MODRM(XM_SCALE1, RID_ESP, RID_ESP);
2911-
} else {
29122898
#if LJ_64
2913-
p1[-3] = 0x48;
2899+
p1[-3] = 0x48;
29142900
#endif
2915-
p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2916-
p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
2917-
}
2901+
p1[-2] = (MCode)(checki8(spadj) ? XI_ARITHi8 : XI_ARITHi);
2902+
p1[-1] = MODRM(XM_REG, XOg_ADD, RID_ESP);
29182903
}
29192904
/* Patch exit branch. */
29202905
target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
@@ -2945,7 +2930,7 @@ static void asm_tail_prep(ASMState *as)
29452930
as->invmcp = as->mcp = p;
29462931
} else {
29472932
/* Leave room for ESP adjustment: add esp, imm or lea esp, [esp+imm] */
2948-
as->mcp = p - (((as->flags & JIT_F_LEA_AGU) ? 7 : 6) + (LJ_64 ? 1 : 0));
2933+
as->mcp = p - (LJ_64 ? 7 : 6);
29492934
as->invmcp = NULL;
29502935
}
29512936
}

src/lj_dispatch.c

-7
Original file line numberDiff line numberDiff line change
@@ -258,15 +258,8 @@ int luaJIT_setmode(lua_State *L, int idx, int mode)
258258
} else {
259259
if (!(mode & LUAJIT_MODE_ON))
260260
G2J(g)->flags &= ~(uint32_t)JIT_F_ON;
261-
#if LJ_TARGET_X86ORX64
262-
else if ((G2J(g)->flags & JIT_F_SSE2))
263-
G2J(g)->flags |= (uint32_t)JIT_F_ON;
264-
else
265-
return 0; /* Don't turn on JIT compiler without SSE2 support. */
266-
#else
267261
else
268262
G2J(g)->flags |= (uint32_t)JIT_F_ON;
269-
#endif
270263
lj_dispatch_update(g);
271264
}
272265
break;

src/lj_emit_x86.h

+1-4
Original file line numberDiff line numberDiff line change
@@ -561,10 +561,7 @@ static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
561561
static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
562562
{
563563
if (ofs) {
564-
if ((as->flags & JIT_F_LEA_AGU))
565-
emit_rmro(as, XO_LEA, r|REX_GC64, r, ofs);
566-
else
567-
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
564+
emit_gri(as, XG_ARITHi(XOg_ADD), r|REX_GC64, ofs);
568565
}
569566
}
570567

src/lj_errmsg.h

-4
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,7 @@ ERRDEF(STRGSRV, "invalid replacement value (a %s)")
101101
ERRDEF(BADMODN, "name conflict for module " LUA_QS)
102102
#if LJ_HASJIT
103103
ERRDEF(JITPROT, "runtime code generation failed, restricted kernel?")
104-
#if LJ_TARGET_X86ORX64
105-
ERRDEF(NOJIT, "JIT compiler disabled, CPU does not support SSE2")
106-
#else
107104
ERRDEF(NOJIT, "JIT compiler disabled")
108-
#endif
109105
#elif defined(LJ_ARCH_NOJIT)
110106
ERRDEF(NOJIT, "no JIT compiler for this architecture (yet)")
111107
#else

0 commit comments

Comments
 (0)