diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua new file mode 100644 index 0000000000..8a03b5bebb --- /dev/null +++ b/src/jit/dis_riscv.lua @@ -0,0 +1,930 @@ +------------------------------------------------------------------------------ +-- LuaJIT RISC-V disassembler module. +-- +-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +-- +-- Contributed by Milos Poletanovic from Syrmia.com. +-- Contributed by gns from PLCT Lab, ISCAS. +------------------------------------------------------------------------------ +-- This is a helper module used by the LuaJIT machine code dumper module. +-- +-- It disassembles most standard RISC-V instructions. +-- Mode is little-endian +------------------------------------------------------------------------------ + +local type = type +local byte, format = string.byte, string.format +local match, gmatch = string.match, string.gmatch +local concat = table.concat +local bit = require("bit") +local band, bor, tohex = bit.band, bit.bor, bit.tohex +local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift +local jit = require("jit") + +local jstat = { jit.status() } +local function is_opt_enabled(opt) + for _, v in ipairs(jstat) do + if v == opt then + return true + end + end + return false +end +local xthead = is_opt_enabled("XThead") + +------------------------------------------------------------------------------ +-- Opcode maps +------------------------------------------------------------------------------ + +--RVC32 extension + +local map_quad0 = { + shift = 13, mask = 7, + [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", + false, "c.fsdNMh", "c.swZMn", "c.fswNMn" +} + +local map_sub2quad1 = { + shift = 5, mask = 3, + [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" +} + +local map_sub1quad1 = { + shift = 10, mask = 3, + [0] = "c.srliM1", "c.sraiM1", "c.andiMx", map_sub2quad1 +} + +local map_quad1 = { + shift = 13, mask = 7, + [0] = { + shift = 7, mask = 31, + [0] = "c.nop", _ = "c.addiDx" + }, + [1] = "c.jalT", [2] = "c.liDx", + [3] = { + shift = 7, mask = 31, + [0] = "c.luiDK", [1] = "c.luiDK", [2] = "c.addi16spX", + _ = "c.luiDK" + }, + [4] = map_sub1quad1, [5] = "c.jT", [6] = "c.beqzMq", [7] = "c.bnezMq" +} + +local map_sub1quad2 = { + shift = 12, mask = 1, + [0] = { + shift = 2, mask = 31, + [0] = "c.jrD", _ = "c.mvDE" + }, + [1] = { + shift = 2, mask = 31, + [0] = { + shift = 7, mask = 31, + [0] = "c.ebreak", _ = "c.jalrD" + }, + _ = "c.addDE" + } +} + +local map_quad2 = { + shift = 13, mask = 7, + [0] = "c.slliD1", [1] = "c.fldspFQ",[2] = "c.lwspDY", [3] = "c.flwspFY", + [4] = map_sub1quad2, [5] = "c.fsdspVt", [6] = "c.swspEu", [7] = "c.fswspVu" +} + +local map_compr = { + [0] = map_quad0, map_quad1, map_quad2 +} + +--RV32M +local map_mext = { + shift = 12, mask = 7, + [0] = "mulDRr", "mulhDRr", "mulhsuDRr", "mulhuDRr", + "divDRr", "divuDRr", "remDRr", "remuDRr" +} + +--RV64M +local map_mext64 = { + shift = 12, mask = 7, + [0] = "mulwDRr", [4] = "divwDRr", [5] = "divuwDRr", [6] = "remwDRr", + [7] = "remuwDRr" +} + +--RV32F, RV64F, RV32D, RV64D +local map_fload = { + shift = 12, mask = 7, + [2] = "flwFL", [3] = "fldFL" +} + +local map_fstore = { + shift = 12, mask = 7, + [2] = "fswSg", [3] = "fsdSg" +} + +local map_fmadd = { + shift = 25, mask = 3, + [0] = "fmadd.sFGgH", "fmadd.dFGgH" +} + +local map_fmsub = { + shift = 25, mask = 3, + [0] = "fmsub.sFGgH", "fmsub.dFGgH" +} + +local map_fnmsub = { + shift = 25, mask = 3, + [0] = "fnmsub.sFGgH", "fnmsub.dFGgH" +} + +local map_fnmadd = { + shift = 25, mask = 3, + [0] = "fnmadd.sFGgH", "fnmadd.dFGgH" +} + +local map_fsgnjs = { + shift = 12, mask = 7, + [0] = "fsgnj.s|fmv.sFGg6", "fsgnjn.s|fneg.sFGg6", "fsgnjx.s|fabs.sFGg6" +} + +local map_fsgnjd = { + shift = 12, mask = 7, + [0] = "fsgnj.d|fmv.dFGg6", "fsgnjn.d|fneg.dFGg6", "fsgnjx.d|fabs.dFGg6" +} + +local map_fms = { + shift = 12, mask = 7, + [0] = "fmin.sFGg", "fmax.sFGg" +} + +local map_fmd = { + shift = 12, mask = 7, + [0] = "fmin.dFGg", "fmax.dFGg" +} + +local map_fcomps = { + shift = 12, mask = 7, + [0] = "fle.sDGg", "flt.sDGg", "feq.sDGg" +} + +local map_fcompd = { + shift = 12, mask = 7, + [0] = "fle.dDGg", "flt.dDGg", "feq.dDGg" +} + +local map_fcvtwls = { + shift = 20, mask = 31, + [0] = "fcvt.w.sDG", "fcvt.wu.sDG", "fcvt.l.sDG", "fcvt.lu.sDG" +} + +local map_fcvtwld = { + shift = 20, mask = 31, + [0] = "fcvt.w.dDG", "fcvt.wu.dDG", "fcvt.l.dDG", "fcvt.lu.dDG" +} + +local map_fcvts = { + shift = 20, mask = 31, + [0] = "fcvt.s.wFR", "fcvt.s.wuFR", "fcvt.s.lFR", "fcvt.s.luFR" +} + +local map_fcvtd = { + shift = 20, mask = 31, + [0] = "fcvt.d.wFR", "fcvt.d.wuFR", "fcvt.d.lFR", "fcvt.d.luFR" +} + +local map_fext = { + shift = 25, mask = 127, + [0] = "fadd.sFGg", [1] = "fadd.dFGg", [4] = "fsub.sFGg", [5] = "fsub.dFGg", + [8] = "fmul.sFGg", [9] = "fmul.dFGg", [12] = "fdiv.sFGg", [13] = "fdiv.dFGg", + [16] = map_fsgnjs, [17] = map_fsgnjd, [20] = map_fms, [21] = map_fmd, + [32] = "fcvt.s.dFG", [33] = "fcvt.d.sFG",[44] = "fsqrt.sFG", [45] = "fsqrt.dFG", + [80] = map_fcomps, [81] = map_fcompd, [96] = map_fcvtwls, [97] = map_fcvtwld, + [104] = map_fcvts, [105] = map_fcvtd, + [112] = { + shift = 12, mask = 7, + [0] = "fmv.x.wDG", "fclass.sDG" + }, + [113] = { + shift = 12, mask = 7, + [0] = "fmv.x.dDG", "fclass.dDG" + }, + [120] = "fmv.w.xFR", [121] = "fmv.d.xFR" +} + +--RV32A, RV64A +local map_aext = { + shift = 27, mask = 31, + [0] = { + shift = 12, mask = 7, + [2] = "amoadd.wDrO", [3] = "amoadd.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoswap.wDrO", [3] = "amoswap.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "lr.wDO", [3] = "lr.dDO" + }, + { + shift = 12, mask = 7, + [2] = "sc.wDrO", [3] = "sc.dDrO" + }, + { + shift = 12, mask = 7, + [2] = "amoxor.wDrO", [3] = "amoxor.dDrO" + }, + [8] = { + shift = 12, mask = 7, + [2] = "amoor.wDrO", [3] = "amoor.dDrO" + }, + [12] = { + shift = 12, mask = 7, + [2] = "amoand.wDrO", [3] = "amoand.dDrO" + }, + [16] = { + shift = 12, mask = 7, + [2] = "amomin.wDrO", [3] = "amomin.dDrO" + }, + [20] = { + shift = 12, mask = 7, + [2] = "amomax.wDrO", [3] = "amomax.dDrO" + }, + [24] = { + shift = 12, mask = 7, + [2] = "amominu.wDrO", [3] = "amominu.dDrO" + }, + [28] = { + shift = 12, mask = 7, + [2] = "amomaxu.wDrO", [3] = "amomaxu.dDrO" + }, +} + +-- RV32I, RV64I +local map_load = { + shift = 12, mask = 7, + [0] = "lbDL", "lhDL", "lwDL", "ldDL", + "lbuDL", "lhuDL", "lwuDL" +} + +local map_opimm = { + shift = 12, mask = 7, + [0] = { + shift = 7, mask = 0x1ffffff, + [0] = "nop", _ = "addi|li|mvDR0I2" + }, + { + shift = 25, mask = 127, + [48] = { + shift = 20, mask = 31, + [4] = "sext.bDR", [5] = "sext.hDR" + }, + _ = "slliDRi", + }, "sltiDRI", "sltiu|seqzDRI5", + "xori|notDRI4", + { + shift = 26, mask = 63, + [0] = "srliDRi", [16] = "sraiDRi", [24] = "roriDRi", + [26] = { + shift = 20, mask = 63, + [56] = "rev8DR" + } + }, + "oriDRI", "andiDRI" +} + +local map_branch = { + shift = 12, mask = 7, + [0] = "beq|beqzRr0B", "bne|bnezRr0B" , false, false, + "blt|bgtz|bltzR0r2B", "bge|blez|bgezR0r2B", "bltuRrB", "bgeuRrB" +} + +local map_store = { + shift = 12, mask = 7, + [0] = "sbSr", "shSr", "swSr", "sdSr" +} + +local map_op = { + shift = 25, mask = 127, + [0] = { + shift = 12, mask = 7, + [0] = "addDRr", "sllDRr", "slt|sgtz|sltzDR0r2", "sltu|snezDR0r", + "xorDRr", "srlDRr", "orDRr", "andDRr" + }, + [1] = map_mext, + [4] = { + + }, + [5] = { -- Zbb + shift = 12, mask = 7, + [4] = "minDRr", [5] = "minuDRr", [6] = "maxDRr", [7] = "maxuDRr" + }, + [7] = { -- Zicond + shift = 12, mask = 7, + [5] = "czero.eqzDRr", [7] = "czero.nezDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1addDRr", [4] = "sh2addDRr", [6] = "sh3addDRr" + }, + [32] = { -- Zbb + shift = 12, mask = 7, + [0] = "sub|negDR0r", [4] = "xnorDRr", [5] = "sraDRr", [6] = "ornDRr", [7] = "andnDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolDRr", [5] = "rorDRr" + } +} + +--- 64I +local map_opimm32 = { + shift = 12, mask = 7, + [0] = "addiw|sext.wDRI0", "slliwDRi", + [2] = { -- Zba + shift = 25, mask = 127, + [1] = "slli.uwDRi" + }, + [5] = { -- 64I + shift = 25, mask = 127, + [0] = "srliwDRi", [32] = "sraiwDRi", [48] = "roriwDRi" + }, + [48] = { -- Zbb + shift = 25, mask = 127, + [5] = "roriwDRi" + } +} + +local map_op32 = { + shift = 25, mask = 127, + [0] = { -- 64I + shift = 12, mask = 7, + [0] = "addwDRr", [1] = "sllwDRr", [5] = "srlwDRr" + }, + [1] = map_mext64, + [4] = { -- Zba & Zbb + shift = 12, mask = 7, + [0] = "add.uw|zext.w|DRr0", [4] = "zext.hDRr" + }, + [16] = { -- Zba + shift = 12, mask = 7, + [2] = "sh1add.uw", [4] = "sh2add.uw", [6] = "sh3add.uw" + }, + [32] = { -- 64I + shift = 12, mask = 7, + [0] = "subw|negwDR0r", [5] = "srawDRr" + }, + [48] = { -- Zbb + shift = 12, mask = 7, + [1] = "rolwDRr", [5] = "rorwDRr" + } +} + +local map_ecabre = { + shift = 12, mask = 7, + [0] = { + shift = 20, mask = 4095, + [0] = "ecall", "ebreak" + } +} + +local map_fence = { + shift = 12, mask = 1, + [0] = "fence", --"fence.i" ZIFENCEI EXTENSION +} + +local map_jalr = { + shift = 7, mask = 0x1ffffff, + _ = "jalr|jrDRI7", [256] = "ret" +} + +local map_xthead_custom0 = { + shift = 12, mask = 7, + [1] = { -- Arithmetic + shift = 27, mask = 31, + [0] = "th.addslDRrv", + [2] = { + shift = 26, mask = 63, + [4] = "th.srriDRi", + [5] = { + shift = 25, mask = 127, + [10] = "th.srriwDRi" + } + }, + [4] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulaDRr", "th.mulsDRr", "th.mulawDRr", "th.mulswDRr" + }, + [5] = { -- XTheadMac + shift = 25, mask = 3, + [0] = "th.mulahDRr", "th.mulshDRr" + }, + [8] = { -- XTheadCondMov + shift = 25, mask = 3, + [0] = "th.mveqzDRr", "th.mvnezDRr" + }, + [16] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.tstnbzDRi", "th.revDR", "th.ff0DR", "th.ff1DR" + } + }, + [17] = { -- XTheadBb + shift = 26, mask = 1, + [0] = "th.tstDRi" + }, + [18] = { -- XTheadBb + shift = 20, mask = 31, + [0] = { + shift = 25, mask = 3, + [0] = "th.revwDR" + } + } + }, + [2] = "th.extDRji", [3] = "th.extuDRji", + { -- MemLoad + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.lwdDrP", [2] = "th.lwudDrP", "th.lddDrP" + } + }, + { -- MemStore + shift = 29, mask = 7, + [7] = { -- XTheadMemPair + shift = 25, mask = 3, + [0] = "th.swdDrP", [3] = "th.sddDrP" + } + } +} + +local map_custom0 = xthead and map_xthead_custom0 or nil + +local map_pri = { + [3] = map_load, [7] = map_fload, [11] = map_custom0, [15] = map_fence, [19] = map_opimm, + [23] = "auipcDA", [27] = map_opimm32, + [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_op, + [55] = "luiDU", [59] = map_op32, [67] = map_fmadd, [71] = map_fmsub, + [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, + [103] = map_jalr, [111] = "jal|j|D0J", [115] = map_ecabre +} + +------------------------------------------------------------------------------ + +local map_gpr = { + [0] = "zero", "ra", "sp", "gp", "tp", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", +} + +local map_fgpr = { + [0] = "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", + "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", +} + +------------------------------------------------------------------------------ + +-- Output a nicely formatted line with an opcode and operands. +local function putop(ctx, text, operands) + local pos = ctx.pos + local extra = "" + if ctx.rel then + local sym = ctx.symtab[ctx.rel] + if sym then extra = "\t->"..sym end + end + if ctx.hexdump > 0 then + ctx.out:write((format("%08x %s %-7s %s%s\n", + ctx.addr+pos, tohex(ctx.op), text, concat(operands, ","), extra))) + else + ctx.out(format("%08x %-7s %s%s\n", + ctx.addr+pos, text, concat(operands, ", "), extra)) + end + local pos = ctx.pos + local first_byte = byte(ctx.code, ctx.pos+1) + --Examine if the next instruction is 16-bits or 32-bits + if(band(first_byte, 3) < 3) then + ctx.pos = pos + 2 + else + ctx.pos = pos + 4 + end +end + +-- Fallback for unknown opcodes. +local function unknown(ctx) + return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) +end + +local function get_le(ctx) + local pos = ctx.pos + --Examine if the next instruction is 16-bits or 32-bits + local first_byte = byte(ctx.code, pos+1) + if(band(first_byte, 3) < 3) then --checking first two bits of opcode + local b0, b1 = byte(ctx.code, pos+1, pos+2) + return bor(lshift(b1, 8), b0) + else + local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) + return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) + end +end + +local function parse_W(opcode) + local part1 = band(rshift(opcode, 7), 15) --9:6 + local part2 = band(rshift(opcode, 11), 3) --5:4 + local part3 = band(rshift(opcode, 5), 1)--3 + local part4 = band(rshift(opcode, 6), 1)--2 + return bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 4), + lshift(part3, 3), lshift(part4, 2)) +end + +local function parse_x(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 + local part2 = band(rshift(opcode, 2), 31) --4:0 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x1ffffff, 6), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_X(opcode) + local part1 = band(rshift(opcode, 12), 1) --12 + local part2 = band(rshift(opcode, 3), 3) --8:7 + local part3 = band(rshift(opcode, 5), 1) --6 + local part4 = band(rshift(opcode, 2), 1) --5 + local part5 = band(rshift(opcode, 6), 1) --4 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x3fffff, 9), lshift(part2, 7), + lshift(part3, 6), lshift(part4, 5), lshift(part5, 4)) + else + return bor(lshift(0, 31), lshift(part2, 7), lshift(part3, 6), + lshift(part4, 5), lshift(part5, 4)) + end +end + +local function parse_S(opcode) + local part1 = band(rshift(opcode, 25), 127) --11:5 + local sign = band(rshift(part1, 6), 1) + local part2 = band(rshift(opcode, 7), 31) --4:0 + if (sign == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +local function parse_B(opcode) + local part1 = band(rshift(opcode, 7), 1) --11 + local part2 = band(rshift(opcode, 25), 63) --10:5 + local part3 = band(rshift(opcode, 8), 15) -- 4 : 1 + if (part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 5), lshift(part3, 1), 0) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 5), + lshift(part3, 1), 0) + end +end + +local function parse_q(opcode) + local part1 = band(rshift(opcode, 12), 1) --8 + local part2 = band(rshift(opcode, 5), 3) --7:6 + local part3 = band(rshift(opcode, 2), 1) --5 + local part4 = band(rshift(opcode, 10), 3) --4:3 + local part5 = band(rshift(opcode, 3), 3) --2:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7fffff, 8), lshift(part2, 6), + lshift(part3, 5), lshift(part4, 3), lshift(part5, 1)) + else + return bor(lshift(0, 31), lshift(part2, 6), lshift(part3, 5), + lshift(part4, 3), lshift(part5, 1)) + end +end + +local function parse_J(opcode) + local part1 = band(rshift(opcode, 31), 1) --20 + local part2 = band(rshift(opcode, 12), 255) -- 19:12 + local part3 = band(rshift(opcode, 20), 1) --11 + local part4 = band(rshift(opcode, 21), 1023) --10:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ff, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + else + return bor(lshift(0, 31), lshift(0, 20), lshift(part2, 12), + lshift(part3, 11), lshift(part4, 1)) + end +end + +local function parse_T(opcode) + local part1 = band(rshift(opcode, 12), 1) --11 + local part2 = band(rshift(opcode, 8), 1) --10 + local part3 = band(rshift(opcode, 9), 3)--9:8 + local part4 = band(rshift(opcode, 6), 1) --7 + local part5 = band(rshift(opcode, 7), 1) -- 6 + local part6 = band(rshift(opcode, 2), 1) --5 + local part7 = band(rshift(opcode, 11), 1) --4 + local part8 = band(rshift(opcode, 3), 7) --3:1 + if(part1 == 1) then + return bor(lshift(1, 31), lshift(0x7ffff, 12), lshift(part1, 11), + lshift(part2, 10), lshift(part3, 8), lshift(part4, 7), + lshift(part5, 6), lshift(part6, 5), lshift(part7, 4), + lshift(part8, 1)) + else + return bor(lshift(0, 31), lshift(part1, 11), lshift(part2, 10), + lshift(part3, 8), lshift(part4, 7), lshift(part5, 6), + lshift(part6, 5), lshift(part7, 4), lshift(part8, 1)) + end +end + +local function parse_K(opcode) + local part1 = band(rshift(opcode, 12), 1) --5 17 + local part2 = band(rshift(opcode, 2), 31) --4:0 16:12 + if(part1 == 1) then + return bor(lshift(0, 31), lshift(0x7fff, 5), part2) + else + return bor(lshift(0, 31), lshift(part1, 5), part2) + end +end + +-- Disassemble a single instruction. +local function disass_ins(ctx) + local op = ctx:get() + local operands = {} + local last = nil + ctx.op = op + ctx.rel =nil + + local opat = 0 + --for compressed instructions + if(band(op, 3) < 3) then + opat = ctx.map_compr[band(op, 3)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + local test = band(rshift(op, opat.shift), opat.mask) + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + else + opat = ctx.map_pri[band(op,127)] + while type(opat) ~= "string" do + if not opat then return unknown(ctx) end + opat = opat[band(rshift(op, opat.shift), opat.mask)] or opat._ + end + end + local name, pat = match(opat, "^([a-z0-9_.]*)(.*)") + local altname, pat2 = match(pat, "|([a-z0-9_.|]*)(.*)") + local a1, a2 = 0 + if altname then + pat = pat2 + end + + local alias_done = false --variable for the case of 2 pseudoinstructions, if both parameters are x0, 0 + + for p in gmatch(pat, ".") do + local x = nil + if p == "D" then + x = map_gpr[band(rshift(op, 7), 31)] + elseif p == "F" then + x = map_fgpr[band(rshift(op, 7), 31)] + elseif p == "R" then + x = map_gpr[band(rshift(op, 15), 31)] + elseif p == "G" then + x = map_fgpr[band(rshift(op, 15), 31)] + elseif p == "r" then + x = map_gpr[band(rshift(op, 20), 31)] + if(name == "sb" or name == "sh" or name == "sw" or name == "sd") then + local temp = last --because of the diffrent order of the characters + operands[#operands] = x + x = temp + end + elseif p == "g" then + x = map_fgpr[band(rshift(op, 20), 31)] + if(name == "fsw" or name == "fsd") then + local temp = last + operands[#operands] = x + x = temp + end + elseif p == "Z" then + x = map_gpr[8 + band(rshift(op, 2), 7)] + elseif p == "N" then + x = map_fgpr[8 + band(rshift(op, 2), 7)] + elseif p == "M" then + x = map_gpr[8 + band(rshift(op, 7), 7)] + elseif p == "E" then + x = map_gpr[band(rshift(op, 2), 31)] + elseif p == "W" then + local uimm = parse_W(op) + x = format("%s,%d", "sp", uimm) + elseif p == "x" then + x = parse_x(op) + elseif p == "h" then + local part1 = band(rshift(op, 5), 3) --7:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6) , lshift(part2, 3)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "X" then + local imm = parse_X(op) + x = format("%s,%d", "sp", imm) + elseif p == "O" then + x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) + elseif p == "H" then + x = map_fgpr[band(rshift(op, 27), 31)] + elseif p == "L" then + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = arshift(op, 20) + x = format("%d(%s)", disp, register) + elseif p == "P" then -- XTheadMemPair + local register = map_gpr[band(rshift(op, 15), 31)] + local disp = band(arshift(op, 25), 3) + local isword = bxor(band(arshift(op, 26), 1), 1) + x = format("(%s), %d, %d", register, disp, isword and 3 or 4) + elseif p == "I" then + x = arshift(op, 20) + --different for jalr + if(name == "jalr") then + local reg = map_gpr[band(rshift(op, 15), 31)] + if(ctx.reltab[reg] == nil) then + operands[#operands] = format("%d(%s)", x, last) + else + local target = ctx.reltab[reg] + x + operands[#operands] = format("%d(%s) #0x%08x", x, last, target) + ctx.rel = target + ctx.reltab[reg] = nil --assume no reuses of the register + end + x = nil --not to add additional operand + end + elseif p == "i" then + --both for RV32I AND RV64I + local value = band(arshift(op, 20), 63) + x = string.format("%d", value) + elseif p == "j" then -- XThead imm1[31..26] + local value = band(rshift(op, 26), 63) + x = string.format("%d", value) + elseif p == "v" then --XThead imm[2][26..25] + local value = band(rshift(op, 25), 3) + x = string.format("%d", value) + elseif p == "S" then + local register = map_gpr[band(rshift(op, 15), 31)] --register + local imm = parse_S(op) + x = format("%d(%s)", imm, register) + elseif p == "n" then + local part1 = band(rshift(op, 5), 1) --6 + local part2 = band(rshift(op, 10), 7) --5:3 + local part3 = band(rshift(op, 6), 1) --2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), + lshift(part3, 2)) + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "A" then + local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] + ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) + x = format("0x%x", value) + elseif p == "B" then + x = ctx.addr + ctx.pos + parse_B(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "U" then + local value = band(rshift(op, 12), 0xfffff) + x = string.format("0x%x", value) + elseif p == "Q" then + local part1 = band(rshift(op, 2), 7) --8:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 5), 3) --4:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "q" then + x = ctx.addr + ctx.pos + parse_q(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "J" then + x = ctx.addr + ctx.pos + parse_J(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "K" then + local value = parse_K(op) + x = string.format("0x%x", value) + elseif p == "Y" then + local part1 = band(rshift(op, 2), 3) --7:6 + local part2 = band(rshift(op, 12), 1) --5 + local part3 = band(rshift(op, 4), 7) --4:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "1" then + local part1 = band(rshift(op, 12), 1) --5 + local part2 = band(rshift(op, 2), 31) --4:0 + local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) + x = string.format("0x%x", uimm) + elseif p == "T" then + x = ctx.addr + ctx.pos + parse_T(op) + ctx.rel = x + x = format("0x%08x", x) + elseif p == "t" then + local part1 = band(rshift(op, 7), 7) --8:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) + x = format("%d(%s)", uimm, "sp") + elseif p == "u" then + local part1 = band(rshift(op, 7), 3) --7:6 + local part2 = band(rshift(op, 9), 15) --5:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) + x = format("%d(%s)", uimm, "sp") + elseif p == "V" then + x = map_fgpr[band(rshift(op, 2), 31)] + elseif p == "0" then --PSEUDOINSTRUCTIONS + if (last == "zero" or last == 0) then + local n = #operands + operands[n] = nil + last = operands[n-1] + local a1, a2 = match(altname, "([^|]*)|(.*)") + if a1 then name, altname = a1, a2 + else name = altname end + alias_done = true + end + elseif (p == "4") then + if(last == -1) then + name = altname + operands[#operands] = nil + end + elseif (p == "5") then + if(last == 1) then + name = altname + operands[#operands] = nil + end + elseif (p == "6") then + if(last == operands[#operands - 1]) then + name = altname + operands[#operands] = nil + end + elseif (p == "7") then --jalr rs + local value = string.sub(operands[#operands], 1, 1) + local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) + if(value == "0" and + (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then + if(operands[#operands - 1] == "zero") then + name = altname + end + operands[#operands] = nil + operands[#operands] = reg + end + elseif (p == "2" and alias_done == false) then + if (last == "zero" or last == 0) then + local a1, a2 = match(altname, "([^|]*)|(.*)") + name = a2 + operands[#operands] = nil + end + end + if x then operands[#operands+1] = x; last = x end + end + return putop(ctx, name, operands) +end + +------------------------------------------------------------------------------ + +-- Disassemble a block of code. +local function disass_block(ctx, ofs, len) + if not ofs then + ofs = 0 + end + local stop = len and ofs+len or #ctx.code + --instructions can be both 32 and 16 bits + stop = stop - stop % 2 + ctx.pos = ofs - ofs % 2 + ctx.rel = nil + while ctx.pos < stop do disass_ins(ctx) end +end + +-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). +local function create(code, addr, out) + local ctx = {} + ctx.code = code + ctx.addr = addr or 0 + ctx.out = out or io.write + ctx.symtab = {} + ctx.disass = disass_block + ctx.hexdump = 8 + ctx.get = get_le + ctx.map_pri = map_pri + ctx.map_compr = map_compr + ctx.reltab = {} + return ctx +end + +-- Simple API: disassemble code (a string) at address and output via out. +local function disass(code, addr, out) + create(code, addr, out):disass(addr) +end + +-- Return register name for RID. +local function regname(r) + if r < 32 then return map_gpr[r] end + return "f"..(r-32) +end + +-- Public module functions. +return { + create = create, + disass = disass, + regname = regname +} diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua new file mode 100644 index 0000000000..5767799039 --- /dev/null +++ b/src/jit/dis_riscv64.lua @@ -0,0 +1,16 @@ +---------------------------------------------------------------------------- +-- LuaJIT RISC-V 64 disassembler wrapper module. +-- +-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. +-- Released under the MIT license. See Copyright Notice in luajit.h +---------------------------------------------------------------------------- +-- This module just exports the default riscv little-endian functions from the +-- RISC-V disassembler module. All the interesting stuff is there. +------------------------------------------------------------------------------ + +local dis_riscv = require((string.match(..., ".*%.") or "").."dis_riscv") +return { + create = dis_riscv.create, + disass = dis_riscv.disass, + regname = dis_riscv.regname +} \ No newline at end of file diff --git a/src/lib_jit.c b/src/lib_jit.c index db341120af..fa45c6206d 100644 --- a/src/lib_jit.c +++ b/src/lib_jit.c @@ -642,6 +642,84 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX + +#if LJ_TARGET_LINUX +#include + +struct riscv_hwprobe hwprobe_requests[] = { + {RISCV_HWPROBE_KEY_IMA_EXT_0} +}; + +const uint64_t *hwprobe_ext = &hwprobe_requests[0].value; + +int hwprobe_ret = 0; +#endif + +static int riscv_compressed() +{ +#if defined(__riscv_c) || defined(__riscv_compressed) + /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_IMA_C)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zba() +{ +#if defined(__riscv_b) || defined(__riscv_zba) + /* Don't bother checking for Zba -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBA)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zbb() +{ +#if defined(__riscv_b) || defined(__riscv_zbb) + /* Don't bother checking for Zbb -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZBB)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_zicond() +{ +#if defined(__riscv_zicond) + /* Don't bother checking for Zicond -- would crash before getting here. */ + return 1; +#elif LJ_TARGET_LINUX + return (hwprobe_ret == 0 && ((*hwprobe_ext) & RISCV_HWPROBE_EXT_ZICOND)) ? 1 : 0; +#else + return 0; +#endif +} + +static int riscv_xthead() +{ +/* +** Hardcoded as there's no easy way of detection: +** - SIGILL have some trouble with libluajit as we speak +** - Checking mvendorid looks good, but might not be reliable. +*/ + return 0; +} + +static uint32_t riscv_probe(int (*func)(void), uint32_t flag) +{ + return func() ? flag : 0; +} +#endif + /* Arch-dependent CPU detection. */ static uint32_t jit_cpudetect(lua_State *L) { @@ -729,7 +807,26 @@ static uint32_t jit_cpudetect(lua_State *L) #endif #endif #elif LJ_TARGET_RISCV64 - /* No optional CPU features to detect (for now). */ +#if LJ_HASJIT + +#if LJ_TARGET_LINUX + /* HWPROBE-based detection of RVC, Zba, Zbb and Zicond. */ + hwprobe_ret = syscall(__NR_riscv_hwprobe, &hwprobe_requests, + sizeof(hwprobe_requests) / sizeof(struct riscv_hwprobe), 0, + NULL, 0); + + flags |= riscv_probe(riscv_compressed, JIT_F_RVC); + flags |= riscv_probe(riscv_zba, JIT_F_RVZba); + flags |= riscv_probe(riscv_zbb, JIT_F_RVZbb); + flags |= riscv_probe(riscv_zicond, JIT_F_RVZicond); + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + +#endif + + /* Detect V/P? */ + /* V have no hardware available, P not ratified yet. */ +#endif + #else #error "Missing CPU detection for this architecture" #endif diff --git a/src/lj_arch.h b/src/lj_arch.h index e529ffacff..5d0047963e 100644 --- a/src/lj_arch.h +++ b/src/lj_arch.h @@ -439,7 +439,6 @@ #define LJ_TARGET_MASKSHIFT 1 #define LJ_TARGET_MASKROT 1 #define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -#define LJ_ARCH_NOJIT 1 /* NYI */ #define LJ_ARCH_NOMEMPROF 1 #define LJ_ARCH_NOSYSPROF 1 diff --git a/src/lj_asm.c b/src/lj_asm.c index 5137d05c7e..90b3685d86 100644 --- a/src/lj_asm.c +++ b/src/lj_asm.c @@ -185,6 +185,8 @@ IRFLDEF(FLOFS) #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" +#elif LJ_TARGET_RISCV64 +#include "lj_emit_riscv.h" #else #error "Missing instruction emitter for target CPU" #endif @@ -1641,6 +1643,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" +#elif LJ_TARGET_RISCV64 +#include "lj_asm_riscv64.h" #else #error "Missing assembler for target CPU" #endif diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h new file mode 100644 index 0000000000..5d4e311279 --- /dev/null +++ b/src/lj_asm_riscv64.h @@ -0,0 +1,1991 @@ +/* +** RISC-V IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +/* -- Register allocator extensions --------------------------------------- */ + +/* Allocate a register with a hint. */ +static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!ra_hashint(r) && !iscrossref(as, ref)) + ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ + r = ra_allocref(as, ref, allow); + } + ra_noweak(as, r); + return r; +} + +/* Allocate a register or RID_ZERO. */ +static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) +{ + Reg r = IR(ref)->r; + if (ra_noreg(r)) { + if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(as, ref) == 0) + return RID_ZERO; + r = ra_allocref(as, ref, allow); + } else { + ra_noweak(as, r); + } + return r; +} + +/* Allocate two source registers for three-operand instructions. */ +static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) +{ + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + Reg left = irl->r, right = irr->r; + if (ra_hasreg(left)) { + ra_noweak(as, left); + if (ra_noreg(right)) + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + else + ra_noweak(as, right); + } else if (ra_hasreg(right)) { + ra_noweak(as, right); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else if (ra_hashint(right)) { + right = ra_alloc1z(as, ir->op2, allow); + left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); + } else { + left = ra_alloc1z(as, ir->op1, allow); + right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); + } + return left | (right << 8); +} + +/* -- Guard handling ------------------------------------------------------ */ + +/* Copied from MIPS, AUIPC+JALR is expensive to setup in-place */ +#define RISCV_SPAREJUMP 4 + +/* Setup spare long-range jump (trampoline?) slots per mcarea. */ + +static void asm_sparejump_setup(ASMState *as) +{ + MCode *mxp = as->mctop; + if ((char *)mxp == (char *)as->J->mcarea + as->J->szmcarea) { + for (int i = RISCV_SPAREJUMP*2; i--; ) + *--mxp = RISCVI_EBREAK; + as->mctop = mxp; + } +} + +static MCode *asm_sparejump_use(MCode *mcarea, MCode *target) +{ + MCode *mxp = (MCode *)((char *)mcarea + ((MCLink *)mcarea)->size); + int slot = RISCV_SPAREJUMP; + RISCVIns tslot = RISCVI_EBREAK, tauipc, tjalr; + while (slot--) { + mxp -= 2; + ptrdiff_t delta = (char *)target - (char *)mxp; + tauipc = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)), + tjalr = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (mxp[0] == tauipc && mxp[1] == tjalr) { + return mxp; + } else if (mxp[0] == tslot) { + mxp[0] = tauipc, mxp[1] = tjalr; + return mxp; + } + } + return NULL; +} + +/* Setup exit stub after the end of each trace. */ +static void asm_exitstub_setup(ASMState *as, ExitNo nexits) +{ + ExitNo i; + MCode *mxp = as->mctop; + if (mxp - (nexits + 4 + MCLIM_REDZONE) < as->mclim) + asm_mclimit(as); + for (i = nexits-1; (int32_t)i >= 0; i--) + *--mxp = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ((uintptr_t)(4*(-4-i))); + ptrdiff_t delta = (char *)lj_vm_exit_handler - (char *)(mxp-3); + /* 1: sw ra, 0(sp); auipc+jalr ->vm_exit_handler; lui x0, traceno; jal <1; jal <1; ... */ + *--mxp = RISCVI_LUI | RISCVF_IMMU(as->T->traceno); + *--mxp = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) + | RISCVF_IMMI(RISCVF_LO((uintptr_t)(void *)delta)); + *--mxp = RISCVI_AUIPC | RISCVF_D(RID_TMP) + | RISCVF_IMMU(RISCVF_HI((uintptr_t)(void *)delta)); + *--mxp = RISCVI_SD | RISCVF_S2(RID_RA) | RISCVF_S1(RID_SP); + as->mctop = mxp; +} + +static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) +{ + /* Keep this in-sync with exitstub_trace_addr(). */ + return as->mctop + exitno + 4; +} + +/* Emit conditional branch to exit for guard. */ +static void asm_guard(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2) +{ + MCode *target = asm_exitstub_addr(as, as->snapno); + MCode *p = as->mcp; + if (LJ_UNLIKELY(p == as->invmcp)) { + as->loopinv = 1; + as->mcp = ++p; + *p = RISCVI_JAL | RISCVF_IMMJ((char *)target - (char *)p); + riscvi = riscvi^RISCVF_FUNCT3(1); /* Invert cond. */ + target = p - 1; /* Patch target later in asm_loop_fixup. */ + } + ptrdiff_t delta = (char *)target - (char *)(p - 1); + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + *--p = (riscvi^RISCVF_FUNCT3(1)) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + as->mcp = p; +} + +/* -- Operand fusion ------------------------------------------------------ */ + +/* Limit linear search to this distance. Avoids O(n^2) behavior. */ +#define CONFLICT_SEARCH_LIM 31 + +/* Check if there's no conflicting instruction between curins and ref. */ +static int noconflict(ASMState *as, IRRef ref, IROp conflict) +{ + IRIns *ir = as->ir; + IRRef i = as->curins; + if (i > ref + CONFLICT_SEARCH_LIM) + return 0; /* Give up, ref is too far away. */ + while (--i > ref) + if (ir[i].o == conflict) + return 0; /* Conflict found. */ + return 1; /* Ok, no conflict. */ +} + +/* Fuse the array base of colocated arrays. */ +static int32_t asm_fuseabase(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && + !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) + return (int32_t)sizeof(GCtab); + return 0; +} + +/* Fuse array/hash/upvalue reference into register+offset operand. */ +static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) +{ + IRIns *ir = IR(ref); + if (ra_noreg(ir->r)) { + if (ir->o == IR_AREF) { + if (mayfuse(as, ref)) { + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, refa, allow); + } + } + } + } else if (ir->o == IR_HREFK) { + if (mayfuse(as, ref)) { + int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); + if (checki12(ofs)) { + *ofsp = ofs; + return ra_alloc1(as, ir->op1, allow); + } + } + } else if (ir->o == IR_UREFC) { + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv; + intptr_t ofs = ((intptr_t)((uintptr_t)(&uv->tv) - (uintptr_t)&J2GG(as->J)->g)); + if (checki12(ofs)) { + *ofsp = (int32_t)ofs; + return RID_GL; + } + } + } + } + *ofsp = 0; + return ra_alloc1(as, ref, allow); +} + +/* Fuse XLOAD/XSTORE reference into load/store operand. */ +static void asm_fusexref(ASMState *as, RISCVIns riscvi, Reg rd, IRRef ref, + RegSet allow, int32_t ofs) +{ + IRIns *ir = IR(ref); + Reg base; + if (ra_noreg(ir->r) && canfuse(as, ir)) { + intptr_t ofs2; + if (ir->o == IR_ADD) { + if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(as, ir->op2), + checki12(ofs2))) { + ref = ir->op1; + ofs = (int32_t)ofs2; + } + } else if (ir->o == IR_STRREF) { + ofs2 = 4096; + lj_assertA(ofs == 0, "bad usage"); + ofs = (int32_t)sizeof(GCstr); + if (irref_isk(ir->op2)) { + ofs2 = ofs + get_kval(as, ir->op2); + ref = ir->op1; + } else if (irref_isk(ir->op1)) { + ofs2 = ofs + get_kval(as, ir->op1); + ref = ir->op2; + } + if (!checki12(ofs2)) { + /* NYI: Fuse ADD with constant. */ + Reg right, left = ra_alloc2(as, ir, allow); + right = (left >> 8); left &= 255; + emit_lso(as, riscvi, rd, RID_TMP, ofs); + emit_ds1s2(as, RISCVI_ADD, RID_TMP, left, right); + return; + } + ofs = ofs2; + } + } + base = ra_alloc1(as, ref, allow); + emit_lso(as, riscvi, rd, base, ofs); +} + +/* Fuse Integer multiply-accumulate. */ + +static int asm_fusemac(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_GPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_GPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); + if (dest != add) emit_mv(as, dest, add); + return 1; + } + return 0; +} + +/* Fuse FP multiply-add/sub. */ + +static int asm_fusemadd(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvir) +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; + if (lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); + Reg left = ra_alloc2(as, irm, + rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; + emit_ds1s2s3(as, riscvi, dest, left, right, add); + return 1; + } + return 0; +} +/* -- Calls --------------------------------------------------------------- */ + +/* Generate a call to a C function. */ +static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) +{ + uint32_t n, nargs = CCI_XNARGS(ci); + int32_t ofs = 0; + Reg gpr, fpr = REGARG_FIRSTFPR; + if ((void *)ci->func) + emit_call(as, (void *)ci->func, 1); + for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) + as->cost[gpr] = REGCOST(~0u, ASMREF_L); + gpr = REGARG_FIRSTGPR; + for (n = 0; n < nargs; n++) { /* Setup args. */ + IRRef ref = args[n]; + IRIns *ir = IR(ref); + if (ref) { + if (irt_isfp(ir->t)) { + if (fpr <= REGARG_LASTFPR) { + lj_assertA(rset_test(as->freeset, fpr), + "reg %d not free", fpr); /* Must have been evicted. */ + ra_leftov(as, fpr, ref); + fpr++; if(ci->flags & CCI_VARARG) gpr++; + } else if (!(ci->flags & CCI_VARARG) && gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; + } else { + Reg r = ra_alloc1(as, ref, RSET_FPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } else { + if (gpr <= REGARG_LASTGPR) { + lj_assertA(rset_test(as->freeset, gpr), + "reg %d not free", gpr); /* Must have been evicted. */ + ra_leftov(as, gpr, ref); + gpr++; if(ci->flags & CCI_VARARG) fpr++; + } else { + Reg r = ra_alloc1z(as, ref, RSET_GPR); + emit_spstore(as, ir, r, ofs); + ofs += 8; + } + } + } + } +} + +/* Setup result reg/sp for call. Evict scratch regs. */ +static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); /* Evictions must be performed first. */ + if (ra_used(ir)) { + lj_assertA(!irt_ispri(ir->t), "PRI dest"); + if (irt_isfp(ir->t)) { + if ((ci->flags & CCI_CASTU64)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, + dest, RID_RET); + } else { + ra_destreg(as, ir, RID_FPRET); + } + } else { + ra_destreg(as, ir, RID_RET); + } + } +} + +static void asm_callx(ASMState *as, IRIns *ir) +{ + IRRef args[CCI_NARGS_MAX*2]; + CCallInfo ci; + IRRef func; + IRIns *irf; + ci.flags = asm_callx_flags(as, ir); + asm_collectargs(as, ir, &ci, args); + asm_setupresult(as, ir, &ci); + func = ir->op2; irf = IR(func); + if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } + if (irref_isk(func)) { /* Call to constant address. */ + ci.func = (ASMFunction)(void *)get_kval(as, func); + } else { /* Need specific register for indirect calls. */ + Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); + MCode *p = as->mcp; + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(r); + if (r == RID_CFUNCADDR) + *--p = RISCVI_ADDI | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + else + *--p = RISCVI_MV | RISCVF_D(RID_CFUNCADDR) | RISCVF_S1(r); + as->mcp = p; + ci.func = (ASMFunction)(void *)0; + } + asm_gencall(as, &ci, args); +} + +static void asm_callround(ASMState *as, IRIns *ir, IRCallID id) +{ + /* The modified regs must match with the *.dasc implementation. */ + RegSet drop = RID2RSET(RID_X6)|RID2RSET(RID_X7)|RID2RSET(RID_F10)| + RID2RSET(RID_F14)|RID2RSET(RID_F1)|RID2RSET(RID_F3)| + RID2RSET(RID_F4); + if (ra_hasreg(ir->r)) rset_clear(drop, ir->r); + ra_evictset(as, drop); + ra_destreg(as, ir, RID_FPRET); + emit_call(as, (void *)lj_ir_callinfo[id].func, 0); + ra_leftov(as, REGARG_FIRSTFPR, ir->op1); +} + +/* -- Returns ------------------------------------------------------------- */ + +/* Return to lower frame. Guard that it goes to the right spot. */ +static void asm_retf(ASMState *as, IRIns *ir) +{ + Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); + void *pc = ir_kptr(IR(ir->op2)); + int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); + as->topslot -= (BCReg)delta; + if ((int32_t)as->topslot < 0) as->topslot = 0; + irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ + emit_setgl(as, base, jit_base); + emit_addptr(as, base, -8*delta); + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base))); + emit_lso(as, RISCVI_LD, RID_TMP, base, -8); +} + +/* -- Buffer operations --------------------------------------------------- */ + +#if LJ_HASBUFFER +static void asm_bufhdr_write(ASMState *as, Reg sb) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); + IRIns irgc; + irgc.ot = IRT(0, IRT_PGC); /* GC type. */ + emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); + emit_ds1s2(as, RISCVI_OR, RID_TMP, RID_TMP, tmp); + emit_dsi(as, RISCVI_ANDI, tmp, tmp, SBUF_MASK_FLAG); + emit_getgl(as, RID_TMP, cur_L); + emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); +} +#endif + +/* -- Type conversions ---------------------------------------------------- */ + +static void asm_tointg(ASMState *as, IRIns *ir, Reg left) +{ + Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); + Reg dest = ra_dest(as, ir, RSET_GPR), cmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); + asm_guard(as, RISCVI_BEQ, cmp, RID_ZERO); + emit_ds1s2(as, RISCVI_FEQ_D, cmp, tmp, left); + emit_ds(as, RISCVI_FCVT_D_W, tmp, dest); + emit_ds(as, RISCVI_FCVT_W_D, dest, left); +} + +static void asm_tobit(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_FPR; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, allow); + Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); + Reg tmp = ra_scratch(as, rset_clear(allow, right)); + emit_ds(as, RISCVI_FMV_X_W, dest, tmp); + emit_ds1s2(as, RISCVI_FADD_D, tmp, left, right); +} + +static void asm_conv(ASMState *as, IRIns *ir) +{ + IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); + int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64); + int stfp = (st == IRT_NUM || st == IRT_FLOAT); + IRRef lref = ir->op1; + lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); + /* Use GPR to pass floating-point arguments */ + if (irt_isfp(ir->t) && ir->r >= RID_X10 && ir->r <= RID_X17) { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg ftmp = ra_scratch(as, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FMV_X_W : RISCVI_FMV_X_D, dest, ftmp); + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + ftmp, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, st64 ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dest, ftmp); + emit_ds(as, riscvi, ftmp, left); + } + } else if (irt_isfp(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + if (stfp) { /* FP to FP conversion. */ + emit_ds(as, st == IRT_NUM ? RISCVI_FCVT_S_D : RISCVI_FCVT_D_S, + dest, ra_alloc1(as, lref, RSET_FPR)); + } else { /* Integer to FP conversion. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = irt_isfloat(ir->t) ? + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_S_L : RISCVI_FCVT_S_LU) : + (st == IRT_INT ? RISCVI_FCVT_S_W : RISCVI_FCVT_S_WU)) : + (((IRT_IS64 >> st) & 1) ? + (st == IRT_I64 ? RISCVI_FCVT_D_L : RISCVI_FCVT_D_LU) : + (st == IRT_INT ? RISCVI_FCVT_D_W : RISCVI_FCVT_D_WU)); + emit_ds(as, riscvi, dest, left); + } + } else if (stfp) { /* FP to integer conversion. */ + if (irt_isguard(ir->t)) { + /* Checked conversions are only supported from number to int. */ + lj_assertA(irt_isint(ir->t) && st == IRT_NUM, + "bad type for checked CONV"); + asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); + } else { + Reg left = ra_alloc1(as, lref, RSET_FPR); + Reg dest = ra_dest(as, ir, RSET_GPR); + RISCVIns riscvi = irt_is64(ir->t) ? + (st == IRT_NUM ? + (irt_isi64(ir->t) ? RISCVI_FCVT_L_D : RISCVI_FCVT_LU_D) : + (irt_isi64(ir->t) ? RISCVI_FCVT_L_S : RISCVI_FCVT_LU_S)) : + (st == IRT_NUM ? + (irt_isint(ir->t) ? RISCVI_FCVT_W_D : RISCVI_FCVT_WU_D) : + (irt_isint(ir->t) ? RISCVI_FCVT_W_S : RISCVI_FCVT_WU_S)); + emit_ds(as, riscvi|RISCVF_RM(RISCVRM_RTZ), dest, left); + } + } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, lref, RSET_GPR); + RISCVIns riscvi = st == IRT_I8 ? RISCVI_SEXT_B : + st == IRT_U8 ? RISCVI_ZEXT_B : + st == IRT_I16 ? RISCVI_SEXT_H : RISCVI_ZEXT_H; + lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); + emit_ext(as, riscvi, dest, left); + } else { /* 32/64 bit integer conversions. */ + Reg dest = ra_dest(as, ir, RSET_GPR); + if (irt_is64(ir->t)) { + if (st64) { + /* 64/64 bit no-op (cast)*/ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } else { /* 32 to 64 bit sign extension. */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + if ((ir->op2 & IRCONV_SEXT)) { /* 32 to 64 bit sign extension. */ + emit_ext(as, RISCVI_SEXT_W, dest, left); + } else { /* 32 to 64 bit zero extension. */ + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } + } + } else { + if (st64 && !(ir->op2)) { + /* This is either a 32 bit reg/reg mov which zeroes the hiword + ** or a load of the loword from a 64 bit address. + */ + Reg left = ra_alloc1(as, lref, RSET_GPR); + emit_ext(as, RISCVI_ZEXT_W, dest, left); + } else { /* 32/32 bit no-op (cast). */ + ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ + } + } + } +} + +static void asm_strto(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; + IRRef args[2]; + int32_t ofs = SPOFS_TMP; + RegSet drop = RSET_SCRATCH; + if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ + ra_evictset(as, drop); + if (ir->s) ofs = sps_scale(ir->s); + asm_guard(as, RISCVI_BEQ, RID_RET, RID_ZERO); /* Test return status. */ + args[0] = ir->op1; /* GCstr *str */ + args[1] = ASMREF_TMP1; /* TValue *n */ + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); + emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); +} + +/* -- Memory references --------------------------------------------------- */ + +/* Store tagged value for ref at base+ofs. */ +static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) +{ + RegSet allow = rset_exclude(RSET_GPR, base); + IRIns *ir = IR(ref); + lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), + "store of IR type %d", irt_type(ir->t)); + if (irref_isk(ref)) { + TValue k; + lj_ir_kvalue(as->J->L, &k, ir); + emit_lso(as, RISCVI_SD, ra_allock(as, (int64_t)k.u64, allow), base, ofs); + } else { + Reg src = ra_alloc1(as, ref, allow); + rset_clear(allow, src); + Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + emit_lso(as, RISCVI_SD, RID_TMP, base, ofs); + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, RID_TMP, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, RID_TMP, type); + emit_ext(as, RISCVI_ZEXT_W, RID_TMP, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, RID_TMP, src, type); + } + } +} + +/* Get pointer to TValue. */ +static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) +{ + IRIns *ir = IR(ref); + if (irt_isnum(ir->t)) { + if (irref_isk(ref)) { + /* Use the number constant itself as a TValue. */ + ra_allockreg(as, igcptr(ir_knum(ir)), dest); + } else { + /* Otherwise force a spill and use the spill slot. */ + emit_dsi(as, RISCVI_ADDI, dest, RID_SP, ra_spill(as, ir)); + } + } else { + /* Otherwise use g->tmptv to hold the TValue(s). */ + asm_tvstore64(as, dest, 0, ref); + ra_allockreg(as, igcptr(&J2G(as->J)->tmptv), dest); + } + +} + +static void asm_aref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg idx, base; + if (irref_isk(ir->op2)) { + IRRef tab = IR(ir->op1)->op1; + int32_t ofs = asm_fuseabase(as, tab); + IRRef refa = ofs ? tab : ir->op1; + ofs += 8*IR(ir->op2)->i; + if (checki12(ofs)) { + base = ra_alloc1(as, refa, RSET_GPR); + emit_dsi(as, RISCVI_ADDI, dest, base, ofs); + return; + } + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); + emit_sh3add(as, dest, base, idx, RID_TMP); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. +** The equivalent C code is: +** Node *n = hashkey(t, key); +** do { +** if (lj_obj_equal(&n->key, key)) return &n->val; +** } while ((n = nextnode(n))); +** return niltv(L); +*/ +static void asm_href(ASMState *as, IRIns *ir, IROp merge) +{ + RegSet allow = RSET_GPR; + int destused = ra_used(ir); + Reg dest = ra_dest(as, ir, allow); + Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); + Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; + Reg cmp64 = RID_NONE; + IRRef refkey = ir->op2; + IRIns *irkey = IR(refkey); + int isk = irref_isk(refkey); + IRType1 kt = irkey->t; + uint32_t khash; + MCLabel l_end, l_loop, l_next; + rset_clear(allow, tab); + tmp1 = ra_scratch(as, allow); + rset_clear(allow, tmp1); + tmp2 = ra_scratch(as, allow); + rset_clear(allow, tmp2); + + if (irt_isnum(kt)) { + key = ra_alloc1(as, refkey, RSET_FPR); + tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); + } else { + /* Allocate cmp64 register used for 64-bit comparisons */ + if (!isk && irt_isaddr(kt)) { + cmp64 = tmp2; + } else { + int64_t k; + if (isk && irt_isaddr(kt)) { + k = ((int64_t)irt_toitype(kt) << 47) | irkey[1].tv.u64; + } else { + lj_assertA(irt_ispri(kt) && !irt_isnil(kt), "bad HREF key type"); + k = ~((int64_t)~irt_toitype(kt) << 47); + } + cmp64 = ra_allock(as, k, allow); + rset_clear(allow, cmp64); + } + if (!irt_ispri(kt)) { + key = ra_alloc1(as, refkey, allow); + rset_clear(allow, key); + } + } + + /* Key not found in chain: jump to exit (if merged) or load niltv. */ + l_end = emit_label(as); + int is_lend_exit = 0; + as->invmcp = NULL; + if (merge == IR_NE) + asm_guard(as, RISCVI_BEQ, RID_ZERO, RID_ZERO); + else if (destused) + emit_loada(as, dest, niltvg(J2G(as->J))); + + /* Follow hash chain until the end. */ + l_loop = --as->mcp; + emit_mv(as, dest, tmp1); + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, next)); + l_next = emit_label(as); + + /* Type and value comparison. */ + if (merge == IR_EQ) { /* Must match asm_guard(). */ + l_end = asm_exitstub_addr(as, as->snapno); + is_lend_exit = 1; + } + if (irt_isnum(kt)) { + emit_branch(as, RISCVI_BNE, tmp1, RID_ZERO, l_end, is_lend_exit); + emit_ds1s2(as, RISCVI_FEQ_D, tmp1, tmpnum, key); + emit_branch(as, RISCVI_BEQ, tmp1, RID_ZERO, l_next, 0); + emit_dsi(as, RISCVI_SLTIU, tmp1, tmp1, ((int32_t)LJ_TISNUM)); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 47); + emit_ds(as, RISCVI_FMV_D_X, tmpnum, tmp1); + } else { + emit_branch(as, RISCVI_BEQ, tmp1, cmp64, l_end, is_lend_exit); + } + emit_lso(as, RISCVI_LD, tmp1, dest, (int32_t)offsetof(Node, key.u64)); + *l_loop = RISCVI_BNE | RISCVF_S1(tmp1) | RISCVF_S2(RID_ZERO) + | RISCVF_IMMB((char *)as->mcp-(char *)l_loop); + if (!isk && irt_isaddr(kt)) { + type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); + emit_ds1s2(as, RISCVI_ADD, tmp2, key, type); + rset_clear(allow, type); + } + + /* Load main position relative to tab->node into dest. */ + khash = isk ? ir_khash(as, irkey) : 1; + if (khash == 0) { + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + } else { + Reg tmphash = tmp1; + if (isk) + tmphash = ra_allock(as, khash, allow); + /* node = tab->node + (idx*32-idx*8) */ + emit_ds1s2(as, RISCVI_ADD, dest, dest, tmp1); + lj_assertA(sizeof(Node) == 24, "bad Node size"); + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp2, tmp1); + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 3); + emit_dsshamt(as, RISCVI_SLLIW, tmp2, tmp1, 5); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp2, tmphash); // idx = hi & tab->hmask + emit_lso(as, RISCVI_LD, dest, tab, (int32_t)offsetof(GCtab, node)); + emit_lso(as, RISCVI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); + if (isk) { + /* Nothing to do. */ + } else if (irt_isstr(kt)) { + emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); + emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); + emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); + emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); + emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); + emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); + if (irt_isnum(kt)) { + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, tmp1); // lo + emit_ds(as, RISCVI_FMV_X_D, tmp1, key); + } else { + checkmclim(as); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi + emit_ext(as, RISCVI_SEXT_W, tmp2, key); // lo + emit_ds1s2(as, RISCVI_ADD, tmp1, key, type); + } + } + } +} + +static void asm_hrefk(ASMState *as, IRIns *ir) +{ + IRIns *kslot = IR(ir->op2); + IRIns *irkey = IR(kslot->op1); + int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); + int32_t kofs = ofs + (int32_t)offsetof(Node, key); + int bigofs = !checki12(kofs); + Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { + idx = dest; + rset_clear(allow, dest); + kofs = (int32_t)offsetof(Node, key); + } else if (ra_hasreg(dest)) { + emit_dsi(as, RISCVI_ADDI, dest, node, ofs); + } + if (irt_ispri(irkey->t)) { + lj_assertA(!irt_isnil(irkey->t), "bad HREFK key type"); + k = ~((int64_t)~irt_toitype(irkey->t) << 47); + } else if (irt_isnum(irkey->t)) { + k = (int64_t)ir_knum(irkey)->u64; + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } + asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); + emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); + if (bigofs) + emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); +} + +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); + if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); + } else { + if (guarded) + asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); + if (ir->o == IR_UREFC) + emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); + else + emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); + if (guarded) + emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); + if (irref_isk(ir->op1)) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); + emit_loada(as, dest, o); + } else { + emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), + (int32_t)offsetof(GCfuncL, uvptr) + + (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } + } +} + +static void asm_fref(ASMState *as, IRIns *ir) +{ + UNUSED(as); UNUSED(ir); + lj_assertA(!ra_used(ir), "unfused FREF"); +} + +static void asm_strref(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg dest = ra_dest(as, ir, allow); + Reg base = ra_alloc1(as, ir->op1, allow); + IRIns *irr = IR(ir->op2); + int32_t ofs = sizeof(GCstr); + rset_clear(allow, base); + if (irref_isk(ir->op2) && checki12(ofs + irr->i)) { + emit_dsi(as, RISCVI_ADDI, dest, base, ofs + irr->i); + } else { + emit_dsi(as, RISCVI_ADDI, dest, dest, ofs); + emit_ds1s2(as, RISCVI_ADD, dest, base, ra_alloc1(as, ir->op2, allow)); + } +} + +/* -- Loads and stores ---------------------------------------------------- */ + +static RISCVIns asm_fxloadins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: return RISCVI_LB; + case IRT_U8: return RISCVI_LBU; + case IRT_I16: return RISCVI_LH; + case IRT_U16: return RISCVI_LHU; + case IRT_NUM: return RISCVI_FLD; + case IRT_FLOAT: return RISCVI_FLW; + default: return irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW; + } +} + +static RISCVIns asm_fxstoreins(IRIns *ir) +{ + switch (irt_type(ir->t)) { + case IRT_I8: case IRT_U8: return RISCVI_SB; + case IRT_I16: case IRT_U16: return RISCVI_SH; + case IRT_NUM: return RISCVI_FSD; + case IRT_FLOAT: return RISCVI_FSW; + default: return irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW; + } +} + +static void asm_fload(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, dest = ra_dest(as, ir, allow); + rset_clear(allow, dest); + RISCVIns riscvi = asm_fxloadins(ir); + int32_t ofs; + if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ + idx = RID_GL; + ofs = (ir->op2 << 2) - GG_OFS(g); + } else { + idx = ra_alloc1(as, ir->op1, allow); + if (ir->op2 == IRFL_TAB_ARRAY) { + ofs = asm_fuseabase(as, ir->op1); + if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ + emit_dsi(as, RISCVI_ADDI, dest, idx, ofs); + return; + } + } + ofs = field_ofs[ir->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + } + rset_clear(allow, idx); + emit_lso(as, riscvi, dest, idx, ofs); +} + +static void asm_fstore(ASMState *as, IRIns *ir) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); + IRIns *irf = IR(ir->op1); + Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); + int32_t ofs = field_ofs[irf->op2]; + lj_assertA(!irt_isfp(ir->t), "bad FP FSTORE"); + emit_lso(as, asm_fxstoreins(ir), src, idx, ofs); + } +} + +static void asm_xload(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, (irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); + lj_assertA(LJ_TARGET_UNALIGNED || !(ir->op2 & IRXLOAD_UNALIGNED), + "unaligned XLOAD"); + asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0); +} + +static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) +{ + if (ir->r != RID_SINK) { + Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); + asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, + rset_exclude(RSET_GPR, src), ofs); + } +} + +#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) + +static void asm_ahuvload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_TMP, idx; + RegSet allow = RSET_GPR; + int32_t ofs = 0; + IRType1 t = ir->t; + if (ra_used(ir)) { + lj_assertA((irt_isnum(ir->t)) || irt_isint(ir->t) || irt_isaddr(ir->t), + "bad load type %d", irt_type(ir->t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + if (irt_isaddr(t)) { + emit_cleartp(as, dest, dest); + } else if (irt_isint(t)) + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + if (ir->o == IR_VLOAD) ofs += 8 * ir->op2; + rset_clear(allow, idx); + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, type, (int32_t)LJ_TISNUM); + } else { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + if (ra_hasreg(dest)) { + if (irt_isnum(t)) { + emit_lso(as, RISCVI_FLD, dest, idx, ofs); + dest = type; + } + } else { + dest = type; + } + emit_dsshamt(as, RISCVI_SRAI, type, dest, 47); + emit_lso(as, RISCVI_LD, dest, idx, ofs); +} + +static void asm_ahustore(ASMState *as, IRIns *ir) +{ + RegSet allow = RSET_GPR; + Reg idx, src = RID_NONE, type = RID_NONE; + int32_t ofs = 0; + if (ir->r == RID_SINK) + return; + if (irt_isnum(ir->t)) { + src = ra_alloc1(as, ir->op2, RSET_FPR); + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_FSD, src, idx, ofs); + } else { + Reg tmp = RID_TMP; + if (irt_ispri(ir->t)) { + tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); + rset_clear(allow, tmp); + } else { + src = ra_alloc1(as, ir->op2, allow); + rset_clear(allow, src); + type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); + rset_clear(allow, type); + } + idx = asm_fuseahuref(as, ir->op1, &ofs, allow); + emit_lso(as, RISCVI_SD, tmp, idx, ofs); + if (ra_hasreg(src)) { + if (irt_isinteger(ir->t)) { + if (as->flags & JIT_F_RVZba) { + emit_ds1s2(as, RISCVI_ADD_UW, tmp, src, type); + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, tmp, type); + emit_ext(as, RISCVI_ZEXT_W, tmp, src); + } + } else { + emit_ds1s2(as, RISCVI_ADD, tmp, src, type); + } + } + } +} + +static void asm_sload(ASMState *as, IRIns *ir) +{ + Reg dest = RID_NONE, type = RID_NONE, base; + RegSet allow = RSET_GPR; + IRType1 t = ir->t; + int32_t ofs = 8*((int32_t)ir->op1-2); + lj_assertA(checki12(ofs), "sload IR operand out of range"); + lj_assertA(!(ir->op2 & IRSLOAD_PARENT), + "bad parent SLOAD"); /* Handled by asm_head_side(). */ + lj_assertA(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK), + "inconsistent SLOAD variant"); + if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { + dest = ra_scratch(as, RSET_FPR); + asm_tointg(as, ir, dest); + t.irt = IRT_NUM; /* Continue with a regular number type check. */ + } else if (ra_used(ir)) { + Reg tmp = RID_NONE; + if ((ir->op2 & IRSLOAD_CONVERT)) + tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR); + lj_assertA((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t), + "bad SLOAD type %d", irt_type(t)); + dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); + rset_clear(allow, dest); + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); + if (irt_isaddr(t)) { /* Clear type from pointers. */ + emit_cleartp(as, dest, dest); + } else if (ir->op2 & IRSLOAD_CONVERT) { + if (irt_isint(t)) { + emit_ds(as, RISCVI_FCVT_W_D|RISCVF_RM(RISCVRM_RTZ), dest, tmp); + /* If value is already loaded for type check, move it to FPR. */ + if ((ir->op2 & IRSLOAD_TYPECHECK)) + emit_ds(as, RISCVI_FMV_D_X, tmp, dest); + else + dest = tmp; + t.irt = IRT_NUM; /* Check for original type. */ + } else { + emit_ds(as, RISCVI_FCVT_D_W, dest, tmp); + dest = tmp; + t.irt = IRT_INT; /* Check for original type. */ + } + } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { + /* Sign-extend integers. */ + emit_ext(as, RISCVI_SEXT_W, dest, dest); + } + goto dotypecheck; + } + base = ra_alloc1(as, REF_BASE, allow); + rset_clear(allow, base); +dotypecheck: + if ((ir->op2 & IRSLOAD_TYPECHECK)) { + type = dest < RID_MAX_GPR ? dest : RID_TMP; + if (irt_ispri(t)) { + asm_guard(as, RISCVI_BNE, type, + ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow)); + } else { + if (irt_isnum(t)) { + asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, LJ_TISNUM); + if (ra_hasreg(dest)) { + emit_lso(as, RISCVI_FLD, dest, base, ofs); + } + } else { + asm_guard(as, RISCVI_BNE, RID_TMP, + ra_allock(as, (int32_t)irt_toitype(t), allow)); + } + emit_dsshamt(as, RISCVI_SRAI, RID_TMP, type, 47); + } + emit_lso(as, RISCVI_LD, type, base, ofs); + } else if (ra_hasreg(dest)) { + emit_lso(as, irt_isnum(t) ? RISCVI_FLD : + irt_isint(t) ? RISCVI_LW : RISCVI_LD, + dest, base, ofs); + } +} + +/* -- Allocations --------------------------------------------------------- */ + +#if LJ_HASFFI +static void asm_cnew(ASMState *as, IRIns *ir) +{ + CTState *cts = ctype_ctsG(J2G(as->J)); + CTypeID id = (CTypeID)IR(ir->op1)->i; + CTSize sz; + CTInfo info = lj_ctype_info(cts, id, &sz); + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; + IRRef args[4]; + RegSet drop = RSET_SCRATCH; + lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), + "bad CNEW/CNEWI operands"); + + as->gcsteps++; + if (ra_hasreg(ir->r)) + rset_clear(drop, ir->r); /* Dest reg handled below. */ + ra_evictset(as, drop); + if (ra_used(ir)) + ra_destreg(as, ir, RID_RET); /* GCcdata * */ + + /* Initialize immutable cdata object. */ + if (ir->o == IR_CNEWI) { + RegSet allow = (RSET_GPR & ~RSET_SCRATCH); + emit_lso(as, sz == 8 ? RISCVI_SD : RISCVI_SW, ra_alloc1(as, ir->op2, allow), + RID_RET, (sizeof(GCcdata))); + lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); + } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ + ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ir->op1; /* CTypeID id */ + args[2] = ir->op2; /* CTSize sz */ + args[3] = ASMREF_TMP1; /* CTSize align */ + asm_gencall(as, ci, args); + emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); + return; + } + + /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ + emit_setgl(as, RID_TMP, gc.cdatanum); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, 1); + emit_getgl(as, RID_TMP, gc.cdatanum); + emit_lso(as, RISCVI_SB, RID_RET+1, RID_RET, (offsetof(GCcdata, gct))); + emit_lso(as, RISCVI_SH, RID_TMP, RID_RET, (offsetof(GCcdata, ctypeid))); + emit_loadk12(as, RID_RET+1, ~LJ_TCDATA); + emit_loadk32(as, RID_TMP, id); + args[0] = ASMREF_L; /* lua_State *L */ + args[1] = ASMREF_TMP1; /* MSize size */ + asm_gencall(as, ci, args); + ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), + ra_releasetmp(as, ASMREF_TMP1)); +} +#endif + +/* -- Write barriers ------------------------------------------------------ */ + +static void asm_tbar(ASMState *as, IRIns *ir) +{ + Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); + Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); + Reg link = RID_TMP; + MCLabel l_end = emit_label(as); + emit_lso(as, RISCVI_SD, link, tab, (int32_t)offsetof(GCtab, gclist)); + emit_lso(as, RISCVI_SB, mark, tab, (int32_t)offsetof(GCtab, marked)); + emit_setgl(as, tab, gc.grayagain); // make tab gray again + emit_getgl(as, link, gc.grayagain); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0); // black: not jump + emit_ds1s2(as, RISCVI_XOR, mark, mark, RID_TMP); // mark=0: gray + emit_dsi(as, RISCVI_ANDI, RID_TMP, mark, LJ_GC_BLACK); + emit_lso(as, RISCVI_LBU, mark, tab, ((int32_t)offsetof(GCtab, marked))); +} + +static void asm_obar(ASMState *as, IRIns *ir) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; + IRRef args[2]; + MCLabel l_end; + Reg obj, val, tmp; + /* No need for other object barriers (yet). */ + lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); // Closed upvalue + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ir->op1; /* TValue *tv */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + obj = IR(ir->op1)->r; + tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); + emit_branch(as, RISCVI_BEQ, tmp, RID_ZERO, l_end, 0); + emit_branch(as, RISCVI_BEQ, RID_TMP, RID_ZERO, l_end, 0); // black: jump + emit_dsi(as, RISCVI_ANDI, tmp, tmp, LJ_GC_BLACK); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES); + val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); + emit_lso(as, RISCVI_LBU, tmp, obj, + ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))); + emit_lso(as, RISCVI_LBU, RID_TMP, val, ((int32_t)offsetof(GChead, marked))); +} + +/* -- Arithmetic and logic operations ------------------------------------- */ + +static void asm_fparith(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, riscvi, dest, left, right); +} + +static void asm_fpunary(ASMState *as, IRIns *ir, RISCVIns riscvi) +{ + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); + switch(riscvi) { + case RISCVI_FSQRT_S: case RISCVI_FSQRT_D: + emit_ds(as, riscvi, dest, left); + break; + case RISCVI_FMV_S: case RISCVI_FMV_D: + case RISCVI_FABS_S: case RISCVI_FABS_D: + case RISCVI_FNEG_S: case RISCVI_FNEG_D: + emit_ds1s2(as, riscvi, dest, left, left); + break; + default: + lj_assertA(0, "bad fp unary instruction"); + return; + } +} + +static void asm_fpmath(ASMState *as, IRIns *ir) +{ + IRFPMathOp fpm = (IRFPMathOp)ir->op2; + if (fpm <= IRFPM_TRUNC) + asm_callround(as, ir, IRCALL_lj_vm_floor + fpm); + else if (fpm == IRFPM_SQRT) + asm_fpunary(as, ir, RISCVI_FSQRT_D); + else + asm_callid(as, ir, IRCALL_lj_vm_floor + fpm); +} + +static void asm_add(ASMState *as, IRIns *ir) +{ + IRType1 t = ir->t; + if (irt_isnum(t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMADD_D, RISCVI_FMADD_D)) + asm_fparith(as, ir, RISCVI_FADD_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULA)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + if (irt_is64(t)) { + emit_dsi(as, RISCVI_ADDI, dest, left, k); + } else { + emit_dsi(as, RISCVI_ADDIW, dest, left, k); + } + return; + } + } + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, irt_is64(t) ? RISCVI_ADD : RISCVI_ADDW, dest, + left, right); + } +} + +static void asm_sub(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + if (!asm_fusemadd(as, ir, RISCVI_FMSUB_D, RISCVI_FNMSUB_D)) + asm_fparith(as, ir, RISCVI_FSUB_D); + return; + } else { + if ((as->flags & JIT_F_RVXThead) && asm_fusemac(as, ir, RISCVI_TH_MULS)) + return; + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + left, right); + } +} + +static void asm_mul(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fparith(as, ir, RISCVI_FMUL_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_MUL : RISCVI_MULW, dest, + left, right); + } +} + +static void asm_fpdiv(ASMState *as, IRIns *ir) +{ + asm_fparith(as, ir, RISCVI_FDIV_D); +} + +static void asm_neg(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpunary(as, ir, RISCVI_FNEG_D); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds1s2(as, irt_is64(ir->t) ? RISCVI_SUB : RISCVI_SUBW, dest, + RID_ZERO, left); + } +} + +#define asm_abs(as, ir) asm_fpunary(as, ir, RISCVI_FABS_D) + +static void asm_arithov(ASMState *as, IRIns *ir) +{ + Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR); + lj_assertA(!irt_is64(ir->t), "bad usage"); + if (irref_isk(ir->op2)) { + int k = IR(ir->op2)->i; + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); + asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); + emit_dsi(as, RISCVI_ADDI, dest, left, k); + if (dest == left) emit_mv(as, RID_TMP, left); + return; + } + } + left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left), + right), dest)); + asm_guard(as, RISCVI_BLT, RID_TMP, RID_ZERO); + emit_ds1s2(as, RISCVI_AND, RID_TMP, RID_TMP, tmp); + if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ + emit_ds1s2(as, RISCVI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right); + } else { /* ((dest^left) & (dest^~right)) < 0 */ + emit_xnor(as, RID_TMP, dest, dest == right ? RID_TMP : right); + } + emit_ds1s2(as, RISCVI_XOR, tmp, dest, dest == left ? RID_TMP : left); + emit_ds1s2(as, ir->o == IR_ADDOV ? RISCVI_ADDW : RISCVI_SUBW, dest, left, right); + if (dest == left || dest == right) + emit_mv(as, RID_TMP, dest == left ? left : right); +} + +#define asm_addov(as, ir) asm_arithov(as, ir) +#define asm_subov(as, ir) asm_arithov(as, ir) + +static void asm_mulov(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, RISCVI_BNE, dest, RID_TMP); + emit_ext(as, RISCVI_SEXT_W, dest, RID_TMP); // dest: [31:0]+signextend + emit_ds1s2(as, RISCVI_MUL, RID_TMP, left, right); // RID_TMP: [63:0] +} + +static void asm_bnot(ASMState *as, IRIns *ir) +{ + Reg left, right, dest = ra_dest(as, ir, RSET_GPR); + IRIns *irl = IR(ir->op1); + if (as->flags & JIT_F_RVZbb && mayfuse(as, ir->op1) && irl->o == IR_BXOR) { + left = ra_alloc2(as, irl, RSET_GPR); + right = (left >> 8); left &= 255; + emit_ds1s2(as, RISCVI_XNOR, dest, left, right); + } else { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_ds(as, RISCVI_NOT, dest, left); + } +} + +static void asm_bswap(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); + if (as->flags & JIT_F_RVZbb) { + if (!irt_is64(ir->t)) + emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); + emit_ds(as, RISCVI_REV8, dest, left); + } else if (as->flags & JIT_F_RVXThead) { + emit_ds(as, irt_is64(ir->t) ? RISCVI_TH_REV : RISCVI_TH_REVW, + dest, left); + } else if (irt_is64(ir->t)) { + Reg tmp1, tmp2, tmp3, tmp4; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp2); + tmp3 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp3); + tmp4 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp4); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp3); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 40); + emit_dsshamt(as, RISCVI_SLLI, dest, left, 56); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp1, tmp3); + emit_ds1s2(as, RISCVI_AND, tmp4, left, RID_TMP); + emit_dsshamt(as, RISCVI_SLLI, tmp3, tmp3, 32); + emit_dsshamt(as, RISCVI_SLLI, tmp1, tmp1, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp3, tmp2); + emit_ds1s2(as, RISCVI_AND, tmp1, left, tmp1); + emit_ds1s2(as, RISCVI_OR, tmp3, tmp4, tmp3); + emit_dsshamt(as, RISCVI_SLLI, tmp4, tmp4, 24); + emit_dsshamt(as, RISCVI_SRLIW, tmp4, tmp4, 24); + emit_ds1s2(as, RISCVI_AND, tmp3, tmp3, tmp1); + emit_dsshamt(as, RISCVI_SRLI, tmp4, left, 8); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp2, tmp2, tmp3); + emit_du(as, RISCVI_LUI, tmp1, RISCVF_HI(0xff0000u)); + emit_ds1s2(as, RISCVI_AND, tmp2, tmp2, RID_TMP); + emit_dsshamt(as, RISCVI_SRLI, tmp3, left, 56); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp2, left, 40); + } else { + Reg tmp1, tmp2; + tmp1 = ra_scratch(as, allow), allow = rset_exclude(allow, tmp1); + tmp2 = ra_scratch(as, allow); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp2); + emit_ds1s2(as, RISCVI_OR, dest, dest, tmp1); + emit_dsshamt(as, RISCVI_SLLI, tmp2, RID_TMP, 8); + emit_dsshamt(as, RISCVI_SLLIW, dest, left, 24); + emit_ds1s2(as, RISCVI_OR, tmp1, tmp1, tmp2); + emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_AND, tmp1, tmp1, RID_TMP); + emit_dsshamt(as, RISCVI_SRLIW, tmp2, left, 24); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, RISCVF_LO(0xff00)); + emit_du(as, RISCVI_LUI, RID_TMP, RISCVF_HI(0xff00u)); + emit_dsshamt(as, RISCVI_SRLI, tmp1, left, 8); + } +} + +static void asm_bitop(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik, RISCVIns riscvin) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left, right; + IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if (checki12(k)) { + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + emit_dsi(as, riscvik, dest, left, k); + return; + } + } else if (as->flags & JIT_F_RVZbb) { + if (mayfuse(as, ir->op1) && irl->o == IR_BNOT) { + left = ra_alloc1(as, irl->op1, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, right, left); + return; + } else if (mayfuse(as, ir->op2) && irr->o == IR_BNOT) { + left = ra_alloc1(as, ir->op1, RSET_GPR); + right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvin, dest, left, right); + return; + } + } + left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + emit_ds1s2(as, riscvi, dest, left, right); +} + +#define asm_band(as, ir) asm_bitop(as, ir, RISCVI_AND, RISCVI_ANDI, RISCVI_ANDN) +#define asm_bor(as, ir) asm_bitop(as, ir, RISCVI_OR, RISCVI_ORI, RISCVI_ORN) +#define asm_bxor(as, ir) asm_bitop(as, ir, RISCVI_XOR, RISCVI_XORI, RISCVI_XNOR) + +static void asm_bitshift(ASMState *as, IRIns *ir, RISCVIns riscvi, RISCVIns riscvik) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); + uint32_t shmsk = irt_is64(ir->t) ? 63 : 31; + if (irref_isk(ir->op2)) { /* Constant shifts. */ + uint32_t shift = (uint32_t)(IR(ir->op2)->i & shmsk); + switch (riscvik) { + case RISCVI_SRAI: case RISCVI_SRLI: case RISCVI_SLLI: + case RISCVI_SRAIW: case RISCVI_SLLIW: case RISCVI_SRLIW: + emit_dsshamt(as, riscvik, dest, left, shift); + break; + case RISCVI_ADDI: shift = (-shift) & shmsk; + case RISCVI_RORI: + emit_roti(as, RISCVI_RORI, dest, left, RID_TMP, shift); + break; + case RISCVI_ADDIW: shift = (-shift) & shmsk; + case RISCVI_RORIW: + emit_roti(as, RISCVI_RORIW, dest, left, RID_TMP, shift); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } else { + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + switch (riscvi) { + case RISCVI_SRA: case RISCVI_SRL: case RISCVI_SLL: + case RISCVI_SRAW: case RISCVI_SRLW: case RISCVI_SLLW: + emit_ds1s2(as, riscvi, dest, left, right); + break; + case RISCVI_ROR: case RISCVI_ROL: + case RISCVI_RORW: case RISCVI_ROLW: + emit_rot(as, riscvi, dest, left, right, RID_TMP); + break; + default: + lj_assertA(0, "bad shift instruction"); + return; + } + } +} + +#define asm_bshl(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SLL, RISCVI_SLLI) : \ + asm_bitshift(as, ir, RISCVI_SLLW, RISCVI_SLLIW)) +#define asm_bshr(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRL, RISCVI_SRLI) : \ + asm_bitshift(as, ir, RISCVI_SRLW, RISCVI_SRLIW)) +#define asm_bsar(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_SRA, RISCVI_SRAI) : \ + asm_bitshift(as, ir, RISCVI_SRAW, RISCVI_SRAIW)) +#define asm_brol(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROL, RISCVI_ADDI) : \ + asm_bitshift(as, ir, RISCVI_ROLW, RISCVI_ADDIW)) + // ROLI -> ADDI, ROLIW -> ADDIW; Hacky but works. +#define asm_bror(as, ir) (irt_is64(ir->t) ? \ + asm_bitshift(as, ir, RISCVI_ROR, RISCVI_RORI) : \ + asm_bitshift(as, ir, RISCVI_RORW, RISCVI_RORIW)) + +static void asm_min_max(ASMState *as, IRIns *ir, int ismax) +{ + if (irt_isnum(ir->t)) { + Reg dest = ra_dest(as, ir, RSET_FPR); + MCLabel l_ret_left, l_end; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + l_end = emit_label(as); + + if (dest != left) + emit_ds1s2(as, RISCVI_FMV_D, dest, left, left); + l_ret_left = emit_label(as); + + emit_jmp(as, l_end); + if (dest != right) + emit_ds1s2(as, RISCVI_FMV_D, dest, right, right); + + emit_branch(as, RISCVI_BNE, RID_TMP, RID_ZERO, l_ret_left, 0); + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, ismax ? right : left, + ismax ? left : right); + } else { + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); + Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, ismax ? RISCVI_MAX : RISCVI_MIN, dest, left, right); + } else { + if (as->flags & JIT_F_RVXThead) { + if (left == right) { + if (dest != left) emit_mv(as, dest, left); + } else { + if (dest == left) { + emit_ds1s2(as, RISCVI_TH_MVNEZ, dest, right, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_TH_MVEQZ, dest, left, RID_TMP); + if (dest != right) emit_mv(as, dest, right); + } + } + } else if (as->flags & JIT_F_RVZicond) { + emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); + if (dest != right) { + emit_ds1s2(as, RISCVI_CZERO_EQZ, RID_TMP, right, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_NEZ, dest, left, RID_TMP); + } else { + emit_ds1s2(as, RISCVI_CZERO_NEZ, RID_TMP, left, RID_TMP); + emit_ds1s2(as, RISCVI_CZERO_EQZ, dest, right, RID_TMP); + } + } else { + if (dest != right) { + emit_ds1s2(as, RISCVI_XOR, dest, right, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, right, left); + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); + } else { + emit_ds1s2(as, RISCVI_XOR, dest, left, dest); + emit_ds1s2(as, RISCVI_AND, dest, dest, RID_TMP); + emit_ds1s2(as, RISCVI_XOR, dest, left, right); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, RID_ZERO, RID_TMP); + } + } + emit_ds1s2(as, RISCVI_SLT, RID_TMP, + ismax ? left : right, ismax ? right : left); + } + } +} + +#define asm_min(as, ir) asm_min_max(as, ir, 0) +#define asm_max(as, ir) asm_min_max(as, ir, 1) + +/* -- Comparisons --------------------------------------------------------- */ + +/* FP comparisons. */ +static void asm_fpcomp(ASMState *as, IRIns *ir) +{ + IROp op = ir->o; + Reg right, left = ra_alloc2(as, ir, RSET_FPR); + right = (left >> 8); left &= 255; + asm_guard(as, (op < IR_EQ ? (op&4) : (op&1)) + ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + switch (op) { + case IR_LT: case IR_UGE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, left, right); + break; + case IR_LE: case IR_UGT: case IR_ABC: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, left, right); + break; + case IR_GT: case IR_ULE: + emit_ds1s2(as, RISCVI_FLT_D, RID_TMP, right, left); + break; + case IR_GE: case IR_ULT: + emit_ds1s2(as, RISCVI_FLE_D, RID_TMP, right, left); + break; + case IR_EQ: case IR_NE: + emit_ds1s2(as, RISCVI_FEQ_D, RID_TMP, left, right); + break; + default: + break; + } +} + +/* Integer comparisons. */ +static void asm_intcomp(ASMState *as, IRIns *ir) +{ + /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ + /* 00 01 10 11 100 101 110 111 */ + IROp op = ir->o; + Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); + if (op == IR_ABC) op = IR_UGT; + if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(as, ir->op2) == 0) { + switch (op) { + case IR_LT: asm_guard(as, RISCVI_BGE, left, RID_ZERO); break; + case IR_GE: asm_guard(as, RISCVI_BLT, left, RID_ZERO); break; + case IR_LE: asm_guard(as, RISCVI_BLT, RID_ZERO, left); break; + case IR_GT: asm_guard(as, RISCVI_BGE, RID_ZERO, left); break; + default: break; + } + return; + } + if (irref_isk(ir->op2)) { + intptr_t k = get_kval(as, ir->op2); + if ((op&2)) k++; + if (checki12(k)) { + asm_guard(as, (op&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); + emit_dsi(as, (op&4) ? RISCVI_SLTIU : RISCVI_SLTI, RID_TMP, left, k); + return; + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); + asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), + (op&2) ? right : left, (op&2) ? left : right); +} + +static void asm_comp(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) + asm_fpcomp(as, ir); + else + asm_intcomp(as, ir); +} + +static void asm_equal(ASMState *as, IRIns *ir) +{ + if (irt_isnum(ir->t)) { + asm_fpcomp(as, ir); + } else { + Reg right, left = ra_alloc2(as, ir, RSET_GPR); + right = (left >> 8); left &= 255; + asm_guard(as, (ir->o & 1) ? RISCVI_BEQ : RISCVI_BNE, left, right); + } +} + +/* -- Split register ops -------------------------------------------------- */ + +/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ +static void asm_hiop(ASMState *as, IRIns *ir) +{ + /* HIOP is marked as a store because it needs its own DCE logic. */ + int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ + if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; + if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ + switch ((ir-1)->o) { + case IR_CALLN: + case IR_CALLL: + case IR_CALLS: + case IR_CALLXS: + if (!uselo) + ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ + break; + default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; + } +} + +/* -- Profiling ----------------------------------------------------------- */ + +static void asm_prof(ASMState *as, IRIns *ir) +{ + UNUSED(ir); + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + emit_dsi(as, RISCVI_ANDI, RID_TMP, RID_TMP, HOOK_PROFILE); + emit_lsglptr(as, RISCVI_LBU, RID_TMP, + (int32_t)offsetof(global_State, hookmask)); +} + +/* -- Stack handling ------------------------------------------------------ */ + +/* Check Lua stack size for overflow. Use exit handler as fallback. */ +static void asm_stack_check(ASMState *as, BCReg topslot, + IRIns *irp, RegSet allow, ExitNo exitno) +{ + /* Try to get an unused temp register, otherwise spill/restore RID_RET*. */ + Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; + ExitNo oldsnap = as->snapno; + rset_clear(allow, pbase); + as->snapno = exitno; + asm_guard(as, RISCVI_BNE, RID_TMP, RID_ZERO); + as->snapno = oldsnap; + if (allow) { + tmp = rset_pickbot(allow); + ra_modified(as, tmp); + } else { // allow == RSET_EMPTY + tmp = RID_RET; + emit_lso(as, RISCVI_LD, tmp, RID_SP, 0); /* Restore tmp1 register. */ + } + emit_dsi(as, RISCVI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot)); + emit_ds1s2(as, RISCVI_SUB, RID_TMP, tmp, pbase); + emit_lso(as, RISCVI_LD, tmp, tmp, offsetof(lua_State, maxstack)); + if (pbase == RID_TMP) + emit_getgl(as, RID_TMP, jit_base); + emit_getgl(as, tmp, cur_L); + if (allow == RSET_EMPTY) /* Spill temp register. */ + emit_lso(as, RISCVI_SD, tmp, RID_SP, 0); +} + +/* Restore Lua stack from on-trace state. */ +static void asm_stack_restore(ASMState *as, SnapShot *snap) +{ + SnapEntry *map = &as->T->snapmap[snap->mapofs]; +#ifdef LUA_USE_ASSERT + SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; +#endif + MSize n, nent = snap->nent; + /* Store the value of all modified slots to the Lua stack. */ + for (n = 0; n < nent; n++) { + SnapEntry sn = map[n]; + BCReg s = snap_slot(sn); + int32_t ofs = 8*((int32_t)s-1-LJ_FR2); + IRRef ref = snap_ref(sn); + IRIns *ir = IR(ref); + if ((sn & SNAP_NORESTORE)) + continue; + if (irt_isnum(ir->t)) { + Reg src = ra_alloc1(as, ref, RSET_FPR); + emit_lso(as, RISCVI_FSD, src, RID_BASE, ofs); + } else { + asm_tvstore64(as, RID_BASE, ofs, ref); + } + checkmclim(as); + } + lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); +} + +/* -- GC handling --------------------------------------------------------- */ + +/* Marker to prevent patching the GC check exit. */ +#define RISCV_NOPATCH_GC_CHECK \ + (RISCVI_OR|RISCVF_D(RID_TMP)|RISCVF_S1(RID_TMP)|RISCVF_S2(RID_TMP)) + +/* Check GC threshold and do one or more GC steps. */ +static void asm_gc_check(ASMState *as) +{ + const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; + IRRef args[2]; + MCLabel l_end; + Reg tmp; + ra_evictset(as, RSET_SCRATCH); + l_end = emit_label(as); + /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ + asm_guard(as, RISCVI_BNE, RID_RET, RID_ZERO); /* Assumes asm_snap_prep() already done. */ + *--as->mcp = RISCV_NOPATCH_GC_CHECK; + args[0] = ASMREF_TMP1; /* global_State *g */ + args[1] = ASMREF_TMP2; /* MSize steps */ + asm_gencall(as, ci, args); + emit_ds(as, RISCVI_MV, ra_releasetmp(as, ASMREF_TMP1), RID_GL); + tmp = ra_releasetmp(as, ASMREF_TMP2); + emit_loadi(as, tmp, as->gcsteps); + /* Jump around GC step if GC total < GC threshold. */ + emit_branch(as, RISCVI_BLTU, RID_TMP, tmp, l_end, 0); + emit_getgl(as, tmp, gc.threshold); + emit_getgl(as, RID_TMP, gc.total); + as->gcsteps = 0; + checkmclim(as); +} + +/* -- Loop handling ------------------------------------------------------- */ + +/* Fixup the loop branch. */ +static void asm_loop_fixup(ASMState *as) +{ + MCode *p = as->mctop; + MCode *target = as->mcp; + ptrdiff_t delta; + if (as->loopinv) { /* Inverted loop branch? */ + delta = (char *)target - (char *)(p - 2); + /* asm_guard* already inverted the branch, and patched the final b. */ + lj_assertA(checki21(delta), "branch target out of range"); + p[-2] = (p[-2]&0x00000fff) | RISCVF_IMMJ(delta); + } else { + /* J */ + delta = (char *)target - (char *)(p - 1); + p[-1] = RISCVI_JAL | RISCVF_IMMJ(delta); + } +} + +/* Fixup the tail of the loop. */ +static void asm_loop_tail_fixup(ASMState *as) +{ + UNUSED(as); /* Nothing to do(?) */ +} + +/* -- Head of trace ------------------------------------------------------- */ + +/* Coalesce BASE register for a root trace. */ +static void asm_head_root_base(ASMState *as) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (r != RID_BASE) + emit_mv(as, r, RID_BASE); + } +} + +/* Coalesce BASE register for a side trace. */ +static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; + if (ra_hasreg(r)) { + ra_free(as, r); + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { + return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { + emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ + return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } + return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ + +/* Fixup the tail code. */ +static void asm_tail_fixup(ASMState *as, TraceNo lnk) +{ + MCode *p = as->mctop; + MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp; + int32_t spadj = as->T->spadjust; + if (spadj == 0) { + p[-3] = RISCVI_NOP; + // as->mctop = p-2; + } else { + /* Patch stack adjustment. */ + p[-3] = RISCVI_ADDI | RISCVF_D(RID_SP) | RISCVF_S1(RID_SP) | RISCVF_IMMI(spadj); + } + /* Patch exit jump. */ + ptrdiff_t delta = (char *)target - (char *)(p - 2); + p[-2] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[-1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); +} + +/* Prepare tail of code. */ +static void asm_tail_prep(ASMState *as) +{ + MCode *p = as->mctop - 2; /* Leave room for exitstub. */ + if (as->loopref) { + as->invmcp = as->mcp = p; + } else { + as->mcp = p-1; /* Leave room for stack pointer adjustment. */ + as->invmcp = NULL; + } + p[0] = p[1] = RISCVI_NOP; /* Prevent load/store merging. */ +} + +/* -- Trace setup --------------------------------------------------------- */ + +/* Ensure there are enough stack slots for call arguments. */ +static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) +{ + IRRef args[CCI_NARGS_MAX*2]; + uint32_t i, nargs = CCI_XNARGS(ci); + int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; + asm_collectargs(as, ir, ci, args); + for (i = 0; i < nargs; i++) { + if (args[i] && irt_isfp(IR(args[i])->t)) { + if (nfpr > 0) { + nfpr--; if(ci->flags & CCI_VARARG) ngpr--; + } else if (!(ci->flags & CCI_VARARG) && ngpr > 0) ngpr--; + else nslots += 2; + } else { + if (ngpr > 0) { + ngpr--; if(ci->flags & CCI_VARARG) nfpr--; + } else nslots += 2; + } + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; + return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); +} + +static void asm_setup_target(ASMState *as) +{ + asm_sparejump_setup(as); + asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); +} + +/* -- Trace patching ------------------------------------------------------ */ + +/* Patch exit jumps of existing machine code to a new target. */ +void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) +{ + MCode *p = T->mcode; + MCode *pe = (MCode *)((char *)p + T->szmcode); + MCode *px = exitstub_trace_addr(T, exitno); + MCode *cstart = NULL; + MCode *mcarea = lj_mcode_patch(J, p, 0); + + for (; p < pe; p++) { + /* Look for exitstub branch, replace with branch to target. */ + ptrdiff_t odelta = (char *)px - (char *)(p+1), + ndelta = (char *)target - (char *)(p+1); + if ((((p[0] ^ RISCVF_IMMB(8)) & 0xfe000f80u) == 0 && + ((p[0] & 0x0000007fu) == 0x63u) && + ((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[-1] != RISCV_NOPATCH_GC_CHECK) || + (((p[1] ^ RISCVF_IMMJ(odelta)) & 0xfffff000u) == 0 && + ((p[1] & 0x0000007fu) == 0x6fu) && p[0] != RISCV_NOPATCH_GC_CHECK)) { + lj_assertJ(checki32(ndelta), "branch target out of range"); + /* Patch jump, if within range. */ + patchbranch: + if (checki21(ndelta)) { /* Patch jump */ + p[1] = RISCVI_JAL | RISCVF_IMMJ(ndelta); + if (!cstart) cstart = p + 1; + } else { /* Branch out of range. Use spare jump slot in mcarea. */ + MCode *mcjump = asm_sparejump_use(mcarea, target); + if (mcjump) { + lj_mcode_sync(mcjump, mcjump+2); + ndelta = (char *)mcjump - (char *)(p+1); + if (checki21(ndelta)) { + goto patchbranch; + } else { + lj_assertJ(0, "spare jump out of range: -Osizemcode too big"); + } + } + /* Ignore jump slot overflow. Child trace is simply not attached. */ + } + } else if (p+2 == pe) { + if (p[0] == RISCVI_NOP && p[1] == RISCVI_NOP) { + ptrdiff_t delta = (char *)target - (char *)p; + lj_assertJ(checki32(delta), "jump target out of range"); + p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + if (!cstart) cstart = p; + } + } + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h new file mode 100644 index 0000000000..1c549a1adc --- /dev/null +++ b/src/lj_emit_riscv.h @@ -0,0 +1,530 @@ +/* +** RISC-V instruction emitter. +** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h +** +** Contributed by gns from PLCT Lab, ISCAS. +*/ + +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); + if (ir->o == IR_KINT64) { + return (intptr_t)ir_kint64(ir)->u64; + } else if (ir->o == IR_KGC) { + return (intptr_t)ir_kgc(ir); + } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { + return (intptr_t)ir_kptr(ir); + } else { + lj_assertA(ir->o == IR_KINT || ir->o == IR_KNULL, + "bad 64 bit const IR op %d", ir->o); + return ir->i; /* Sign-extended. */ + } +} + +#define get_kval(as, ref) get_k64val(as, ref) + +/* -- Emit basic instructions --------------------------------------------- */ + +static void emit_r(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2); +} + +#define emit_ds(as, riscvi, rd, rs1) emit_r(as, riscvi, rd, rs1, 0) +#define emit_ds2(as, riscvi, rd, rs2) emit_r(as, riscvi, rd, 0, rs2) +#define emit_ds1s2(as, riscvi, rd, rs1, rs2) emit_r(as, riscvi, rd, rs1, rs2) + +static void emit_r4(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg rs3) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_S3(rs3); +} + +#define emit_ds1s2s3(as, riscvi, rd, rs1, rs2, rs3) emit_r4(as, riscvi, rd, rs1, rs2, rs3) + +static void emit_i(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_S1(rs1) | RISCVF_IMMI((uint32_t)i & 0xfff); +} + +#define emit_di(as, riscvi, rd, i) emit_i(as, riscvi, rd, 0, i) +#define emit_dsi(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i) +#define emit_dsshamt(as, riscvi, rd, rs1, i) emit_i(as, riscvi, rd, rs1, i&0x3f) + +static void emit_s(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMS((uint32_t)i & 0xfff); +} + +#define emit_s1s2i(as, riscvi, rs1, rs2, i) emit_s(as, riscvi, rs1, rs2, i) + +/* +static void emit_b(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB((uint32_t)i & 0x1ffe); +} +*/ + +static void emit_u(ASMState *as, RISCVIns riscvi, Reg rd, uint32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMU(i & 0xfffff); +} + +#define emit_du(as, riscvi, rd, i) emit_u(as, riscvi, rd, i) + +/* +static void emit_j(ASMState *as, RISCVIns riscvi, Reg rd, int32_t i) +{ + *--as->mcp = riscvi | RISCVF_D(rd) | RISCVF_IMMJ((uint32_t)i & 0x1fffffe); +} +*/ + +static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); +static void ra_allockreg(ASMState *as, intptr_t k, Reg r); +static Reg ra_scratch(ASMState *as, RegSet allow); + +static void emit_lso(ASMState *as, RISCVIns riscvi, Reg data, Reg base, int32_t ofs) +{ + lj_assertA(checki12(ofs), "load/store offset %d out of range", ofs); + switch (riscvi) { + case RISCVI_LD: case RISCVI_LW: case RISCVI_LH: case RISCVI_LB: + case RISCVI_LWU: case RISCVI_LHU: case RISCVI_LBU: + case RISCVI_FLW: case RISCVI_FLD: + emit_dsi(as, riscvi, data, base, ofs); + break; + case RISCVI_SD: case RISCVI_SW: case RISCVI_SH: case RISCVI_SB: + case RISCVI_FSW: case RISCVI_FSD: + emit_s1s2i(as, riscvi, base, data, ofs); + break; + default: lj_assertA(0, "invalid lso"); break; + } +} + +static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, + int32_t shamt) +{ + if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { + if (!(as->flags & JIT_F_RVZbb)) switch (riscvi) { + case RISCVI_RORI: riscvi = RISCVI_TH_SRRI; break; + case RISCVI_RORIW: riscvi = RISCVI_TH_SRRIW; break; + default: lj_assertA(0, "invalid roti op"); break; + } + emit_dsshamt(as, riscvi, rd, rs1, shamt); + } else { + RISCVIns ai, bi; + int32_t shwid, shmsk; + switch (riscvi) { + case RISCVI_RORI: + ai = RISCVI_SRLI, bi = RISCVI_SLLI; + shwid = 64, shmsk = 63; + break; + case RISCVI_RORIW: + ai = RISCVI_SRLIW, bi = RISCVI_SLLIW; + shwid = 32, shmsk = 31; + break; + default: + lj_assertA(0, "invalid roti op"); + return; + } + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); + emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); + } +} + +static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, riscvi, rd, rs1, rs2); + } else { + RISCVIns sai, sbi; + switch (riscvi) { + case RISCVI_ROL: + sai = RISCVI_SLL, sbi = RISCVI_SRL; + break; + case RISCVI_ROR: + sai = RISCVI_SRL, sbi = RISCVI_SLL; + break; + case RISCVI_ROLW: + sai = RISCVI_SLLW, sbi = RISCVI_SRLW; + break; + case RISCVI_RORW: + sai = RISCVI_SRLW, sbi = RISCVI_SLLW; + break; + default: + lj_assertA(0, "invalid rot op"); + return; + } + if (rd == rs2) { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); + emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds2(as, RISCVI_NEG, tmp, rs2); + } + } +} + +static void emit_ext(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1) +{ + if ((riscvi != RISCVI_ZEXT_W && as->flags & JIT_F_RVZbb) || + (riscvi == RISCVI_ZEXT_W && as->flags & JIT_F_RVZba)) { + emit_ds(as, riscvi, rd, rs1); + } else if (as->flags & JIT_F_RVXThead) { + uint32_t hi, sext; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + hi = 15, sext = 0; + break; + case RISCVI_ZEXT_W: + hi = 31, sext = 0; + break; + case RISCVI_SEXT_B: + hi = 7, sext = 1; + break; + case RISCVI_SEXT_H: + hi = 15, sext = 1; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsi(as, sext ? RISCVI_TH_EXT : RISCVI_TH_EXTU, + rd, rs1, hi << 6); + } else { + RISCVIns sli, sri; + int32_t shamt; + switch (riscvi) { + case RISCVI_ZEXT_B: + case RISCVI_SEXT_W: + emit_ds(as, riscvi, rd, rs1); + return; + case RISCVI_ZEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 48; + break; + case RISCVI_ZEXT_W: + sli = RISCVI_SLLI, sri = RISCVI_SRLI; + shamt = 32; + break; + case RISCVI_SEXT_B: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 56; + break; + case RISCVI_SEXT_H: + sli = RISCVI_SLLI, sri = RISCVI_SRAI; + shamt = 48; + break; + default: + lj_assertA(0, "invalid ext op"); + return; + } + emit_dsshamt(as, sri, rd, rd, shamt); + emit_dsshamt(as, sli, rd, rs1, shamt); + } +} + +static void emit_cleartp(ASMState *as, Reg rd, Reg rs1) +{ + if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_EXTU, rd, rs1, 46u << 6); + } else { + emit_dsshamt(as, RISCVI_SRLI, rd, rd, 17); + emit_dsshamt(as, RISCVI_SLLI, rd, rs1, 17); + } +} + +/* +static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ANDN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_AND, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +/* +static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_ORN, rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_OR, rd, rs1, tmp); + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} +*/ + +static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, RISCVI_XNOR, rd, rs1, rs2); + } else { + emit_ds(as, RISCVI_NOT, rd, rd); + emit_ds1s2(as, RISCVI_XOR, rd, rs1, rs2); + } +} + +static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) +{ + if (as->flags & JIT_F_RVZba) { + switch (shamt) { + case 1: emit_ds1s2(as, RISCVI_SH1ADD, rd, rs2, rs1); break; + case 2: emit_ds1s2(as, RISCVI_SH2ADD, rd, rs2, rs1); break; + case 3: emit_ds1s2(as, RISCVI_SH3ADD, rd, rs2, rs1); break; + default: return; + } + } else if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); + } else { + emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); + emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); + } +} + +#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) +#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) +#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) + +static void emit_loadk12(ASMState *as, Reg rd, int32_t i) +{ + emit_di(as, RISCVI_ADDI, rd, i); +} + +static void emit_loadk32(ASMState *as, Reg rd, int32_t i) +{ + if (checki12((int64_t)i)) { + emit_loadk12(as, rd, i); + } else { + if(LJ_UNLIKELY(RISCVF_HI((uint32_t)i) == 0x80000u && i > 0)) + emit_dsi(as, RISCVI_XORI, rd, rd, RISCVF_LO(i)); + else + emit_dsi(as, RISCVI_ADDI, rd, rd, RISCVF_LO(i)); + emit_du(as, RISCVI_LUI, rd, RISCVF_HI((uint32_t)i)); + } +} + +/* -- Emit loads/stores --------------------------------------------------- */ + +/* Prefer rematerialization of BASE/L from global_State over spills. */ +#define emit_canremat(ref) ((ref) <= REF_BASE) + + +/* Load a 32 bit constant into a GPR. */ +#define emit_loadi(as, r, i) emit_loadk32(as, r, i); + +/* Load a 64 bit constant into a GPR. */ +static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) +{ + int64_t u64_delta = (int64_t)((intptr_t)u64 - (intptr_t)(as->mcp - 2)); + if (checki32((int64_t)u64)) { + emit_loadk32(as, r, (int32_t)u64); + } else if (checki32auipc(u64_delta)) { + emit_dsi(as, RISCVI_ADDI, r, r, RISCVF_LO(u64_delta)); + emit_du(as, RISCVI_AUIPC, r, RISCVF_HI(u64_delta)); + } else { + uint32_t lo32 = u64 & 0xfffffffful; + if (checku11(lo32)) { + if (lo32 > 0) emit_dsi(as, RISCVI_ADDI, r, r, lo32); + emit_dsshamt(as, RISCVI_SLLI, r, r, 32); + } else { + RISCVIns li_insn[7] = {0}; + int shamt = 0, step = 0; + for(int bit = 0; bit < 32; bit++) { + if (lo32 & (1u << bit)) { + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + int inc = bit+10 > 31 ? 31-bit : 10; + bit += inc, shamt = inc+1; + uint32_t msk = ((1ul << (bit+1))-1)^((1ul << (((bit-inc) >= 0) ? (bit-inc) : 0))-1); + uint16_t payload = (lo32 & msk) >> (((bit-inc) >= 0) ? (bit-inc) : 0); + li_insn[step++] = RISCVI_ADDI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(payload); + } else shamt++; + } + if (shamt) li_insn[step++] = RISCVI_SLLI | RISCVF_D(r) | RISCVF_S1(r) | RISCVF_IMMI(shamt); + + if (step < 6) { + for(int i = 0; i < step; i++) + *--as->mcp = li_insn[i]; + } else { + emit_dsi(as, RISCVI_ADDI, r, r, u64 & 0x3ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 10); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 10) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + emit_dsi(as, RISCVI_ADDI, r, r, (u64 >> 21) & 0x7ff); + emit_dsshamt(as, RISCVI_SLLI, r, r, 11); + } + } + + uint32_t hi32 = u64 >> 32; + if (hi32 & 0xfff) emit_loadk32(as, r, hi32); + else emit_du(as, RISCVI_LUI, r, hi32 >> 12); + } +} + +#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) + +/* Get/set from constant pointer. */ +static void emit_lsptr(ASMState *as, RISCVIns riscvi, Reg r, void *p, RegSet allow) +{ + emit_lso(as, riscvi, r, ra_allock(as, igcptr(p), allow), 0); +} + +/* Load 64 bit IR constant into register. */ +static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) +{ + const uint64_t *k = &ir_k64(ir)->u64; + Reg r64 = r; + if (rset_test(RSET_FPR, r)) { + r64 = RID_TMP; + emit_ds(as, RISCVI_FMV_D_X, r, r64); + } + emit_loadu64(as, r64, *k); +} + +/* Get/set global_State fields. */ +static void emit_lsglptr(ASMState *as, RISCVIns riscvi, Reg r, int32_t ofs) +{ + emit_lso(as, riscvi, r, RID_GL, ofs); +} + +#define emit_getgl(as, r, field) \ + emit_lsglptr(as, RISCVI_LD, (r), (int32_t)offsetof(global_State, field)) +#define emit_setgl(as, r, field) \ + emit_lsglptr(as, RISCVI_SD, (r), (int32_t)offsetof(global_State, field)) + +/* Trace number is determined from per-trace exit stubs. */ +#define emit_setvmstate(as, i) UNUSED(i) + +/* -- Emit control-flow instructions -------------------------------------- */ + +/* Label for internal jumps. */ +typedef MCode *MCLabel; + +/* Return label pointing to current PC. */ +#define emit_label(as) ((as)->mcp) + +static void emit_branch(ASMState *as, RISCVIns riscvi, Reg rs1, Reg rs2, MCode *target, int jump) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 1); + // lj_assertA(((delta + 0x10000) >> 13) == 0, "branch target out of range"); /* B */ + lj_assertA(((delta + 0x100000) >> 21) == 0, "branch target out of range"); /* ^B+J */ + if (checki13(delta) && !jump) { + *--p = riscvi | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(delta); + *--p = RISCVI_NOP; + } else { + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); /* Poorman's trampoline */ + *--p = (riscvi^0x00001000) | RISCVF_S1(rs1) | RISCVF_S2(rs2) | RISCVF_IMMB(8); + } + as->mcp = p; +} + +static void emit_jmp(ASMState *as, MCode *target) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 2); + // lj_assertA(((delta + 0x100000) >> 21) == 0, "jump target out of range"); /* J */ + lj_assertA(checki32(delta), "jump target out of range"); /* AUIPC+JALR */ + if (checki21(delta)) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_IMMJ(delta); + } else { + *--p = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + } + as->mcp = p; +} + +#define emit_mv(as, dst, src) \ + emit_ds(as, RISCVI_MV, (dst), (src)) + +static void emit_call(ASMState *as, void *target, int needcfa) +{ + MCode *p = as->mcp; + ptrdiff_t delta = (char *)target - (char *)(p - 2); + if (checki21(delta)) { + *--p = RISCVI_NOP; + *--p = RISCVI_JAL | RISCVF_D(RID_RA) | RISCVF_IMMJ(delta); + } else if (checki32(delta)) { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); + *--p = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + needcfa = 1; + } else { + *--p = RISCVI_JALR | RISCVF_D(RID_RA) | RISCVF_S1(RID_CFUNCADDR) | RISCVF_IMMI(0); + needcfa = 2; + } + as->mcp = p; + if (needcfa > 1) + ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); +} + +/* -- Emit generic operations --------------------------------------------- */ + +/* Generic move between two regs. */ +static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) +{ + if (src < RID_MAX_GPR && dst < RID_MAX_GPR) + emit_mv(as, dst, src); + else if (src < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_D_X : RISCVI_FMV_W_X, dst, src); + else if (dst < RID_MAX_GPR) + emit_ds(as, irt_isnum(ir->t) ? RISCVI_FMV_X_D : RISCVI_FMV_X_W, dst, src); + else + emit_ds1s2(as, irt_isnum(ir->t) ? RISCVI_FMV_D : RISCVI_FMV_S, dst, src, src); +} + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, + Reg tmp, intptr_t k) +{ + if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); + else { + switch (riscvi) { + case RISCVI_ADDI: riscvi = RISCVI_ADD; break; + case RISCVI_XORI: riscvi = RISCVI_XOR; break; + case RISCVI_ORI: riscvi = RISCVI_OR; break; + case RISCVI_ANDI: riscvi = RISCVI_AND; break; + default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; + } + emit_ds1s2(as, riscvi, dest, src, tmp); + emit_loadu64(as, tmp, (uintptr_t)k); + } +} + +/* Generic load of register with base and (small) offset address. */ +static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_LD : RISCVI_LW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FLD : RISCVI_FLW, r, base, ofs); +} + +/* Generic store of register with base and (small) offset address. */ +static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) +{ + if (r < RID_MAX_GPR) + emit_lso(as, irt_is64(ir->t) ? RISCVI_SD : RISCVI_SW, r, base, ofs); + else + emit_lso(as, irt_isnum(ir->t) ? RISCVI_FSD : RISCVI_FSW, r, base, ofs); +} + +/* Add offset to pointer. */ +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) + emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); +} + + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c index 9947eacc91..a035d19ca8 100644 --- a/src/lj_gdbjit.c +++ b/src/lj_gdbjit.c @@ -306,6 +306,9 @@ enum { #elif LJ_TARGET_MIPS DW_REG_SP = 29, DW_REG_RA = 31, +#elif LJ_TARGET_RISCV64 + DW_REG_SP = 2, + DW_REG_RA = 1, #else #error "Unsupported target architecture" #endif @@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { .machine = 20, #elif LJ_TARGET_MIPS .machine = 8, +#elif LJ_TARGET_RISCV64 + .machine = 243, #else #error "Unsupported target architecture" #endif @@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } } +#elif LJ_TARGET_RISCV64 + { + int i; + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } + DB(DW_CFA_offset|9); DUV(17); + DB(DW_CFA_offset|8); DUV(18); + for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } + DB(DW_CFA_offset|32|9); DUV(29); + DB(DW_CFA_offset|32|8); DUV(30); + } #else #error "Unsupported target architecture" #endif diff --git a/src/lj_jit.h b/src/lj_jit.h index 361570a072..984d1304f8 100644 --- a/src/lj_jit.h +++ b/src/lj_jit.h @@ -63,6 +63,46 @@ #define JIT_F_CPUSTRING "\010MIPS64R2" #endif #endif +#elif LJ_TARGET_RISCV64 + +#define JIT_F_CPU 0x00000010 +#define JIT_F_CPU_FIRST JIT_F_CPU +#define JIT_F_RVC (JIT_F_CPU << 0) +#define JIT_F_RVZba (JIT_F_CPU << 1) +#define JIT_F_RVZbb (JIT_F_CPU << 2) +#define JIT_F_RVZicond (JIT_F_CPU << 3) +#define JIT_F_RVXThead (JIT_F_CPU << 4) + +#define JIT_F_CPUSTRING "\003RVC\003Zba\003Zbb\006Zicond\006XThead" + +#if LJ_TARGET_LINUX +#include + +#ifndef __NR_riscv_hwprobe +#ifndef __NR_arch_specific_syscall +#define __NR_arch_specific_syscall 244 +#endif +#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14) +#endif + +struct riscv_hwprobe { + int64_t key; + uint64_t value; +}; + +#define RISCV_HWPROBE_KEY_MVENDORID 0 +#define RISCV_HWPROBE_KEY_MARCHID 1 +#define RISCV_HWPROBE_KEY_MIMPID 2 +#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3 +#define RISCV_HWPROBE_KEY_IMA_EXT_0 4 + +#define RISCV_HWPROBE_IMA_C (1 << 1) +#define RISCV_HWPROBE_EXT_ZBA (1 << 3) +#define RISCV_HWPROBE_EXT_ZBB (1 << 4) +#define RISCV_HWPROBE_EXT_ZICOND (1ULL << 35) + +#endif + #else #define JIT_F_CPU_FIRST 0 #define JIT_F_CPUSTRING "" diff --git a/src/lj_mcode.c b/src/lj_mcode.c index 33ca2396d2..d2828bf75a 100644 --- a/src/lj_mcode.c +++ b/src/lj_mcode.c @@ -32,6 +32,12 @@ void sys_icache_invalidate(void *start, size_t len); #endif +#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#include +#include +#include +#endif + /* Synchronize data/instruction cache. */ void lj_mcode_sync(void *start, void *end) { @@ -44,6 +50,17 @@ void lj_mcode_sync(void *start, void *end) sys_icache_invalidate(start, (char *)end-(char *)start); #elif LJ_TARGET_PPC lj_vm_cachesync(start, end); +#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX +#if (defined(__GNUC__) || defined(__clang__)) + __asm__ volatile("fence rw, rw"); +#else + lj_vm_fence_rw_rw(); +#endif +#ifdef __GLIBC__ + __riscv_flush_icache(start, end, 0); +#else + syscall(__NR_riscv_flush_icache, start, end, 0UL); +#endif #elif defined(__GNUC__) || defined(__clang__) __clear_cache(start, end); #else diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc index 408d97bb59..0e991f68f7 100644 --- a/src/vm_riscv64.dasc +++ b/src/vm_riscv64.dasc @@ -1847,7 +1847,14 @@ static void build_subroutines(BuildCtx *ctx) | bxgeu RA, RB, ->fff_resn | checknum CARG2, >7 |6: - | fpins FARG1, FARG1, FARG2 + |.if ismax + | flt.d TMP0, FARG2, FARG1 + |.else // min + | flt.d TMP0, FARG1, FARG2 + |.endif + | bnez TMP0, >8 // skip swap + | fmv.d FARG1, FARG2 + |8: | addi RA, RA, 8 | j <5 |7: // Convert integer to number and continue above. @@ -4124,7 +4131,7 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) case BC_ITERN: | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) |.if JIT - | hotloop + | // NYI: compile table traversal. |.endif |->vm_IITERN: | add RA, BASE, RA @@ -4210,26 +4217,8 @@ static void build_ins(BuildCtx *ctx, BCOp op, int defop) | li TMP1, BC_ITERC | sb TMP3, -4+OFS_OP(PC) | add PC, TMP0, TMP2 - |.if JIT - | lb TMP0, OFS_OP(PC) - | li TMP3, BC_ITERN - | lhu TMP2, OFS_RD(PC) - | bne TMP0, TMP3, >6 - |.endif | sb TMP1, OFS_OP(PC) | j <1 - |.if JIT - |6: // Unpatch JLOOP. - | ld TMP0, GL_J(trace)(GL) // Assumes J.trace in-reach relative to GL. - | slliw TMP2, TMP2, 3 - | add TMP0, TMP0, TMP2 - | ld TRACE:TMP2, 0(TMP0) - | lw TMP0, TRACE:TMP2->startins - | andi TMP0, TMP0, -256 - | or TMP0, TMP0, TMP1 - | sw TMP0, 0(PC) - | j <1 - |.endif break; case BC_VARG: