Skip to content

Commit

Permalink
JBR-3715: partial cherry-pick of PR "Windows AArch64 Support"
Browse files Browse the repository at this point in the history
implemented correct r18 usage
fixed JBR-3715
patch from V.Kempik

(cherry picked from commit a08c2a7)
  • Loading branch information
artemm-bochkarev authored and vprovodin committed Sep 20, 2021
1 parent ca49a70 commit e9e9816
Show file tree
Hide file tree
Showing 16 changed files with 1,388 additions and 1,250 deletions.
10 changes: 10 additions & 0 deletions make/hotspot/gensrc/GensrcAdlc.gmk
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,16 @@ ifeq ($(call check-jvm-feature, compiler2), true)
ADLCFLAGS += -DAIX=1
else ifeq ($(OPENJDK_TARGET_OS), macosx)
ADLCFLAGS += -D_ALLBSD_SOURCE=1 -D_GNU_SOURCE=1
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
ADLCFLAGS += -DR18_RESERVED=1
endif
else ifeq ($(OPENJDK_TARGET_OS), windows)
ifeq ($(call isTargetCpuBits, 64), true)
ADLCFLAGS += -D_WIN64=1
endif
ifeq ($(HOTSPOT_TARGET_CPU_ARCH), aarch64)
ADLCFLAGS += -DR18_RESERVED=1
endif
endif

ifneq ($(OPENJDK_TARGET_OS), windows)
Expand Down
6 changes: 6 additions & 0 deletions src/hotspot/cpu/aarch64/aarch64-asmtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ class Register(Operand):

def generate(self):
self.number = random.randint(0, 30)
if self.number == 18:
self.number = 17
return self

def astr(self, prefix):
Expand All @@ -37,6 +39,8 @@ class GeneralRegisterOrZr(Register):

def generate(self):
self.number = random.randint(0, 31)
if self.number == 18:
self.number = 16
return self

def astr(self, prefix = ""):
Expand All @@ -54,6 +58,8 @@ def __str__(self):
class GeneralRegisterOrSp(Register):
def generate(self):
self.number = random.randint(0, 31)
if self.number == 18:
self.number = 15
return self

def astr(self, prefix = ""):
Expand Down
24 changes: 19 additions & 5 deletions src/hotspot/cpu/aarch64/aarch64.ad
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,8 @@ reg_def R16 ( SOC, SOC, Op_RegI, 16, r16->as_VMReg() );
reg_def R16_H ( SOC, SOC, Op_RegI, 16, r16->as_VMReg()->next());
reg_def R17 ( SOC, SOC, Op_RegI, 17, r17->as_VMReg() );
reg_def R17_H ( SOC, SOC, Op_RegI, 17, r17->as_VMReg()->next());
reg_def R18 ( SOC, SOC, Op_RegI, 18, r18->as_VMReg() );
reg_def R18_H ( SOC, SOC, Op_RegI, 18, r18->as_VMReg()->next());
reg_def R18 ( SOC, SOC, Op_RegI, 18, r18_reserved->as_VMReg() );
reg_def R18_H ( SOC, SOC, Op_RegI, 18, r18_reserved->as_VMReg()->next());
reg_def R19 ( SOC, SOE, Op_RegI, 19, r19->as_VMReg() );
reg_def R19_H ( SOC, SOE, Op_RegI, 19, r19->as_VMReg()->next());
reg_def R20 ( SOC, SOE, Op_RegI, 20, r20->as_VMReg() ); // caller esp
Expand Down Expand Up @@ -352,7 +352,6 @@ alloc_class chunk0(
R15, R15_H,
R16, R16_H,
R17, R17_H,
R18, R18_H,

// arg registers
R0, R0_H,
Expand All @@ -375,7 +374,7 @@ alloc_class chunk0(
R26, R26_H,

// non-allocatable registers

R18, R18_H, // platform
R27, R27_H, // heapbase
R28, R28_H, // thread
R29, R29_H, // fp
Expand Down Expand Up @@ -533,7 +532,10 @@ reg_class no_special_reg32_no_fp(
R15,
R16,
R17,
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
R18,
#endif
R19,
R20,
R21,
Expand Down Expand Up @@ -566,7 +568,10 @@ reg_class no_special_reg32_with_fp(
R15,
R16,
R17,
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
R18,
#endif
R19,
R20,
R21,
Expand Down Expand Up @@ -602,7 +607,10 @@ reg_class no_special_reg_no_fp(
R15, R15_H,
R16, R16_H,
R17, R17_H,
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
R18, R18_H,
#endif
R19, R19_H,
R20, R20_H,
R21, R21_H,
Expand Down Expand Up @@ -635,7 +643,10 @@ reg_class no_special_reg_with_fp(
R15, R15_H,
R16, R16_H,
R17, R17_H,
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
R18, R18_H,
#endif
R19, R19_H,
R20, R20_H,
R21, R21_H,
Expand Down Expand Up @@ -775,7 +786,10 @@ reg_class no_special_ptr_reg(
R15, R15_H,
R16, R16_H,
R17, R17_H,
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
R18, R18_H,
#endif
R19, R19_H,
R20, R20_H,
R21, R21_H,
Expand Down Expand Up @@ -5419,7 +5433,7 @@ pipeline %{
attributes %{
// ARM instructions are of fixed length
fixed_size_instructions; // Fixed size instructions TODO does
max_instructions_per_bundle = 2; // A53 = 2, A57 = 4
max_instructions_per_bundle = 4; // A53 = 2, A57 = 4
// ARM instructions come in 32-bit word units
instruction_unit_size = 4; // An instruction is 4 bytes long
instruction_fetch_unit_size = 64; // The processor fetches one line
Expand Down
2,362 changes: 1,181 additions & 1,181 deletions src/hotspot/cpu/aarch64/assembler_aarch64.cpp

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions src/hotspot/cpu/aarch64/c1_Defs_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ enum {
pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers, // number of registers used during code emission
pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers, // number of registers used during code emission

pd_nof_caller_save_cpu_regs_frame_map = 19 - 2, // number of registers killed by calls
pd_nof_caller_save_cpu_regs_frame_map = 19 - 2 /* rscratch1 and rscratch2 */ R18_RESERVED_ONLY(- 1), // number of registers killed by calls
pd_nof_caller_save_fpu_regs_frame_map = 32, // number of registers killed by calls

pd_first_callee_saved_reg = 19 - 2,
pd_last_callee_saved_reg = 26 - 2,
pd_first_callee_saved_reg = 19 - 2 /* rscratch1 and rscratch2 */ R18_RESERVED_ONLY(- 1),
pd_last_callee_saved_reg = 26 - 2 /* rscratch1 and rscratch2 */ R18_RESERVED_ONLY(- 1),

pd_last_allocatable_cpu_reg = 16,
pd_last_allocatable_cpu_reg = 16 R18_RESERVED_ONLY(- 1),

pd_nof_cpu_regs_reg_alloc
= pd_last_allocatable_cpu_reg + 1, // number of registers that are visible to register allocator
Expand All @@ -60,9 +60,9 @@ enum {
pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
pd_first_cpu_reg = 0,
pd_last_cpu_reg = 16,
pd_last_cpu_reg = 16 R18_RESERVED_ONLY(- 1),
pd_first_byte_reg = 0,
pd_last_byte_reg = 16,
pd_last_byte_reg = 16 R18_RESERVED_ONLY(- 1),
pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
pd_last_fpu_reg = pd_first_fpu_reg + 31,

Expand Down
1 change: 1 addition & 0 deletions src/hotspot/cpu/aarch64/c1_FpuStackSim_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,4 @@
//--------------------------------------------------------

// No FPU stack on AARCH64
#include "precompiled.hpp"
15 changes: 13 additions & 2 deletions src/hotspot/cpu/aarch64/c1_FrameMap_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,10 @@ void FrameMap::initialize() {
map_register(i, r15); r15_opr = LIR_OprFact::single_cpu(i); i++;
map_register(i, r16); r16_opr = LIR_OprFact::single_cpu(i); i++;
map_register(i, r17); r17_opr = LIR_OprFact::single_cpu(i); i++;
map_register(i, r18); r18_opr = LIR_OprFact::single_cpu(i); i++;
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
map_register(i, r18_reserved); r18_opr = LIR_OprFact::single_cpu(i); i++;
#endif
map_register(i, r19); r19_opr = LIR_OprFact::single_cpu(i); i++;
map_register(i, r20); r20_opr = LIR_OprFact::single_cpu(i); i++;
map_register(i, r21); r21_opr = LIR_OprFact::single_cpu(i); i++;
Expand All @@ -199,6 +202,11 @@ void FrameMap::initialize() {
map_register(i, r8); r8_opr = LIR_OprFact::single_cpu(i); i++; // rscratch1
map_register(i, r9); r9_opr = LIR_OprFact::single_cpu(i); i++; // rscratch2

#ifdef R18_RESERVED
// See comment in register_aarch64.hpp
map_register(i, r18_reserved); r18_opr = LIR_OprFact::single_cpu(i); i++;
#endif

rscratch1_opr = r8_opr;
rscratch2_opr = r9_opr;
rscratch1_long_opr = LIR_OprFact::double_cpu(r8_opr->cpu_regnr(), r8_opr->cpu_regnr());
Expand Down Expand Up @@ -227,7 +235,10 @@ void FrameMap::initialize() {
_caller_save_cpu_regs[13] = r15_opr;
_caller_save_cpu_regs[14] = r16_opr;
_caller_save_cpu_regs[15] = r17_opr;
#ifndef R18_RESERVED
// See comment in register_aarch64.hpp
_caller_save_cpu_regs[16] = r18_opr;
#endif

for (int i = 0; i < 8; i++) {
_caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
Expand All @@ -253,7 +264,7 @@ void FrameMap::initialize() {
r15_oop_opr = as_oop_opr(r15);
r16_oop_opr = as_oop_opr(r16);
r17_oop_opr = as_oop_opr(r17);
r18_oop_opr = as_oop_opr(r18);
r18_oop_opr = as_oop_opr(r18_reserved);
r19_oop_opr = as_oop_opr(r19);
r20_oop_opr = as_oop_opr(r20);
r21_oop_opr = as_oop_opr(r21);
Expand Down
9 changes: 9 additions & 0 deletions src/hotspot/cpu/aarch64/globalDefinitions_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,13 @@ const bool CCallingConventionRequiresIntsAsLongs = false;

#define THREAD_LOCAL_POLL

#if defined(__APPLE__) || defined(_WIN64)
#define R18_RESERVED
#define R18_RESERVED_ONLY(code) code
#define NOT_R18_RESERVED(code)
#else
#define R18_RESERVED_ONLY(code)
#define NOT_R18_RESERVED(code) code
#endif

#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
7 changes: 6 additions & 1 deletion src/hotspot/cpu/aarch64/immediate_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
*/

#include <stdlib.h>
#include "precompiled.hpp"
#include "utilities/globalDefinitions.hpp"
#include "immediate_aarch64.hpp"

// there are at most 2^13 possible logical immediate encodings
Expand Down Expand Up @@ -243,7 +245,10 @@ int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,

// constructor to initialise the lookup tables

static void initLITables() __attribute__ ((constructor));
static void initLITables();
// Use an empty struct with a construtor as MSVC doesn't support `__attribute__ ((constructor))`
// See https://stackoverflow.com/questions/1113409/attribute-constructor-equivalent-in-vc
static struct initLITables_t { initLITables_t(void) { initLITables(); } } _initLITables;
static void initLITables()
{
li_table_entry_count = 0;
Expand Down
12 changes: 10 additions & 2 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2544,9 +2544,17 @@ void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
}
}

RegSet MacroAssembler::call_clobbered_registers() {
RegSet regs = RegSet::range(r0, r17) - RegSet::of(rscratch1, rscratch2);
#ifndef R18_RESERVED
regs += r18_reserved;
#endif
return regs;
}

void MacroAssembler::push_call_clobbered_registers() {
int step = 4 * wordSize;
push(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
push(call_clobbered_registers() - RegSet::of(rscratch1, rscratch2), sp);
sub(sp, sp, step);
mov(rscratch1, -step);
// Push v0-v7, v16-v31.
Expand All @@ -2566,7 +2574,7 @@ void MacroAssembler::pop_call_clobbered_registers() {
as_FloatRegister(i+3), T1D, Address(post(sp, 4 * wordSize)));
}

pop(RegSet::range(r0, r18) - RegSet::of(rscratch1, rscratch2), sp);
pop(call_clobbered_registers() - RegSet::of(rscratch1, rscratch2), sp);
}

void MacroAssembler::push_CPU_state(bool save_vectors) {
Expand Down
2 changes: 2 additions & 0 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -460,6 +460,8 @@ class MacroAssembler: public Assembler {
void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }

static RegSet call_clobbered_registers();

// Push and pop everything that might be clobbered by a native
// runtime call except rscratch1 and rscratch2. (They are always
// scratch, so we don't have to protect them.) Only save the lower
Expand Down
18 changes: 13 additions & 5 deletions src/hotspot/cpu/aarch64/macroAssembler_aarch64_trig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,7 @@ void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2
RECOMP_FOR1_CHECK;
Register tmp2 = r1, n = r2, jv = r4, tmp5 = r5, jx = r6,
tmp3 = r7, iqBase = r10, ih = r11, tmp4 = r12, tmp1 = r13,
jz = r14, j = r15, twoOverPiBase = r16, i = r17, qBase = r18;
jz = r14, j = r15, twoOverPiBase = r16, i = r17, qBase = r19;
// jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4
// jx = nx - 1
lea(twoOverPiBase, ExternalAddress(two_over_pi));
Expand Down Expand Up @@ -1421,6 +1421,12 @@ void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE;
Register X = r0, absX = r1, n = r2, ix = r3;
FloatRegister y0 = v4, y1 = v5;

enter();
// r19 is used in TemplateInterpreterGenerator::generate_math_entry
RegSet saved_regs = RegSet::of(r19);
push (saved_regs, sp);

block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); {
fmovd(X, v0);
mov(rscratch2, 0x3e400000);
Expand All @@ -1438,14 +1444,14 @@ void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
// Set last bit unconditionally to make it NaN
orr(r10, r10, 1);
fmovd(v0, r10);
ret(lr);
b(DONE);
}
block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); {
bind(TINY_X);
if (isCos) {
fmovd(v0, 1.0);
}
ret(lr);
b(DONE);
}
bind(ARG_REDUCTION); /* argument reduction needed */
block_comment("n = __ieee754_rem_pio2(x,y);"); {
Expand All @@ -1465,7 +1471,7 @@ void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
tbz(n, 1, DONE);
}
fnegd(v0, v0);
ret(lr);
b(DONE);
bind(RETURN_SIN);
generate_kernel_sin(y0, true, dsin_coef);
if (isCos) {
Expand All @@ -1474,7 +1480,7 @@ void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
tbz(n, 1, DONE);
}
fnegd(v0, v0);
ret(lr);
b(DONE);
}
bind(EARLY_CASE);
eor(y1, T8B, y1, y1);
Expand All @@ -1484,5 +1490,7 @@ void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
generate_kernel_sin(v0, false, dsin_coef);
}
bind(DONE);
pop(saved_regs, sp);
leave();
ret(lr);
}
2 changes: 1 addition & 1 deletion src/hotspot/cpu/aarch64/register_aarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ const char* RegisterImpl::name() const {
"c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7",
"rscratch1", "rscratch2",
"r10", "r11", "r12", "r13", "r14", "r15", "r16",
"r17", "r18", "r19",
"r17", "r18_tls", "r19",
"resp", "rdispatch", "rbcp", "r23", "rlocals", "rmonitors", "rcpool", "rheapbase",
"rthread", "rfp", "lr", "sp"
};
Expand Down
Loading

0 comments on commit e9e9816

Please sign in to comment.