Skip to content

Commit b60de93

Browse files
committed
cpu/drcbex64.cpp: Better instruction scheduling in read/write memory ops.
1 parent 8340d63 commit b60de93

File tree

1 file changed

+140
-101
lines changed

1 file changed

+140
-101
lines changed

src/devices/cpu/drcbex64.cpp

+140-101
Original file line numberDiff line numberDiff line change
@@ -2600,7 +2600,7 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
26002600
else
26012601
a.shl(Gpq(REG_PARAM3), cl); // shift mem_mask by masked bit address
26022602

2603-
// need to do this after finished with CL as REG_PARAM1 is A on Windows
2603+
// need to do this after finished with CL as REG_PARAM1 is C on Windows
26042604
a.mov(Gpq(REG_PARAM1), rax);
26052605
if (accessors.specific.read.is_virtual)
26062606
a.mov(rax, ptr(rax, accessors.specific.read.displacement)); // load vtable pointer
@@ -2749,27 +2749,27 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
27492749
a.mov(r10d, Gpd(REG_PARAM2)); // copy masked address
27502750
a.shr(Gpd(REG_PARAM2), accessors.specific.low_bits); // shift off low bits
27512751
}
2752-
a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3)); // mask bit address
27532752
a.mov(rax, ptr(rax, Gpq(REG_PARAM2), 3)); // load dispatch table entry
2753+
a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3)); // mask bit address
27542754
if (accessors.specific.low_bits)
27552755
a.mov(Gpd(REG_PARAM2), r10d); // restore masked address
27562756
if (need_save)
27572757
a.mov(Gpd(int_register_map[0]), ecx); // save masked bit address
27582758
else
27592759
a.mov(dstreg.r32(), ecx); // save masked bit address
2760+
if (accessors.specific.read.is_virtual)
2761+
a.mov(r10, ptr(rax, accessors.specific.read.displacement)); // load vtable pointer
2762+
if (accessors.specific.read.displacement)
2763+
a.add(rax, accessors.specific.read.displacement); // apply this pointer offset
27602764
if (accessors.specific.native_bytes <= 4)
27612765
a.shl(Gpd(REG_PARAM3), cl); // shift mem_mask by masked bit address
27622766
else
27632767
a.shl(Gpq(REG_PARAM3), cl); // shift mem_mask by masked bit address
27642768

2765-
// need to do this after finished with CL as REG_PARAM1 is A on Windows
2769+
// need to do this after finished with CL as REG_PARAM1 is C on Windows
27662770
a.mov(Gpq(REG_PARAM1), rax);
27672771
if (accessors.specific.read.is_virtual)
2768-
a.mov(rax, ptr(rax, accessors.specific.read.displacement)); // load vtable pointer
2769-
if (accessors.specific.read.displacement)
2770-
a.add(Gpq(REG_PARAM1), accessors.specific.read.displacement); // apply this pointer offset
2771-
if (accessors.specific.read.is_virtual)
2772-
a.call(ptr(rax, accessors.specific.read.function)); // call virtual member function
2772+
a.call(ptr(r10, accessors.specific.read.function)); // call virtual member function
27732773
else
27742774
smart_call_r64(a, (x86code *)accessors.specific.read.function, rax); // call non-virtual member function
27752775

@@ -2856,70 +2856,91 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst)
28562856
// set up a call to the write handler
28572857
auto const &accessors = m_memory_accessors[spacesizep.space()];
28582858
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.write.function) || accessors.specific.write.is_virtual;
2859+
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
28592860
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
28602861
if (spacesizep.size() != SIZE_QWORD)
28612862
mov_reg_param(a, Gpd(REG_PARAM3), srcp);
28622863
else
28632864
mov_reg_param(a, Gpq(REG_PARAM3), srcp);
2864-
if (have_specific && ((1 << spacesizep.size()) <= accessors.specific.native_bytes))
2865+
if (have_specific && ((1 << spacesizep.size()) == accessors.specific.native_bytes))
28652866
{
2866-
// need to do this early - shift count must be CL, and RCX is a function parameter
2867-
if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
2867+
// set default mem_mask
2868+
if (accessors.specific.native_bytes <= 4)
2869+
a.mov(Gpd(REG_PARAM4), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
2870+
else
2871+
a.mov(Gpq(REG_PARAM4), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
2872+
2873+
a.and_(Gpd(REG_PARAM2), imm(addr_mask)); // apply address mask
2874+
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch)); // load dispatch table pointer
2875+
if (accessors.specific.low_bits)
28682876
{
2869-
a.mov(ecx, Gpd(REG_PARAM2));
2870-
if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD))
2871-
a.mov(eax, imm(make_bitmask<uint32_t>(8 << spacesizep.size())));
2872-
else
2873-
a.mov(rax, imm(make_bitmask<uint64_t>(8 << spacesizep.size())));
2874-
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
2875-
if (shift < 0)
2876-
a.shl(ecx, imm(-shift));
2877-
else if (shift > 0)
2878-
a.shr(ecx, imm(shift));
2879-
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
2880-
{
2881-
a.sub(ecx, imm((accessors.specific.native_bytes << 3) - (8 << spacesizep.size())));
2882-
a.neg(ecx);
2883-
}
2884-
a.and_(cl, imm((accessors.specific.native_bytes - 1) << 3));
2885-
if (accessors.specific.native_bytes <= 4)
2886-
{
2887-
a.shl(eax, cl);
2888-
a.shl(Gpd(REG_PARAM3), cl);
2889-
a.mov(Gpd(REG_PARAM4), eax);
2890-
}
2891-
else
2892-
{
2893-
a.shl(rax, cl);
2894-
a.shl(Gpq(REG_PARAM3), cl);
2895-
a.mov(Gpq(REG_PARAM4), rax);
2896-
}
2877+
a.mov(r10d, Gpd(REG_PARAM2)); // save masked address
2878+
a.shr(Gpd(REG_PARAM2), accessors.specific.low_bits); // shift off low bits
28972879
}
2880+
a.mov(Gpq(REG_PARAM1), ptr(rax, Gpq(REG_PARAM2), 3)); // load dispatch table entry
2881+
if (accessors.specific.low_bits)
2882+
a.mov(Gpd(REG_PARAM2), r10d); // restore masked address
2883+
2884+
if (accessors.specific.write.is_virtual)
2885+
a.mov(rax, ptr(Gpq(REG_PARAM1), accessors.specific.write.displacement)); // load vtable pointer
2886+
if (accessors.specific.write.displacement)
2887+
a.add(Gpq(REG_PARAM1), accessors.specific.write.displacement); // apply this pointer offset
2888+
if (accessors.specific.write.is_virtual)
2889+
a.call(ptr(rax, accessors.specific.write.function)); // call virtual member function
28982890
else
2891+
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax); // call non-virtual member function
2892+
}
2893+
else if (have_specific && ((1 << spacesizep.size()) < accessors.specific.native_bytes))
2894+
{
2895+
a.mov(ecx, Gpd(REG_PARAM2)); // copy address
2896+
a.and_(Gpd(REG_PARAM2), imm(addr_mask)); // apply address mask
2897+
2898+
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
2899+
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
2900+
a.not_(ecx); // swizzle address for bit Endian spaces
2901+
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch)); // load dispatch table pointer
2902+
if (shift < 0)
2903+
a.shl(ecx, imm(-shift)); // convert address to bits (left shift)
2904+
else if (shift > 0)
2905+
a.shr(ecx, imm(shift)); // convert address to bits (right shift)
2906+
if (accessors.specific.low_bits)
28992907
{
2900-
if (accessors.specific.native_bytes <= 4)
2901-
a.mov(Gpd(REG_PARAM4), make_bitmask<uint32_t>(accessors.specific.native_bytes << 3));
2902-
else
2903-
a.mov(Gpq(REG_PARAM4), make_bitmask<uint64_t>(accessors.specific.native_bytes << 3));
2908+
a.mov(r10d, Gpd(REG_PARAM2)); // copy masked address
2909+
a.shr(Gpd(REG_PARAM2), accessors.specific.low_bits); // shift off low bits
29042910
}
2905-
2906-
a.and_(Gpd(REG_PARAM2), make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits));
2907-
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch));
2908-
a.mov(Gpd(REG_PARAM1), Gpd(REG_PARAM2));
2911+
a.mov(rax, ptr(rax, Gpq(REG_PARAM2), 3)); // load dispatch table entry
2912+
a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3)); // mask bit address
2913+
if ((accessors.specific.native_bytes <= 4) || (spacesizep.size() != SIZE_QWORD))
2914+
a.mov(r11d, imm(make_bitmask<uint32_t>(8 << spacesizep.size()))); // set default mem_mask
2915+
else
2916+
a.mov(r11, imm(make_bitmask<uint64_t>(8 << spacesizep.size()))); // set default mem_mask
29092917
if (accessors.specific.low_bits)
2910-
a.shr(Gpd(REG_PARAM1), accessors.specific.low_bits);
2911-
a.mov(Gpq(REG_PARAM1), ptr(rax, Gpq(REG_PARAM1), 3));
2912-
if (accessors.specific.write.displacement)
2913-
a.add(Gpq(REG_PARAM1), accessors.specific.write.displacement);
2918+
a.mov(Gpd(REG_PARAM2), r10d); // restore masked address
29142919
if (accessors.specific.write.is_virtual)
2920+
a.mov(r10, ptr(rax, accessors.specific.write.displacement)); // load vtable pointer
2921+
if (accessors.specific.write.displacement)
2922+
a.add(rax, accessors.specific.write.displacement); // apply this pointer offset
2923+
if (accessors.specific.native_bytes <= 4)
29152924
{
2916-
a.mov(rax, ptr(Gpq(REG_PARAM1)));
2917-
a.call(ptr(rax, accessors.specific.write.function));
2925+
a.shl(r11d, cl); // shift mem_mask by masked bit address
2926+
a.shl(Gpd(REG_PARAM3), cl); // shift data by masked bit address
29182927
}
29192928
else
29202929
{
2921-
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax);
2930+
a.shl(r11, cl); // shift mem_mask by masked bit address
2931+
a.shl(Gpq(REG_PARAM3), cl); // shift data by masked bit address
29222932
}
2933+
2934+
// need to do this after finished with CL as REG_PARAM1 is C on Windows and REG_PARAM4 is C on SysV
2935+
a.mov(Gpq(REG_PARAM1), rax);
2936+
if (accessors.specific.native_bytes <= 4)
2937+
a.mov(Gpd(REG_PARAM4), r11d); // copy mem_mask to parameter 4 (ECX on SysV)
2938+
else
2939+
a.mov(Gpq(REG_PARAM4), r11); // copy mem_mask to parameter 4 (RCX on SysV)
2940+
if (accessors.specific.write.is_virtual)
2941+
a.call(ptr(r10, accessors.specific.write.function)); // call virtual member function
2942+
else
2943+
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax); // call non-virtual member function
29232944
}
29242945
else if (spacesizep.size() == SIZE_BYTE)
29252946
{
@@ -2965,70 +2986,88 @@ void drcbe_x64::op_writem(Assembler &a, const instruction &inst)
29652986
// set up a call to the write handler
29662987
auto const &accessors = m_memory_accessors[spacesizep.space()];
29672988
bool const have_specific = (uintptr_t(nullptr) != accessors.specific.write.function) || accessors.specific.write.is_virtual;
2989+
auto const addr_mask = make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits);
29682990
mov_reg_param(a, Gpd(REG_PARAM2), addrp);
29692991
if (spacesizep.size() != SIZE_QWORD)
29702992
mov_reg_param(a, Gpd(REG_PARAM3), srcp);
29712993
else
29722994
mov_reg_param(a, Gpq(REG_PARAM3), srcp);
2973-
if (have_specific && ((1 << spacesizep.size()) <= accessors.specific.native_bytes))
2995+
if (have_specific && ((1 << spacesizep.size()) == accessors.specific.native_bytes))
29742996
{
2975-
// need to do this early - shift count must be CL, and RCX is a function parameter
2976-
if ((1 << spacesizep.size()) < accessors.specific.native_bytes)
2977-
{
2978-
if (spacesizep.size() != SIZE_QWORD)
2979-
mov_reg_param(a, eax, maskp);
2980-
else
2981-
mov_reg_param(a, rax, maskp);
2982-
a.mov(ecx, Gpd(REG_PARAM2));
2983-
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
2984-
if (shift < 0)
2985-
a.shl(ecx, imm(-shift));
2986-
else if (shift > 0)
2987-
a.shr(ecx, imm(shift));
2988-
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
2989-
{
2990-
a.sub(ecx, imm((accessors.specific.native_bytes << 3) - (8 << spacesizep.size())));
2991-
a.neg(ecx);
2992-
}
2993-
a.and_(cl, imm((accessors.specific.native_bytes - 1) << 3));
2994-
if (accessors.specific.native_bytes <= 4)
2995-
{
2996-
a.shl(eax, cl);
2997-
a.shl(Gpd(REG_PARAM3), cl);
2998-
a.mov(Gpd(REG_PARAM4), eax);
2999-
}
3000-
else
3001-
{
3002-
a.shl(rax, cl);
3003-
a.shl(Gpq(REG_PARAM3), cl);
3004-
a.mov(Gpq(REG_PARAM4), rax);
3005-
}
3006-
}
2997+
if (spacesizep.size() != SIZE_QWORD)
2998+
mov_reg_param(a, Gpd(REG_PARAM4), maskp); // get mem_mask
30072999
else
3000+
mov_reg_param(a, Gpq(REG_PARAM4), maskp); // get mem_mask
3001+
a.and_(Gpd(REG_PARAM2), imm(addr_mask)); // apply address mask
3002+
3003+
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch)); // load dispatch table pointer
3004+
if (accessors.specific.low_bits)
30083005
{
3009-
if (accessors.specific.native_bytes <= 4)
3010-
mov_reg_param(a, Gpd(REG_PARAM4), maskp);
3011-
else
3012-
mov_reg_param(a, Gpq(REG_PARAM4), maskp);
3006+
a.mov(r10d, Gpd(REG_PARAM2)); // save masked address
3007+
a.shr(Gpd(REG_PARAM2), accessors.specific.low_bits); // shift off low bits
30133008
}
3014-
3015-
a.and_(Gpd(REG_PARAM2), make_bitmask<uint32_t>(accessors.specific.address_width) & ~make_bitmask<uint32_t>(accessors.specific.native_mask_bits));
3016-
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch));
3017-
a.mov(Gpd(REG_PARAM1), Gpd(REG_PARAM2));
3009+
a.mov(Gpq(REG_PARAM1), ptr(rax, Gpq(REG_PARAM2), 3)); // load dispatch table entry
30183010
if (accessors.specific.low_bits)
3019-
a.shr(Gpd(REG_PARAM1), accessors.specific.low_bits);
3020-
a.mov(Gpq(REG_PARAM1), ptr(rax, Gpq(REG_PARAM1), 3));
3011+
a.mov(Gpd(REG_PARAM2), r10d); // restore masked address
3012+
3013+
if (accessors.specific.write.is_virtual)
3014+
a.mov(rax, ptr(Gpq(REG_PARAM1), accessors.specific.write.displacement)); // load vtable pointer
30213015
if (accessors.specific.write.displacement)
3022-
a.add(Gpq(REG_PARAM1), accessors.specific.write.displacement);
3016+
a.add(Gpq(REG_PARAM1), accessors.specific.write.displacement); // apply this pointer offset
30233017
if (accessors.specific.write.is_virtual)
3018+
a.call(ptr(rax, accessors.specific.write.function)); // call virtual member function
3019+
else
3020+
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax); // call non-virtual member function
3021+
}
3022+
else if (have_specific && ((1 << spacesizep.size()) < accessors.specific.native_bytes))
3023+
{
3024+
a.mov(ecx, Gpd(REG_PARAM2)); // copy address
3025+
if (spacesizep.size() != SIZE_QWORD)
3026+
mov_reg_param(a, r11d, maskp); // get mem_mask
3027+
else
3028+
mov_reg_param(a, r11, maskp); // get mem_mask
3029+
a.and_(Gpd(REG_PARAM2), imm(addr_mask)); // apply address mask
3030+
3031+
int const shift = m_space[spacesizep.space()]->addr_shift() - 3;
3032+
if (m_space[spacesizep.space()]->endianness() != ENDIANNESS_LITTLE)
3033+
a.not_(ecx); // swizzle address for bit Endian spaces
3034+
mov_r64_imm(a, rax, uintptr_t(accessors.specific.write.dispatch)); // load dispatch table pointer
3035+
if (shift < 0)
3036+
a.shl(ecx, imm(-shift)); // convert address to bits (left shift)
3037+
else if (shift > 0)
3038+
a.shr(ecx, imm(shift)); // convert address to bits (right shift)
3039+
if (accessors.specific.low_bits)
30243040
{
3025-
a.mov(rax, ptr(Gpq(REG_PARAM1)));
3026-
a.call(ptr(rax, accessors.specific.write.function));
3041+
a.mov(r10d, Gpd(REG_PARAM2)); // copy masked address
3042+
a.shr(Gpd(REG_PARAM2), accessors.specific.low_bits); // shift off low bits
3043+
}
3044+
a.and_(ecx, imm((accessors.specific.native_bytes - (1 << spacesizep.size())) << 3)); // mask bit address
3045+
a.mov(rax, ptr(rax, Gpq(REG_PARAM2), 3)); // load dispatch table entry
3046+
if (accessors.specific.low_bits)
3047+
a.mov(Gpd(REG_PARAM2), r10d); // restore masked address
3048+
if (accessors.specific.native_bytes <= 4)
3049+
{
3050+
a.shl(r11d, cl); // shift mem_mask by masked bit address
3051+
a.shl(Gpd(REG_PARAM3), cl); // shift data by masked bit address
3052+
a.mov(Gpd(REG_PARAM4), r11d); // copy mem_mask to parameter 4 (ECX on SysV)
30273053
}
30283054
else
30293055
{
3030-
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax);
3056+
a.shl(r11, cl); // shift mem_mask by masked bit address
3057+
a.shl(Gpq(REG_PARAM3), cl); // shift data by masked bit address
3058+
a.mov(Gpq(REG_PARAM4), r11); // copy mem_mask to parameter 4 (RCX on SysV)
30313059
}
3060+
3061+
// need to do this after finished with CL as REG_PARAM1 is C on Windows
3062+
a.mov(Gpq(REG_PARAM1), rax);
3063+
if (accessors.specific.write.is_virtual)
3064+
a.mov(rax, ptr(rax, accessors.specific.write.displacement)); // load vtable pointer
3065+
if (accessors.specific.write.displacement)
3066+
a.add(Gpq(REG_PARAM1), accessors.specific.write.displacement); // apply this pointer offset
3067+
if (accessors.specific.write.is_virtual)
3068+
a.call(ptr(rax, accessors.specific.write.function)); // call virtual member function
3069+
else
3070+
smart_call_r64(a, (x86code *)accessors.specific.write.function, rax); // call non-virtual member function
30323071
}
30333072
else if (spacesizep.size() == SIZE_BYTE)
30343073
{

0 commit comments

Comments
 (0)