@@ -2600,7 +2600,7 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
2600
2600
else
2601
2601
a.shl (Gpq (REG_PARAM3), cl); // shift mem_mask by masked bit address
2602
2602
2603
- // need to do this after finished with CL as REG_PARAM1 is A on Windows
2603
+ // need to do this after finished with CL as REG_PARAM1 is C on Windows
2604
2604
a.mov (Gpq (REG_PARAM1), rax);
2605
2605
if (accessors.specific .read .is_virtual )
2606
2606
a.mov (rax, ptr (rax, accessors.specific .read .displacement )); // load vtable pointer
@@ -2749,27 +2749,27 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
2749
2749
a.mov (r10d, Gpd (REG_PARAM2)); // copy masked address
2750
2750
a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
2751
2751
}
2752
- a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
2753
2752
a.mov (rax, ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
2753
+ a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
2754
2754
if (accessors.specific .low_bits )
2755
2755
a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
2756
2756
if (need_save)
2757
2757
a.mov (Gpd (int_register_map[0 ]), ecx); // save masked bit address
2758
2758
else
2759
2759
a.mov (dstreg.r32 (), ecx); // save masked bit address
2760
+ if (accessors.specific .read .is_virtual )
2761
+ a.mov (r10, ptr (rax, accessors.specific .read .displacement )); // load vtable pointer
2762
+ if (accessors.specific .read .displacement )
2763
+ a.add (rax, accessors.specific .read .displacement ); // apply this pointer offset
2760
2764
if (accessors.specific .native_bytes <= 4 )
2761
2765
a.shl (Gpd (REG_PARAM3), cl); // shift mem_mask by masked bit address
2762
2766
else
2763
2767
a.shl (Gpq (REG_PARAM3), cl); // shift mem_mask by masked bit address
2764
2768
2765
- // need to do this after finished with CL as REG_PARAM1 is A on Windows
2769
+ // need to do this after finished with CL as REG_PARAM1 is C on Windows
2766
2770
a.mov (Gpq (REG_PARAM1), rax);
2767
2771
if (accessors.specific .read .is_virtual )
2768
- a.mov (rax, ptr (rax, accessors.specific .read .displacement )); // load vtable pointer
2769
- if (accessors.specific .read .displacement )
2770
- a.add (Gpq (REG_PARAM1), accessors.specific .read .displacement ); // apply this pointer offset
2771
- if (accessors.specific .read .is_virtual )
2772
- a.call (ptr (rax, accessors.specific .read .function )); // call virtual member function
2772
+ a.call (ptr (r10, accessors.specific .read .function )); // call virtual member function
2773
2773
else
2774
2774
smart_call_r64 (a, (x86code *)accessors.specific .read .function , rax); // call non-virtual member function
2775
2775
@@ -2856,70 +2856,91 @@ void drcbe_x64::op_write(Assembler &a, const instruction &inst)
2856
2856
// set up a call to the write handler
2857
2857
auto const &accessors = m_memory_accessors[spacesizep.space ()];
2858
2858
bool const have_specific = (uintptr_t (nullptr ) != accessors.specific .write .function ) || accessors.specific .write .is_virtual ;
2859
+ auto const addr_mask = make_bitmask<uint32_t >(accessors.specific .address_width ) & ~make_bitmask<uint32_t >(accessors.specific .native_mask_bits );
2859
2860
mov_reg_param (a, Gpd (REG_PARAM2), addrp);
2860
2861
if (spacesizep.size () != SIZE_QWORD)
2861
2862
mov_reg_param (a, Gpd (REG_PARAM3), srcp);
2862
2863
else
2863
2864
mov_reg_param (a, Gpq (REG_PARAM3), srcp);
2864
- if (have_specific && ((1 << spacesizep.size ()) < = accessors.specific .native_bytes ))
2865
+ if (have_specific && ((1 << spacesizep.size ()) = = accessors.specific .native_bytes ))
2865
2866
{
2866
- // need to do this early - shift count must be CL, and RCX is a function parameter
2867
- if ((1 << spacesizep.size ()) < accessors.specific .native_bytes )
2867
+ // set default mem_mask
2868
+ if (accessors.specific .native_bytes <= 4 )
2869
+ a.mov (Gpd (REG_PARAM4), make_bitmask<uint32_t >(accessors.specific .native_bytes << 3 ));
2870
+ else
2871
+ a.mov (Gpq (REG_PARAM4), make_bitmask<uint64_t >(accessors.specific .native_bytes << 3 ));
2872
+
2873
+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
2874
+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch )); // load dispatch table pointer
2875
+ if (accessors.specific .low_bits )
2868
2876
{
2869
- a.mov (ecx, Gpd (REG_PARAM2));
2870
- if ((accessors.specific .native_bytes <= 4 ) || (spacesizep.size () != SIZE_QWORD))
2871
- a.mov (eax, imm (make_bitmask<uint32_t >(8 << spacesizep.size ())));
2872
- else
2873
- a.mov (rax, imm (make_bitmask<uint64_t >(8 << spacesizep.size ())));
2874
- int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
2875
- if (shift < 0 )
2876
- a.shl (ecx, imm (-shift));
2877
- else if (shift > 0 )
2878
- a.shr (ecx, imm (shift));
2879
- if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
2880
- {
2881
- a.sub (ecx, imm ((accessors.specific .native_bytes << 3 ) - (8 << spacesizep.size ())));
2882
- a.neg (ecx);
2883
- }
2884
- a.and_ (cl, imm ((accessors.specific .native_bytes - 1 ) << 3 ));
2885
- if (accessors.specific .native_bytes <= 4 )
2886
- {
2887
- a.shl (eax, cl);
2888
- a.shl (Gpd (REG_PARAM3), cl);
2889
- a.mov (Gpd (REG_PARAM4), eax);
2890
- }
2891
- else
2892
- {
2893
- a.shl (rax, cl);
2894
- a.shl (Gpq (REG_PARAM3), cl);
2895
- a.mov (Gpq (REG_PARAM4), rax);
2896
- }
2877
+ a.mov (r10d, Gpd (REG_PARAM2)); // save masked address
2878
+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
2897
2879
}
2880
+ a.mov (Gpq (REG_PARAM1), ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
2881
+ if (accessors.specific .low_bits )
2882
+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
2883
+
2884
+ if (accessors.specific .write .is_virtual )
2885
+ a.mov (rax, ptr (Gpq (REG_PARAM1), accessors.specific .write .displacement )); // load vtable pointer
2886
+ if (accessors.specific .write .displacement )
2887
+ a.add (Gpq (REG_PARAM1), accessors.specific .write .displacement ); // apply this pointer offset
2888
+ if (accessors.specific .write .is_virtual )
2889
+ a.call (ptr (rax, accessors.specific .write .function )); // call virtual member function
2898
2890
else
2891
+ smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax); // call non-virtual member function
2892
+ }
2893
+ else if (have_specific && ((1 << spacesizep.size ()) < accessors.specific .native_bytes ))
2894
+ {
2895
+ a.mov (ecx, Gpd (REG_PARAM2)); // copy address
2896
+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
2897
+
2898
+ int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
2899
+ if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
2900
+ a.not_ (ecx); // swizzle address for bit Endian spaces
2901
+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch )); // load dispatch table pointer
2902
+ if (shift < 0 )
2903
+ a.shl (ecx, imm (-shift)); // convert address to bits (left shift)
2904
+ else if (shift > 0 )
2905
+ a.shr (ecx, imm (shift)); // convert address to bits (right shift)
2906
+ if (accessors.specific .low_bits )
2899
2907
{
2900
- if (accessors.specific .native_bytes <= 4 )
2901
- a.mov (Gpd (REG_PARAM4), make_bitmask<uint32_t >(accessors.specific .native_bytes << 3 ));
2902
- else
2903
- a.mov (Gpq (REG_PARAM4), make_bitmask<uint64_t >(accessors.specific .native_bytes << 3 ));
2908
+ a.mov (r10d, Gpd (REG_PARAM2)); // copy masked address
2909
+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
2904
2910
}
2905
-
2906
- a.and_ (Gpd (REG_PARAM2), make_bitmask<uint32_t >(accessors.specific .address_width ) & ~make_bitmask<uint32_t >(accessors.specific .native_mask_bits ));
2907
- mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch ));
2908
- a.mov (Gpd (REG_PARAM1), Gpd (REG_PARAM2));
2911
+ a.mov (rax, ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
2912
+ a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
2913
+ if ((accessors.specific .native_bytes <= 4 ) || (spacesizep.size () != SIZE_QWORD))
2914
+ a.mov (r11d, imm (make_bitmask<uint32_t >(8 << spacesizep.size ()))); // set default mem_mask
2915
+ else
2916
+ a.mov (r11, imm (make_bitmask<uint64_t >(8 << spacesizep.size ()))); // set default mem_mask
2909
2917
if (accessors.specific .low_bits )
2910
- a.shr (Gpd (REG_PARAM1), accessors.specific .low_bits );
2911
- a.mov (Gpq (REG_PARAM1), ptr (rax, Gpq (REG_PARAM1), 3 ));
2912
- if (accessors.specific .write .displacement )
2913
- a.add (Gpq (REG_PARAM1), accessors.specific .write .displacement );
2918
+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
2914
2919
if (accessors.specific .write .is_virtual )
2920
+ a.mov (r10, ptr (rax, accessors.specific .write .displacement )); // load vtable pointer
2921
+ if (accessors.specific .write .displacement )
2922
+ a.add (rax, accessors.specific .write .displacement ); // apply this pointer offset
2923
+ if (accessors.specific .native_bytes <= 4 )
2915
2924
{
2916
- a.mov (rax, ptr ( Gpq (REG_PARAM1)));
2917
- a.call ( ptr (rax, accessors. specific . write . function ));
2925
+ a.shl (r11d, cl); // shift mem_mask by masked bit address
2926
+ a.shl ( Gpd (REG_PARAM3), cl); // shift data by masked bit address
2918
2927
}
2919
2928
else
2920
2929
{
2921
- smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax);
2930
+ a.shl (r11, cl); // shift mem_mask by masked bit address
2931
+ a.shl (Gpq (REG_PARAM3), cl); // shift data by masked bit address
2922
2932
}
2933
+
2934
+ // need to do this after finished with CL as REG_PARAM1 is C on Windows and REG_PARAM4 is C on SysV
2935
+ a.mov (Gpq (REG_PARAM1), rax);
2936
+ if (accessors.specific .native_bytes <= 4 )
2937
+ a.mov (Gpd (REG_PARAM4), r11d); // copy mem_mask to parameter 4 (ECX on SysV)
2938
+ else
2939
+ a.mov (Gpq (REG_PARAM4), r11); // copy mem_mask to parameter 4 (RCX on SysV)
2940
+ if (accessors.specific .write .is_virtual )
2941
+ a.call (ptr (r10, accessors.specific .write .function )); // call virtual member function
2942
+ else
2943
+ smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax); // call non-virtual member function
2923
2944
}
2924
2945
else if (spacesizep.size () == SIZE_BYTE)
2925
2946
{
@@ -2965,70 +2986,88 @@ void drcbe_x64::op_writem(Assembler &a, const instruction &inst)
2965
2986
// set up a call to the write handler
2966
2987
auto const &accessors = m_memory_accessors[spacesizep.space ()];
2967
2988
bool const have_specific = (uintptr_t (nullptr ) != accessors.specific .write .function ) || accessors.specific .write .is_virtual ;
2989
+ auto const addr_mask = make_bitmask<uint32_t >(accessors.specific .address_width ) & ~make_bitmask<uint32_t >(accessors.specific .native_mask_bits );
2968
2990
mov_reg_param (a, Gpd (REG_PARAM2), addrp);
2969
2991
if (spacesizep.size () != SIZE_QWORD)
2970
2992
mov_reg_param (a, Gpd (REG_PARAM3), srcp);
2971
2993
else
2972
2994
mov_reg_param (a, Gpq (REG_PARAM3), srcp);
2973
- if (have_specific && ((1 << spacesizep.size ()) < = accessors.specific .native_bytes ))
2995
+ if (have_specific && ((1 << spacesizep.size ()) = = accessors.specific .native_bytes ))
2974
2996
{
2975
- // need to do this early - shift count must be CL, and RCX is a function parameter
2976
- if ((1 << spacesizep.size ()) < accessors.specific .native_bytes )
2977
- {
2978
- if (spacesizep.size () != SIZE_QWORD)
2979
- mov_reg_param (a, eax, maskp);
2980
- else
2981
- mov_reg_param (a, rax, maskp);
2982
- a.mov (ecx, Gpd (REG_PARAM2));
2983
- int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
2984
- if (shift < 0 )
2985
- a.shl (ecx, imm (-shift));
2986
- else if (shift > 0 )
2987
- a.shr (ecx, imm (shift));
2988
- if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
2989
- {
2990
- a.sub (ecx, imm ((accessors.specific .native_bytes << 3 ) - (8 << spacesizep.size ())));
2991
- a.neg (ecx);
2992
- }
2993
- a.and_ (cl, imm ((accessors.specific .native_bytes - 1 ) << 3 ));
2994
- if (accessors.specific .native_bytes <= 4 )
2995
- {
2996
- a.shl (eax, cl);
2997
- a.shl (Gpd (REG_PARAM3), cl);
2998
- a.mov (Gpd (REG_PARAM4), eax);
2999
- }
3000
- else
3001
- {
3002
- a.shl (rax, cl);
3003
- a.shl (Gpq (REG_PARAM3), cl);
3004
- a.mov (Gpq (REG_PARAM4), rax);
3005
- }
3006
- }
2997
+ if (spacesizep.size () != SIZE_QWORD)
2998
+ mov_reg_param (a, Gpd (REG_PARAM4), maskp); // get mem_mask
3007
2999
else
3000
+ mov_reg_param (a, Gpq (REG_PARAM4), maskp); // get mem_mask
3001
+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
3002
+
3003
+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch )); // load dispatch table pointer
3004
+ if (accessors.specific .low_bits )
3008
3005
{
3009
- if (accessors.specific .native_bytes <= 4 )
3010
- mov_reg_param (a, Gpd (REG_PARAM4), maskp);
3011
- else
3012
- mov_reg_param (a, Gpq (REG_PARAM4), maskp);
3006
+ a.mov (r10d, Gpd (REG_PARAM2)); // save masked address
3007
+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
3013
3008
}
3014
-
3015
- a.and_ (Gpd (REG_PARAM2), make_bitmask<uint32_t >(accessors.specific .address_width ) & ~make_bitmask<uint32_t >(accessors.specific .native_mask_bits ));
3016
- mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch ));
3017
- a.mov (Gpd (REG_PARAM1), Gpd (REG_PARAM2));
3009
+ a.mov (Gpq (REG_PARAM1), ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
3018
3010
if (accessors.specific .low_bits )
3019
- a.shr (Gpd (REG_PARAM1), accessors.specific .low_bits );
3020
- a.mov (Gpq (REG_PARAM1), ptr (rax, Gpq (REG_PARAM1), 3 ));
3011
+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
3012
+
3013
+ if (accessors.specific .write .is_virtual )
3014
+ a.mov (rax, ptr (Gpq (REG_PARAM1), accessors.specific .write .displacement )); // load vtable pointer
3021
3015
if (accessors.specific .write .displacement )
3022
- a.add (Gpq (REG_PARAM1), accessors.specific .write .displacement );
3016
+ a.add (Gpq (REG_PARAM1), accessors.specific .write .displacement ); // apply this pointer offset
3023
3017
if (accessors.specific .write .is_virtual )
3018
+ a.call (ptr (rax, accessors.specific .write .function )); // call virtual member function
3019
+ else
3020
+ smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax); // call non-virtual member function
3021
+ }
3022
+ else if (have_specific && ((1 << spacesizep.size ()) < accessors.specific .native_bytes ))
3023
+ {
3024
+ a.mov (ecx, Gpd (REG_PARAM2)); // copy address
3025
+ if (spacesizep.size () != SIZE_QWORD)
3026
+ mov_reg_param (a, r11d, maskp); // get mem_mask
3027
+ else
3028
+ mov_reg_param (a, r11, maskp); // get mem_mask
3029
+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
3030
+
3031
+ int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
3032
+ if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
3033
+ a.not_ (ecx); // swizzle address for bit Endian spaces
3034
+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .write .dispatch )); // load dispatch table pointer
3035
+ if (shift < 0 )
3036
+ a.shl (ecx, imm (-shift)); // convert address to bits (left shift)
3037
+ else if (shift > 0 )
3038
+ a.shr (ecx, imm (shift)); // convert address to bits (right shift)
3039
+ if (accessors.specific .low_bits )
3024
3040
{
3025
- a.mov (rax, ptr (Gpq (REG_PARAM1)));
3026
- a.call (ptr (rax, accessors.specific .write .function ));
3041
+ a.mov (r10d, Gpd (REG_PARAM2)); // copy masked address
3042
+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
3043
+ }
3044
+ a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
3045
+ a.mov (rax, ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
3046
+ if (accessors.specific .low_bits )
3047
+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
3048
+ if (accessors.specific .native_bytes <= 4 )
3049
+ {
3050
+ a.shl (r11d, cl); // shift mem_mask by masked bit address
3051
+ a.shl (Gpd (REG_PARAM3), cl); // shift data by masked bit address
3052
+ a.mov (Gpd (REG_PARAM4), r11d); // copy mem_mask to parameter 4 (ECX on SysV)
3027
3053
}
3028
3054
else
3029
3055
{
3030
- smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax);
3056
+ a.shl (r11, cl); // shift mem_mask by masked bit address
3057
+ a.shl (Gpq (REG_PARAM3), cl); // shift data by masked bit address
3058
+ a.mov (Gpq (REG_PARAM4), r11); // copy mem_mask to parameter 4 (RCX on SysV)
3031
3059
}
3060
+
3061
+ // need to do this after finished with CL as REG_PARAM1 is C on Windows
3062
+ a.mov (Gpq (REG_PARAM1), rax);
3063
+ if (accessors.specific .write .is_virtual )
3064
+ a.mov (rax, ptr (rax, accessors.specific .write .displacement )); // load vtable pointer
3065
+ if (accessors.specific .write .displacement )
3066
+ a.add (Gpq (REG_PARAM1), accessors.specific .write .displacement ); // apply this pointer offset
3067
+ if (accessors.specific .write .is_virtual )
3068
+ a.call (ptr (rax, accessors.specific .write .function )); // call virtual member function
3069
+ else
3070
+ smart_call_r64 (a, (x86code *)accessors.specific .write .function , rax); // call non-virtual member function
3032
3071
}
3033
3072
else if (spacesizep.size () == SIZE_BYTE)
3034
3073
{
0 commit comments