@@ -2560,6 +2560,71 @@ void drcbe_x64::op_read(Assembler &a, const instruction &inst)
25602560 else
25612561 smart_call_r64 (a, (x86code *)accessors.specific .read .function , rax); // call non-virtual member function
25622562 }
2563+ else if (have_specific && ((1 << spacesizep.size ()) < accessors.specific .native_bytes ))
2564+ {
2565+ // if the destination register isn't a non-volatile register, we can use it to save the shift count
2566+ bool need_save = (dstreg != ebx) && (dstreg != r12d) && (dstreg != r13d) && (dstreg != r14d) && (dstreg != r15d);
2567+
2568+ if (need_save)
2569+ a.mov (ptr (rsp, 32 ), Gpq (int_register_map[0 ])); // save I0 register
2570+ if ((accessors.specific .native_bytes <= 4 ) || (spacesizep.size () != SIZE_QWORD))
2571+ a.mov (Gpd (REG_PARAM3), imm (make_bitmask<uint32_t >(8 << spacesizep.size ()))); // set default mem_mask
2572+ else
2573+ a.mov (Gpq (REG_PARAM3), imm (make_bitmask<uint64_t >(8 << spacesizep.size ()))); // set default mem_mask
2574+ a.mov (ecx, Gpd (REG_PARAM2)); // copy address
2575+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
2576+
2577+ int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
2578+ if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
2579+ a.not_ (ecx); // swizzle address for bit Endian spaces
2580+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .read .dispatch )); // load dispatch table pointer
2581+ if (shift < 0 )
2582+ a.shl (ecx, imm (-shift)); // convert address to bits (left shift)
2583+ else if (shift > 0 )
2584+ a.shr (ecx, imm (shift)); // convert address to bits (right shift)
2585+ if (accessors.specific .low_bits )
2586+ {
2587+ a.mov (r10d, Gpd (REG_PARAM2)); // copy masked address
2588+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
2589+ }
2590+ a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
2591+ a.mov (rax, ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
2592+ if (accessors.specific .low_bits )
2593+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
2594+ if (need_save)
2595+ a.mov (Gpd (int_register_map[0 ]), ecx); // save masked bit address
2596+ else
2597+ a.mov (dstreg.r32 (), ecx); // save masked bit address
2598+ if (accessors.specific .native_bytes <= 4 )
2599+ a.shl (Gpd (REG_PARAM3), cl); // shift mem_mask by masked bit address
2600+ else
2601+ a.shl (Gpq (REG_PARAM3), cl); // shift mem_mask by masked bit address
2602+
2603+ // need to do this after finished with CL as REG_PARAM1 is A on Windows
2604+ a.mov (Gpq (REG_PARAM1), rax);
2605+ if (accessors.specific .read .is_virtual )
2606+ a.mov (rax, ptr (rax, accessors.specific .read .displacement )); // load vtable pointer
2607+ if (accessors.specific .read .displacement )
2608+ a.add (Gpq (REG_PARAM1), accessors.specific .read .displacement ); // apply this pointer offset
2609+ if (accessors.specific .read .is_virtual )
2610+ a.call (ptr (rax, accessors.specific .read .function )); // call virtual member function
2611+ else
2612+ smart_call_r64 (a, (x86code *)accessors.specific .read .function , rax); // call non-virtual member function
2613+
2614+ if (need_save)
2615+ {
2616+ a.mov (ecx, Gpd (int_register_map[0 ])); // restore masked bit address
2617+ a.mov (Gpq (int_register_map[0 ]), ptr (rsp, 32 )); // restore I0 register
2618+ }
2619+ else
2620+ {
2621+ a.mov (ecx, dstreg.r32 ()); // restore masked bit address
2622+ }
2623+ if (accessors.specific .native_bytes <= 4 )
2624+ a.shr (eax, cl); // shift result by masked bit address
2625+ else
2626+ a.shr (rax, cl); // shift result by masked bit address
2627+ }
25632628 else if (spacesizep.size () == SIZE_BYTE)
25642629 {
25652630 mov_r64_imm (a, Gpq (REG_PARAM1), accessors.resolved .read_byte .obj );
@@ -2661,6 +2726,67 @@ void drcbe_x64::op_readm(Assembler &a, const instruction &inst)
26612726 else
26622727 smart_call_r64 (a, (x86code *)accessors.specific .read .function , rax); // call non-virtual member function
26632728 }
2729+ else if (have_specific && ((1 << spacesizep.size ()) < accessors.specific .native_bytes ))
2730+ {
2731+ // if the destination register isn't a non-volatile register, we can use it to save the shift count
2732+ bool need_save = (dstreg != ebx) && (dstreg != r12d) && (dstreg != r13d) && (dstreg != r14d) && (dstreg != r15d);
2733+
2734+ if (need_save)
2735+ a.mov (ptr (rsp, 32 ), Gpq (int_register_map[0 ])); // save I0 register
2736+ a.mov (ecx, Gpd (REG_PARAM2)); // copy address
2737+ a.and_ (Gpd (REG_PARAM2), imm (addr_mask)); // apply address mask
2738+
2739+ int const shift = m_space[spacesizep.space ()]->addr_shift () - 3 ;
2740+ if (m_space[spacesizep.space ()]->endianness () != ENDIANNESS_LITTLE)
2741+ a.not_ (ecx); // swizzle address for bit Endian spaces
2742+ mov_r64_imm (a, rax, uintptr_t (accessors.specific .read .dispatch )); // load dispatch table pointer
2743+ if (shift < 0 )
2744+ a.shl (ecx, imm (-shift)); // convert address to bits (left shift)
2745+ else if (shift > 0 )
2746+ a.shr (ecx, imm (shift)); // convert address to bits (right shift)
2747+ if (accessors.specific .low_bits )
2748+ {
2749+ a.mov (r10d, Gpd (REG_PARAM2)); // copy masked address
2750+ a.shr (Gpd (REG_PARAM2), accessors.specific .low_bits ); // shift off low bits
2751+ }
2752+ a.and_ (ecx, imm ((accessors.specific .native_bytes - (1 << spacesizep.size ())) << 3 )); // mask bit address
2753+ a.mov (rax, ptr (rax, Gpq (REG_PARAM2), 3 )); // load dispatch table entry
2754+ if (accessors.specific .low_bits )
2755+ a.mov (Gpd (REG_PARAM2), r10d); // restore masked address
2756+ if (need_save)
2757+ a.mov (Gpd (int_register_map[0 ]), ecx); // save masked bit address
2758+ else
2759+ a.mov (dstreg.r32 (), ecx); // save masked bit address
2760+ if (accessors.specific .native_bytes <= 4 )
2761+ a.shl (Gpd (REG_PARAM3), cl); // shift mem_mask by masked bit address
2762+ else
2763+ a.shl (Gpq (REG_PARAM3), cl); // shift mem_mask by masked bit address
2764+
2765+ // need to do this after finished with CL as REG_PARAM1 is A on Windows
2766+ a.mov (Gpq (REG_PARAM1), rax);
2767+ if (accessors.specific .read .is_virtual )
2768+ a.mov (rax, ptr (rax, accessors.specific .read .displacement )); // load vtable pointer
2769+ if (accessors.specific .read .displacement )
2770+ a.add (Gpq (REG_PARAM1), accessors.specific .read .displacement ); // apply this pointer offset
2771+ if (accessors.specific .read .is_virtual )
2772+ a.call (ptr (rax, accessors.specific .read .function )); // call virtual member function
2773+ else
2774+ smart_call_r64 (a, (x86code *)accessors.specific .read .function , rax); // call non-virtual member function
2775+
2776+ if (need_save)
2777+ {
2778+ a.mov (ecx, Gpd (int_register_map[0 ])); // restore masked bit address
2779+ a.mov (Gpq (int_register_map[0 ]), ptr (rsp, 32 )); // restore I0 register
2780+ }
2781+ else
2782+ {
2783+ a.mov (ecx, dstreg.r32 ()); // restore masked bit address
2784+ }
2785+ if (accessors.specific .native_bytes <= 4 )
2786+ a.shr (eax, cl); // shift result by masked bit address
2787+ else
2788+ a.shr (rax, cl); // shift result by masked bit address
2789+ }
26642790 else if (spacesizep.size () == SIZE_BYTE)
26652791 {
26662792 mov_r64_imm (a, Gpq (REG_PARAM1), accessors.resolved .read_byte_masked .obj );
0 commit comments