Skip to content

Commit b040025

Browse files
Coreforgegeerlingguy
authored andcommitted
better simd fixup (still not entirely working)
1 parent 43969b6 commit b040025

File tree

1 file changed

+109
-67
lines changed

1 file changed

+109
-67
lines changed

arch/arm64/kernel/compat_alignment.c

Lines changed: 109 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@
1313
#include <asm/traps.h>
1414

1515
#include <asm/fpsimd.h>
16+
#include <asm/neon.h>
17+
#include <asm/simd.h>
1618

1719
/*
1820
* 32-bit misaligned trap handler (c) 1998 San Mehat (CCC) -July 1998
@@ -413,10 +415,13 @@ struct fixupDescription{
413415
void* addr;
414416

415417
// datax_simd has to be located directly after datax in memory
416-
u64 data1;
418+
/*u64 data1;
417419
u64 data1_simd;
418420
u64 data2;
419-
u64 data2_simd;
421+
u64 data2_simd;*/
422+
423+
int reg1;
424+
int reg2;
420425

421426
int Rs; // used for atomics (which don't get handled atomically)
422427

@@ -481,14 +486,42 @@ static int alignment_get_arm64(struct pt_regs *regs, __le64 __user *ip, u32 *ins
481486
}*/
482487

483488
// saves the contents of the simd register reg to dst
484-
void read_simd_reg(int reg, __uint128_t* dst){
485-
struct user_fpsimd_state st;
486-
fpsimd_save_state(&st);
487-
*dst = st.vregs[reg];
489+
void read_simd_reg(int reg, u64 dst[2]){
490+
struct user_fpsimd_state st = {0};
491+
//fpsimd_save_state(&st);
492+
493+
if(!may_use_simd()){
494+
printk("may_use_simd returned false!\n");
495+
}
496+
kernel_neon_begin();
497+
if(current->thread.sve_state){
498+
printk("SVE state is not NULL!\n");
499+
}
500+
501+
dst[0] = *((u64*)(&current->thread.uw.fpsimd_state.vregs[reg]));
502+
dst[1] = *(((u64*)(&current->thread.uw.fpsimd_state.vregs[reg])) + 1);
503+
504+
kernel_neon_end();
488505
}
489506

490507
int do_ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
491508
int r;
509+
u64 data1[2];
510+
u64 data2[2];
511+
512+
// the reg indices have to always be valid, even if the reg isn't being used
513+
if(desc->simd){
514+
// At least currently, there aren't any simd instructions supported that use more than one data register
515+
//__uint128_t tmp;
516+
read_simd_reg(desc->reg1, data1);
517+
//data1[0] = tmp;
518+
//data1[1] = *(((u64*)&tmp) + 1);
519+
printk("SIMD: storing 0x%llx %llx (%d bits) at 0x%px", data1[1], data1[0], desc->width, desc->addr);
520+
} else {
521+
data1[0] = regs->regs[desc->reg1];
522+
data2[0] = regs->regs[desc->reg2];
523+
}
524+
492525
/*if(desc->width > 64){
493526
printk("Currently cannot process ls_fixup with a size of %d bits\n", desc->width);
494527
return 1;
@@ -500,8 +533,10 @@ int do_ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
500533
//printk("Storing %d bytes (pair: %d) to 0x%llx",bcount, desc->pair, desc->addr);
501534
int addrIt = 0;
502535
for(int i = 0; i < bcount; i++){
503-
if((r=put_user( (*(((uint8_t*)(&desc->data1)) + addrIt) & 0xff), (uint8_t __user *)addr)))
536+
if((r=put_user( (*(((uint8_t*)(data1)) + addrIt) & 0xff), (uint8_t __user *)addr))){
537+
printk("Failed to write data at 0x%px (base was 0x%px)\n", addr, desc->addr);
504538
return r;
539+
}
505540
//desc->data1 >>= 8;
506541
addrIt++;
507542
addr++;
@@ -510,16 +545,18 @@ int do_ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
510545
addrIt = 0;
511546
if(desc->pair){
512547
for(int i = 0; i < bcount; i++){
513-
if((r=put_user((*(((uint8_t*)(&desc->data2)) + addrIt) & 0xff) & 0xff, (uint8_t __user *)addr)))
548+
if((r=put_user((*(((uint8_t*)(data2)) + addrIt) & 0xff) & 0xff, (uint8_t __user *)addr))){
549+
printk("Failed to write data at 0x%px (base was 0x%px)\n", addr, desc->addr);
514550
return r;
551+
}
515552
//desc->data2 >>= 8;
516553
addrIt++;
517554
addr++;
518555
}
519556
}
520557
arm64_skip_faulting_instruction(regs, 4);
521558
} else {
522-
printk("Loading is currently not implemented (addr 0x%llx)\n", desc->addr);
559+
printk("Loading is currently not implemented (addr 0x%px)\n", desc->addr);
523560
return -1;
524561
}
525562
return 0;
@@ -555,7 +592,7 @@ int ls_cas_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc)
555592
}
556593

557594
desc->addr = (void*)regs->regs[Rn];
558-
desc->data1 = regs->regs[Rt];
595+
u64 data1 = regs->regs[Rt];
559596

560597
// nearly everything from here on could be moved into another function if needed
561598
u64 cmpmask = (1 << desc->width) - 1;
@@ -567,7 +604,7 @@ int ls_cas_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc)
567604
int r;
568605
uint8_t tmp;
569606

570-
printk("Atomic CAS not being done atomically at 0x%llx, size %d\n",desc->addr, desc->width);
607+
printk("Atomic CAS not being done atomically at 0x%px, size %d\n",desc->addr, desc->width);
571608

572609
for(int i = 0; i < bcount; i++){
573610
if((r=get_user(tmp, (uint8_t __user *)addr)))
@@ -582,9 +619,9 @@ int ls_cas_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc)
582619
addr = (u64)desc->addr;
583620

584621
for(int i = 0; i < bcount; i++){
585-
if((r=put_user(desc->data1 & 0xff, (uint8_t __user *)addr)))
622+
if((r=put_user(data1 & 0xff, (uint8_t __user *)addr)))
586623
return r;
587-
desc->data1 >>= 8;
624+
data1 >>= 8;
588625
addr++;
589626
}
590627

@@ -637,8 +674,10 @@ int ls_pair_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc
637674
default:
638675
return -1;
639676
}
640-
desc->data1 = regs->regs[Rt];
641-
desc->data2 = regs->regs[Rt2];
677+
//desc->data1 = regs->regs[Rt];
678+
//desc->data2 = regs->regs[Rt2];
679+
desc->reg1 = Rt;
680+
desc->reg2 = Rt2;
642681

643682
return do_ls_fixup(instr, regs, desc);
644683

@@ -648,22 +687,29 @@ int ls_reg_unsigned_imm(u32 instr, struct pt_regs *regs, struct fixupDescription
648687
uint8_t size = (instr >> 30) & 3;
649688
uint8_t simd = (instr >> 26) & 1;
650689
uint8_t opc = (instr >> 22) & 3;
690+
uint16_t imm12 = (instr >> 10) & 0xfff;
691+
uint8_t Rn = (instr >> 5) & 0x1f;
692+
uint8_t Rt = instr & 0x1f;
651693

652-
switch(size){
653-
case 0:
654-
desc->width = 8;
655-
break;
656-
case 1:
657-
desc->width = 16;
658-
break;
659-
case 2:
660-
desc->width = 32;
661-
break;
662-
case 3:
663-
desc->width = 64;
664-
break;
694+
uint8_t load = opc & 1;
695+
uint8_t extend_sign = ((opc & 2) >> 1 ) & !simd;
696+
printk("size: %d simd: %d opc: %d imm12: 0x%x Rn: %d Rt: %d\n", size, simd, opc, imm12, Rn, Rt);
697+
// when in simd mode, opc&2 is a third size bit. Otherwise, it's there for sign extension
698+
int width_shift = (size | (((opc & 2) & (simd << 1)) << 1));
699+
desc->width = 8 << width_shift;
700+
701+
if((size & 1) && simd && (opc & 2)){
702+
return 1;
665703
}
666-
return 1;
704+
705+
desc->reg1 = Rt;
706+
desc->simd = simd;
707+
desc->extendSign = extend_sign;
708+
u64 addr = regs->regs[Rn];
709+
desc->addr = addr + (imm12 << width_shift);
710+
printk("unsigned imm\n");
711+
712+
return do_ls_fixup(instr, regs, desc);
667713
}
668714

669715

@@ -697,50 +743,52 @@ int lsr_offset_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* d
697743
uint8_t Rn = (instr >> 5) & 0x1f;
698744
uint8_t Rt = instr & 0x1f;
699745
uint8_t S = (instr >> 12) & 1;
746+
int width_shift = (size | (((opc & 2) & (simd << 1)) << 1));
700747
// size==0 seems to be a bit special
701748
// opc&2 is sign, opc&1 is load (for most instructions anyways)
702749

703750
uint8_t load = opc & 1;
704-
uint8_t extend_sign = (opc & 2) >> 1;
751+
uint8_t extend_sign = ((opc & 2) >> 1 ) & !simd;
705752
desc->pair = 0;
706753

707754
desc->simd = simd;
708-
desc->width = 8 << size;
755+
desc->width = 8 << width_shift;
709756

710757
// the simd instructions make this a bit weird
711-
if(!simd){
712-
if(extend_sign){
713-
if(load){
714-
desc->extend_width = 32;
715-
} else {
716-
desc->extend_width = 64;
717-
}
718-
desc->load = 1;
758+
if(extend_sign){
759+
if(load){
760+
desc->extend_width = 32;
719761
} else {
720-
desc->load = load;
762+
desc->extend_width = 64;
721763
}
764+
desc->load = 1;
765+
} else {
766+
desc->load = load;
767+
}
722768

723-
desc->extendSign = extend_sign; // needed for load, which isn't implemented yet
724-
725-
726-
u64 addr = regs->regs[Rn];
769+
desc->extendSign = extend_sign; // needed for load, which isn't implemented yet
727770

771+
u64 offset = 0;
772+
u64 addr = 0;
773+
addr = regs->regs[Rn];
774+
if(simd){
775+
int shift = 0;
776+
if(S) shift = width_shift;
777+
offset = extend_reg(regs->regs[Rm], option, shift);
778+
} else {
728779
int shift = 0;
729780
if(S) shift = 2 << ((size & 1) & ((size >> 1) & 1));
730781

731-
u64 offset = extend_reg(regs->regs[Rm], option, S);
732-
733-
addr += offset;
782+
offset = extend_reg(regs->regs[Rm], option, shift);
783+
}
734784

735-
desc->data1 = regs->regs[Rt];
736-
desc->addr = (void*)addr;
785+
addr += offset;
737786

738-
return do_ls_fixup(instr, regs, desc);
787+
//desc->data1 = regs->regs[Rt];
788+
desc->reg1 = Rt;
789+
desc->addr = (void*)addr;
739790

740-
} else {
741-
printk("Load/Store register offset decode doesn't support simd yet\n");
742-
return 1;
743-
}
791+
return do_ls_fixup(instr, regs, desc);
744792
return 0;
745793
}
746794

@@ -767,14 +815,15 @@ int lsr_unscaled_immediate_fixup(u32 instr, struct pt_regs *regs, struct fixupDe
767815
if(load){
768816
return 1;
769817
}
818+
desc->reg1 = Rt;
770819
if(simd){
771820
desc->simd = 1;
772-
desc->width = 8 << (size | (opc << 1));
821+
desc->width = 8 << (size | ((opc & 2) << 1));
773822
// assuming store
774-
__uint128_t tmp;
823+
/*__uint128_t tmp;
775824
read_simd_reg(Rt, &tmp);
776825
desc->data1 = tmp;
777-
desc->data1_simd = *(((u64*)&tmp) + 1);
826+
desc->data1_simd = *(((u64*)&tmp) + 1);*/
778827
return do_ls_fixup(instr, regs, desc);
779828
}
780829
printk("SIMD: %d\n", simd);
@@ -809,10 +858,9 @@ int ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
809858
desc->pair = 0;
810859

811860
}
812-
if((op0 & 3) == 2 && (op2 == 2)){
813-
// Load/store pair offset
814-
//ldpstp_offset_fixup(instr, regs);
815-
//r = ls_reg_unsigned_imm(instr, regs, desc);
861+
if((op0 & 3) == 3 && ((op2 & 2) == 2)){
862+
// register unsigned immediate
863+
r = ls_reg_unsigned_imm(instr, regs, desc);
816864
}
817865
if((op0 & 3) == 3 && (op2 & 2) == 0 && (op3 & 0x20) == 0x20 && op4 == 2){
818866
// register offset load/store
@@ -821,12 +869,6 @@ int ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
821869
if((op0 & 3) == 3 && (op2 & 2) == 0 && (op3 & 0x20) == 0x0 && op4 == 0){
822870
// register load/store unscaled immediate
823871
r = lsr_unscaled_immediate_fixup(instr, regs, desc);
824-
printk("Likely SIMD stuff, which isn't being handled properly at all!\n");
825-
if(r){
826-
arm64_skip_faulting_instruction(regs, 4);
827-
// skip anyways
828-
}
829-
//r = 0;
830872
}
831873
if(r){
832874
printk("Load/Store: op0 0x%x op1 0x%x op2 0x%x op3 0x%x op4 0x%x\n", op0, op1, op2, op3, op4);

0 commit comments

Comments
 (0)