Skip to content

Commit 1c9fba3

Browse files
Coreforgegeerlingguy
authored andcommitted
SIMD stp
1 parent b040025 commit 1c9fba3

File tree

1 file changed

+83
-20
lines changed

1 file changed

+83
-20
lines changed

arch/arm64/kernel/compat_alignment.c

Lines changed: 83 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,15 @@ static int alignment_get_arm64(struct pt_regs *regs, __le64 __user *ip, u32 *ins
446446
return 0;
447447
}
448448

449+
int64_t extend_sign(int64_t in, int bits){
450+
bits--;
451+
if(in & (1 << bits)){
452+
// extend sign
453+
return (0xffffffffffffffff << bits) | in;
454+
}
455+
return in;
456+
}
457+
449458
/*int ldpstp_offset_fixup(u32 instr, struct pt_regs *regs){
450459
uint8_t load = (instr >> 22) & 1;
451460
uint8_t simd = (instr >> 26) & 1;
@@ -513,10 +522,16 @@ int do_ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
513522
if(desc->simd){
514523
// At least currently, there aren't any simd instructions supported that use more than one data register
515524
//__uint128_t tmp;
525+
526+
// probably better for performance to read both registers with one function to kernel_neon_* doesn't have to be called more than once
516527
read_simd_reg(desc->reg1, data1);
528+
read_simd_reg(desc->reg2, data2);
517529
//data1[0] = tmp;
518530
//data1[1] = *(((u64*)&tmp) + 1);
519-
printk("SIMD: storing 0x%llx %llx (%d bits) at 0x%px", data1[1], data1[0], desc->width, desc->addr);
531+
///printk("SIMD: storing 0x%llx %llx (%d bits) at 0x%px", data1[1], data1[0], desc->width, desc->addr);
532+
if(desc->width < 128){
533+
return -1;
534+
}
520535
} else {
521536
data1[0] = regs->regs[desc->reg1];
522537
data2[0] = regs->regs[desc->reg2];
@@ -646,23 +661,29 @@ int ls_pair_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc
646661
uint8_t Rn = (instr >> 5) & 0x1f;
647662
uint8_t Rt = instr & 0x1f;
648663

649-
int16_t imm = 0xffff & imm7;
650-
664+
int64_t imm = extend_sign(imm7, 7);
665+
int immshift = 0;
651666
desc->load = load;
652667
desc->simd = simd;
653668

654669
// opc controls the width
655-
switch(opc){
656-
case 0:
657-
desc->width = 32;
658-
imm <<= 2;
659-
break;
660-
case 2:
661-
desc->width = 64;
662-
imm <<= 3;
663-
break;
664-
default:
665-
return -1;
670+
if(simd){
671+
desc->width = 32 << opc;
672+
immshift = 4 << opc;
673+
imm <<= immshift;
674+
} else {
675+
switch(opc){
676+
case 0:
677+
desc->width = 32;
678+
imm <<= 2;
679+
break;
680+
case 2:
681+
desc->width = 64;
682+
imm <<= 3;
683+
break;
684+
default:
685+
return -1;
686+
}
666687
}
667688

668689
// op2 controls the indexing
@@ -687,15 +708,25 @@ int ls_reg_unsigned_imm(u32 instr, struct pt_regs *regs, struct fixupDescription
687708
uint8_t size = (instr >> 30) & 3;
688709
uint8_t simd = (instr >> 26) & 1;
689710
uint8_t opc = (instr >> 22) & 3;
690-
uint16_t imm12 = (instr >> 10) & 0xfff;
711+
uint64_t imm12 = (instr >> 10) & 0xfff;
691712
uint8_t Rn = (instr >> 5) & 0x1f;
692713
uint8_t Rt = instr & 0x1f;
693714

694715
uint8_t load = opc & 1;
695-
uint8_t extend_sign = ((opc & 2) >> 1 ) & !simd;
696-
printk("size: %d simd: %d opc: %d imm12: 0x%x Rn: %d Rt: %d\n", size, simd, opc, imm12, Rn, Rt);
716+
uint8_t extend_sign = 0;// = ((opc & 2) >> 1 ) & !simd;
717+
int width_shift = 0;
718+
719+
if(simd){
720+
extend_sign = 0;
721+
width_shift = size | ((opc & 2) << 1);
722+
} else {
723+
extend_sign = ((opc & 2) >> 1 );
724+
width_shift = size;
725+
}
726+
727+
///printk("size: %d simd: %d opc: %d imm12: 0x%x Rn: %d Rt: %d\n", size, simd, opc, imm12, Rn, Rt);
697728
// when in simd mode, opc&2 is a third size bit. Otherwise, it's there for sign extension
698-
int width_shift = (size | (((opc & 2) & (simd << 1)) << 1));
729+
//width_shift = (size | (((opc & 2) & (simd << 1)) << 1));
699730
desc->width = 8 << width_shift;
700731

701732
if((size & 1) && simd && (opc & 2)){
@@ -707,7 +738,7 @@ int ls_reg_unsigned_imm(u32 instr, struct pt_regs *regs, struct fixupDescription
707738
desc->extendSign = extend_sign;
708739
u64 addr = regs->regs[Rn];
709740
desc->addr = addr + (imm12 << width_shift);
710-
printk("unsigned imm\n");
741+
///printk("unsigned imm\n");
711742

712743
return do_ls_fixup(instr, regs, desc);
713744
}
@@ -728,9 +759,14 @@ u64 extend_reg(u64 reg, int type, int shift){
728759
int32_t stmpw = reg;
729760
int64_t stmpdw = stmpw;
730761
tmp = (u64)stmpdw;
762+
} else {
763+
printk("Other branch I forgor about previously!\n");
764+
tmp = reg; // since the size stays the same, I don't think this makes a difference
731765
}
732766
}
733767

768+
///printk("extend_reg: reg 0x%lx out (before shift) 0x%lx signed: %x\n", reg, tmp, is_signed);
769+
734770
return tmp << shift;
735771
}
736772

@@ -826,7 +862,7 @@ int lsr_unscaled_immediate_fixup(u32 instr, struct pt_regs *regs, struct fixupDe
826862
desc->data1_simd = *(((u64*)&tmp) + 1);*/
827863
return do_ls_fixup(instr, regs, desc);
828864
}
829-
printk("SIMD: %d\n", simd);
865+
///printk("SIMD: %d\n", simd);
830866
return 1;
831867
}
832868

@@ -876,6 +912,31 @@ int ls_fixup(u32 instr, struct pt_regs *regs, struct fixupDescription* desc){
876912
return r;
877913
}
878914

915+
uint32_t* seenCMDs;
916+
size_t seenCMDCount = 0;
917+
size_t seenCMDSize = 0;
918+
919+
void instrDBG(u32 instr){
920+
for(size_t i = 0; i < seenCMDCount; i++){
921+
if(seenCMDs[i] == instr){
922+
return;
923+
}
924+
}
925+
if(seenCMDSize == 0){
926+
seenCMDs = krealloc(seenCMDs, 1, GFP_KERNEL);
927+
seenCMDSize = 1;
928+
}
929+
930+
if(seenCMDCount >= seenCMDSize){
931+
seenCMDs = krealloc(seenCMDs, seenCMDSize*2, GFP_KERNEL);
932+
seenCMDSize *= 2;
933+
}
934+
935+
seenCMDs[seenCMDCount] = instr;
936+
seenCMDCount++;
937+
printk("New instruction: %x", instr);
938+
}
939+
879940
int do_alignment_fixup(unsigned long addr, struct pt_regs *regs){
880941
unsigned long long instrptr;
881942
u32 instr = 0;
@@ -893,6 +954,8 @@ int do_alignment_fixup(unsigned long addr, struct pt_regs *regs){
893954
*
894955
*/
895956

957+
instrDBG(instr);
958+
896959
uint8_t op0;
897960
int r;
898961
struct fixupDescription desc = {0};

0 commit comments

Comments
 (0)