Skip to content

Commit de2acda

Browse files
authored
[AMDGPU][True16][MC] support more VOP3 inst in true16/fake16 format (#113603)
Support true16 and fake16 format for more VOP3 instructions in MC This patch updates the true16 and fake16 vop_profile for the following instructions and update the asm/dasm tests: v_mad_u16 v_mad_i16 v_med3_f16 v_med3_i16 v_med3_u16 v_max3_f16 v_max3_i16 v_max3_u16 v_min3_f16 v_min3_i16 v_min3_u16 v_med3_num_f16
1 parent 4c2a46f commit de2acda

15 files changed

+9364
-3381
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

+3
Original file line numberDiff line numberDiff line change
@@ -3659,7 +3659,10 @@ multiclass Int16Med3Pat<Instruction med3Inst,
36593659
defm : FPMed3Pat<f32, V_MED3_F32_e64>;
36603660

36613661
let SubtargetPredicate = HasMed3_16 in {
3662+
let True16Predicate = NotHasTrue16BitInsts in
36623663
defm : FPMed3Pat<f16, V_MED3_F16_e64>;
3664+
let True16Predicate = UseFakeTrue16Insts in
3665+
defm : FPMed3Pat<f16, V_MED3_F16_fake16_e64>;
36633666
}
36643667

36653668
class

llvm/lib/Target/AMDGPU/VOP3Instructions.td

+36-32
Original file line numberDiff line numberDiff line change
@@ -371,8 +371,8 @@ let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in {
371371
} // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1
372372

373373
let SubtargetPredicate = isGFX9Plus in {
374-
defm V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
375-
defm V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>>;
374+
defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
375+
defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
376376
let OtherPredicates = [isNotGFX90APlus] in
377377
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
378378
} // End SubtargetPredicate = isGFX9Plus
@@ -437,16 +437,20 @@ defm: Ternary_i16_Pats<imad, V_MAD_U16_e64>;
437437

438438
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
439439

440+
multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
441+
Instruction inst> {
442+
def : GCNPat <
443+
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
444+
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
445+
>;
446+
}
440447

441-
class Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
442-
Instruction inst> : GCNPat <
443-
(op2 (op1 i16:$src0, i16:$src1), i16:$src2),
444-
(inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
445-
>;
446-
447-
let Predicates = [Has16BitInsts, isGFX10Plus] in {
448-
def: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
449-
} // End Predicates = [Has16BitInsts, isGFX10Plus]
448+
let True16Predicate = UseFakeTrue16Insts in {
449+
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_fake16_e64>;
450+
} // End True16Predicates = UseFakeTrue16Insts
451+
let OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts in {
452+
defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
453+
} // End OtherPredicates = [isGFX10Plus, Has16BitInsts], True16Predicate = NotHasTrue16BitInsts
450454

451455
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
452456
(ops node:$x, node:$y, node:$z),
@@ -616,17 +620,17 @@ let isCommutable = 1, isReMaterializable = 1 in {
616620
} // End isCommutable = 1, isReMaterializable = 1
617621
// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this
618622
// to the new src0.
619-
defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmed3>;
620-
defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmed3>;
621-
defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumed3>;
623+
defm V_MED3_F16 : VOP3Inst_t16 <"v_med3_f16", VOP_F16_F16_F16_F16, AMDGPUfmed3>;
624+
defm V_MED3_I16 : VOP3Inst_t16 <"v_med3_i16", VOP_I16_I16_I16_I16, AMDGPUsmed3>;
625+
defm V_MED3_U16 : VOP3Inst_t16 <"v_med3_u16", VOP_I16_I16_I16_I16, AMDGPUumed3>;
622626

623-
defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmin3>;
624-
defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmin3>;
625-
defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumin3>;
627+
defm V_MIN3_F16 : VOP3Inst_t16 <"v_min3_f16", VOP_F16_F16_F16_F16, AMDGPUfmin3>;
628+
defm V_MIN3_I16 : VOP3Inst_t16 <"v_min3_i16", VOP_I16_I16_I16_I16, AMDGPUsmin3>;
629+
defm V_MIN3_U16 : VOP3Inst_t16 <"v_min3_u16", VOP_I16_I16_I16_I16, AMDGPUumin3>;
626630

627-
defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmax3>;
628-
defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUsmax3>;
629-
defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile<VOP_I16_I16_I16_I16, VOP3_OPSEL>, AMDGPUumax3>;
631+
defm V_MAX3_F16 : VOP3Inst_t16 <"v_max3_f16", VOP_F16_F16_F16_F16, AMDGPUfmax3>;
632+
defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>;
633+
defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>;
630634

631635
let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
632636
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
@@ -1582,7 +1586,7 @@ defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
15821586
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
15831587
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
15841588
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
1585-
defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x232, "V_MED3_F16", "v_med3_num_f16">;
1589+
defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
15861590
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
15871591
defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
15881592
defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">;
@@ -1700,22 +1704,22 @@ defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>;
17001704
defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>;
17011705
defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11_gfx12<0x23d>;
17021706
defm V_XOR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x240>;
1703-
defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x241, "V_MAD_U16_gfx9", "v_mad_u16">;
1707+
defm V_MAD_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x241, "v_mad_u16", "V_MAD_U16_gfx9">;
17041708
defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
17051709
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
17061710
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
17071711
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
17081712
defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
1709-
defm V_MIN3_F16 : VOP3_Realtriple_gfx11<0x249>;
1710-
defm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24a>;
1711-
defm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24b>;
1712-
defm V_MAX3_F16 : VOP3_Realtriple_gfx11<0x24c>;
1713-
defm V_MAX3_I16 : VOP3_Realtriple_gfx11_gfx12<0x24d>;
1714-
defm V_MAX3_U16 : VOP3_Realtriple_gfx11_gfx12<0x24e>;
1715-
defm V_MED3_F16 : VOP3_Realtriple_gfx11<0x24f>;
1716-
defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12<0x250>;
1717-
defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12<0x251>;
1718-
defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x253, "V_MAD_I16_gfx9", "v_mad_i16">;
1713+
defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
1714+
defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
1715+
defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;
1716+
defm V_MAX3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24c, "v_max3_f16">;
1717+
defm V_MAX3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24d, "v_max3_i16">;
1718+
defm V_MAX3_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x24e, "v_max3_u16">;
1719+
defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x24f, "v_med3_f16">;
1720+
defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12<0x250, "v_med3_i16">;
1721+
defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x251, "v_med3_u16">;
1722+
defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x253, "v_mad_i16", "V_MAD_I16_gfx9">;
17191723
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
17201724
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
17211725
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;

llvm/lib/Target/AMDGPU/VOPInstructions.td

+14-2
Original file line numberDiff line numberDiff line change
@@ -1906,10 +1906,22 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName
19061906
string pseudo_mnemonic = "", bit isSingle = 0> :
19071907
VOP3_Realtriple_with_name<GFX11Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
19081908

1909+
multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME,
1910+
string pseudo_mnemonic = "", bit isSingle = 0> {
1911+
defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>;
1912+
defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>;
1913+
}
1914+
19091915
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,
19101916
string opName = NAME, string pseudo_mnemonic = "">
19111917
: VOP3_Realtriple_t16_gfx11<op, asmName, opName, pseudo_mnemonic, 1>;
19121918

1919+
multiclass VOP3Only_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName,
1920+
string opName = NAME, string pseudo_mnemonic = ""> {
1921+
defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, 1>;
1922+
defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, 1>;
1923+
}
1924+
19131925
multiclass VOP3be_Real_gfx11<bits<10> op, string opName, string asmName,
19141926
bit isSingle = 0> :
19151927
VOP3be_Real<GFX11Gen, op, opName, asmName, isSingle>;
@@ -1943,8 +1955,8 @@ multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName
19431955

19441956
multiclass VOP3_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName, string opName = NAME,
19451957
string pseudo_mnemonic = "", bit isSingle = 0> {
1946-
defm opName#"_t16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1947-
defm opName#"_fake16":VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
1958+
defm _t16:VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
1959+
defm _fake16:VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
19481960
}
19491961

19501962
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,

0 commit comments

Comments
 (0)