@@ -167,8 +167,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
167
167
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
168
168
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
169
169
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
170
- defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fminimum>>;
171
- defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile< VOP_F16_F16_F16> , DivergentBinFrag<fmaximum>>;
170
+ defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
171
+ defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
172
172
173
173
let SchedRW = [WriteDoubleAdd] in {
174
174
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -368,8 +368,8 @@ let SubtargetPredicate = isGFX9Only, FPDPRounding = 1 in {
368
368
} // End SubtargetPredicate = isGFX9Only, FPDPRounding = 1
369
369
370
370
let SubtargetPredicate = isGFX9Plus in {
371
- defm V_MAD_U16_gfx9 : VOP3Inst <"v_mad_u16_gfx9", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> >;
372
- defm V_MAD_I16_gfx9 : VOP3Inst <"v_mad_i16_gfx9", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> >;
371
+ defm V_MAD_U16_gfx9 : VOP3Inst_t16 <"v_mad_u16_gfx9", VOP_I16_I16_I16_I16>;
372
+ defm V_MAD_I16_gfx9 : VOP3Inst_t16 <"v_mad_i16_gfx9", VOP_I16_I16_I16_I16>;
373
373
let OtherPredicates = [isNotGFX90APlus] in
374
374
def V_INTERP_P2_F16_gfx9 : VOP3Interp <"v_interp_p2_f16_gfx9", VOP3_INTERP16<[f16, f32, i32, f32]>>;
375
375
} // End SubtargetPredicate = isGFX9Plus
@@ -434,16 +434,24 @@ defm: Ternary_i16_Pats<imad, V_MAD_U16_e64>;
434
434
435
435
} // End Predicates = [Has16BitInsts, isGFX6GFX7GFX8GFX9]
436
436
437
+ multiclass Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
438
+ Instruction inst> {
439
+ def : GCNPat <
440
+ (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
441
+ (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
442
+ >;
443
+ }
437
444
438
- class Ternary_i16_Pats_gfx9<SDPatternOperator op1, SDPatternOperator op2,
439
- Instruction inst> : GCNPat <
440
- (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
441
- (inst SRCMODS.NONE, $src0, SRCMODS.NONE, $src1, SRCMODS.NONE, $src2, DSTCLAMP.NONE)
442
- >;
443
-
444
- let Predicates = [Has16BitInsts, isGFX10Plus] in {
445
- def: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
446
- } // End Predicates = [Has16BitInsts, isGFX10Plus]
445
+ let Predicates = [UseRealTrue16Insts] in {
446
+ defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_t16_e64>;
447
+ /*defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_I16_gfx9_t16_e64>;*/
448
+ } // End Predicates = [UseRealTrue16Insts]
449
+ let Predicates = [UseFakeTrue16Insts] in {
450
+ defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_fake16_e64>;
451
+ } // End Predicates = [UseFakeTrue16Insts]
452
+ let Predicates = [Has16BitInsts, NotHasTrue16BitInsts, isGFX10Plus] in {
453
+ defm: Ternary_i16_Pats_gfx9<mul, add, V_MAD_U16_gfx9_e64>;
454
+ } // End Predicates = [Has16BitInsts, NotHasTrue16BitInsts, isGFX10Plus]
447
455
448
456
class ThreeOpFragSDAG<SDPatternOperator op1, SDPatternOperator op2> : PatFrag<
449
457
(ops node:$x, node:$y, node:$z),
@@ -613,17 +621,17 @@ let isCommutable = 1, isReMaterializable = 1 in {
613
621
} // End isCommutable = 1, isReMaterializable = 1
614
622
// TODO src0 contains the opsel bit for dst, so if we commute, need to mask and swap this
615
623
// to the new src0.
616
- defm V_MED3_F16 : VOP3Inst <"v_med3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfmed3>;
617
- defm V_MED3_I16 : VOP3Inst <"v_med3_i16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUsmed3>;
618
- defm V_MED3_U16 : VOP3Inst <"v_med3_u16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUumed3>;
624
+ defm V_MED3_F16 : VOP3Inst_t16 <"v_med3_f16", VOP_F16_F16_F16_F16, AMDGPUfmed3>;
625
+ defm V_MED3_I16 : VOP3Inst_t16 <"v_med3_i16", VOP_I16_I16_I16_I16, AMDGPUsmed3>;
626
+ defm V_MED3_U16 : VOP3Inst_t16 <"v_med3_u16", VOP_I16_I16_I16_I16, AMDGPUumed3>;
619
627
620
- defm V_MIN3_F16 : VOP3Inst <"v_min3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfmin3>;
621
- defm V_MIN3_I16 : VOP3Inst <"v_min3_i16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUsmin3>;
622
- defm V_MIN3_U16 : VOP3Inst <"v_min3_u16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUumin3>;
628
+ defm V_MIN3_F16 : VOP3Inst_t16 <"v_min3_f16", VOP_F16_F16_F16_F16, AMDGPUfmin3>;
629
+ defm V_MIN3_I16 : VOP3Inst_t16 <"v_min3_i16", VOP_I16_I16_I16_I16, AMDGPUsmin3>;
630
+ defm V_MIN3_U16 : VOP3Inst_t16 <"v_min3_u16", VOP_I16_I16_I16_I16, AMDGPUumin3>;
623
631
624
- defm V_MAX3_F16 : VOP3Inst <"v_max3_f16", VOP3_Profile< VOP_F16_F16_F16_F16, VOP3_OPSEL> , AMDGPUfmax3>;
625
- defm V_MAX3_I16 : VOP3Inst <"v_max3_i16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUsmax3>;
626
- defm V_MAX3_U16 : VOP3Inst <"v_max3_u16", VOP3_Profile< VOP_I16_I16_I16_I16, VOP3_OPSEL> , AMDGPUumax3>;
632
+ defm V_MAX3_F16 : VOP3Inst_t16 <"v_max3_f16", VOP_F16_F16_F16_F16, AMDGPUfmax3>;
633
+ defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>;
634
+ defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>;
627
635
628
636
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
629
637
defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
@@ -1056,7 +1064,7 @@ defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
1056
1064
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
1057
1065
defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
1058
1066
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
1059
- defm V_MED3_NUM_F16 : VOP3_Realtriple_with_name_gfx12 <0x232, "V_MED3_F16", "v_med3_num_f16 ">;
1067
+ defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12 <0x232, "v_med3_num_f16", " V_MED3_F16", "v_med3_f16 ">;
1060
1068
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
1061
1069
defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
1062
1070
defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">;
@@ -1081,8 +1089,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
1081
1089
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
1082
1090
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
1083
1091
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
1084
- defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x367>;
1085
- defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12 <0x368>;
1092
+ defm V_MINIMUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12 <0x367, "v_minimum_f16" >;
1093
+ defm V_MAXIMUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12 <0x368, "v_maximum_f16" >;
1086
1094
1087
1095
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
1088
1096
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
@@ -1172,22 +1180,22 @@ defm V_QSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23a>;
1172
1180
defm V_MQSAD_PK_U16_U8 : VOP3_Real_Base_gfx11_gfx12<0x23b>;
1173
1181
defm V_MQSAD_U32_U8 : VOP3_Real_Base_gfx11_gfx12<0x23d>;
1174
1182
defm V_XOR3_B32 : VOP3_Realtriple_gfx11_gfx12<0x240>;
1175
- defm V_MAD_U16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x241, "V_MAD_U16_gfx9 ", "v_mad_u16 ">;
1183
+ defm V_MAD_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x241, "v_mad_u16 ", "V_MAD_U16_gfx9 ">;
1176
1184
defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
1177
1185
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
1178
1186
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
1179
1187
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
1180
1188
defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
1181
- defm V_MIN3_F16 : VOP3_Realtriple_gfx11 <0x249>;
1182
- defm V_MIN3_I16 : VOP3_Realtriple_gfx11_gfx12 <0x24a>;
1183
- defm V_MIN3_U16 : VOP3_Realtriple_gfx11_gfx12 <0x24b>;
1184
- defm V_MAX3_F16 : VOP3_Realtriple_gfx11 <0x24c>;
1185
- defm V_MAX3_I16 : VOP3_Realtriple_gfx11_gfx12 <0x24d>;
1186
- defm V_MAX3_U16 : VOP3_Realtriple_gfx11_gfx12 <0x24e>;
1187
- defm V_MED3_F16 : VOP3_Realtriple_gfx11 <0x24f>;
1188
- defm V_MED3_I16 : VOP3_Realtriple_gfx11_gfx12 <0x250>;
1189
- defm V_MED3_U16 : VOP3_Realtriple_gfx11_gfx12 <0x251>;
1190
- defm V_MAD_I16 : VOP3_Realtriple_with_name_gfx11_gfx12 <0x253, "V_MAD_I16_gfx9 ", "v_mad_i16 ">;
1189
+ defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11 <0x249, "v_min3_f16" >;
1190
+ defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x24a, "v_min3_i16" >;
1191
+ defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x24b, "v_min3_u16" >;
1192
+ defm V_MAX3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11 <0x24c, "v_max3_f16" >;
1193
+ defm V_MAX3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x24d, "v_max3_i16" >;
1194
+ defm V_MAX3_U16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x24e, "v_max3_u16" >;
1195
+ defm V_MED3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11 <0x24f, "v_med3_f16" >;
1196
+ defm V_MED3_I16 : VOP3Only_Realtriple_t16_and_fake16_gfx11_gfx12 <0x250, "v_med3_i16" >;
1197
+ defm V_MED3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x251, "v_med3_u16" >;
1198
+ defm V_MAD_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12 <0x253, "v_mad_i16 ", "V_MAD_I16_gfx9 ">;
1191
1199
defm V_DIV_FIXUP_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x254, "V_DIV_FIXUP_F16_gfx9", "v_div_fixup_f16">;
1192
1200
defm V_ADD3_U32 : VOP3_Realtriple_gfx11_gfx12<0x255>;
1193
1201
defm V_LSHL_OR_B32 : VOP3_Realtriple_gfx11_gfx12<0x256>;
0 commit comments