Skip to content

Commit 08db696

Browse files
authored
[AMDGPU][True16][MC] V_MED3_I/U16_fake16 CodeGen pattern (#120600)
In this patch #113603 replace `V_MED3_I/U16` to `V_MED3_I/U16_fake16` for Post-GFX11, but it miss to update the CodeGen pattern. This patch update and corrert the CodeGen pattern
1 parent 91e392b commit 08db696

File tree

3 files changed

+46
-22
lines changed

3 files changed

+46
-22
lines changed

llvm/lib/Target/AMDGPU/SIInstructions.td

+14-6
Original file line numberDiff line numberDiff line change
@@ -3645,13 +3645,15 @@ multiclass FPMed3Pat<ValueType vt,
36453645

36463646
multiclass Int16Med3Pat<Instruction med3Inst,
36473647
SDPatternOperator min,
3648-
SDPatternOperator max> {
3648+
SDPatternOperator max,
3649+
RegisterOperand outputSrcType> {
36493650
// This matches 16 permutations of
36503651
// max(min(x, y), min(max(x, y), z))
36513652
def : GCNPat <
36523653
(max (min i16:$src0, i16:$src1),
36533654
(min (max i16:$src0, i16:$src1), i16:$src2)),
3654-
(med3Inst SRCMODS.NONE, VSrc_b16:$src0, SRCMODS.NONE, VSrc_b16:$src1, SRCMODS.NONE, VSrc_b16:$src2, DSTCLAMP.NONE)
3655+
(med3Inst SRCMODS.NONE, outputSrcType:$src0, SRCMODS.NONE, outputSrcType:$src1,
3656+
SRCMODS.NONE, outputSrcType:$src2, DSTCLAMP.NONE)
36553657
>;
36563658

36573659
// This matches 16 permutations of
@@ -3716,10 +3718,16 @@ def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>
37163718
def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
37173719
}
37183720

3719-
let OtherPredicates = [isGFX9Plus] in {
3720-
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax>;
3721-
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax>;
3722-
} // End Predicates = [isGFX9Plus]
3721+
let SubtargetPredicate = isGFX9Plus in {
3722+
let True16Predicate = NotHasTrue16BitInsts in {
3723+
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
3724+
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
3725+
}
3726+
let True16Predicate = UseFakeTrue16Insts in {
3727+
defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
3728+
defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
3729+
}
3730+
} // End SubtargetPredicate = [isGFX9Plus]
37233731

37243732
let OtherPredicates = [isGFX12Plus] in {
37253733
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-smed3.s16.mir

+16-8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ body: |
2424
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
2525
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
2626
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]]
27+
;
2728
; GFX9-LABEL: name: smed3_s16_vvv
2829
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
2930
; GFX9-NEXT: {{ $}}
@@ -32,14 +33,15 @@ body: |
3233
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3334
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
3435
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
36+
;
3537
; GFX11-LABEL: name: smed3_s16_vvv
3638
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
3739
; GFX11-NEXT: {{ $}}
3840
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3941
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4042
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41-
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
42-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]]
43+
; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
44+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]]
4345
%0:vgpr(s32) = COPY $vgpr0
4446
%1:vgpr(s32) = COPY $vgpr1
4547
%2:vgpr(s32) = COPY $vgpr2
@@ -75,6 +77,7 @@ body: |
7577
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
7678
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
7779
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_]]
80+
;
7881
; GFX9-LABEL: name: smed3_s16_vvv_multiuse0
7982
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
8083
; GFX9-NEXT: {{ $}}
@@ -84,15 +87,16 @@ body: |
8487
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[COPY]], [[COPY1]], implicit $exec
8588
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
8689
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
90+
;
8791
; GFX11-LABEL: name: smed3_s16_vvv_multiuse0
8892
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
8993
; GFX11-NEXT: {{ $}}
9094
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9195
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
9296
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
9397
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
94-
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
95-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
98+
; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
99+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
96100
%0:vgpr(s32) = COPY $vgpr0
97101
%1:vgpr(s32) = COPY $vgpr1
98102
%2:vgpr(s32) = COPY $vgpr2
@@ -128,6 +132,7 @@ body: |
128132
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
129133
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
130134
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MIN_I16_e64_]]
135+
;
131136
; GFX9-LABEL: name: smed3_s16_vvv_multiuse1
132137
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
133138
; GFX9-NEXT: {{ $}}
@@ -137,15 +142,16 @@ body: |
137142
; GFX9-NEXT: [[V_MIN_I16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[COPY]], [[COPY1]], implicit $exec
138143
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
139144
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_e64_]]
145+
;
140146
; GFX11-LABEL: name: smed3_s16_vvv_multiuse1
141147
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
142148
; GFX11-NEXT: {{ $}}
143149
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
144150
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
145151
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
146152
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
147-
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
148-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MIN_I16_fake16_e64_]]
153+
; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
154+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MIN_I16_fake16_e64_]]
149155
%0:vgpr(s32) = COPY $vgpr0
150156
%1:vgpr(s32) = COPY $vgpr1
151157
%2:vgpr(s32) = COPY $vgpr2
@@ -181,6 +187,7 @@ body: |
181187
; GFX8-NEXT: [[V_MAX_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
182188
; GFX8-NEXT: [[V_MIN_I16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_I16_e64 [[V_MAX_I16_e64_]], [[V_MAX_I16_e64_1]], implicit $exec
183189
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_I16_e64_1]], implicit [[V_MAX_I16_e64_1]]
190+
;
184191
; GFX9-LABEL: name: smed3_s16_vvv_multiuse2
185192
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
186193
; GFX9-NEXT: {{ $}}
@@ -191,6 +198,7 @@ body: |
191198
; GFX9-NEXT: [[V_MAX_I16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_e64 [[V_MIN_I16_e64_]], [[COPY2]], implicit $exec
192199
; GFX9-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
193200
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_e64_]]
201+
;
194202
; GFX11-LABEL: name: smed3_s16_vvv_multiuse2
195203
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
196204
; GFX11-NEXT: {{ $}}
@@ -199,8 +207,8 @@ body: |
199207
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
200208
; GFX11-NEXT: [[V_MIN_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_I16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
201209
; GFX11-NEXT: [[V_MAX_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_I16_fake16_e64 [[V_MIN_I16_fake16_e64_]], [[COPY2]], implicit $exec
202-
; GFX11-NEXT: [[V_MED3_I16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
203-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
210+
; GFX11-NEXT: [[V_MED3_I16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_I16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
211+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_I16_fake16_e64_]], implicit [[V_MAX_I16_fake16_e64_]]
204212
%0:vgpr(s32) = COPY $vgpr0
205213
%1:vgpr(s32) = COPY $vgpr1
206214
%2:vgpr(s32) = COPY $vgpr2

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-umed3.s16.mir

+16-8
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ body: |
2424
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
2525
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
2626
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]]
27+
;
2728
; GFX9-LABEL: name: umed3_s16_vvv
2829
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
2930
; GFX9-NEXT: {{ $}}
@@ -32,14 +33,15 @@ body: |
3233
; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
3334
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
3435
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
36+
;
3537
; GFX11-LABEL: name: umed3_s16_vvv
3638
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
3739
; GFX11-NEXT: {{ $}}
3840
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
3941
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
4042
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
41-
; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
42-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]]
43+
; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
44+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]]
4345
%0:vgpr(s32) = COPY $vgpr0
4446
%1:vgpr(s32) = COPY $vgpr1
4547
%2:vgpr(s32) = COPY $vgpr2
@@ -75,6 +77,7 @@ body: |
7577
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
7678
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
7779
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_]]
80+
;
7881
; GFX9-LABEL: name: umed3_s16_vvv_multiuse0
7982
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
8083
; GFX9-NEXT: {{ $}}
@@ -84,15 +87,16 @@ body: |
8487
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[COPY]], [[COPY1]], implicit $exec
8588
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
8689
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
90+
;
8791
; GFX11-LABEL: name: umed3_s16_vvv_multiuse0
8892
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
8993
; GFX11-NEXT: {{ $}}
9094
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
9195
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
9296
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
9397
; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
94-
; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
95-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
98+
; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
99+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
96100
%0:vgpr(s32) = COPY $vgpr0
97101
%1:vgpr(s32) = COPY $vgpr1
98102
%2:vgpr(s32) = COPY $vgpr2
@@ -128,6 +132,7 @@ body: |
128132
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
129133
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
130134
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MIN_U16_e64_]]
135+
;
131136
; GFX9-LABEL: name: umed3_s16_vvv_multiuse1
132137
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
133138
; GFX9-NEXT: {{ $}}
@@ -137,15 +142,16 @@ body: |
137142
; GFX9-NEXT: [[V_MIN_U16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[COPY]], [[COPY1]], implicit $exec
138143
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
139144
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_e64_]]
145+
;
140146
; GFX11-LABEL: name: umed3_s16_vvv_multiuse1
141147
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
142148
; GFX11-NEXT: {{ $}}
143149
; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
144150
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
145151
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
146152
; GFX11-NEXT: [[V_MIN_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
147-
; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
148-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MIN_U16_fake16_e64_]]
153+
; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
154+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MIN_U16_fake16_e64_]]
149155
%0:vgpr(s32) = COPY $vgpr0
150156
%1:vgpr(s32) = COPY $vgpr1
151157
%2:vgpr(s32) = COPY $vgpr2
@@ -181,6 +187,7 @@ body: |
181187
; GFX8-NEXT: [[V_MAX_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
182188
; GFX8-NEXT: [[V_MIN_U16_e64_1:%[0-9]+]]:vgpr_32 = V_MIN_U16_e64 [[V_MAX_U16_e64_]], [[V_MAX_U16_e64_1]], implicit $exec
183189
; GFX8-NEXT: S_ENDPGM 0, implicit [[V_MIN_U16_e64_1]], implicit [[V_MAX_U16_e64_1]]
190+
;
184191
; GFX9-LABEL: name: umed3_s16_vvv_multiuse2
185192
; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
186193
; GFX9-NEXT: {{ $}}
@@ -191,6 +198,7 @@ body: |
191198
; GFX9-NEXT: [[V_MAX_U16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_e64 [[V_MIN_U16_e64_]], [[COPY2]], implicit $exec
192199
; GFX9-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
193200
; GFX9-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_e64_]]
201+
;
194202
; GFX11-LABEL: name: umed3_s16_vvv_multiuse2
195203
; GFX11: liveins: $vgpr0, $vgpr1, $vgpr2
196204
; GFX11-NEXT: {{ $}}
@@ -199,8 +207,8 @@ body: |
199207
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
200208
; GFX11-NEXT: [[V_MIN_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MIN_U16_fake16_e64 [[COPY]], [[COPY1]], implicit $exec
201209
; GFX11-NEXT: [[V_MAX_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MAX_U16_fake16_e64 [[V_MIN_U16_fake16_e64_]], [[COPY2]], implicit $exec
202-
; GFX11-NEXT: [[V_MED3_U16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
203-
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
210+
; GFX11-NEXT: [[V_MED3_U16_fake16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_U16_fake16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $exec
211+
; GFX11-NEXT: S_ENDPGM 0, implicit [[V_MED3_U16_fake16_e64_]], implicit [[V_MAX_U16_fake16_e64_]]
204212
%0:vgpr(s32) = COPY $vgpr0
205213
%1:vgpr(s32) = COPY $vgpr1
206214
%2:vgpr(s32) = COPY $vgpr2

0 commit comments

Comments
 (0)