-
Notifications
You must be signed in to change notification settings - Fork 12.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU][True16][CodeGen] true16 codegen pattern for fma #122950
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,8 @@ | |
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s | ||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s | ||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s | ||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s | ||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s | ||
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s | ||
|
||
define float @v_fma_f32(float %x, float %y, float %z) { | ||
; GFX6-LABEL: v_fma_f32: | ||
|
@@ -107,11 +108,18 @@ define half @v_fma_f16(half %x, half %y, half %z) { | |
; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-LABEL: v_fma_f16: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-NEXT: v_fma_f16 v0, v0, v1, v2 | ||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ||
; GFX11-TRUE16-LABEL: v_fma_f16: | ||
; GFX11-TRUE16: ; %bb.0: | ||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-TRUE16-NEXT: v_fmac_f16_e32 v2.l, v0.l, v1.l | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks like it should be optimized in the True16 case. I notice we have not optimized it downstream either. If it is not easy to fix, I'd be ok landing this if the optimization was tracked for a later fix. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hi Joe. I ran a quick check on this and it seems there is a problem in the two-address convert pass that it failed to map the dst register and thus failed to convert 2 address to 3 address. It seems it's related with the register class setting for vgpr_16. Since gisel change is not upstreamed, it's better to fix this in the downstream branch. I'll file a case to track this and we can just merge it as it now in the upstream |
||
; GFX11-TRUE16-NEXT: v_mov_b32_e32 v0, v2 | ||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-FAKE16-LABEL: v_fma_f16: | ||
; GFX11-FAKE16: ; %bb.0: | ||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, v2 | ||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] | ||
%fma = call half @llvm.fma.f16(half %x, half %y, half %z) | ||
ret half %fma | ||
} | ||
|
@@ -145,11 +153,17 @@ define half @v_fma_f16_fneg_lhs(half %x, half %y, half %z) { | |
; GFX10-NEXT: v_fma_f16 v0, -v0, v1, v2 | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-LABEL: v_fma_f16_fneg_lhs: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-NEXT: v_fma_f16 v0, -v0, v1, v2 | ||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ||
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_lhs: | ||
; GFX11-TRUE16: ; %bb.0: | ||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, -v0.l, v1.l, v2.l | ||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_lhs: | ||
; GFX11-FAKE16: ; %bb.0: | ||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-FAKE16-NEXT: v_fma_f16 v0, -v0, v1, v2 | ||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] | ||
%neg.x = fneg half %x | ||
%fma = call half @llvm.fma.f16(half %neg.x, half %y, half %z) | ||
ret half %fma | ||
|
@@ -184,11 +198,17 @@ define half @v_fma_f16_fneg_rhs(half %x, half %y, half %z) { | |
; GFX10-NEXT: v_fma_f16 v0, v0, -v1, v2 | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-LABEL: v_fma_f16_fneg_rhs: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-NEXT: v_fma_f16 v0, v0, -v1, v2 | ||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ||
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_rhs: | ||
; GFX11-TRUE16: ; %bb.0: | ||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, -v1.l, v2.l | ||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_rhs: | ||
; GFX11-FAKE16: ; %bb.0: | ||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, -v1, v2 | ||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] | ||
%neg.y = fneg half %y | ||
%fma = call half @llvm.fma.f16(half %x, half %neg.y, half %z) | ||
ret half %fma | ||
|
@@ -223,11 +243,17 @@ define half @v_fma_f16_fneg_add(half %x, half %y, half %z) { | |
; GFX10-NEXT: v_fma_f16 v0, v0, v1, -v2 | ||
; GFX10-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-LABEL: v_fma_f16_fneg_add: | ||
; GFX11: ; %bb.0: | ||
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-NEXT: v_fma_f16 v0, v0, v1, -v2 | ||
; GFX11-NEXT: s_setpc_b64 s[30:31] | ||
; GFX11-TRUE16-LABEL: v_fma_f16_fneg_add: | ||
; GFX11-TRUE16: ; %bb.0: | ||
; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-TRUE16-NEXT: v_fma_f16 v0.l, v0.l, v1.l, -v2.l | ||
; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] | ||
; | ||
; GFX11-FAKE16-LABEL: v_fma_f16_fneg_add: | ||
; GFX11-FAKE16: ; %bb.0: | ||
; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) | ||
; GFX11-FAKE16-NEXT: v_fma_f16 v0, v0, v1, -v2 | ||
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] | ||
%neg.z = fneg half %z | ||
%fma = call half @llvm.fma.f16(half %x, half %y, half %neg.z) | ||
ret half %fma | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done