Skip to content

Commit c70d421

Browse files
committed
Initial experiments
1 parent f325085 commit c70d421

File tree

4 files changed

+175
-0
lines changed

4 files changed

+175
-0
lines changed

Diff for: clang/lib/Basic/Targets/SystemZ.h

+9
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,20 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
9191
"-v128:64-a:8:16-n32:64");
9292
}
9393
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;
94+
95+
HasLegalHalfType = false; // Default=false
96+
HalfArgsAndReturns = false; // Default=false
97+
HasFloat16 = true; // Default=false
98+
9499
HasStrictFP = true;
95100
}
96101

97102
unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override;
98103

104+
bool useFP16ConversionIntrinsics() const override {
105+
return false;
106+
}
107+
99108
void getTargetDefines(const LangOptions &Opts,
100109
MacroBuilder &Builder) const override;
101110

Diff for: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -704,6 +704,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
704704
setOperationAction(ISD::BITCAST, MVT::f32, Custom);
705705
}
706706

707+
// Expand FP16 <=> FP32 conversions to libcalls and handle FP16 loads and
708+
// stores in GPRs.
709+
setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
710+
setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
711+
setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
712+
setTruncStoreAction(MVT::f32, MVT::f16, Expand);
713+
707714
// VASTART and VACOPY need to deal with the SystemZ-specific varargs
708715
// structure, but VAEND is a no-op.
709716
setOperationAction(ISD::VASTART, MVT::Other, Custom);

Diff for: llvm/lib/Target/SystemZ/SystemZISelLowering.h

+1
Original file line numberDiff line numberDiff line change
@@ -476,6 +476,7 @@ class SystemZTargetLowering : public TargetLowering {
476476
// LD, and having the full constant in memory enables reg/mem opcodes.
477477
return VT != MVT::f64;
478478
}
479+
bool softPromoteHalfType() const override { return true; }
479480
bool hasInlineStackProbe(const MachineFunction &MF) const override;
480481
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override;
481482
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override;

Diff for: llvm/test/CodeGen/SystemZ/fp-half.ll

+158
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
3+
;
4+
; Tests for FP16 (Half).
5+
6+
; A function where everything is done in Half.
7+
define void @fun0(ptr %Op0, ptr %Op1, ptr %Dst) {
8+
; CHECK-LABEL: fun0:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
11+
; CHECK-NEXT: .cfi_offset %r12, -64
12+
; CHECK-NEXT: .cfi_offset %r13, -56
13+
; CHECK-NEXT: .cfi_offset %r14, -48
14+
; CHECK-NEXT: .cfi_offset %r15, -40
15+
; CHECK-NEXT: aghi %r15, -168
16+
; CHECK-NEXT: .cfi_def_cfa_offset 328
17+
; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
18+
; CHECK-NEXT: .cfi_offset %f8, -168
19+
; CHECK-NEXT: llgh %r0, 0(%r3)
20+
; CHECK-NEXT: llgh %r13, 0(%r2)
21+
; CHECK-NEXT: lgr %r12, %r4
22+
; CHECK-NEXT: lgr %r2, %r0
23+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
24+
; CHECK-NEXT: ler %f8, %f0
25+
; CHECK-NEXT: lgr %r2, %r13
26+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
27+
; CHECK-NEXT: aebr %f0, %f8
28+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
29+
; CHECK-NEXT: sth %r2, 0(%r12)
30+
; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
31+
; CHECK-NEXT: lmg %r12, %r15, 264(%r15)
32+
; CHECK-NEXT: br %r14
33+
entry:
34+
%0 = load half, ptr %Op0, align 2
35+
%1 = load half, ptr %Op1, align 2
36+
%add = fadd half %0, %1
37+
store half %add, ptr %Dst, align 2
38+
ret void
39+
}
40+
41+
; A function where Half values are loaded and extended to float and then
42+
; operated on.
43+
define void @fun1(ptr %Op0, ptr %Op1, ptr %Dst) {
44+
; CHECK-LABEL: fun1:
45+
; CHECK: # %bb.0: # %entry
46+
; CHECK-NEXT: stmg %r12, %r15, 96(%r15)
47+
; CHECK-NEXT: .cfi_offset %r12, -64
48+
; CHECK-NEXT: .cfi_offset %r13, -56
49+
; CHECK-NEXT: .cfi_offset %r14, -48
50+
; CHECK-NEXT: .cfi_offset %r15, -40
51+
; CHECK-NEXT: aghi %r15, -168
52+
; CHECK-NEXT: .cfi_def_cfa_offset 328
53+
; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
54+
; CHECK-NEXT: .cfi_offset %f8, -168
55+
; CHECK-NEXT: llgh %r2, 0(%r2)
56+
; CHECK-NEXT: lgr %r13, %r4
57+
; CHECK-NEXT: lgr %r12, %r3
58+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
59+
; CHECK-NEXT: llgh %r2, 0(%r12)
60+
; CHECK-NEXT: ler %f8, %f0
61+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
62+
; CHECK-NEXT: aebr %f0, %f8
63+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
64+
; CHECK-NEXT: sth %r2, 0(%r13)
65+
; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
66+
; CHECK-NEXT: lmg %r12, %r15, 264(%r15)
67+
; CHECK-NEXT: br %r14
68+
entry:
69+
%0 = load half, ptr %Op0, align 2
70+
%ext = fpext half %0 to float
71+
%1 = load half, ptr %Op1, align 2
72+
%ext1 = fpext half %1 to float
73+
%add = fadd float %ext, %ext1
74+
%res = fptrunc float %add to half
75+
store half %res, ptr %Dst, align 2
76+
ret void
77+
}
78+
79+
; Test case with a Half incoming argument.
80+
define zeroext i1 @fun2(half noundef %f) {
81+
; CHECK-LABEL: fun2:
82+
; CHECK: # %bb.0: # %start
83+
; CHECK-NEXT: stmg %r14, %r15, 112(%r15)
84+
; CHECK-NEXT: .cfi_offset %r14, -48
85+
; CHECK-NEXT: .cfi_offset %r15, -40
86+
; CHECK-NEXT: aghi %r15, -160
87+
; CHECK-NEXT: .cfi_def_cfa_offset 320
88+
; CHECK-NEXT: llgfr %r2, %r2
89+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
90+
; CHECK-NEXT: larl %r1, .LCPI2_0
91+
; CHECK-NEXT: deb %f0, 0(%r1)
92+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
93+
; CHECK-NEXT: risbg %r2, %r2, 63, 191, 49
94+
; CHECK-NEXT: lmg %r14, %r15, 272(%r15)
95+
; CHECK-NEXT: br %r14
96+
start:
97+
%self = fdiv half %f, 0xHC700
98+
%_4 = bitcast half %self to i16
99+
%_0 = icmp slt i16 %_4, 0
100+
ret i1 %_0
101+
}
102+
103+
; Test a chain of Half operations which should have each operation surrounded
104+
; by conversions to/from fp32 to properly emulate Half operations.
105+
define void @fun3(ptr %Op0, ptr %Op1, ptr %Op2, ptr %Op3, ptr %Dst) {
106+
; CHECK-LABEL: fun3:
107+
; CHECK: # %bb.0: # %entry
108+
; CHECK-NEXT: stmg %r11, %r15, 88(%r15)
109+
; CHECK-NEXT: .cfi_offset %r11, -72
110+
; CHECK-NEXT: .cfi_offset %r12, -64
111+
; CHECK-NEXT: .cfi_offset %r13, -56
112+
; CHECK-NEXT: .cfi_offset %r14, -48
113+
; CHECK-NEXT: .cfi_offset %r15, -40
114+
; CHECK-NEXT: aghi %r15, -168
115+
; CHECK-NEXT: .cfi_def_cfa_offset 328
116+
; CHECK-NEXT: std %f8, 160(%r15) # 8-byte Folded Spill
117+
; CHECK-NEXT: .cfi_offset %f8, -168
118+
; CHECK-NEXT: llgh %r0, 0(%r3)
119+
; CHECK-NEXT: llgh %r13, 0(%r2)
120+
; CHECK-NEXT: lgr %r12, %r5
121+
; CHECK-NEXT: lgr %r11, %r4
122+
; CHECK-NEXT: lgr %r2, %r0
123+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
124+
; CHECK-NEXT: ler %f8, %f0
125+
; CHECK-NEXT: lgr %r2, %r13
126+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
127+
; CHECK-NEXT: aebr %f0, %f8
128+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
129+
; CHECK-NEXT: llgh %r13, 0(%r11)
130+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
131+
; CHECK-NEXT: ler %f8, %f0
132+
; CHECK-NEXT: lgr %r2, %r13
133+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
134+
; CHECK-NEXT: aebr %f0, %f8
135+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
136+
; CHECK-NEXT: llgh %r13, 0(%r12)
137+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
138+
; CHECK-NEXT: ler %f8, %f0
139+
; CHECK-NEXT: lgr %r2, %r13
140+
; CHECK-NEXT: brasl %r14, __gnu_h2f_ieee@PLT
141+
; CHECK-NEXT: sebr %f8, %f0
142+
; CHECK-NEXT: ler %f0, %f8
143+
; CHECK-NEXT: brasl %r14, __gnu_f2h_ieee@PLT
144+
; CHECK-NEXT: sth %r2, 0(%r6)
145+
; CHECK-NEXT: ld %f8, 160(%r15) # 8-byte Folded Reload
146+
; CHECK-NEXT: lmg %r11, %r15, 256(%r15)
147+
; CHECK-NEXT: br %r14
148+
entry:
149+
%0 = load half, ptr %Op0, align 2
150+
%1 = load half, ptr %Op1, align 2
151+
%add = fadd half %0, %1
152+
%2 = load half, ptr %Op2, align 2
153+
%add1 = fadd half %add, %2
154+
%3 = load half, ptr %Op3, align 2
155+
%sub = fsub half %add1, %3
156+
store half %sub, ptr %Dst, align 2
157+
ret void
158+
}

0 commit comments

Comments
 (0)