Skip to content

Commit 6d03f51

Browse files
authored
[SystemZ] Add support for 16-bit floating point. (#109164)
- _Float16 is now accepted by Clang. - The half IR type is fully handled by the backend. - These values are passed in FP registers and converted to/from float around each operation. - Compiler-rt conversion functions are now built for s390x including the missing extendhfdf2 which was added. Fixes #50374
1 parent 6ccc928 commit 6d03f51

File tree

127 files changed

+5301
-296
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

127 files changed

+5301
-296
lines changed

clang/docs/LanguageExtensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -1000,6 +1000,7 @@ to ``float``; see below for more information on this emulation.
10001000
* SPIR (natively)
10011001
* X86 (if SSE2 is available; natively if AVX512-FP16 is also available)
10021002
* RISC-V (natively if Zfh or Zhinx is available)
1003+
* SystemZ (emulated)
10031004

10041005
* ``__bf16`` is supported on the following targets (currently never natively):
10051006

clang/include/clang/Basic/TargetInfo.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ class TargetInfo : public TransferrableTargetInfo,
235235
bool NoAsmVariants; // True if {|} are normal characters.
236236
bool HasLegalHalfType; // True if the backend supports operations on the half
237237
// LLVM IR type.
238-
bool HalfArgsAndReturns;
238+
bool HalfArgsAndReturns; // OpenCL 6.1.1.1, NEON (IEEE 754-2008 half) type.
239239
bool HasFloat128;
240240
bool HasFloat16;
241241
bool HasBFloat16;

clang/lib/Basic/Targets/SystemZ.h

+13
Original file line numberDiff line numberDiff line change
@@ -93,11 +93,24 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
9393
"-v128:64-a:8:16-n32:64");
9494
}
9595
MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 128;
96+
97+
// True if the backend supports operations on the half LLVM IR type.
98+
// By setting this to false, conversions will happen for _Float16 around
99+
// a statement by default, with operations done in float. However, if
100+
// -ffloat16-excess-precision=none is given, no conversions will be made
101+
// and instead the backend will promote each half operation to float
102+
// individually.
103+
HasLegalHalfType = false;
104+
// Support _Float16.
105+
HasFloat16 = true;
106+
96107
HasStrictFP = true;
97108
}
98109

99110
unsigned getMinGlobalAlign(uint64_t Size, bool HasNonWeakDef) const override;
100111

112+
bool useFP16ConversionIntrinsics() const override { return false; }
113+
101114
void getTargetDefines(const LangOptions &Opts,
102115
MacroBuilder &Builder) const override;
103116

clang/lib/CodeGen/Targets/SystemZ.cpp

+26-14
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ class SystemZABIInfo : public ABIInfo {
3131
bool isPromotableIntegerTypeForABI(QualType Ty) const;
3232
bool isCompoundType(QualType Ty) const;
3333
bool isVectorArgumentType(QualType Ty) const;
34-
bool isFPArgumentType(QualType Ty) const;
34+
llvm::Type *getFPArgumentType(QualType Ty, uint64_t Size) const;
3535
QualType GetSingleElementType(QualType Ty) const;
3636

3737
ABIArgInfo classifyReturnType(QualType RetTy) const;
@@ -107,7 +107,8 @@ class SystemZTargetCodeGenInfo : public TargetCodeGenInfo {
107107
return nullptr;
108108

109109
llvm::Type *Ty = V->getType();
110-
if (Ty->isFloatTy() || Ty->isDoubleTy() || Ty->isFP128Ty()) {
110+
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy() ||
111+
Ty->isFP128Ty()) {
111112
llvm::Module &M = CGM.getModule();
112113
auto &Ctx = M.getContext();
113114
llvm::Function *TDCFunc = llvm::Intrinsic::getOrInsertDeclaration(
@@ -179,20 +180,31 @@ bool SystemZABIInfo::isVectorArgumentType(QualType Ty) const {
179180
getContext().getTypeSize(Ty) <= 128);
180181
}
181182

182-
bool SystemZABIInfo::isFPArgumentType(QualType Ty) const {
183+
// The Size argument will in case of af an overaligned single element struct
184+
// reflect the overalignment value. In such a case the argument will be
185+
// passed using the type matching Size.
186+
llvm::Type *SystemZABIInfo::getFPArgumentType(QualType Ty,
187+
uint64_t Size) const {
183188
if (IsSoftFloatABI)
184-
return false;
189+
return nullptr;
185190

186191
if (const BuiltinType *BT = Ty->getAs<BuiltinType>())
187192
switch (BT->getKind()) {
193+
case BuiltinType::Float16:
194+
if (Size == 16)
195+
return llvm::Type::getHalfTy(getVMContext());
196+
LLVM_FALLTHROUGH;
188197
case BuiltinType::Float:
198+
if (Size == 32)
199+
return llvm::Type::getFloatTy(getVMContext());
200+
LLVM_FALLTHROUGH;
189201
case BuiltinType::Double:
190-
return true;
202+
return llvm::Type::getDoubleTy(getVMContext());
191203
default:
192-
return false;
204+
return nullptr;
193205
}
194206

195-
return false;
207+
return nullptr;
196208
}
197209

198210
QualType SystemZABIInfo::GetSingleElementType(QualType Ty) const {
@@ -277,7 +289,8 @@ RValue SystemZABIInfo::EmitVAArg(CodeGenFunction &CGF, Address VAListAddr,
277289
} else {
278290
if (AI.getCoerceToType())
279291
ArgTy = AI.getCoerceToType();
280-
InFPRs = (!IsSoftFloatABI && (ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
292+
InFPRs = (!IsSoftFloatABI &&
293+
(ArgTy->isHalfTy() || ArgTy->isFloatTy() || ArgTy->isDoubleTy()));
281294
IsVector = ArgTy->isVectorTy();
282295
UnpaddedSize = TyInfo.Width;
283296
DirectAlign = TyInfo.Align;
@@ -447,12 +460,11 @@ ABIArgInfo SystemZABIInfo::classifyArgumentType(QualType Ty) const {
447460
return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(),
448461
/*ByVal=*/false);
449462

450-
// The structure is passed as an unextended integer, a float, or a double.
451-
if (isFPArgumentType(SingleElementTy)) {
452-
assert(Size == 32 || Size == 64);
453-
return ABIArgInfo::getDirect(
454-
Size == 32 ? llvm::Type::getFloatTy(getVMContext())
455-
: llvm::Type::getDoubleTy(getVMContext()));
463+
// The structure is passed as an unextended integer, a half, a float,
464+
// or a double.
465+
if (llvm::Type *FPArgTy = getFPArgumentType(SingleElementTy, Size)) {
466+
assert(Size == 16 || Size == 32 || Size == 64);
467+
return ABIArgInfo::getDirect(FPArgTy);
456468
} else {
457469
llvm::IntegerType *PassTy = llvm::IntegerType::get(getVMContext(), Size);
458470
return Size <= 32 ? ABIArgInfo::getNoExtend(PassTy)

clang/test/CodeGen/SystemZ/Float16.c

+85
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
// RUN: %clang_cc1 -triple s390x-linux-gnu \
2+
// RUN: -ffloat16-excess-precision=standard -emit-llvm -o - %s \
3+
// RUN: | FileCheck %s -check-prefix=STANDARD
4+
5+
// RUN: %clang_cc1 -triple s390x-linux-gnu \
6+
// RUN: -ffloat16-excess-precision=none -emit-llvm -o - %s \
7+
// RUN: | FileCheck %s -check-prefix=NONE
8+
9+
// RUN: %clang_cc1 -triple s390x-linux-gnu \
10+
// RUN: -ffloat16-excess-precision=fast -emit-llvm -o - %s \
11+
// RUN: | FileCheck %s -check-prefix=FAST
12+
13+
_Float16 f(_Float16 a, _Float16 b, _Float16 c, _Float16 d) {
14+
return a * b + c * d;
15+
}
16+
17+
// STANDARD-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
18+
// STANDARD-NEXT: entry:
19+
// STANDARD-NEXT: %a.addr = alloca half, align 2
20+
// STANDARD-NEXT: %b.addr = alloca half, align 2
21+
// STANDARD-NEXT: %c.addr = alloca half, align 2
22+
// STANDARD-NEXT: %d.addr = alloca half, align 2
23+
// STANDARD-NEXT: store half %a, ptr %a.addr, align 2
24+
// STANDARD-NEXT: store half %b, ptr %b.addr, align 2
25+
// STANDARD-NEXT: store half %c, ptr %c.addr, align 2
26+
// STANDARD-NEXT: store half %d, ptr %d.addr, align 2
27+
// STANDARD-NEXT: %0 = load half, ptr %a.addr, align 2
28+
// STANDARD-NEXT: %ext = fpext half %0 to float
29+
// STANDARD-NEXT: %1 = load half, ptr %b.addr, align 2
30+
// STANDARD-NEXT: %ext1 = fpext half %1 to float
31+
// STANDARD-NEXT: %mul = fmul float %ext, %ext1
32+
// STANDARD-NEXT: %2 = load half, ptr %c.addr, align 2
33+
// STANDARD-NEXT: %ext2 = fpext half %2 to float
34+
// STANDARD-NEXT: %3 = load half, ptr %d.addr, align 2
35+
// STANDARD-NEXT: %ext3 = fpext half %3 to float
36+
// STANDARD-NEXT: %mul4 = fmul float %ext2, %ext3
37+
// STANDARD-NEXT: %add = fadd float %mul, %mul4
38+
// STANDARD-NEXT: %unpromotion = fptrunc float %add to half
39+
// STANDARD-NEXT: ret half %unpromotion
40+
// STANDARD-NEXT: }
41+
42+
// NONE-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
43+
// NONE-NEXT: entry:
44+
// NONE-NEXT: %a.addr = alloca half, align 2
45+
// NONE-NEXT: %b.addr = alloca half, align 2
46+
// NONE-NEXT: %c.addr = alloca half, align 2
47+
// NONE-NEXT: %d.addr = alloca half, align 2
48+
// NONE-NEXT: store half %a, ptr %a.addr, align 2
49+
// NONE-NEXT: store half %b, ptr %b.addr, align 2
50+
// NONE-NEXT: store half %c, ptr %c.addr, align 2
51+
// NONE-NEXT: store half %d, ptr %d.addr, align 2
52+
// NONE-NEXT: %0 = load half, ptr %a.addr, align 2
53+
// NONE-NEXT: %1 = load half, ptr %b.addr, align 2
54+
// NONE-NEXT: %mul = fmul half %0, %1
55+
// NONE-NEXT: %2 = load half, ptr %c.addr, align 2
56+
// NONE-NEXT: %3 = load half, ptr %d.addr, align 2
57+
// NONE-NEXT: %mul1 = fmul half %2, %3
58+
// NONE-NEXT: %add = fadd half %mul, %mul1
59+
// NONE-NEXT: ret half %add
60+
// NONE-NEXT: }
61+
62+
// FAST-LABEL: define dso_local half @f(half noundef %a, half noundef %b, half noundef %c, half noundef %d) #0 {
63+
// FAST-NEXT: entry:
64+
// FAST-NEXT: %a.addr = alloca half, align 2
65+
// FAST-NEXT: %b.addr = alloca half, align 2
66+
// FAST-NEXT: %c.addr = alloca half, align 2
67+
// FAST-NEXT: %d.addr = alloca half, align 2
68+
// FAST-NEXT: store half %a, ptr %a.addr, align 2
69+
// FAST-NEXT: store half %b, ptr %b.addr, align 2
70+
// FAST-NEXT: store half %c, ptr %c.addr, align 2
71+
// FAST-NEXT: store half %d, ptr %d.addr, align 2
72+
// FAST-NEXT: %0 = load half, ptr %a.addr, align 2
73+
// FAST-NEXT: %ext = fpext half %0 to float
74+
// FAST-NEXT: %1 = load half, ptr %b.addr, align 2
75+
// FAST-NEXT: %ext1 = fpext half %1 to float
76+
// FAST-NEXT: %mul = fmul float %ext, %ext1
77+
// FAST-NEXT: %2 = load half, ptr %c.addr, align 2
78+
// FAST-NEXT: %ext2 = fpext half %2 to float
79+
// FAST-NEXT: %3 = load half, ptr %d.addr, align 2
80+
// FAST-NEXT: %ext3 = fpext half %3 to float
81+
// FAST-NEXT: %mul4 = fmul float %ext2, %ext3
82+
// FAST-NEXT: %add = fadd float %mul, %mul4
83+
// FAST-NEXT: %unpromotion = fptrunc float %add to half
84+
// FAST-NEXT: ret half %unpromotion
85+
// FAST-NEXT: }

clang/test/CodeGen/SystemZ/fp16.c

+39
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// RUN: %clang_cc1 -triple s390x-linux-gnu -emit-llvm -o - %s \
2+
// RUN: | FileCheck %s
3+
4+
void f(__fp16 *a, __fp16 *b, __fp16 *c, __fp16 *d, __fp16 *e) {
5+
*e = (*a) * (*b) + (*c) * (*d);
6+
}
7+
8+
// CHECK-LABEL: define dso_local void @f(ptr noundef %a, ptr noundef %b, ptr noundef %c, ptr noundef %d, ptr noundef %e) #0 {
9+
// CHECK-NEXT: entry:
10+
// CHECK-NEXT: %a.addr = alloca ptr, align 8
11+
// CHECK-NEXT: %b.addr = alloca ptr, align 8
12+
// CHECK-NEXT: %c.addr = alloca ptr, align 8
13+
// CHECK-NEXT: %d.addr = alloca ptr, align 8
14+
// CHECK-NEXT: %e.addr = alloca ptr, align 8
15+
// CHECK-NEXT: store ptr %a, ptr %a.addr, align 8
16+
// CHECK-NEXT: store ptr %b, ptr %b.addr, align 8
17+
// CHECK-NEXT: store ptr %c, ptr %c.addr, align 8
18+
// CHECK-NEXT: store ptr %d, ptr %d.addr, align 8
19+
// CHECK-NEXT: store ptr %e, ptr %e.addr, align 8
20+
// CHECK-NEXT: %0 = load ptr, ptr %a.addr, align 8
21+
// CHECK-NEXT: %1 = load half, ptr %0, align 2
22+
// CHECK-NEXT: %conv = fpext half %1 to float
23+
// CHECK-NEXT: %2 = load ptr, ptr %b.addr, align 8
24+
// CHECK-NEXT: %3 = load half, ptr %2, align 2
25+
// CHECK-NEXT: %conv1 = fpext half %3 to float
26+
// CHECK-NEXT: %mul = fmul float %conv, %conv1
27+
// CHECK-NEXT: %4 = load ptr, ptr %c.addr, align 8
28+
// CHECK-NEXT: %5 = load half, ptr %4, align 2
29+
// CHECK-NEXT: %conv2 = fpext half %5 to float
30+
// CHECK-NEXT: %6 = load ptr, ptr %d.addr, align 8
31+
// CHECK-NEXT: %7 = load half, ptr %6, align 2
32+
// CHECK-NEXT: %conv3 = fpext half %7 to float
33+
// CHECK-NEXT: %mul4 = fmul float %conv2, %conv3
34+
// CHECK-NEXT: %add = fadd float %mul, %mul4
35+
// CHECK-NEXT: %8 = fptrunc float %add to half
36+
// CHECK-NEXT: %9 = load ptr, ptr %e.addr, align 8
37+
// CHECK-NEXT: store half %8, ptr %9, align 2
38+
// CHECK-NEXT: ret void
39+
// CHECK-NEXT: }

clang/test/CodeGen/SystemZ/strictfp_builtins.c

+13-1
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,24 @@
44

55
#pragma float_control(except, on)
66

7+
// CHECK-LABEL: @test_isnan__Float16(
8+
// CHECK-NEXT: entry:
9+
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca half, align 2
10+
// CHECK-NEXT: store half [[F:%.*]], ptr [[F_ADDR]], align 2
11+
// CHECK-NEXT: [[TMP0:%.*]] = load half, ptr [[F_ADDR]], align 2
12+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f16(half [[TMP0]], i64 15) #[[ATTR2:[0-9]+]]
13+
// CHECK-NEXT: ret i32 [[TMP1]]
14+
//
15+
int test_isnan__Float16(_Float16 f) {
16+
return __builtin_isnan(f);
17+
}
18+
719
// CHECK-LABEL: @test_isnan_float(
820
// CHECK-NEXT: entry:
921
// CHECK-NEXT: [[F_ADDR:%.*]] = alloca float, align 4
1022
// CHECK-NEXT: store float [[F:%.*]], ptr [[F_ADDR]], align 4
1123
// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F_ADDR]], align 4
12-
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) #[[ATTR2:[0-9]+]]
24+
// CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.s390.tdc.f32(float [[TMP0]], i64 15) #[[ATTR2]]
1325
// CHECK-NEXT: ret i32 [[TMP1]]
1426
//
1527
int test_isnan_float(float f) {

clang/test/CodeGen/SystemZ/systemz-abi.c

+53
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,9 @@ long long pass_longlong(long long arg) { return arg; }
5252
__int128 pass_int128(__int128 arg) { return arg; }
5353
// CHECK-LABEL: define{{.*}} void @pass_int128(ptr dead_on_unwind noalias writable sret(i128) align 8 %{{.*}}, ptr %0)
5454

55+
_Float16 pass__Float16(_Float16 arg) { return arg; }
56+
// CHECK-LABEL: define{{.*}} half @pass__Float16(half %{{.*}})
57+
5558
float pass_float(float arg) { return arg; }
5659
// CHECK-LABEL: define{{.*}} float @pass_float(float %{{.*}})
5760

@@ -79,6 +82,9 @@ _Complex long pass_complex_long(_Complex long arg) { return arg; }
7982
_Complex long long pass_complex_longlong(_Complex long long arg) { return arg; }
8083
// CHECK-LABEL: define{{.*}} void @pass_complex_longlong(ptr dead_on_unwind noalias writable sret({ i64, i64 }) align 8 %{{.*}}, ptr %{{.*}}arg)
8184

85+
_Complex _Float16 pass_complex__Float16(_Complex _Float16 arg) { return arg; }
86+
// CHECK-LABEL: define{{.*}} void @pass_complex__Float16(ptr dead_on_unwind noalias writable sret({ half, half }) align 2 %{{.*}}, ptr %{{.*}}arg)
87+
8288
_Complex float pass_complex_float(_Complex float arg) { return arg; }
8389
// CHECK-LABEL: define{{.*}} void @pass_complex_float(ptr dead_on_unwind noalias writable sret({ float, float }) align 4 %{{.*}}, ptr %{{.*}}arg)
8490

@@ -130,6 +136,11 @@ struct agg_16byte pass_agg_16byte(struct agg_16byte arg) { return arg; }
130136

131137
// Float-like aggregate types
132138

139+
struct agg__Float16 { _Float16 a; };
140+
struct agg__Float16 pass_agg__Float16(struct agg__Float16 arg) { return arg; }
141+
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, half %{{.*}})
142+
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, i16 noext %{{.*}})
143+
133144
struct agg_float { float a; };
134145
struct agg_float pass_agg_float(struct agg_float arg) { return arg; }
135146
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, float %{{.*}})
@@ -144,6 +155,20 @@ struct agg_longdouble { long double a; };
144155
struct agg_longdouble pass_agg_longdouble(struct agg_longdouble arg) { return arg; }
145156
// CHECK-LABEL: define{{.*}} void @pass_agg_longdouble(ptr dead_on_unwind noalias writable sret(%struct.agg_longdouble) align 8 %{{.*}}, ptr %{{.*}})
146157

158+
struct agg__Float16_a4 { _Float16 a __attribute__((aligned (4))); };
159+
struct agg__Float16_a4 pass_agg__Float16_a4(struct agg__Float16_a4 arg) { return arg; }
160+
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a4(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a4) align 4 %{{.*}}, float %{{.*}})
161+
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a4(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a4) align 4 %{{.*}}, i32 noext %{{.*}})
162+
163+
struct agg__Float16_a8 { _Float16 a __attribute__((aligned (8))); };
164+
struct agg__Float16_a8 pass_agg__Float16_a8(struct agg__Float16_a8 arg) { return arg; }
165+
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, double %{{.*}})
166+
// SOFT-FLOAT-LABEL: define{{.*}} void @pass_agg__Float16_a8(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a8) align 8 %{{.*}}, i64 %{{.*}})
167+
168+
struct agg__Float16_a16 { _Float16 a __attribute__((aligned (16))); };
169+
struct agg__Float16_a16 pass_agg__Float16_a16(struct agg__Float16_a16 arg) { return arg; }
170+
// CHECK-LABEL: define{{.*}} void @pass_agg__Float16_a16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16_a16) align 16 %{{.*}}, ptr %{{.*}})
171+
147172
struct agg_float_a8 { float a __attribute__((aligned (8))); };
148173
struct agg_float_a8 pass_agg_float_a8(struct agg_float_a8 arg) { return arg; }
149174
// HARD-FLOAT-LABEL: define{{.*}} void @pass_agg_float_a8(ptr dead_on_unwind noalias writable sret(%struct.agg_float_a8) align 8 %{{.*}}, double %{{.*}})
@@ -171,6 +196,10 @@ struct agg_nofloat3 pass_agg_nofloat3(struct agg_nofloat3 arg) { return arg; }
171196

172197
// Union types likewise are *not* float-like aggregate types
173198

199+
union union__Float16 { _Float16 a; };
200+
union union__Float16 pass_union__Float16(union union__Float16 arg) { return arg; }
201+
// CHECK-LABEL: define{{.*}} void @pass_union__Float16(ptr dead_on_unwind noalias writable sret(%union.union__Float16) align 2 %{{.*}}, i16 noext %{{.*}})
202+
174203
union union_float { float a; };
175204
union union_float pass_union_float(union union_float arg) { return arg; }
176205
// CHECK-LABEL: define{{.*}} void @pass_union_float(ptr dead_on_unwind noalias writable sret(%union.union_float) align 4 %{{.*}}, i32 noext %{{.*}})
@@ -448,6 +477,30 @@ struct agg_8byte va_agg_8byte(__builtin_va_list l) { return __builtin_va_arg(l,
448477
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
449478
// CHECK: ret void
450479

480+
struct agg__Float16 va_agg__Float16(__builtin_va_list l) { return __builtin_va_arg(l, struct agg__Float16); }
481+
// CHECK-LABEL: define{{.*}} void @va_agg__Float16(ptr dead_on_unwind noalias writable sret(%struct.agg__Float16) align 2 %{{.*}}, ptr %{{.*}}
482+
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1
483+
// SOFT-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 0
484+
// CHECK: [[REG_COUNT:%[^ ]+]] = load i64, ptr [[REG_COUNT_PTR]]
485+
// HARD-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 4
486+
// SOFT-FLOAT: [[FITS_IN_REGS:%[^ ]+]] = icmp ult i64 [[REG_COUNT]], 5
487+
// CHECK: br i1 [[FITS_IN_REGS]],
488+
// CHECK: [[SCALED_REG_COUNT:%[^ ]+]] = mul i64 [[REG_COUNT]], 8
489+
// HARD-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 128
490+
// SOFT-FLOAT: [[REG_OFFSET:%[^ ]+]] = add i64 [[SCALED_REG_COUNT]], 22
491+
// CHECK: [[REG_SAVE_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 3
492+
// CHECK: [[REG_SAVE_AREA:%[^ ]+]] = load ptr, ptr [[REG_SAVE_AREA_PTR:[^ ]+]]
493+
// CHECK: [[RAW_REG_ADDR:%[^ ]+]] = getelementptr i8, ptr [[REG_SAVE_AREA]], i64 [[REG_OFFSET]]
494+
// CHECK: [[REG_COUNT1:%[^ ]+]] = add i64 [[REG_COUNT]], 1
495+
// CHECK: store i64 [[REG_COUNT1]], ptr [[REG_COUNT_PTR]]
496+
// CHECK: [[OVERFLOW_ARG_AREA_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 2
497+
// CHECK: [[OVERFLOW_ARG_AREA:%[^ ]+]] = load ptr, ptr [[OVERFLOW_ARG_AREA_PTR]]
498+
// CHECK: [[RAW_MEM_ADDR:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 6
499+
// CHECK: [[OVERFLOW_ARG_AREA2:%[^ ]+]] = getelementptr i8, ptr [[OVERFLOW_ARG_AREA]], i64 8
500+
// CHECK: store ptr [[OVERFLOW_ARG_AREA2]], ptr [[OVERFLOW_ARG_AREA_PTR]]
501+
// CHECK: [[VA_ARG_ADDR:%[^ ]+]] = phi ptr [ [[RAW_REG_ADDR]], %{{.*}} ], [ [[RAW_MEM_ADDR]], %{{.*}} ]
502+
// CHECK: ret void
503+
451504
struct agg_float va_agg_float(__builtin_va_list l) { return __builtin_va_arg(l, struct agg_float); }
452505
// CHECK-LABEL: define{{.*}} void @va_agg_float(ptr dead_on_unwind noalias writable sret(%struct.agg_float) align 4 %{{.*}}, ptr %{{.*}}
453506
// HARD-FLOAT: [[REG_COUNT_PTR:%[^ ]+]] = getelementptr inbounds nuw %struct.__va_list_tag, ptr %{{.*}}, i32 0, i32 1

clang/test/CodeGen/SystemZ/systemz-inline-asm.c

+8
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,14 @@ void test_M(void) {
106106
// CHECK: call void asm sideeffect "#FOO $0", "M"(i32 2147483647)
107107
}
108108

109+
_Float16 test_f16(_Float16 a) {
110+
_Float16 f;
111+
asm("ler %0, %1" : "=f" (f) : "f" (a));
112+
return f;
113+
// CHECK-LABEL: define{{.*}} half @test_f16(half noundef %a)
114+
// CHECK: call half asm "ler $0, $1", "=f,f"(half %a)
115+
}
116+
109117
float test_f32(float f, float g) {
110118
asm("aebr %0, %2" : "=f" (f) : "0" (f), "f" (g));
111119
return f;

0 commit comments

Comments
 (0)