Skip to content

Commit 9af8772

Browse files
committed
Add initial f16/f128 support
1 parent 0103c58 commit 9af8772

File tree

6 files changed

+175
-50
lines changed

6 files changed

+175
-50
lines changed

src/abi/pass_mode.rs

+2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam {
2222
(RegKind::Integer, 3..=4) => types::I32,
2323
(RegKind::Integer, 5..=8) => types::I64,
2424
(RegKind::Integer, 9..=16) => types::I128,
25+
(RegKind::Float, 2) => types::F16,
2526
(RegKind::Float, 4) => types::F32,
2627
(RegKind::Float, 8) => types::F64,
28+
(RegKind::Float, 16) => types::F128,
2729
(RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(),
2830
_ => unreachable!("{:?}", reg),
2931
};

src/cast.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ pub(crate) fn clif_int_or_float_cast(
5858
"__float{sign}ti{flt}f",
5959
sign = if from_signed { "" } else { "un" },
6060
flt = match to_ty {
61+
types::F16 => "h",
6162
types::F32 => "s",
6263
types::F64 => "d",
64+
types::F128 => "t",
6365
_ => unreachable!("{:?}", to_ty),
6466
},
6567
);
@@ -90,8 +92,10 @@ pub(crate) fn clif_int_or_float_cast(
9092
"__fix{sign}{flt}fti",
9193
sign = if to_signed { "" } else { "uns" },
9294
flt = match from_ty {
95+
types::F16 => "h",
9396
types::F32 => "s",
9497
types::F64 => "d",
98+
types::F128 => "t",
9599
_ => unreachable!("{:?}", to_ty),
96100
},
97101
);
@@ -145,8 +149,12 @@ pub(crate) fn clif_int_or_float_cast(
145149
} else if from_ty.is_float() && to_ty.is_float() {
146150
// float -> float
147151
match (from_ty, to_ty) {
148-
(types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
149-
(types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
152+
(types::F16, types::F32 | types::F64 | types::F128)
153+
| (types::F32, types::F64 | types::F128)
154+
| (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from),
155+
(types::F128, types::F64 | types::F32 | types::F16)
156+
| (types::F64, types::F32 | types::F16)
157+
| (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from),
150158
_ => from,
151159
}
152160
} else {

src/common.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type {
3333
Integer::I128 => types::I128,
3434
},
3535
Primitive::Float(float) => match float {
36-
Float::F16 => unimplemented!("f16_f128"),
36+
Float::F16 => types::F16,
3737
Float::F32 => types::F32,
3838
Float::F64 => types::F64,
39-
Float::F128 => unimplemented!("f16_f128"),
39+
Float::F128 => types::F128,
4040
},
4141
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
4242
Primitive::Pointer(_) => pointer_ty(tcx),
@@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
6464
},
6565
ty::Char => types::I32,
6666
ty::Float(size) => match size {
67-
FloatTy::F16 => unimplemented!("f16_f128"),
67+
FloatTy::F16 => types::F16,
6868
FloatTy::F32 => types::F32,
6969
FloatTy::F64 => types::F64,
70-
FloatTy::F128 => unimplemented!("f16_f128"),
70+
FloatTy::F128 => types::F128,
7171
},
7272
ty::FnPtr(..) => pointer_ty(tcx),
7373
ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {

src/intrinsics/mod.rs

+115-36
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use rustc_span::{Symbol, sym};
2727

2828
pub(crate) use self::llvm::codegen_llvm_intrinsic_call;
2929
use crate::cast::clif_intcast;
30+
use crate::intrinsics;
3031
use crate::prelude::*;
3132

3233
fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! {
@@ -50,6 +51,12 @@ fn report_atomic_type_validation_error<'tcx>(
5051
fx.bcx.ins().trap(TrapCode::user(1 /* unreachable */).unwrap());
5152
}
5253

54+
pub(crate) fn long_double_is_f128(tcx: TyCtxt<'_>) -> bool {
55+
!tcx.sess.target.is_like_windows
56+
&& !tcx.sess.target.is_like_darwin
57+
&& matches!(&*tcx.sess.target.arch, "aarch64" | "riscv64" | "s390x")
58+
}
59+
5360
pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Type {
5461
let (element, count) = match layout.backend_repr {
5562
BackendRepr::SimdVector { element, count } => (element, count),
@@ -248,8 +255,10 @@ fn bool_to_zero_or_max_uint<'tcx>(
248255
let ty = fx.clif_type(ty).unwrap();
249256

250257
let int_ty = match ty {
258+
types::F16 => types::I16,
251259
types::F32 => types::I32,
252260
types::F64 => types::I64,
261+
types::F128 => types::I128,
253262
ty => ty,
254263
};
255264

@@ -308,45 +317,83 @@ fn codegen_float_intrinsic_call<'tcx>(
308317
ret: CPlace<'tcx>,
309318
) -> bool {
310319
let (name, arg_count, ty, clif_ty) = match intrinsic {
320+
sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16),
311321
sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
312322
sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
323+
sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128),
324+
sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16),
313325
sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
314326
sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
327+
sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128),
328+
sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16),
315329
sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
316330
sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
331+
sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128),
332+
sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins
317333
sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
318334
sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
335+
sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins
336+
sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16),
319337
sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
320338
sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
339+
sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128),
340+
sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16),
321341
sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
322342
sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
343+
sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128),
344+
sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16),
323345
sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
324346
sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
347+
sym::log2f128 => ("log2f16", 1, fx.tcx.types.f128, types::F128),
348+
sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16),
325349
sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
326350
sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
351+
sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128),
352+
sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16),
327353
sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
328354
sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
355+
sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128),
356+
sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16),
329357
sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
330358
sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
359+
sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128),
331360
// FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation
361+
sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16
332362
sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32
333363
sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64
364+
sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128
365+
sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16),
334366
sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
335367
sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
368+
sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128),
369+
sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16),
336370
sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
337371
sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
372+
sym::floorf128 => ("floorf128", 1, fx.tcx.types.f16, types::F128),
373+
sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16),
338374
sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
339375
sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
376+
sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f16, types::F128),
377+
sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16),
340378
sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
341379
sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
380+
sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128),
381+
sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16),
342382
sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
343383
sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64),
384+
sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128),
385+
sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16),
344386
sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
345387
sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
388+
sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128),
389+
sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16),
346390
sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
347391
sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
392+
sym::sinf128 => ("sinf128", 1, fx.tcx.types.f16, types::F128),
393+
sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16),
348394
sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
349395
sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
396+
sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128),
350397
_ => return false,
351398
};
352399

@@ -380,33 +427,61 @@ fn codegen_float_intrinsic_call<'tcx>(
380427

381428
let layout = fx.layout_of(ty);
382429
let res = match intrinsic {
383-
sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => {
384-
CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
385-
}
386-
sym::copysignf32 | sym::copysignf64 => {
430+
sym::fmaf16
431+
| sym::fmaf32
432+
| sym::fmaf64
433+
| sym::fmaf128
434+
| sym::fmuladdf16
435+
| sym::fmuladdf32
436+
| sym::fmuladdf64
437+
| sym::fmuladdf128 => CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout),
438+
sym::copysignf16 | sym::copysignf32 | sym::copysignf64 | sym::copysignf128 => {
387439
CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
388440
}
389-
sym::fabsf32
441+
sym::fabsf16
442+
| sym::fabsf32
390443
| sym::fabsf64
444+
| sym::fabsf128
445+
| sym::floorf16
391446
| sym::floorf32
392447
| sym::floorf64
448+
| sym::floorf128
449+
| sym::ceilf16
393450
| sym::ceilf32
394451
| sym::ceilf64
452+
| sym::ceilf128
453+
| sym::truncf16
395454
| sym::truncf32
396455
| sym::truncf64
456+
| sym::truncf128
457+
| sym::round_ties_even_f16
397458
| sym::round_ties_even_f32
398459
| sym::round_ties_even_f64
460+
| sym::round_ties_even_f128
461+
| sym::sqrtf16
399462
| sym::sqrtf32
400-
| sym::sqrtf64 => {
463+
| sym::sqrtf64
464+
| sym::sqrtf128 => {
401465
let val = match intrinsic {
402-
sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
403-
sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
404-
sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
405-
sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
406-
sym::round_ties_even_f32 | sym::round_ties_even_f64 => {
407-
fx.bcx.ins().nearest(args[0])
466+
sym::fabsf16 | sym::fabsf32 | sym::fabsf64 | sym::fabsf128 => {
467+
fx.bcx.ins().fabs(args[0])
468+
}
469+
sym::floorf16 | sym::floorf32 | sym::floorf64 | sym::floorf128 => {
470+
fx.bcx.ins().floor(args[0])
471+
}
472+
sym::ceilf16 | sym::ceilf32 | sym::ceilf64 | sym::ceilf128 => {
473+
fx.bcx.ins().ceil(args[0])
474+
}
475+
sym::truncf16 | sym::truncf32 | sym::truncf64 | sym::truncf128 => {
476+
fx.bcx.ins().trunc(args[0])
477+
}
478+
sym::round_ties_even_f16
479+
| sym::round_ties_even_f32
480+
| sym::round_ties_even_f64
481+
| sym::round_ties_even_f128 => fx.bcx.ins().nearest(args[0]),
482+
sym::sqrtf16 | sym::sqrtf32 | sym::sqrtf64 | sym::sqrtf128 => {
483+
fx.bcx.ins().sqrt(args[0])
408484
}
409-
sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
410485
_ => unreachable!(),
411486
};
412487

@@ -415,12 +490,32 @@ fn codegen_float_intrinsic_call<'tcx>(
415490

416491
// These intrinsics aren't supported natively by Cranelift.
417492
// Lower them to a libcall.
418-
sym::powif32 | sym::powif64 => {
493+
sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => {
494+
let temp;
495+
let (clif_ty, args) = if intrinsic == sym::powif16 {
496+
temp = [fx.bcx.ins().fpromote(types::F32, args[0]), args[1]];
497+
(types::F32, temp.as_slice())
498+
} else {
499+
(clif_ty, args)
500+
};
419501
let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)];
420502
let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
503+
let ret_val = if intrinsic == sym::powif16 {
504+
fx.bcx.ins().fdemote(types::F16, ret_val)
505+
} else {
506+
ret_val
507+
};
421508
CValue::by_val(ret_val, fx.layout_of(ty))
422509
}
423510
_ => {
511+
let temp;
512+
let name = match name.strip_suffix("f128") {
513+
Some(base) if intrinsics::long_double_is_f128(fx.tcx) => {
514+
temp = format!("{base}l");
515+
&temp
516+
}
517+
_ => name,
518+
};
424519
let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect();
425520
let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
426521
CValue::by_val(ret_val, fx.layout_of(ty))
@@ -1117,40 +1212,24 @@ fn codegen_regular_intrinsic_call<'tcx>(
11171212
ret.write_cvalue(fx, old);
11181213
}
11191214

1120-
sym::minnumf32 => {
1215+
sym::minnumf16 | sym::minnumf32 | sym::minnumf64 | sym::minnumf128 => {
11211216
intrinsic_args!(fx, args => (a, b); intrinsic);
1217+
let layout = a.layout();
11221218
let a = a.load_scalar(fx);
11231219
let b = b.load_scalar(fx);
11241220

11251221
let val = crate::num::codegen_float_min(fx, a, b);
1126-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
1127-
ret.write_cvalue(fx, val);
1128-
}
1129-
sym::minnumf64 => {
1130-
intrinsic_args!(fx, args => (a, b); intrinsic);
1131-
let a = a.load_scalar(fx);
1132-
let b = b.load_scalar(fx);
1133-
1134-
let val = crate::num::codegen_float_min(fx, a, b);
1135-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
1136-
ret.write_cvalue(fx, val);
1137-
}
1138-
sym::maxnumf32 => {
1139-
intrinsic_args!(fx, args => (a, b); intrinsic);
1140-
let a = a.load_scalar(fx);
1141-
let b = b.load_scalar(fx);
1142-
1143-
let val = crate::num::codegen_float_max(fx, a, b);
1144-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
1222+
let val = CValue::by_val(val, layout);
11451223
ret.write_cvalue(fx, val);
11461224
}
1147-
sym::maxnumf64 => {
1225+
sym::maxnumf16 | sym::maxnumf32 | sym::maxnumf64 | sym::maxnumf128 => {
11481226
intrinsic_args!(fx, args => (a, b); intrinsic);
1227+
let layout = a.layout();
11491228
let a = a.load_scalar(fx);
11501229
let b = b.load_scalar(fx);
11511230

11521231
let val = crate::num::codegen_float_max(fx, a, b);
1153-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
1232+
let val = CValue::by_val(val, layout);
11541233
ret.write_cvalue(fx, val);
11551234
}
11561235

src/num.rs

+25-5
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
//! Various operations on integer and floating-point numbers
22
3+
use crate::intrinsics;
34
use crate::prelude::*;
45

56
fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC {
@@ -357,18 +358,37 @@ pub(crate) fn codegen_float_binop<'tcx>(
357358
BinOp::Mul => b.fmul(lhs, rhs),
358359
BinOp::Div => b.fdiv(lhs, rhs),
359360
BinOp::Rem => {
360-
let (name, ty) = match in_lhs.layout().ty.kind() {
361-
ty::Float(FloatTy::F32) => ("fmodf", types::F32),
362-
ty::Float(FloatTy::F64) => ("fmod", types::F64),
361+
let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() {
362+
ty::Float(FloatTy::F16) => (
363+
"fmodf",
364+
types::F32,
365+
fx.bcx.ins().fpromote(types::F32, lhs),
366+
fx.bcx.ins().fpromote(types::F32, rhs),
367+
),
368+
ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs),
369+
ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs),
370+
ty::Float(FloatTy::F128) => (
371+
if intrinsics::long_double_is_f128(fx.tcx) { "fmodl" } else { "fmodf128" },
372+
types::F128,
373+
lhs,
374+
rhs,
375+
),
363376
_ => bug!(),
364377
};
365378

366-
fx.lib_call(
379+
let ret_val = fx.lib_call(
367380
name,
368381
vec![AbiParam::new(ty), AbiParam::new(ty)],
369382
vec![AbiParam::new(ty)],
370383
&[lhs, rhs],
371-
)[0]
384+
)[0];
385+
386+
let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
387+
fx.bcx.ins().fdemote(types::F16, ret_val)
388+
} else {
389+
ret_val
390+
};
391+
return CValue::by_val(ret_val, in_lhs.layout());
372392
}
373393
BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
374394
let fltcc = match bin_op {

0 commit comments

Comments
 (0)