Skip to content

Commit c242104

Browse files
committed
Add initial f16/f128 support
1 parent 0103c58 commit c242104

File tree

6 files changed

+154
-50
lines changed

6 files changed

+154
-50
lines changed

src/abi/pass_mode.rs

+2
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam {
2222
(RegKind::Integer, 3..=4) => types::I32,
2323
(RegKind::Integer, 5..=8) => types::I64,
2424
(RegKind::Integer, 9..=16) => types::I128,
25+
(RegKind::Float, 2) => types::F16,
2526
(RegKind::Float, 4) => types::F32,
2627
(RegKind::Float, 8) => types::F64,
28+
(RegKind::Float, 16) => types::F128,
2729
(RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(),
2830
_ => unreachable!("{:?}", reg),
2931
};

src/cast.rs

+10-2
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,10 @@ pub(crate) fn clif_int_or_float_cast(
5858
"__float{sign}ti{flt}f",
5959
sign = if from_signed { "" } else { "un" },
6060
flt = match to_ty {
61+
types::F16 => "h",
6162
types::F32 => "s",
6263
types::F64 => "d",
64+
types::F128 => "t",
6365
_ => unreachable!("{:?}", to_ty),
6466
},
6567
);
@@ -90,8 +92,10 @@ pub(crate) fn clif_int_or_float_cast(
9092
"__fix{sign}{flt}fti",
9193
sign = if to_signed { "" } else { "uns" },
9294
flt = match from_ty {
95+
types::F16 => "h",
9396
types::F32 => "s",
9497
types::F64 => "d",
98+
types::F128 => "t",
9599
_ => unreachable!("{:?}", to_ty),
96100
},
97101
);
@@ -145,8 +149,12 @@ pub(crate) fn clif_int_or_float_cast(
145149
} else if from_ty.is_float() && to_ty.is_float() {
146150
// float -> float
147151
match (from_ty, to_ty) {
148-
(types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
149-
(types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
152+
(types::F16, types::F32 | types::F64 | types::F128)
153+
| (types::F32, types::F64 | types::F128)
154+
| (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from),
155+
(types::F128, types::F64 | types::F32 | types::F16)
156+
| (types::F64, types::F32 | types::F16)
157+
| (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from),
150158
_ => from,
151159
}
152160
} else {

src/common.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type {
3333
Integer::I128 => types::I128,
3434
},
3535
Primitive::Float(float) => match float {
36-
Float::F16 => unimplemented!("f16_f128"),
36+
Float::F16 => types::F16,
3737
Float::F32 => types::F32,
3838
Float::F64 => types::F64,
39-
Float::F128 => unimplemented!("f16_f128"),
39+
Float::F128 => types::F128,
4040
},
4141
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
4242
Primitive::Pointer(_) => pointer_ty(tcx),
@@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
6464
},
6565
ty::Char => types::I32,
6666
ty::Float(size) => match size {
67-
FloatTy::F16 => unimplemented!("f16_f128"),
67+
FloatTy::F16 => types::F16,
6868
FloatTy::F32 => types::F32,
6969
FloatTy::F64 => types::F64,
70-
FloatTy::F128 => unimplemented!("f16_f128"),
70+
FloatTy::F128 => types::F128,
7171
},
7272
ty::FnPtr(..) => pointer_ty(tcx),
7373
ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {

src/intrinsics/mod.rs

+100-36
Original file line numberDiff line numberDiff line change
@@ -248,8 +248,10 @@ fn bool_to_zero_or_max_uint<'tcx>(
248248
let ty = fx.clif_type(ty).unwrap();
249249

250250
let int_ty = match ty {
251+
types::F16 => types::I16,
251252
types::F32 => types::I32,
252253
types::F64 => types::I64,
254+
types::F128 => types::I128,
253255
ty => ty,
254256
};
255257

@@ -308,45 +310,83 @@ fn codegen_float_intrinsic_call<'tcx>(
308310
ret: CPlace<'tcx>,
309311
) -> bool {
310312
let (name, arg_count, ty, clif_ty) = match intrinsic {
313+
sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16),
311314
sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
312315
sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
316+
sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128),
317+
sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16),
313318
sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
314319
sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
320+
sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128),
321+
sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16),
315322
sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
316323
sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
324+
sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128),
325+
sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins
317326
sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
318327
sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
328+
sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins
329+
sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16),
319330
sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
320331
sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
332+
sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128),
333+
sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16),
321334
sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
322335
sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
336+
sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128),
337+
sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16),
323338
sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
324339
sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
340+
sym::log2f128 => ("log2f16", 1, fx.tcx.types.f128, types::F128),
341+
sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16),
325342
sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
326343
sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
344+
sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128),
345+
sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16),
327346
sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
328347
sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
348+
sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128),
349+
sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16),
329350
sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
330351
sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
352+
sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128),
331353
// FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation
354+
sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16
332355
sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32
333356
sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64
357+
sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128
358+
sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16),
334359
sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
335360
sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
361+
sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128),
362+
sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16),
336363
sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
337364
sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
365+
sym::floorf128 => ("floorf128", 1, fx.tcx.types.f16, types::F128),
366+
sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16),
338367
sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
339368
sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
369+
sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f16, types::F128),
370+
sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16),
340371
sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
341372
sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
373+
sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128),
374+
sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16),
342375
sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
343376
sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64),
377+
sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128),
378+
sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16),
344379
sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
345380
sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
381+
sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128),
382+
sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16),
346383
sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
347384
sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
385+
sym::sinf128 => ("sinf128", 1, fx.tcx.types.f16, types::F128),
386+
sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16),
348387
sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
349388
sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
389+
sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128),
350390
_ => return false,
351391
};
352392

@@ -380,33 +420,61 @@ fn codegen_float_intrinsic_call<'tcx>(
380420

381421
let layout = fx.layout_of(ty);
382422
let res = match intrinsic {
383-
sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => {
384-
CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
385-
}
386-
sym::copysignf32 | sym::copysignf64 => {
423+
sym::fmaf16
424+
| sym::fmaf32
425+
| sym::fmaf64
426+
| sym::fmaf128
427+
| sym::fmuladdf16
428+
| sym::fmuladdf32
429+
| sym::fmuladdf64
430+
| sym::fmuladdf128 => CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout),
431+
sym::copysignf16 | sym::copysignf32 | sym::copysignf64 | sym::copysignf128 => {
387432
CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
388433
}
389-
sym::fabsf32
434+
sym::fabsf16
435+
| sym::fabsf32
390436
| sym::fabsf64
437+
| sym::fabsf128
438+
| sym::floorf16
391439
| sym::floorf32
392440
| sym::floorf64
441+
| sym::floorf128
442+
| sym::ceilf16
393443
| sym::ceilf32
394444
| sym::ceilf64
445+
| sym::ceilf128
446+
| sym::truncf16
395447
| sym::truncf32
396448
| sym::truncf64
449+
| sym::truncf128
450+
| sym::round_ties_even_f16
397451
| sym::round_ties_even_f32
398452
| sym::round_ties_even_f64
453+
| sym::round_ties_even_f128
454+
| sym::sqrtf16
399455
| sym::sqrtf32
400-
| sym::sqrtf64 => {
456+
| sym::sqrtf64
457+
| sym::sqrtf128 => {
401458
let val = match intrinsic {
402-
sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
403-
sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
404-
sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
405-
sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
406-
sym::round_ties_even_f32 | sym::round_ties_even_f64 => {
407-
fx.bcx.ins().nearest(args[0])
459+
sym::fabsf16 | sym::fabsf32 | sym::fabsf64 | sym::fabsf128 => {
460+
fx.bcx.ins().fabs(args[0])
461+
}
462+
sym::floorf16 | sym::floorf32 | sym::floorf64 | sym::floorf128 => {
463+
fx.bcx.ins().floor(args[0])
464+
}
465+
sym::ceilf16 | sym::ceilf32 | sym::ceilf64 | sym::ceilf128 => {
466+
fx.bcx.ins().ceil(args[0])
467+
}
468+
sym::truncf16 | sym::truncf32 | sym::truncf64 | sym::truncf128 => {
469+
fx.bcx.ins().trunc(args[0])
470+
}
471+
sym::round_ties_even_f16
472+
| sym::round_ties_even_f32
473+
| sym::round_ties_even_f64
474+
| sym::round_ties_even_f128 => fx.bcx.ins().nearest(args[0]),
475+
sym::sqrtf16 | sym::sqrtf32 | sym::sqrtf64 | sym::sqrtf128 => {
476+
fx.bcx.ins().sqrt(args[0])
408477
}
409-
sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
410478
_ => unreachable!(),
411479
};
412480

@@ -415,9 +483,21 @@ fn codegen_float_intrinsic_call<'tcx>(
415483

416484
// These intrinsics aren't supported natively by Cranelift.
417485
// Lower them to a libcall.
418-
sym::powif32 | sym::powif64 => {
486+
sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => {
487+
let temp;
488+
let (clif_ty, args) = if intrinsic == sym::powif16 {
489+
temp = [fx.bcx.ins().fpromote(types::F32, args[0]), args[1]];
490+
(types::F32, temp.as_slice())
491+
} else {
492+
(clif_ty, args)
493+
};
419494
let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)];
420495
let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
496+
let ret_val = if intrinsic == sym::powif16 {
497+
fx.bcx.ins().fdemote(types::F16, ret_val)
498+
} else {
499+
ret_val
500+
};
421501
CValue::by_val(ret_val, fx.layout_of(ty))
422502
}
423503
_ => {
@@ -1117,40 +1197,24 @@ fn codegen_regular_intrinsic_call<'tcx>(
11171197
ret.write_cvalue(fx, old);
11181198
}
11191199

1120-
sym::minnumf32 => {
1200+
sym::minnumf16 | sym::minnumf32 | sym::minnumf64 | sym::minnumf128 => {
11211201
intrinsic_args!(fx, args => (a, b); intrinsic);
1202+
let layout = a.layout();
11221203
let a = a.load_scalar(fx);
11231204
let b = b.load_scalar(fx);
11241205

11251206
let val = crate::num::codegen_float_min(fx, a, b);
1126-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
1127-
ret.write_cvalue(fx, val);
1128-
}
1129-
sym::minnumf64 => {
1130-
intrinsic_args!(fx, args => (a, b); intrinsic);
1131-
let a = a.load_scalar(fx);
1132-
let b = b.load_scalar(fx);
1133-
1134-
let val = crate::num::codegen_float_min(fx, a, b);
1135-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
1136-
ret.write_cvalue(fx, val);
1137-
}
1138-
sym::maxnumf32 => {
1139-
intrinsic_args!(fx, args => (a, b); intrinsic);
1140-
let a = a.load_scalar(fx);
1141-
let b = b.load_scalar(fx);
1142-
1143-
let val = crate::num::codegen_float_max(fx, a, b);
1144-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
1207+
let val = CValue::by_val(val, layout);
11451208
ret.write_cvalue(fx, val);
11461209
}
1147-
sym::maxnumf64 => {
1210+
sym::maxnumf16 | sym::maxnumf32 | sym::maxnumf64 | sym::maxnumf128 => {
11481211
intrinsic_args!(fx, args => (a, b); intrinsic);
1212+
let layout = a.layout();
11491213
let a = a.load_scalar(fx);
11501214
let b = b.load_scalar(fx);
11511215

11521216
let val = crate::num::codegen_float_max(fx, a, b);
1153-
let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
1217+
let val = CValue::by_val(val, layout);
11541218
ret.write_cvalue(fx, val);
11551219
}
11561220

src/num.rs

+19-5
Original file line numberDiff line numberDiff line change
@@ -357,18 +357,32 @@ pub(crate) fn codegen_float_binop<'tcx>(
357357
BinOp::Mul => b.fmul(lhs, rhs),
358358
BinOp::Div => b.fdiv(lhs, rhs),
359359
BinOp::Rem => {
360-
let (name, ty) = match in_lhs.layout().ty.kind() {
361-
ty::Float(FloatTy::F32) => ("fmodf", types::F32),
362-
ty::Float(FloatTy::F64) => ("fmod", types::F64),
360+
let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() {
361+
ty::Float(FloatTy::F16) => (
362+
"fmodf",
363+
types::F32,
364+
fx.bcx.ins().fpromote(types::F32, lhs),
365+
fx.bcx.ins().fpromote(types::F32, rhs),
366+
),
367+
ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs),
368+
ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs),
369+
ty::Float(FloatTy::F128) => ("fmodf128", types::F128, lhs, rhs),
363370
_ => bug!(),
364371
};
365372

366-
fx.lib_call(
373+
let ret_val = fx.lib_call(
367374
name,
368375
vec![AbiParam::new(ty), AbiParam::new(ty)],
369376
vec![AbiParam::new(ty)],
370377
&[lhs, rhs],
371-
)[0]
378+
)[0];
379+
380+
let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
381+
fx.bcx.ins().fdemote(types::F16, ret_val)
382+
} else {
383+
ret_val
384+
};
385+
return CValue::by_val(ret_val, in_lhs.layout());
372386
}
373387
BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
374388
let fltcc = match bin_op {

0 commit comments

Comments
 (0)