Add initial f16/f128 support

beetrees · beetrees · commit 9af8772ddd85 · 2025-04-22T01:02:18.000+01:00
diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs
@@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam {
         (RegKind::Integer, 3..=4) => types::I32,
         (RegKind::Integer, 5..=8) => types::I64,
         (RegKind::Integer, 9..=16) => types::I128,
+        (RegKind::Float, 2) => types::F16,
         (RegKind::Float, 4) => types::F32,
         (RegKind::Float, 8) => types::F64,
+        (RegKind::Float, 16) => types::F128,
         (RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(),
         _ => unreachable!("{:?}", reg),
     };
diff --git a/src/cast.rs b/src/cast.rs
@@ -58,8 +58,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__float{sign}ti{flt}f",
                 sign = if from_signed { "" } else { "un" },
                 flt = match to_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -90,8 +92,10 @@ pub(crate) fn clif_int_or_float_cast(
                 "__fix{sign}{flt}fti",
                 sign = if to_signed { "" } else { "uns" },
                 flt = match from_ty {
+                    types::F16 => "h",
                     types::F32 => "s",
                     types::F64 => "d",
+                    types::F128 => "t",
                     _ => unreachable!("{:?}", to_ty),
                 },
             );
@@ -145,8 +149,12 @@ pub(crate) fn clif_int_or_float_cast(
     } else if from_ty.is_float() && to_ty.is_float() {
         // float -> float
         match (from_ty, to_ty) {
-            (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from),
-            (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from),
+            (types::F16, types::F32 | types::F64 | types::F128)
+            | (types::F32, types::F64 | types::F128)
+            | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from),
+            (types::F128, types::F64 | types::F32 | types::F16)
+            | (types::F64, types::F32 | types::F16)
+            | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from),
             _ => from,
         }
     } else {
diff --git a/src/common.rs b/src/common.rs
@@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type {
             Integer::I128 => types::I128,
         },
         Primitive::Float(float) => match float {
-            Float::F16 => unimplemented!("f16_f128"),
+            Float::F16 => types::F16,
             Float::F32 => types::F32,
             Float::F64 => types::F64,
-            Float::F128 => unimplemented!("f16_f128"),
+            Float::F128 => types::F128,
         },
         // FIXME(erikdesjardins): handle non-default addrspace ptr sizes
         Primitive::Pointer(_) => pointer_ty(tcx),
@@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option<types::Typ
         },
         ty::Char => types::I32,
         ty::Float(size) => match size {
-            FloatTy::F16 => unimplemented!("f16_f128"),
+            FloatTy::F16 => types::F16,
             FloatTy::F32 => types::F32,
             FloatTy::F64 => types::F64,
-            FloatTy::F128 => unimplemented!("f16_f128"),
+            FloatTy::F128 => types::F128,
         },
         ty::FnPtr(..) => pointer_ty(tcx),
         ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => {
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
@@ -27,6 +27,7 @@ use rustc_span::{Symbol, sym};
 
 pub(crate) use self::llvm::codegen_llvm_intrinsic_call;
 use crate::cast::clif_intcast;
+use crate::intrinsics;
 use crate::prelude::*;
 
 fn bug_on_incorrect_arg_count(intrinsic: impl std::fmt::Display) -> ! {
@@ -50,6 +51,12 @@ fn report_atomic_type_validation_error<'tcx>(
     fx.bcx.ins().trap(TrapCode::user(1 /* unreachable */).unwrap());
 }
 
+pub(crate) fn long_double_is_f128(tcx: TyCtxt<'_>) -> bool {
+    !tcx.sess.target.is_like_windows
+        && !tcx.sess.target.is_like_darwin
+        && matches!(&*tcx.sess.target.arch, "aarch64" | "riscv64" | "s390x")
+}
+
 pub(crate) fn clif_vector_type<'tcx>(tcx: TyCtxt<'tcx>, layout: TyAndLayout<'tcx>) -> Type {
     let (element, count) = match layout.backend_repr {
         BackendRepr::SimdVector { element, count } => (element, count),
@@ -248,8 +255,10 @@ fn bool_to_zero_or_max_uint<'tcx>(
     let ty = fx.clif_type(ty).unwrap();
 
     let int_ty = match ty {
+        types::F16 => types::I16,
         types::F32 => types::I32,
         types::F64 => types::I64,
+        types::F128 => types::I128,
         ty => ty,
     };
 
@@ -308,45 +317,83 @@ fn codegen_float_intrinsic_call<'tcx>(
     ret: CPlace<'tcx>,
 ) -> bool {
     let (name, arg_count, ty, clif_ty) = match intrinsic {
+        sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16),
         sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32),
         sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64),
+        sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128),
+        sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16),
         sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32),
         sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64),
+        sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128),
+        sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16),
         sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32),
         sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64),
+        sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128),
+        sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins
         sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins
         sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins
+        sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins
+        sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16),
         sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32),
         sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64),
+        sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128),
+        sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16),
         sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32),
         sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64),
+        sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128),
+        sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16),
         sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32),
         sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64),
+        sym::log2f128 => ("log2f16", 1, fx.tcx.types.f128, types::F128),
+        sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16),
         sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32),
         sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64),
+        sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128),
+        sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16),
         sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32),
         sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64),
+        sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128),
+        sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16),
         sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32),
         sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64),
+        sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128),
         // FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation
+        sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16
         sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32
         sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64
+        sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128
+        sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16),
         sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32),
         sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64),
+        sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128),
+        sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16),
         sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32),
         sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64),
+        sym::floorf128 => ("floorf128", 1, fx.tcx.types.f16, types::F128),
+        sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16),
         sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32),
         sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64),
+        sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f16, types::F128),
+        sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16),
         sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32),
         sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64),
+        sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128),
+        sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16),
         sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32),
         sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64),
+        sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128),
+        sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16),
         sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32),
         sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64),
+        sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128),
+        sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16),
         sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32),
         sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64),
+        sym::sinf128 => ("sinf128", 1, fx.tcx.types.f16, types::F128),
+        sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16),
         sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32),
         sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64),
+        sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128),
         _ => return false,
     };
 
@@ -380,33 +427,61 @@ fn codegen_float_intrinsic_call<'tcx>(
 
     let layout = fx.layout_of(ty);
     let res = match intrinsic {
-        sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => {
-            CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout)
-        }
-        sym::copysignf32 | sym::copysignf64 => {
+        sym::fmaf16
+        | sym::fmaf32
+        | sym::fmaf64
+        | sym::fmaf128
+        | sym::fmuladdf16
+        | sym::fmuladdf32
+        | sym::fmuladdf64
+        | sym::fmuladdf128 => CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout),
+        sym::copysignf16 | sym::copysignf32 | sym::copysignf64 | sym::copysignf128 => {
             CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout)
         }
-        sym::fabsf32
+        sym::fabsf16
+        | sym::fabsf32
         | sym::fabsf64
+        | sym::fabsf128
+        | sym::floorf16
         | sym::floorf32
         | sym::floorf64
+        | sym::floorf128
+        | sym::ceilf16
         | sym::ceilf32
         | sym::ceilf64
+        | sym::ceilf128
+        | sym::truncf16
         | sym::truncf32
         | sym::truncf64
+        | sym::truncf128
+        | sym::round_ties_even_f16
         | sym::round_ties_even_f32
         | sym::round_ties_even_f64
+        | sym::round_ties_even_f128
+        | sym::sqrtf16
         | sym::sqrtf32
-        | sym::sqrtf64 => {
+        | sym::sqrtf64
+        | sym::sqrtf128 => {
             let val = match intrinsic {
-                sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]),
-                sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]),
-                sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]),
-                sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]),
-                sym::round_ties_even_f32 | sym::round_ties_even_f64 => {
-                    fx.bcx.ins().nearest(args[0])
+                sym::fabsf16 | sym::fabsf32 | sym::fabsf64 | sym::fabsf128 => {
+                    fx.bcx.ins().fabs(args[0])
+                }
+                sym::floorf16 | sym::floorf32 | sym::floorf64 | sym::floorf128 => {
+                    fx.bcx.ins().floor(args[0])
+                }
+                sym::ceilf16 | sym::ceilf32 | sym::ceilf64 | sym::ceilf128 => {
+                    fx.bcx.ins().ceil(args[0])
+                }
+                sym::truncf16 | sym::truncf32 | sym::truncf64 | sym::truncf128 => {
+                    fx.bcx.ins().trunc(args[0])
+                }
+                sym::round_ties_even_f16
+                | sym::round_ties_even_f32
+                | sym::round_ties_even_f64
+                | sym::round_ties_even_f128 => fx.bcx.ins().nearest(args[0]),
+                sym::sqrtf16 | sym::sqrtf32 | sym::sqrtf64 | sym::sqrtf128 => {
+                    fx.bcx.ins().sqrt(args[0])
                 }
-                sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]),
                 _ => unreachable!(),
             };
 
@@ -415,12 +490,32 @@ fn codegen_float_intrinsic_call<'tcx>(
 
         // These intrinsics aren't supported natively by Cranelift.
         // Lower them to a libcall.
-        sym::powif32 | sym::powif64 => {
+        sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => {
+            let temp;
+            let (clif_ty, args) = if intrinsic == sym::powif16 {
+                temp = [fx.bcx.ins().fpromote(types::F32, args[0]), args[1]];
+                (types::F32, temp.as_slice())
+            } else {
+                (clif_ty, args)
+            };
             let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)];
             let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
+            let ret_val = if intrinsic == sym::powif16 {
+                fx.bcx.ins().fdemote(types::F16, ret_val)
+            } else {
+                ret_val
+            };
             CValue::by_val(ret_val, fx.layout_of(ty))
         }
         _ => {
+            let temp;
+            let name = match name.strip_suffix("f128") {
+                Some(base) if intrinsics::long_double_is_f128(fx.tcx) => {
+                    temp = format!("{base}l");
+                    &temp
+                }
+                _ => name,
+            };
             let input_tys: Vec<_> = args.iter().map(|_| AbiParam::new(clif_ty)).collect();
             let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0];
             CValue::by_val(ret_val, fx.layout_of(ty))
@@ -1117,40 +1212,24 @@ fn codegen_regular_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, old);
         }
 
-        sym::minnumf32 => {
+        sym::minnumf16 | sym::minnumf32 | sym::minnumf64 | sym::minnumf128 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
+            let layout = a.layout();
             let a = a.load_scalar(fx);
             let b = b.load_scalar(fx);
 
             let val = crate::num::codegen_float_min(fx, a, b);
-            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
-            ret.write_cvalue(fx, val);
-        }
-        sym::minnumf64 => {
-            intrinsic_args!(fx, args => (a, b); intrinsic);
-            let a = a.load_scalar(fx);
-            let b = b.load_scalar(fx);
-
-            let val = crate::num::codegen_float_min(fx, a, b);
-            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
-            ret.write_cvalue(fx, val);
-        }
-        sym::maxnumf32 => {
-            intrinsic_args!(fx, args => (a, b); intrinsic);
-            let a = a.load_scalar(fx);
-            let b = b.load_scalar(fx);
-
-            let val = crate::num::codegen_float_max(fx, a, b);
-            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
+            let val = CValue::by_val(val, layout);
             ret.write_cvalue(fx, val);
         }
-        sym::maxnumf64 => {
+        sym::maxnumf16 | sym::maxnumf32 | sym::maxnumf64 | sym::maxnumf128 => {
             intrinsic_args!(fx, args => (a, b); intrinsic);
+            let layout = a.layout();
             let a = a.load_scalar(fx);
             let b = b.load_scalar(fx);
 
             let val = crate::num::codegen_float_max(fx, a, b);
-            let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
+            let val = CValue::by_val(val, layout);
             ret.write_cvalue(fx, val);
         }
 
diff --git a/src/num.rs b/src/num.rs
@@ -1,5 +1,6 @@
 //! Various operations on integer and floating-point numbers
 
+use crate::intrinsics;
 use crate::prelude::*;
 
 fn bin_op_to_intcc(bin_op: BinOp, signed: bool) -> IntCC {
@@ -357,18 +358,37 @@ pub(crate) fn codegen_float_binop<'tcx>(
         BinOp::Mul => b.fmul(lhs, rhs),
         BinOp::Div => b.fdiv(lhs, rhs),
         BinOp::Rem => {
-            let (name, ty) = match in_lhs.layout().ty.kind() {
-                ty::Float(FloatTy::F32) => ("fmodf", types::F32),
-                ty::Float(FloatTy::F64) => ("fmod", types::F64),
+            let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() {
+                ty::Float(FloatTy::F16) => (
+                    "fmodf",
+                    types::F32,
+                    fx.bcx.ins().fpromote(types::F32, lhs),
+                    fx.bcx.ins().fpromote(types::F32, rhs),
+                ),
+                ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs),
+                ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs),
+                ty::Float(FloatTy::F128) => (
+                    if intrinsics::long_double_is_f128(fx.tcx) { "fmodl" } else { "fmodf128" },
+                    types::F128,
+                    lhs,
+                    rhs,
+                ),
                 _ => bug!(),
             };
 
-            fx.lib_call(
+            let ret_val = fx.lib_call(
                 name,
                 vec![AbiParam::new(ty), AbiParam::new(ty)],
                 vec![AbiParam::new(ty)],
                 &[lhs, rhs],
-            )[0]
+            )[0];
+
+            let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) {
+                fx.bcx.ins().fdemote(types::F16, ret_val)
+            } else {
+                ret_val
+            };
+            return CValue::by_val(ret_val, in_lhs.layout());
         }
         BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => {
             let fltcc = match bin_op {
diff --git a/src/value_and_place.rs b/src/value_and_place.rs