diff --git a/src/abi/pass_mode.rs b/src/abi/pass_mode.rs index 06d89bc9e..6d8614aca 100644 --- a/src/abi/pass_mode.rs +++ b/src/abi/pass_mode.rs @@ -22,8 +22,10 @@ fn reg_to_abi_param(reg: Reg) -> AbiParam { (RegKind::Integer, 3..=4) => types::I32, (RegKind::Integer, 5..=8) => types::I64, (RegKind::Integer, 9..=16) => types::I128, + (RegKind::Float, 2) => types::F16, (RegKind::Float, 4) => types::F32, (RegKind::Float, 8) => types::F64, + (RegKind::Float, 16) => types::F128, (RegKind::Vector, size) => types::I8.by(u32::try_from(size).unwrap()).unwrap(), _ => unreachable!("{:?}", reg), }; diff --git a/src/cast.rs b/src/cast.rs index e23463242..642d74033 100644 --- a/src/cast.rs +++ b/src/cast.rs @@ -58,8 +58,10 @@ pub(crate) fn clif_int_or_float_cast( "__float{sign}ti{flt}f", sign = if from_signed { "" } else { "un" }, flt = match to_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -90,8 +92,10 @@ pub(crate) fn clif_int_or_float_cast( "__fix{sign}{flt}fti", sign = if to_signed { "" } else { "uns" }, flt = match from_ty { + types::F16 => "h", types::F32 => "s", types::F64 => "d", + types::F128 => "t", _ => unreachable!("{:?}", to_ty), }, ); @@ -145,8 +149,12 @@ pub(crate) fn clif_int_or_float_cast( } else if from_ty.is_float() && to_ty.is_float() { // float -> float match (from_ty, to_ty) { - (types::F32, types::F64) => fx.bcx.ins().fpromote(types::F64, from), - (types::F64, types::F32) => fx.bcx.ins().fdemote(types::F32, from), + (types::F16, types::F32 | types::F64 | types::F128) + | (types::F32, types::F64 | types::F128) + | (types::F64, types::F128) => fx.bcx.ins().fpromote(to_ty, from), + (types::F128, types::F64 | types::F32 | types::F16) + | (types::F64, types::F32 | types::F16) + | (types::F32, types::F16) => fx.bcx.ins().fdemote(to_ty, from), _ => from, } } else { diff --git a/src/common.rs b/src/common.rs index abe2972ba..2f11b2d2d 100644 --- a/src/common.rs +++ b/src/common.rs @@ -33,10 +33,10 @@ pub(crate) fn scalar_to_clif_type(tcx: TyCtxt<'_>, scalar: Scalar) -> Type { Integer::I128 => types::I128, }, Primitive::Float(float) => match float { - Float::F16 => unimplemented!("f16_f128"), + Float::F16 => types::F16, Float::F32 => types::F32, Float::F64 => types::F64, - Float::F128 => unimplemented!("f16_f128"), + Float::F128 => types::F128, }, // FIXME(erikdesjardins): handle non-default addrspace ptr sizes Primitive::Pointer(_) => pointer_ty(tcx), @@ -64,10 +64,10 @@ fn clif_type_from_ty<'tcx>(tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Option types::I32, ty::Float(size) => match size { - FloatTy::F16 => unimplemented!("f16_f128"), + FloatTy::F16 => types::F16, FloatTy::F32 => types::F32, FloatTy::F64 => types::F64, - FloatTy::F128 => unimplemented!("f16_f128"), + FloatTy::F128 => types::F128, }, ty::FnPtr(..) => pointer_ty(tcx), ty::RawPtr(pointee_ty, _) | ty::Ref(_, pointee_ty, _) => { diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs index 0048a3e8d..d1c676c26 100644 --- a/src/intrinsics/mod.rs +++ b/src/intrinsics/mod.rs @@ -248,8 +248,10 @@ fn bool_to_zero_or_max_uint<'tcx>( let ty = fx.clif_type(ty).unwrap(); let int_ty = match ty { + types::F16 => types::I16, types::F32 => types::I32, types::F64 => types::I64, + types::F128 => types::I128, ty => ty, }; @@ -308,45 +310,83 @@ fn codegen_float_intrinsic_call<'tcx>( ret: CPlace<'tcx>, ) -> bool { let (name, arg_count, ty, clif_ty) = match intrinsic { + sym::expf16 => ("expf16", 1, fx.tcx.types.f16, types::F16), sym::expf32 => ("expf", 1, fx.tcx.types.f32, types::F32), sym::expf64 => ("exp", 1, fx.tcx.types.f64, types::F64), + sym::expf128 => ("expf128", 1, fx.tcx.types.f128, types::F128), + sym::exp2f16 => ("exp2f16", 1, fx.tcx.types.f16, types::F16), sym::exp2f32 => ("exp2f", 1, fx.tcx.types.f32, types::F32), sym::exp2f64 => ("exp2", 1, fx.tcx.types.f64, types::F64), + sym::exp2f128 => ("exp2f128", 1, fx.tcx.types.f128, types::F128), + sym::sqrtf16 => ("sqrtf16", 1, fx.tcx.types.f16, types::F16), sym::sqrtf32 => ("sqrtf", 1, fx.tcx.types.f32, types::F32), sym::sqrtf64 => ("sqrt", 1, fx.tcx.types.f64, types::F64), + sym::sqrtf128 => ("sqrtf128", 1, fx.tcx.types.f128, types::F128), + sym::powif16 => ("__powisf2", 2, fx.tcx.types.f16, types::F16), // compiler-builtins sym::powif32 => ("__powisf2", 2, fx.tcx.types.f32, types::F32), // compiler-builtins sym::powif64 => ("__powidf2", 2, fx.tcx.types.f64, types::F64), // compiler-builtins + sym::powif128 => ("__powitf2", 2, fx.tcx.types.f128, types::F128), // compiler-builtins + sym::powf16 => ("powf16", 2, fx.tcx.types.f16, types::F16), sym::powf32 => ("powf", 2, fx.tcx.types.f32, types::F32), sym::powf64 => ("pow", 2, fx.tcx.types.f64, types::F64), + sym::powf128 => ("powf128", 2, fx.tcx.types.f128, types::F128), + sym::logf16 => ("logf16", 1, fx.tcx.types.f16, types::F16), sym::logf32 => ("logf", 1, fx.tcx.types.f32, types::F32), sym::logf64 => ("log", 1, fx.tcx.types.f64, types::F64), + sym::logf128 => ("logf128", 1, fx.tcx.types.f128, types::F128), + sym::log2f16 => ("log2f16", 1, fx.tcx.types.f16, types::F16), sym::log2f32 => ("log2f", 1, fx.tcx.types.f32, types::F32), sym::log2f64 => ("log2", 1, fx.tcx.types.f64, types::F64), + sym::log2f128 => ("log2f16", 1, fx.tcx.types.f128, types::F128), + sym::log10f16 => ("log10f16", 1, fx.tcx.types.f16, types::F16), sym::log10f32 => ("log10f", 1, fx.tcx.types.f32, types::F32), sym::log10f64 => ("log10", 1, fx.tcx.types.f64, types::F64), + sym::log10f128 => ("log10f128", 1, fx.tcx.types.f128, types::F128), + sym::fabsf16 => ("fabsf16", 1, fx.tcx.types.f16, types::F16), sym::fabsf32 => ("fabsf", 1, fx.tcx.types.f32, types::F32), sym::fabsf64 => ("fabs", 1, fx.tcx.types.f64, types::F64), + sym::fabsf128 => ("fabsf128", 1, fx.tcx.types.f128, types::F128), + sym::fmaf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), sym::fmaf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), sym::fmaf64 => ("fma", 3, fx.tcx.types.f64, types::F64), + sym::fmaf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // FIXME: calling `fma` from libc without FMA target feature uses expensive sofware emulation + sym::fmuladdf16 => ("fmaf16", 3, fx.tcx.types.f16, types::F16), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f16 sym::fmuladdf32 => ("fmaf", 3, fx.tcx.types.f32, types::F32), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f32 sym::fmuladdf64 => ("fma", 3, fx.tcx.types.f64, types::F64), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f64 + sym::fmuladdf128 => ("fmaf128", 3, fx.tcx.types.f128, types::F128), // TODO: use cranelift intrinsic analogous to llvm.fmuladd.f128 + sym::copysignf16 => ("copysignf16", 2, fx.tcx.types.f16, types::F16), sym::copysignf32 => ("copysignf", 2, fx.tcx.types.f32, types::F32), sym::copysignf64 => ("copysign", 2, fx.tcx.types.f64, types::F64), + sym::copysignf128 => ("copysignf128", 2, fx.tcx.types.f128, types::F128), + sym::floorf16 => ("floorf16", 1, fx.tcx.types.f16, types::F16), sym::floorf32 => ("floorf", 1, fx.tcx.types.f32, types::F32), sym::floorf64 => ("floor", 1, fx.tcx.types.f64, types::F64), + sym::floorf128 => ("floorf128", 1, fx.tcx.types.f16, types::F128), + sym::ceilf16 => ("ceilf16", 1, fx.tcx.types.f16, types::F16), sym::ceilf32 => ("ceilf", 1, fx.tcx.types.f32, types::F32), sym::ceilf64 => ("ceil", 1, fx.tcx.types.f64, types::F64), + sym::ceilf128 => ("ceilf128", 1, fx.tcx.types.f16, types::F128), + sym::truncf16 => ("truncf16", 1, fx.tcx.types.f16, types::F16), sym::truncf32 => ("truncf", 1, fx.tcx.types.f32, types::F32), sym::truncf64 => ("trunc", 1, fx.tcx.types.f64, types::F64), + sym::truncf128 => ("truncf128", 1, fx.tcx.types.f128, types::F128), + sym::round_ties_even_f16 => ("rintf16", 1, fx.tcx.types.f16, types::F16), sym::round_ties_even_f32 => ("rintf", 1, fx.tcx.types.f32, types::F32), sym::round_ties_even_f64 => ("rint", 1, fx.tcx.types.f64, types::F64), + sym::round_ties_even_f128 => ("rintf128", 1, fx.tcx.types.f128, types::F128), + sym::roundf16 => ("roundf16", 1, fx.tcx.types.f16, types::F16), sym::roundf32 => ("roundf", 1, fx.tcx.types.f32, types::F32), sym::roundf64 => ("round", 1, fx.tcx.types.f64, types::F64), + sym::roundf128 => ("roundf128", 1, fx.tcx.types.f128, types::F128), + sym::sinf16 => ("sinf16", 1, fx.tcx.types.f16, types::F16), sym::sinf32 => ("sinf", 1, fx.tcx.types.f32, types::F32), sym::sinf64 => ("sin", 1, fx.tcx.types.f64, types::F64), + sym::sinf128 => ("sinf128", 1, fx.tcx.types.f16, types::F128), + sym::cosf16 => ("cosf16", 1, fx.tcx.types.f16, types::F16), sym::cosf32 => ("cosf", 1, fx.tcx.types.f32, types::F32), sym::cosf64 => ("cos", 1, fx.tcx.types.f64, types::F64), + sym::cosf128 => ("cosf128", 1, fx.tcx.types.f128, types::F128), _ => return false, }; @@ -380,33 +420,61 @@ fn codegen_float_intrinsic_call<'tcx>( let layout = fx.layout_of(ty); let res = match intrinsic { - sym::fmaf32 | sym::fmaf64 | sym::fmuladdf32 | sym::fmuladdf64 => { - CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout) - } - sym::copysignf32 | sym::copysignf64 => { + sym::fmaf16 + | sym::fmaf32 + | sym::fmaf64 + | sym::fmaf128 + | sym::fmuladdf16 + | sym::fmuladdf32 + | sym::fmuladdf64 + | sym::fmuladdf128 => CValue::by_val(fx.bcx.ins().fma(args[0], args[1], args[2]), layout), + sym::copysignf16 | sym::copysignf32 | sym::copysignf64 | sym::copysignf128 => { CValue::by_val(fx.bcx.ins().fcopysign(args[0], args[1]), layout) } - sym::fabsf32 + sym::fabsf16 + | sym::fabsf32 | sym::fabsf64 + | sym::fabsf128 + | sym::floorf16 | sym::floorf32 | sym::floorf64 + | sym::floorf128 + | sym::ceilf16 | sym::ceilf32 | sym::ceilf64 + | sym::ceilf128 + | sym::truncf16 | sym::truncf32 | sym::truncf64 + | sym::truncf128 + | sym::round_ties_even_f16 | sym::round_ties_even_f32 | sym::round_ties_even_f64 + | sym::round_ties_even_f128 + | sym::sqrtf16 | sym::sqrtf32 - | sym::sqrtf64 => { + | sym::sqrtf64 + | sym::sqrtf128 => { let val = match intrinsic { - sym::fabsf32 | sym::fabsf64 => fx.bcx.ins().fabs(args[0]), - sym::floorf32 | sym::floorf64 => fx.bcx.ins().floor(args[0]), - sym::ceilf32 | sym::ceilf64 => fx.bcx.ins().ceil(args[0]), - sym::truncf32 | sym::truncf64 => fx.bcx.ins().trunc(args[0]), - sym::round_ties_even_f32 | sym::round_ties_even_f64 => { - fx.bcx.ins().nearest(args[0]) + sym::fabsf16 | sym::fabsf32 | sym::fabsf64 | sym::fabsf128 => { + fx.bcx.ins().fabs(args[0]) + } + sym::floorf16 | sym::floorf32 | sym::floorf64 | sym::floorf128 => { + fx.bcx.ins().floor(args[0]) + } + sym::ceilf16 | sym::ceilf32 | sym::ceilf64 | sym::ceilf128 => { + fx.bcx.ins().ceil(args[0]) + } + sym::truncf16 | sym::truncf32 | sym::truncf64 | sym::truncf128 => { + fx.bcx.ins().trunc(args[0]) + } + sym::round_ties_even_f16 + | sym::round_ties_even_f32 + | sym::round_ties_even_f64 + | sym::round_ties_even_f128 => fx.bcx.ins().nearest(args[0]), + sym::sqrtf16 | sym::sqrtf32 | sym::sqrtf64 | sym::sqrtf128 => { + fx.bcx.ins().sqrt(args[0]) } - sym::sqrtf32 | sym::sqrtf64 => fx.bcx.ins().sqrt(args[0]), _ => unreachable!(), }; @@ -415,9 +483,21 @@ fn codegen_float_intrinsic_call<'tcx>( // These intrinsics aren't supported natively by Cranelift. // Lower them to a libcall. - sym::powif32 | sym::powif64 => { + sym::powif16 | sym::powif32 | sym::powif64 | sym::powif128 => { + let temp; + let (clif_ty, args) = if intrinsic == sym::powif16 { + temp = [fx.bcx.ins().fpromote(types::F32, args[0]), args[1]]; + (types::F32, temp.as_slice()) + } else { + (clif_ty, args) + }; let input_tys: Vec<_> = vec![AbiParam::new(clif_ty), AbiParam::new(types::I32)]; let ret_val = fx.lib_call(name, input_tys, vec![AbiParam::new(clif_ty)], &args)[0]; + let ret_val = if intrinsic == sym::powif16 { + fx.bcx.ins().fdemote(types::F16, ret_val) + } else { + ret_val + }; CValue::by_val(ret_val, fx.layout_of(ty)) } _ => { @@ -1117,40 +1197,24 @@ fn codegen_regular_intrinsic_call<'tcx>( ret.write_cvalue(fx, old); } - sym::minnumf32 => { + sym::minnumf16 | sym::minnumf32 | sym::minnumf64 | sym::minnumf128 => { intrinsic_args!(fx, args => (a, b); intrinsic); + let layout = a.layout(); let a = a.load_scalar(fx); let b = b.load_scalar(fx); let val = crate::num::codegen_float_min(fx, a, b); - let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32)); - ret.write_cvalue(fx, val); - } - sym::minnumf64 => { - intrinsic_args!(fx, args => (a, b); intrinsic); - let a = a.load_scalar(fx); - let b = b.load_scalar(fx); - - let val = crate::num::codegen_float_min(fx, a, b); - let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); - ret.write_cvalue(fx, val); - } - sym::maxnumf32 => { - intrinsic_args!(fx, args => (a, b); intrinsic); - let a = a.load_scalar(fx); - let b = b.load_scalar(fx); - - let val = crate::num::codegen_float_max(fx, a, b); - let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32)); + let val = CValue::by_val(val, layout); ret.write_cvalue(fx, val); } - sym::maxnumf64 => { + sym::maxnumf16 | sym::maxnumf32 | sym::maxnumf64 | sym::maxnumf128 => { intrinsic_args!(fx, args => (a, b); intrinsic); + let layout = a.layout(); let a = a.load_scalar(fx); let b = b.load_scalar(fx); let val = crate::num::codegen_float_max(fx, a, b); - let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64)); + let val = CValue::by_val(val, layout); ret.write_cvalue(fx, val); } diff --git a/src/num.rs b/src/num.rs index 90627f806..ac7980b9e 100644 --- a/src/num.rs +++ b/src/num.rs @@ -357,18 +357,32 @@ pub(crate) fn codegen_float_binop<'tcx>( BinOp::Mul => b.fmul(lhs, rhs), BinOp::Div => b.fdiv(lhs, rhs), BinOp::Rem => { - let (name, ty) = match in_lhs.layout().ty.kind() { - ty::Float(FloatTy::F32) => ("fmodf", types::F32), - ty::Float(FloatTy::F64) => ("fmod", types::F64), + let (name, ty, lhs, rhs) = match in_lhs.layout().ty.kind() { + ty::Float(FloatTy::F16) => ( + "fmodf", + types::F32, + fx.bcx.ins().fpromote(types::F32, lhs), + fx.bcx.ins().fpromote(types::F32, rhs), + ), + ty::Float(FloatTy::F32) => ("fmodf", types::F32, lhs, rhs), + ty::Float(FloatTy::F64) => ("fmod", types::F64, lhs, rhs), + ty::Float(FloatTy::F128) => ("fmodf128", types::F128, lhs, rhs), _ => bug!(), }; - fx.lib_call( + let ret_val = fx.lib_call( name, vec![AbiParam::new(ty), AbiParam::new(ty)], vec![AbiParam::new(ty)], &[lhs, rhs], - )[0] + )[0]; + + let ret_val = if *in_lhs.layout().ty.kind() == ty::Float(FloatTy::F16) { + fx.bcx.ins().fdemote(types::F16, ret_val) + } else { + ret_val + }; + return CValue::by_val(ret_val, in_lhs.layout()); } BinOp::Eq | BinOp::Lt | BinOp::Le | BinOp::Ne | BinOp::Ge | BinOp::Gt => { let fltcc = match bin_op { diff --git a/src/value_and_place.rs b/src/value_and_place.rs index 4874a4ad9..6f4691e0a 100644 --- a/src/value_and_place.rs +++ b/src/value_and_place.rs @@ -325,7 +325,7 @@ impl<'tcx> CValue<'tcx> { const_val: ty::ScalarInt, ) -> CValue<'tcx> { assert_eq!(const_val.size(), layout.size, "{:#?}: {:?}", const_val, layout); - use cranelift_codegen::ir::immediates::{Ieee32, Ieee64}; + use cranelift_codegen::ir::immediates::{Ieee16, Ieee32, Ieee64, Ieee128}; let clif_ty = fx.clif_type(layout.ty).unwrap(); @@ -346,12 +346,24 @@ impl<'tcx> CValue<'tcx> { let raw_val = const_val.size().truncate(const_val.to_bits(layout.size)); fx.bcx.ins().iconst(clif_ty, raw_val as i64) } + ty::Float(FloatTy::F16) => { + fx.bcx.ins().f16const(Ieee16::with_bits(u16::try_from(const_val).unwrap())) + } ty::Float(FloatTy::F32) => { fx.bcx.ins().f32const(Ieee32::with_bits(u32::try_from(const_val).unwrap())) } ty::Float(FloatTy::F64) => { fx.bcx.ins().f64const(Ieee64::with_bits(u64::try_from(const_val).unwrap())) } + ty::Float(FloatTy::F128) => { + let value = fx + .bcx + .func + .dfg + .constants + .insert(Ieee128::with_bits(u128::try_from(const_val).unwrap()).into()); + fx.bcx.ins().f128const(value) + } _ => panic!( "CValue::const_val for non bool/char/float/integer/pointer type {:?} is not allowed", layout.ty @@ -568,10 +580,14 @@ impl<'tcx> CPlace<'tcx> { (_, _) if src_ty == dst_ty => data, // This is a `write_cvalue_transmute`. - (types::I32, types::F32) + (types::I16, types::F16) + | (types::F16, types::I16) + | (types::I32, types::F32) | (types::F32, types::I32) | (types::I64, types::F64) - | (types::F64, types::I64) => codegen_bitcast(fx, dst_ty, data), + | (types::F64, types::I64) + | (types::I128, types::F128) + | (types::F128, types::I128) => codegen_bitcast(fx, dst_ty, data), _ if src_ty.is_vector() && dst_ty.is_vector() => codegen_bitcast(fx, dst_ty, data), _ if src_ty.is_vector() || dst_ty.is_vector() => { // FIXME(bytecodealliance/wasmtime#6104) do something more efficient for transmutes between vectors and integers.