Skip to content

rustc: Use LLVM's new saturating float-to-int intrinsics #84339

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 34 additions & 63 deletions compiler/rustc_codegen_llvm/src/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::common::Funclet;
use crate::context::CodegenCx;
use crate::llvm::{self, BasicBlock, False};
use crate::llvm::{AtomicOrdering, AtomicRmwBinOp, SynchronizationScope};
use crate::llvm_util;
use crate::type_::Type;
use crate::type_of::LayoutLlvmExt;
use crate::value::Value;
Expand All @@ -16,7 +17,7 @@ use rustc_data_structures::small_c_str::SmallCStr;
use rustc_hir::def_id::DefId;
use rustc_middle::ty::layout::TyAndLayout;
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_span::{sym, Span};
use rustc_span::Span;
use rustc_target::abi::{self, Align, Size};
use rustc_target::spec::{HasTargetSpec, Target};
use std::borrow::Cow;
Expand Down Expand Up @@ -669,81 +670,47 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
}

fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
// WebAssembly has saturating floating point to integer casts if the
// `nontrapping-fptoint` target feature is activated. We'll use those if
// they are available.
if self.sess().target.arch == "wasm32"
&& self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
{
if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
let src_ty = self.cx.val_ty(val);
let float_width = self.cx.float_width(src_ty);
let int_width = self.cx.int_width(dest_ty);
let name = match (int_width, float_width) {
(32, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f32"),
(32, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f64"),
(64, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f32"),
(64, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f64"),
_ => None,
};
if let Some(name) = name {
let intrinsic = self.get_intrinsic(name);
return Some(self.call(intrinsic, &[val], None));
}
let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
let intrinsic = self.get_intrinsic(&name);
return Some(self.call(intrinsic, &[val], None));
}

None
}

fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
// WebAssembly has saturating floating point to integer casts if the
// `nontrapping-fptoint` target feature is activated. We'll use those if
// they are available.
if self.sess().target.arch == "wasm32"
&& self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
{
if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
let src_ty = self.cx.val_ty(val);
let float_width = self.cx.float_width(src_ty);
let int_width = self.cx.int_width(dest_ty);
let name = match (int_width, float_width) {
(32, 32) => Some("llvm.wasm.trunc.saturate.signed.i32.f32"),
(32, 64) => Some("llvm.wasm.trunc.saturate.signed.i32.f64"),
(64, 32) => Some("llvm.wasm.trunc.saturate.signed.i64.f32"),
(64, 64) => Some("llvm.wasm.trunc.saturate.signed.i64.f64"),
_ => None,
};
if let Some(name) = name {
let intrinsic = self.get_intrinsic(name);
return Some(self.call(intrinsic, &[val], None));
}
let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
let intrinsic = self.get_intrinsic(&name);
return Some(self.call(intrinsic, &[val], None));
}
None
}

fn fptosui_may_trap(&self, val: &'ll Value, dest_ty: &'ll Type) -> bool {
// Most of the time we'll be generating the `fptosi` or `fptoui`
// instruction for floating-point-to-integer conversions. These
// instructions by definition in LLVM do not trap. For the WebAssembly
// target, however, we'll lower in some cases to intrinsic calls instead
// which may trap. If we detect that this is a situation where we'll be
// using the intrinsics then we report that the call map trap, which
// callers might need to handle.
if !self.wasm_and_missing_nontrapping_fptoint() {
return false;
}
let src_ty = self.cx.val_ty(val);
let float_width = self.cx.float_width(src_ty);
let int_width = self.cx.int_width(dest_ty);
matches!((int_width, float_width), (32, 32) | (32, 64) | (64, 32) | (64, 64))
None
}

fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
// When we can, use the native wasm intrinsics which have tighter
// codegen. Note that this has a semantic difference in that the
// intrinsic can trap whereas `fptoui` never traps. That difference,
// however, is handled by `fptosui_may_trap` above.
// On WebAssembly the `fptoui` and `fptosi` instructions currently have
// poor codegen. The reason for this is that the corresponding wasm
// instructions, `i32.trunc_f32_s` for example, will trap when the float
// is out-of-bounds, infinity, or nan. This means that LLVM
// automatically inserts control flow around `fptoui` and `fptosi`
// because the LLVM instruction `fptoui` is defined as producing a
// poison value, not having UB on out-of-bounds values.
//
// Note that we skip the wasm intrinsics for vector types where `fptoui`
// must be used instead.
if self.wasm_and_missing_nontrapping_fptoint() {
// This method, however, is only used with non-saturating casts that
// have UB on out-of-bounds values. This means that it's ok if we use
// the raw wasm instruction since out-of-bounds values can do whatever
// we like. To ensure that LLVM picks the right instruction we choose
// the raw wasm intrinsic functions which avoid LLVM inserting all the
// other control flow automatically.
if self.sess().target.arch == "wasm32" {
let src_ty = self.cx.val_ty(val);
if self.cx.type_kind(src_ty) != TypeKind::Vector {
let float_width = self.cx.float_width(src_ty);
Expand All @@ -765,7 +732,8 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
}

fn fptosi(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
if self.wasm_and_missing_nontrapping_fptoint() {
// see `fptoui` above for why wasm is different here
if self.sess().target.arch == "wasm32" {
let src_ty = self.cx.val_ty(val);
if self.cx.type_kind(src_ty) != TypeKind::Vector {
let float_width = self.cx.float_width(src_ty);
Expand Down Expand Up @@ -1420,8 +1388,11 @@ impl Builder<'a, 'll, 'tcx> {
}
}

fn wasm_and_missing_nontrapping_fptoint(&self) -> bool {
self.sess().target.arch == "wasm32"
&& !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
fn fptoint_sat_broken_in_llvm(&self) -> bool {
match self.tcx.sess.target.arch.as_str() {
// FIXME - https://bugs.llvm.org/show_bug.cgi?id=50083
"riscv64" => llvm_util::get_version() < (13, 0, 0),
_ => false,
}
}
}
30 changes: 22 additions & 8 deletions compiler/rustc_codegen_llvm/src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -503,14 +503,6 @@ impl CodegenCx<'b, 'tcx> {
let t_f32 = self.type_f32();
let t_f64 = self.type_f64();

ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f64", fn(t_f64) -> t_i32);
ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f32", fn(t_f32) -> t_i64);
ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f64", fn(t_f64) -> t_i64);
ifn!("llvm.wasm.trunc.saturate.signed.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.wasm.trunc.saturate.signed.i32.f64", fn(t_f64) -> t_i32);
ifn!("llvm.wasm.trunc.saturate.signed.i64.f32", fn(t_f32) -> t_i64);
ifn!("llvm.wasm.trunc.saturate.signed.i64.f64", fn(t_f64) -> t_i64);
ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
Expand All @@ -520,6 +512,28 @@ impl CodegenCx<'b, 'tcx> {
ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);

ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);

ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);

ifn!("llvm.trap", fn() -> void);
ifn!("llvm.debugtrap", fn() -> void);
ifn!("llvm.frameaddress", fn(t_i32) -> i8p);
Expand Down
Loading