Skip to content

Commit 7889f96

Browse files
authored
Rollup merge of #92425 - calebzulawski:simd-cast, r=workingjubilee
Improve SIMD casts * Allows `simd_cast` intrinsic to take `usize` and `isize` * Adds `simd_as` intrinsic, which is the same as `simd_cast` except for saturating float-to-int conversions (matching the behavior of `as`). cc `@workingjubilee`
2 parents f372476 + 49d36d7 commit 7889f96

File tree

8 files changed

+314
-177
lines changed

8 files changed

+314
-177
lines changed

compiler/rustc_codegen_llvm/src/builder.rs

+41-18
Original file line numberDiff line numberDiff line change
@@ -731,27 +731,11 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
731731
}
732732

733733
fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
734-
if !self.fptoint_sat_broken_in_llvm() {
735-
let src_ty = self.cx.val_ty(val);
736-
let float_width = self.cx.float_width(src_ty);
737-
let int_width = self.cx.int_width(dest_ty);
738-
let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
739-
return Some(self.call_intrinsic(&name, &[val]));
740-
}
741-
742-
None
734+
self.fptoint_sat(false, val, dest_ty)
743735
}
744736

745737
fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
746-
if !self.fptoint_sat_broken_in_llvm() {
747-
let src_ty = self.cx.val_ty(val);
748-
let float_width = self.cx.float_width(src_ty);
749-
let int_width = self.cx.int_width(dest_ty);
750-
let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
751-
return Some(self.call_intrinsic(&name, &[val]));
752-
}
753-
754-
None
738+
self.fptoint_sat(true, val, dest_ty)
755739
}
756740

757741
fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
@@ -1455,4 +1439,43 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
14551439
_ => false,
14561440
}
14571441
}
1442+
1443+
fn fptoint_sat(
1444+
&mut self,
1445+
signed: bool,
1446+
val: &'ll Value,
1447+
dest_ty: &'ll Type,
1448+
) -> Option<&'ll Value> {
1449+
if !self.fptoint_sat_broken_in_llvm() {
1450+
let src_ty = self.cx.val_ty(val);
1451+
let (float_ty, int_ty, vector_length) = if self.cx.type_kind(src_ty) == TypeKind::Vector
1452+
{
1453+
assert_eq!(self.cx.vector_length(src_ty), self.cx.vector_length(dest_ty));
1454+
(
1455+
self.cx.element_type(src_ty),
1456+
self.cx.element_type(dest_ty),
1457+
Some(self.cx.vector_length(src_ty)),
1458+
)
1459+
} else {
1460+
(src_ty, dest_ty, None)
1461+
};
1462+
let float_width = self.cx.float_width(float_ty);
1463+
let int_width = self.cx.int_width(int_ty);
1464+
1465+
let instr = if signed { "fptosi" } else { "fptoui" };
1466+
let name = if let Some(vector_length) = vector_length {
1467+
format!(
1468+
"llvm.{}.sat.v{}i{}.v{}f{}",
1469+
instr, vector_length, int_width, vector_length, float_width
1470+
)
1471+
} else {
1472+
format!("llvm.{}.sat.i{}.f{}", instr, int_width, float_width)
1473+
};
1474+
let f =
1475+
self.declare_cfn(&name, llvm::UnnamedAddr::No, self.type_func(&[src_ty], dest_ty));
1476+
Some(self.call(self.type_func(&[src_ty], dest_ty), f, &[val], None))
1477+
} else {
1478+
None
1479+
}
1480+
}
14581481
}

compiler/rustc_codegen_llvm/src/intrinsic.rs

+21-9
Original file line numberDiff line numberDiff line change
@@ -1688,7 +1688,7 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
16881688
bitwise_red!(simd_reduce_all: vector_reduce_and, true);
16891689
bitwise_red!(simd_reduce_any: vector_reduce_or, true);
16901690

1691-
if name == sym::simd_cast {
1691+
if name == sym::simd_cast || name == sym::simd_as {
16921692
require_simd!(ret_ty, "return");
16931693
let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
16941694
require!(
@@ -1714,14 +1714,26 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
17141714
let (in_style, in_width) = match in_elem.kind() {
17151715
// vectors of pointer-sized integers should've been
17161716
// disallowed before here, so this unwrap is safe.
1717-
ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
1718-
ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
1717+
ty::Int(i) => (
1718+
Style::Int(true),
1719+
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
1720+
),
1721+
ty::Uint(u) => (
1722+
Style::Int(false),
1723+
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
1724+
),
17191725
ty::Float(f) => (Style::Float, f.bit_width()),
17201726
_ => (Style::Unsupported, 0),
17211727
};
17221728
let (out_style, out_width) = match out_elem.kind() {
1723-
ty::Int(i) => (Style::Int(true), i.bit_width().unwrap()),
1724-
ty::Uint(u) => (Style::Int(false), u.bit_width().unwrap()),
1729+
ty::Int(i) => (
1730+
Style::Int(true),
1731+
i.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
1732+
),
1733+
ty::Uint(u) => (
1734+
Style::Int(false),
1735+
u.normalize(bx.tcx().sess.target.pointer_width).bit_width().unwrap(),
1736+
),
17251737
ty::Float(f) => (Style::Float, f.bit_width()),
17261738
_ => (Style::Unsupported, 0),
17271739
};
@@ -1748,10 +1760,10 @@ unsupported {} from `{}` with element `{}` of size `{}` to `{}`"#,
17481760
});
17491761
}
17501762
(Style::Float, Style::Int(out_is_signed)) => {
1751-
return Ok(if out_is_signed {
1752-
bx.fptosi(args[0].immediate(), llret_ty)
1753-
} else {
1754-
bx.fptoui(args[0].immediate(), llret_ty)
1763+
return Ok(match (out_is_signed, name == sym::simd_as) {
1764+
(false, false) => bx.fptoui(args[0].immediate(), llret_ty),
1765+
(true, false) => bx.fptosi(args[0].immediate(), llret_ty),
1766+
(_, true) => bx.cast_float_to_int(out_is_signed, args[0].immediate(), llret_ty),
17551767
});
17561768
}
17571769
(Style::Float, Style::Float) => {

compiler/rustc_codegen_ssa/src/mir/rvalue.rs

+3-147
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@ use super::place::PlaceRef;
33
use super::{FunctionCx, LocalRef};
44

55
use crate::base;
6-
use crate::common::{self, IntPredicate, RealPredicate};
6+
use crate::common::{self, IntPredicate};
77
use crate::traits::*;
88
use crate::MemFlags;
99

10-
use rustc_apfloat::{ieee, Float, Round, Status};
1110
use rustc_middle::mir;
1211
use rustc_middle::ty::cast::{CastTy, IntTy};
1312
use rustc_middle::ty::layout::{HasTyCtxt, LayoutOf};
@@ -368,10 +367,10 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
368367
bx.inttoptr(usize_llval, ll_t_out)
369368
}
370369
(CastTy::Float, CastTy::Int(IntTy::I)) => {
371-
cast_float_to_int(&mut bx, true, llval, ll_t_in, ll_t_out)
370+
bx.cast_float_to_int(true, llval, ll_t_out)
372371
}
373372
(CastTy::Float, CastTy::Int(_)) => {
374-
cast_float_to_int(&mut bx, false, llval, ll_t_in, ll_t_out)
373+
bx.cast_float_to_int(false, llval, ll_t_out)
375374
}
376375
_ => bug!("unsupported cast: {:?} to {:?}", operand.layout.ty, cast.ty),
377376
};
@@ -768,146 +767,3 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
768767
// (*) this is only true if the type is suitable
769768
}
770769
}
771-
772-
fn cast_float_to_int<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
773-
bx: &mut Bx,
774-
signed: bool,
775-
x: Bx::Value,
776-
float_ty: Bx::Type,
777-
int_ty: Bx::Type,
778-
) -> Bx::Value {
779-
if let Some(false) = bx.cx().sess().opts.debugging_opts.saturating_float_casts {
780-
return if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
781-
}
782-
783-
let try_sat_result = if signed { bx.fptosi_sat(x, int_ty) } else { bx.fptoui_sat(x, int_ty) };
784-
if let Some(try_sat_result) = try_sat_result {
785-
return try_sat_result;
786-
}
787-
788-
let int_width = bx.cx().int_width(int_ty);
789-
let float_width = bx.cx().float_width(float_ty);
790-
// LLVM's fpto[su]i returns undef when the input x is infinite, NaN, or does not fit into the
791-
// destination integer type after rounding towards zero. This `undef` value can cause UB in
792-
// safe code (see issue #10184), so we implement a saturating conversion on top of it:
793-
// Semantically, the mathematical value of the input is rounded towards zero to the next
794-
// mathematical integer, and then the result is clamped into the range of the destination
795-
// integer type. Positive and negative infinity are mapped to the maximum and minimum value of
796-
// the destination integer type. NaN is mapped to 0.
797-
//
798-
// Define f_min and f_max as the largest and smallest (finite) floats that are exactly equal to
799-
// a value representable in int_ty.
800-
// They are exactly equal to int_ty::{MIN,MAX} if float_ty has enough significand bits.
801-
// Otherwise, int_ty::MAX must be rounded towards zero, as it is one less than a power of two.
802-
// int_ty::MIN, however, is either zero or a negative power of two and is thus exactly
803-
// representable. Note that this only works if float_ty's exponent range is sufficiently large.
804-
// f16 or 256 bit integers would break this property. Right now the smallest float type is f32
805-
// with exponents ranging up to 127, which is barely enough for i128::MIN = -2^127.
806-
// On the other hand, f_max works even if int_ty::MAX is greater than float_ty::MAX. Because
807-
// we're rounding towards zero, we just get float_ty::MAX (which is always an integer).
808-
// This already happens today with u128::MAX = 2^128 - 1 > f32::MAX.
809-
let int_max = |signed: bool, int_width: u64| -> u128 {
810-
let shift_amount = 128 - int_width;
811-
if signed { i128::MAX as u128 >> shift_amount } else { u128::MAX >> shift_amount }
812-
};
813-
let int_min = |signed: bool, int_width: u64| -> i128 {
814-
if signed { i128::MIN >> (128 - int_width) } else { 0 }
815-
};
816-
817-
let compute_clamp_bounds_single = |signed: bool, int_width: u64| -> (u128, u128) {
818-
let rounded_min = ieee::Single::from_i128_r(int_min(signed, int_width), Round::TowardZero);
819-
assert_eq!(rounded_min.status, Status::OK);
820-
let rounded_max = ieee::Single::from_u128_r(int_max(signed, int_width), Round::TowardZero);
821-
assert!(rounded_max.value.is_finite());
822-
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
823-
};
824-
let compute_clamp_bounds_double = |signed: bool, int_width: u64| -> (u128, u128) {
825-
let rounded_min = ieee::Double::from_i128_r(int_min(signed, int_width), Round::TowardZero);
826-
assert_eq!(rounded_min.status, Status::OK);
827-
let rounded_max = ieee::Double::from_u128_r(int_max(signed, int_width), Round::TowardZero);
828-
assert!(rounded_max.value.is_finite());
829-
(rounded_min.value.to_bits(), rounded_max.value.to_bits())
830-
};
831-
832-
let mut float_bits_to_llval = |bits| {
833-
let bits_llval = match float_width {
834-
32 => bx.cx().const_u32(bits as u32),
835-
64 => bx.cx().const_u64(bits as u64),
836-
n => bug!("unsupported float width {}", n),
837-
};
838-
bx.bitcast(bits_llval, float_ty)
839-
};
840-
let (f_min, f_max) = match float_width {
841-
32 => compute_clamp_bounds_single(signed, int_width),
842-
64 => compute_clamp_bounds_double(signed, int_width),
843-
n => bug!("unsupported float width {}", n),
844-
};
845-
let f_min = float_bits_to_llval(f_min);
846-
let f_max = float_bits_to_llval(f_max);
847-
// To implement saturation, we perform the following steps:
848-
//
849-
// 1. Cast x to an integer with fpto[su]i. This may result in undef.
850-
// 2. Compare x to f_min and f_max, and use the comparison results to select:
851-
// a) int_ty::MIN if x < f_min or x is NaN
852-
// b) int_ty::MAX if x > f_max
853-
// c) the result of fpto[su]i otherwise
854-
// 3. If x is NaN, return 0.0, otherwise return the result of step 2.
855-
//
856-
// This avoids resulting undef because values in range [f_min, f_max] by definition fit into the
857-
// destination type. It creates an undef temporary, but *producing* undef is not UB. Our use of
858-
// undef does not introduce any non-determinism either.
859-
// More importantly, the above procedure correctly implements saturating conversion.
860-
// Proof (sketch):
861-
// If x is NaN, 0 is returned by definition.
862-
// Otherwise, x is finite or infinite and thus can be compared with f_min and f_max.
863-
// This yields three cases to consider:
864-
// (1) if x in [f_min, f_max], the result of fpto[su]i is returned, which agrees with
865-
// saturating conversion for inputs in that range.
866-
// (2) if x > f_max, then x is larger than int_ty::MAX. This holds even if f_max is rounded
867-
// (i.e., if f_max < int_ty::MAX) because in those cases, nextUp(f_max) is already larger
868-
// than int_ty::MAX. Because x is larger than int_ty::MAX, the return value of int_ty::MAX
869-
// is correct.
870-
// (3) if x < f_min, then x is smaller than int_ty::MIN. As shown earlier, f_min exactly equals
871-
// int_ty::MIN and therefore the return value of int_ty::MIN is correct.
872-
// QED.
873-
874-
let int_max = bx.cx().const_uint_big(int_ty, int_max(signed, int_width));
875-
let int_min = bx.cx().const_uint_big(int_ty, int_min(signed, int_width) as u128);
876-
let zero = bx.cx().const_uint(int_ty, 0);
877-
878-
// Step 1 ...
879-
let fptosui_result = if signed { bx.fptosi(x, int_ty) } else { bx.fptoui(x, int_ty) };
880-
let less_or_nan = bx.fcmp(RealPredicate::RealULT, x, f_min);
881-
let greater = bx.fcmp(RealPredicate::RealOGT, x, f_max);
882-
883-
// Step 2: We use two comparisons and two selects, with %s1 being the
884-
// result:
885-
// %less_or_nan = fcmp ult %x, %f_min
886-
// %greater = fcmp olt %x, %f_max
887-
// %s0 = select %less_or_nan, int_ty::MIN, %fptosi_result
888-
// %s1 = select %greater, int_ty::MAX, %s0
889-
// Note that %less_or_nan uses an *unordered* comparison. This
890-
// comparison is true if the operands are not comparable (i.e., if x is
891-
// NaN). The unordered comparison ensures that s1 becomes int_ty::MIN if
892-
// x is NaN.
893-
//
894-
// Performance note: Unordered comparison can be lowered to a "flipped"
895-
// comparison and a negation, and the negation can be merged into the
896-
// select. Therefore, it not necessarily any more expensive than an
897-
// ordered ("normal") comparison. Whether these optimizations will be
898-
// performed is ultimately up to the backend, but at least x86 does
899-
// perform them.
900-
let s0 = bx.select(less_or_nan, int_min, fptosui_result);
901-
let s1 = bx.select(greater, int_max, s0);
902-
903-
// Step 3: NaN replacement.
904-
// For unsigned types, the above step already yielded int_ty::MIN == 0 if x is NaN.
905-
// Therefore we only need to execute this step for signed integer types.
906-
if signed {
907-
// LLVM has no isNaN predicate, so we use (x == x) instead
908-
let cmp = bx.fcmp(RealPredicate::RealOEQ, x, x);
909-
bx.select(cmp, s1, zero)
910-
} else {
911-
s1
912-
}
913-
}

0 commit comments

Comments
 (0)