Add intrinsics for float arithmetic with fast flag enabled

bluss · bluss · commit 2dbac1fb8ed4 · 2016-03-18T17:31:41.000+01:00
`fast` a.k.a UnsafeAlgebra is the flag for enabling all "unsafe" (according to llvm) float optimizations. See LangRef for more information http://llvm.org/docs/LangRef.html#fast-math-flags Providing these operations with less precise associativity rules (for example) is useful to numerical applications. For example, the summation loop: let sum = 0.; for element in data { sum += *element; } Using the default floating point semantics, this loop expresses the floats must be added in a sequence, one after another. This constraint is usually completely unintended, and it means that no autovectorization is possible.
diff --git a/src/libcore/intrinsics.rs b/src/libcore/intrinsics.rs
@@ -539,6 +539,32 @@ extern "rust-intrinsic" {
     /// Returns the nearest integer to an `f64`. Rounds half-way cases away from zero.
     pub fn roundf64(x: f64) -> f64;
 
+    /// Float addition that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fadd_fast<T>(a: T, b: T) -> T;
+
+    /// Float subtraction that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fsub_fast<T>(a: T, b: T) -> T;
+
+    /// Float multiplication that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fmul_fast<T>(a: T, b: T) -> T;
+
+    /// Float division that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn fdiv_fast<T>(a: T, b: T) -> T;
+
+    /// Float remainder that allows optimizations based on algebraic rules.
+    /// May assume inputs are finite.
+    #[cfg(not(stage0))]
+    pub fn frem_fast<T>(a: T, b: T) -> T;
+
+
     /// Returns the number of bits set in an integer type `T`
     pub fn ctpop<T>(x: T) -> T;
 
diff --git a/src/librustc_llvm/lib.rs b/src/librustc_llvm/lib.rs
@@ -1310,6 +1310,7 @@ extern {
                          -> ValueRef;
     pub fn LLVMBuildNot(B: BuilderRef, V: ValueRef, Name: *const c_char)
                         -> ValueRef;
+    pub fn LLVMRustSetHasUnsafeAlgebra(Instr: ValueRef);
 
     /* Memory */
     pub fn LLVMBuildAlloca(B: BuilderRef, Ty: TypeRef, Name: *const c_char)
diff --git a/src/librustc_trans/trans/build.rs b/src/librustc_trans/trans/build.rs
@@ -221,6 +221,18 @@ pub fn FAdd(cx: Block,
     B(cx).fadd(lhs, rhs)
 }
 
+pub fn FAddFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fadd_fast(lhs, rhs)
+}
+
 pub fn Sub(cx: Block,
            lhs: ValueRef,
            rhs: ValueRef,
@@ -269,6 +281,18 @@ pub fn FSub(cx: Block,
     B(cx).fsub(lhs, rhs)
 }
 
+pub fn FSubFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fsub_fast(lhs, rhs)
+}
+
 pub fn Mul(cx: Block,
            lhs: ValueRef,
            rhs: ValueRef,
@@ -317,6 +341,18 @@ pub fn FMul(cx: Block,
     B(cx).fmul(lhs, rhs)
 }
 
+pub fn FMulFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fmul_fast(lhs, rhs)
+}
+
 pub fn UDiv(cx: Block,
             lhs: ValueRef,
             rhs: ValueRef,
@@ -365,6 +401,18 @@ pub fn FDiv(cx: Block,
     B(cx).fdiv(lhs, rhs)
 }
 
+pub fn FDivFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).fdiv_fast(lhs, rhs)
+}
+
 pub fn URem(cx: Block,
             lhs: ValueRef,
             rhs: ValueRef,
@@ -401,6 +449,18 @@ pub fn FRem(cx: Block,
     B(cx).frem(lhs, rhs)
 }
 
+pub fn FRemFast(cx: Block,
+                lhs: ValueRef,
+                rhs: ValueRef,
+                debug_loc: DebugLoc)
+            -> ValueRef {
+    if cx.unreachable.get() {
+        return _Undef(lhs);
+    }
+    debug_loc.apply(cx.fcx);
+    B(cx).frem_fast(lhs, rhs)
+}
+
 pub fn Shl(cx: Block,
            lhs: ValueRef,
            rhs: ValueRef,
diff --git a/src/librustc_trans/trans/builder.rs b/src/librustc_trans/trans/builder.rs
@@ -226,6 +226,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         }
     }
 
+    pub fn fadd_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fadd");
+        unsafe {
+            let instr = llvm::LLVMBuildFAdd(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
     pub fn sub(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
         self.count_insn("sub");
         unsafe {
@@ -254,6 +263,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         }
     }
 
+    pub fn fsub_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("sub");
+        unsafe {
+            let instr = llvm::LLVMBuildFSub(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
     pub fn mul(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
         self.count_insn("mul");
         unsafe {
@@ -282,6 +300,16 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         }
     }
 
+    pub fn fmul_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fmul");
+        unsafe {
+            let instr = llvm::LLVMBuildFMul(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
+
     pub fn udiv(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
         self.count_insn("udiv");
         unsafe {
@@ -310,6 +338,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         }
     }
 
+    pub fn fdiv_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("fdiv");
+        unsafe {
+            let instr = llvm::LLVMBuildFDiv(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
     pub fn urem(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
         self.count_insn("urem");
         unsafe {
@@ -331,6 +368,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
         }
     }
 
+    pub fn frem_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
+        self.count_insn("frem");
+        unsafe {
+            let instr = llvm::LLVMBuildFRem(self.llbuilder, lhs, rhs, noname());
+            llvm::LLVMRustSetHasUnsafeAlgebra(instr);
+            instr
+        }
+    }
+
     pub fn shl(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
         self.count_insn("shl");
         unsafe {
diff --git a/src/librustc_trans/trans/intrinsic.rs b/src/librustc_trans/trans/intrinsic.rs
@@ -658,6 +658,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
             }
 
         },
+        (_, "fadd_fast") | (_, "fsub_fast") | (_, "fmul_fast") | (_, "fdiv_fast") |
+        (_, "frem_fast") => {
+            let sty = &arg_tys[0].sty;
+            match float_type_width(sty) {
+                Some(_width) =>
+                    match &*name {
+                        "fadd_fast" => FAddFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fsub_fast" => FSubFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fmul_fast" => FMulFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "fdiv_fast" => FDivFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        "frem_fast" => FRemFast(bcx, llargs[0], llargs[1], call_debug_location),
+                        _ => unreachable!(),
+                    },
+                None => {
+                    span_invalid_monomorphization_error(
+                        tcx.sess, span,
+                        &format!("invalid monomorphization of `{}` intrinsic: \
+                                  expected basic float type, found `{}`", name, sty));
+                        C_null(llret_ty)
+                }
+            }
+
+        },
 
 
         (_, "return_address") => {
@@ -1700,3 +1723,17 @@ fn int_type_width_signed<'tcx>(sty: &ty::TypeVariants<'tcx>, ccx: &CrateContext)
         _ => None,
     }
 }
+
+// Returns the width of a float TypeVariant
+// Returns None if the type is not a float
+fn float_type_width<'tcx>(sty: &ty::TypeVariants<'tcx>)
+        -> Option<u64> {
+    use rustc::middle::ty::TyFloat;
+    match *sty {
+        TyFloat(t) => Some(match t {
+            ast::FloatTy::F32 => 32,
+            ast::FloatTy::F64 => 64,
+        }),
+        _ => None,
+    }
+}
diff --git a/src/librustc_typeck/check/intrinsic.rs b/src/librustc_typeck/check/intrinsic.rs
@@ -280,6 +280,8 @@ pub fn check_intrinsic_type(ccx: &CrateCtxt, it: &hir::ForeignItem) {
 
             "overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
                 (1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
+            "fadd_fast" | "fsub_fast" | "fmul_fast" | "fdiv_fast" | "frem_fast" =>
+                (1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
 
             "return_address" => (0, vec![], tcx.mk_imm_ptr(tcx.types.u8)),
 
diff --git a/src/rustllvm/RustWrapper.cpp b/src/rustllvm/RustWrapper.cpp
@@ -164,6 +164,11 @@ extern "C" void LLVMRemoveFunctionAttrString(LLVMValueRef fn, unsigned index, co
                                           to_remove));
 }
 
+// enable fpmath flag UnsafeAlgebra
+extern "C" void LLVMRustSetHasUnsafeAlgebra(LLVMValueRef Instr) {
+    unwrap<Instruction>(Instr)->setHasUnsafeAlgebra(true);
+}
+
 extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
                                             LLVMValueRef source,
                                             const char* Name,
diff --git a/src/test/codegen/float_math.rs b/src/test/codegen/float_math.rs
@@ -0,0 +1,60 @@
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -C no-prepopulate-passes
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+
+use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
+
+// CHECK-LABEL: @add
+#[no_mangle]
+pub fn add(x: f32, y: f32) -> f32 {
+// CHECK: fadd float
+// CHECK-NOT fast
+    x + y
+}
+
+// CHECK-LABEL: @addition
+#[no_mangle]
+pub fn addition(x: f32, y: f32) -> f32 {
+// CHECK: fadd fast float
+    unsafe {
+        fadd_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @subtraction
+#[no_mangle]
+pub fn subtraction(x: f32, y: f32) -> f32 {
+// CHECK: fsub fast float
+    unsafe {
+        fsub_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @multiplication
+#[no_mangle]
+pub fn multiplication(x: f32, y: f32) -> f32 {
+// CHECK: fmul fast float
+    unsafe {
+        fmul_fast(x, y)
+    }
+}
+
+// CHECK-LABEL: @division
+#[no_mangle]
+pub fn division(x: f32, y: f32) -> f32 {
+// CHECK: fdiv fast float
+    unsafe {
+        fdiv_fast(x, y)
+    }
+}
diff --git a/src/test/run-pass/float_math.rs b/src/test/run-pass/float_math.rs
@@ -0,0 +1,24 @@
+// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+#![feature(core_intrinsics)]
+
+use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
+
+fn main() {
+    // make sure they all map to the correct operation
+    unsafe {
+        assert_eq!(fadd_fast(1., 2.), 1. + 2.);
+        assert_eq!(fsub_fast(1., 2.), 1. - 2.);
+        assert_eq!(fmul_fast(2., 3.), 2. * 3.);
+        assert_eq!(fdiv_fast(10., 5.), 10. / 5.);
+        assert_eq!(frem_fast(10., 5.), 10. % 5.);
+    }
+}