Skip to content

Commit 2dbac1f

Browse files
committed
Add intrinsics for float arithmetic with fast flag enabled
`fast` a.k.a UnsafeAlgebra is the flag for enabling all "unsafe" (according to llvm) float optimizations. See LangRef for more information http://llvm.org/docs/LangRef.html#fast-math-flags Providing these operations with less precise associativity rules (for example) is useful to numerical applications. For example, the summation loop: let sum = 0.; for element in data { sum += *element; } Using the default floating point semantics, this loop expresses the floats must be added in a sequence, one after another. This constraint is usually completely unintended, and it means that no autovectorization is possible.
1 parent 235d774 commit 2dbac1f

File tree

9 files changed

+261
-0
lines changed

9 files changed

+261
-0
lines changed

src/libcore/intrinsics.rs

+26
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,32 @@ extern "rust-intrinsic" {
539539
/// Returns the nearest integer to an `f64`. Rounds half-way cases away from zero.
540540
pub fn roundf64(x: f64) -> f64;
541541

542+
/// Float addition that allows optimizations based on algebraic rules.
543+
/// May assume inputs are finite.
544+
#[cfg(not(stage0))]
545+
pub fn fadd_fast<T>(a: T, b: T) -> T;
546+
547+
/// Float subtraction that allows optimizations based on algebraic rules.
548+
/// May assume inputs are finite.
549+
#[cfg(not(stage0))]
550+
pub fn fsub_fast<T>(a: T, b: T) -> T;
551+
552+
/// Float multiplication that allows optimizations based on algebraic rules.
553+
/// May assume inputs are finite.
554+
#[cfg(not(stage0))]
555+
pub fn fmul_fast<T>(a: T, b: T) -> T;
556+
557+
/// Float division that allows optimizations based on algebraic rules.
558+
/// May assume inputs are finite.
559+
#[cfg(not(stage0))]
560+
pub fn fdiv_fast<T>(a: T, b: T) -> T;
561+
562+
/// Float remainder that allows optimizations based on algebraic rules.
563+
/// May assume inputs are finite.
564+
#[cfg(not(stage0))]
565+
pub fn frem_fast<T>(a: T, b: T) -> T;
566+
567+
542568
/// Returns the number of bits set in an integer type `T`
543569
pub fn ctpop<T>(x: T) -> T;
544570

src/librustc_llvm/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,7 @@ extern {
13101310
-> ValueRef;
13111311
pub fn LLVMBuildNot(B: BuilderRef, V: ValueRef, Name: *const c_char)
13121312
-> ValueRef;
1313+
pub fn LLVMRustSetHasUnsafeAlgebra(Instr: ValueRef);
13131314

13141315
/* Memory */
13151316
pub fn LLVMBuildAlloca(B: BuilderRef, Ty: TypeRef, Name: *const c_char)

src/librustc_trans/trans/build.rs

+60
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,18 @@ pub fn FAdd(cx: Block,
221221
B(cx).fadd(lhs, rhs)
222222
}
223223

224+
pub fn FAddFast(cx: Block,
225+
lhs: ValueRef,
226+
rhs: ValueRef,
227+
debug_loc: DebugLoc)
228+
-> ValueRef {
229+
if cx.unreachable.get() {
230+
return _Undef(lhs);
231+
}
232+
debug_loc.apply(cx.fcx);
233+
B(cx).fadd_fast(lhs, rhs)
234+
}
235+
224236
pub fn Sub(cx: Block,
225237
lhs: ValueRef,
226238
rhs: ValueRef,
@@ -269,6 +281,18 @@ pub fn FSub(cx: Block,
269281
B(cx).fsub(lhs, rhs)
270282
}
271283

284+
pub fn FSubFast(cx: Block,
285+
lhs: ValueRef,
286+
rhs: ValueRef,
287+
debug_loc: DebugLoc)
288+
-> ValueRef {
289+
if cx.unreachable.get() {
290+
return _Undef(lhs);
291+
}
292+
debug_loc.apply(cx.fcx);
293+
B(cx).fsub_fast(lhs, rhs)
294+
}
295+
272296
pub fn Mul(cx: Block,
273297
lhs: ValueRef,
274298
rhs: ValueRef,
@@ -317,6 +341,18 @@ pub fn FMul(cx: Block,
317341
B(cx).fmul(lhs, rhs)
318342
}
319343

344+
pub fn FMulFast(cx: Block,
345+
lhs: ValueRef,
346+
rhs: ValueRef,
347+
debug_loc: DebugLoc)
348+
-> ValueRef {
349+
if cx.unreachable.get() {
350+
return _Undef(lhs);
351+
}
352+
debug_loc.apply(cx.fcx);
353+
B(cx).fmul_fast(lhs, rhs)
354+
}
355+
320356
pub fn UDiv(cx: Block,
321357
lhs: ValueRef,
322358
rhs: ValueRef,
@@ -365,6 +401,18 @@ pub fn FDiv(cx: Block,
365401
B(cx).fdiv(lhs, rhs)
366402
}
367403

404+
pub fn FDivFast(cx: Block,
405+
lhs: ValueRef,
406+
rhs: ValueRef,
407+
debug_loc: DebugLoc)
408+
-> ValueRef {
409+
if cx.unreachable.get() {
410+
return _Undef(lhs);
411+
}
412+
debug_loc.apply(cx.fcx);
413+
B(cx).fdiv_fast(lhs, rhs)
414+
}
415+
368416
pub fn URem(cx: Block,
369417
lhs: ValueRef,
370418
rhs: ValueRef,
@@ -401,6 +449,18 @@ pub fn FRem(cx: Block,
401449
B(cx).frem(lhs, rhs)
402450
}
403451

452+
pub fn FRemFast(cx: Block,
453+
lhs: ValueRef,
454+
rhs: ValueRef,
455+
debug_loc: DebugLoc)
456+
-> ValueRef {
457+
if cx.unreachable.get() {
458+
return _Undef(lhs);
459+
}
460+
debug_loc.apply(cx.fcx);
461+
B(cx).frem_fast(lhs, rhs)
462+
}
463+
404464
pub fn Shl(cx: Block,
405465
lhs: ValueRef,
406466
rhs: ValueRef,

src/librustc_trans/trans/builder.rs

+46
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
226226
}
227227
}
228228

229+
pub fn fadd_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
230+
self.count_insn("fadd");
231+
unsafe {
232+
let instr = llvm::LLVMBuildFAdd(self.llbuilder, lhs, rhs, noname());
233+
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
234+
instr
235+
}
236+
}
237+
229238
pub fn sub(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
230239
self.count_insn("sub");
231240
unsafe {
@@ -254,6 +263,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
254263
}
255264
}
256265

266+
pub fn fsub_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
267+
self.count_insn("sub");
268+
unsafe {
269+
let instr = llvm::LLVMBuildFSub(self.llbuilder, lhs, rhs, noname());
270+
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
271+
instr
272+
}
273+
}
274+
257275
pub fn mul(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
258276
self.count_insn("mul");
259277
unsafe {
@@ -282,6 +300,16 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
282300
}
283301
}
284302

303+
pub fn fmul_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
304+
self.count_insn("fmul");
305+
unsafe {
306+
let instr = llvm::LLVMBuildFMul(self.llbuilder, lhs, rhs, noname());
307+
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
308+
instr
309+
}
310+
}
311+
312+
285313
pub fn udiv(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
286314
self.count_insn("udiv");
287315
unsafe {
@@ -310,6 +338,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
310338
}
311339
}
312340

341+
pub fn fdiv_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
342+
self.count_insn("fdiv");
343+
unsafe {
344+
let instr = llvm::LLVMBuildFDiv(self.llbuilder, lhs, rhs, noname());
345+
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
346+
instr
347+
}
348+
}
349+
313350
pub fn urem(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
314351
self.count_insn("urem");
315352
unsafe {
@@ -331,6 +368,15 @@ impl<'a, 'tcx> Builder<'a, 'tcx> {
331368
}
332369
}
333370

371+
pub fn frem_fast(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
372+
self.count_insn("frem");
373+
unsafe {
374+
let instr = llvm::LLVMBuildFRem(self.llbuilder, lhs, rhs, noname());
375+
llvm::LLVMRustSetHasUnsafeAlgebra(instr);
376+
instr
377+
}
378+
}
379+
334380
pub fn shl(&self, lhs: ValueRef, rhs: ValueRef) -> ValueRef {
335381
self.count_insn("shl");
336382
unsafe {

src/librustc_trans/trans/intrinsic.rs

+37
Original file line numberDiff line numberDiff line change
@@ -658,6 +658,29 @@ pub fn trans_intrinsic_call<'a, 'blk, 'tcx>(mut bcx: Block<'blk, 'tcx>,
658658
}
659659

660660
},
661+
(_, "fadd_fast") | (_, "fsub_fast") | (_, "fmul_fast") | (_, "fdiv_fast") |
662+
(_, "frem_fast") => {
663+
let sty = &arg_tys[0].sty;
664+
match float_type_width(sty) {
665+
Some(_width) =>
666+
match &*name {
667+
"fadd_fast" => FAddFast(bcx, llargs[0], llargs[1], call_debug_location),
668+
"fsub_fast" => FSubFast(bcx, llargs[0], llargs[1], call_debug_location),
669+
"fmul_fast" => FMulFast(bcx, llargs[0], llargs[1], call_debug_location),
670+
"fdiv_fast" => FDivFast(bcx, llargs[0], llargs[1], call_debug_location),
671+
"frem_fast" => FRemFast(bcx, llargs[0], llargs[1], call_debug_location),
672+
_ => unreachable!(),
673+
},
674+
None => {
675+
span_invalid_monomorphization_error(
676+
tcx.sess, span,
677+
&format!("invalid monomorphization of `{}` intrinsic: \
678+
expected basic float type, found `{}`", name, sty));
679+
C_null(llret_ty)
680+
}
681+
}
682+
683+
},
661684

662685

663686
(_, "return_address") => {
@@ -1700,3 +1723,17 @@ fn int_type_width_signed<'tcx>(sty: &ty::TypeVariants<'tcx>, ccx: &CrateContext)
17001723
_ => None,
17011724
}
17021725
}
1726+
1727+
// Returns the width of a float TypeVariant
1728+
// Returns None if the type is not a float
1729+
fn float_type_width<'tcx>(sty: &ty::TypeVariants<'tcx>)
1730+
-> Option<u64> {
1731+
use rustc::middle::ty::TyFloat;
1732+
match *sty {
1733+
TyFloat(t) => Some(match t {
1734+
ast::FloatTy::F32 => 32,
1735+
ast::FloatTy::F64 => 64,
1736+
}),
1737+
_ => None,
1738+
}
1739+
}

src/librustc_typeck/check/intrinsic.rs

+2
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,8 @@ pub fn check_intrinsic_type(ccx: &CrateCtxt, it: &hir::ForeignItem) {
280280

281281
"overflowing_add" | "overflowing_sub" | "overflowing_mul" =>
282282
(1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
283+
"fadd_fast" | "fsub_fast" | "fmul_fast" | "fdiv_fast" | "frem_fast" =>
284+
(1, vec![param(ccx, 0), param(ccx, 0)], param(ccx, 0)),
283285

284286
"return_address" => (0, vec![], tcx.mk_imm_ptr(tcx.types.u8)),
285287

src/rustllvm/RustWrapper.cpp

+5
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,11 @@ extern "C" void LLVMRemoveFunctionAttrString(LLVMValueRef fn, unsigned index, co
164164
to_remove));
165165
}
166166

167+
// enable fpmath flag UnsafeAlgebra
168+
extern "C" void LLVMRustSetHasUnsafeAlgebra(LLVMValueRef Instr) {
169+
unwrap<Instruction>(Instr)->setHasUnsafeAlgebra(true);
170+
}
171+
167172
extern "C" LLVMValueRef LLVMBuildAtomicLoad(LLVMBuilderRef B,
168173
LLVMValueRef source,
169174
const char* Name,

src/test/codegen/float_math.rs

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// compile-flags: -C no-prepopulate-passes
12+
13+
#![crate_type = "lib"]
14+
#![feature(core_intrinsics)]
15+
16+
use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
17+
18+
// CHECK-LABEL: @add
19+
#[no_mangle]
20+
pub fn add(x: f32, y: f32) -> f32 {
21+
// CHECK: fadd float
22+
// CHECK-NOT fast
23+
x + y
24+
}
25+
26+
// CHECK-LABEL: @addition
27+
#[no_mangle]
28+
pub fn addition(x: f32, y: f32) -> f32 {
29+
// CHECK: fadd fast float
30+
unsafe {
31+
fadd_fast(x, y)
32+
}
33+
}
34+
35+
// CHECK-LABEL: @subtraction
36+
#[no_mangle]
37+
pub fn subtraction(x: f32, y: f32) -> f32 {
38+
// CHECK: fsub fast float
39+
unsafe {
40+
fsub_fast(x, y)
41+
}
42+
}
43+
44+
// CHECK-LABEL: @multiplication
45+
#[no_mangle]
46+
pub fn multiplication(x: f32, y: f32) -> f32 {
47+
// CHECK: fmul fast float
48+
unsafe {
49+
fmul_fast(x, y)
50+
}
51+
}
52+
53+
// CHECK-LABEL: @division
54+
#[no_mangle]
55+
pub fn division(x: f32, y: f32) -> f32 {
56+
// CHECK: fdiv fast float
57+
unsafe {
58+
fdiv_fast(x, y)
59+
}
60+
}

src/test/run-pass/float_math.rs

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
#![feature(core_intrinsics)]
12+
13+
use std::intrinsics::{fadd_fast, fsub_fast, fmul_fast, fdiv_fast, frem_fast};
14+
15+
fn main() {
16+
// make sure they all map to the correct operation
17+
unsafe {
18+
assert_eq!(fadd_fast(1., 2.), 1. + 2.);
19+
assert_eq!(fsub_fast(1., 2.), 1. - 2.);
20+
assert_eq!(fmul_fast(2., 3.), 2. * 3.);
21+
assert_eq!(fdiv_fast(10., 5.), 10. / 5.);
22+
assert_eq!(frem_fast(10., 5.), 10. % 5.);
23+
}
24+
}

0 commit comments

Comments
 (0)