Skip to content

Commit 37046c3

Browse files
authored
Rollup merge of rust-lang#121223 - RalfJung:simd-intrinsics, r=Amanieu
intrinsics::simd: add missing functions Turns out stdarch declares a bunch more SIMD intrinsics that are still missing from libcore. I hope I got the docs and in particular the safety requirements right for these "unordered" and "nanless" intrinsics. Many of these are unused even in stdarch, but they are implemented in the codegen backend, so we may as well list them here. r? `@Amanieu` Cc `@calebzulawski` `@workingjubilee`
2 parents 66ee1ac + f70538c commit 37046c3

File tree

7 files changed

+116
-11
lines changed

7 files changed

+116
-11
lines changed

compiler/rustc_codegen_gcc/src/builder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1727,7 +1727,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
17271727
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
17281728
}
17291729

1730-
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
1730+
pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
17311731
unimplemented!();
17321732
}
17331733

@@ -1747,7 +1747,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
17471747
unimplemented!();
17481748
}
17491749

1750-
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
1750+
pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
17511751
unimplemented!();
17521752
}
17531753

compiler/rustc_codegen_gcc/src/intrinsic/simd.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
989989

990990
arith_red!(
991991
simd_reduce_add_unordered: BinaryOp::Plus,
992-
vector_reduce_fadd_fast,
992+
vector_reduce_fadd_reassoc,
993993
false,
994994
add,
995995
0.0 // TODO: Use this argument.
996996
);
997997
arith_red!(
998998
simd_reduce_mul_unordered: BinaryOp::Mult,
999-
vector_reduce_fmul_fast,
999+
vector_reduce_fmul_reassoc,
10001000
false,
10011001
mul,
10021002
1.0

compiler/rustc_codegen_llvm/src/builder.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -1327,17 +1327,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
13271327
pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13281328
unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
13291329
}
1330-
pub fn vector_reduce_fadd_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
1330+
pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13311331
unsafe {
13321332
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
1333-
llvm::LLVMRustSetFastMath(instr);
1333+
llvm::LLVMRustSetAllowReassoc(instr);
13341334
instr
13351335
}
13361336
}
1337-
pub fn vector_reduce_fmul_fast(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
1337+
pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13381338
unsafe {
13391339
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
1340-
llvm::LLVMRustSetFastMath(instr);
1340+
llvm::LLVMRustSetAllowReassoc(instr);
13411341
instr
13421342
}
13431343
}

compiler/rustc_codegen_llvm/src/intrinsic.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
18801880
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
18811881
arith_red!(
18821882
simd_reduce_add_unordered: vector_reduce_add,
1883-
vector_reduce_fadd_fast,
1883+
vector_reduce_fadd_reassoc,
18841884
false,
18851885
add,
18861886
0.0
18871887
);
18881888
arith_red!(
18891889
simd_reduce_mul_unordered: vector_reduce_mul,
1890-
vector_reduce_fmul_fast,
1890+
vector_reduce_fmul_reassoc,
18911891
false,
18921892
mul,
18931893
1.0

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1618,6 +1618,7 @@ extern "C" {
16181618
) -> &'a Value;
16191619

16201620
pub fn LLVMRustSetFastMath(Instr: &Value);
1621+
pub fn LLVMRustSetAllowReassoc(Instr: &Value);
16211622

16221623
// Miscellaneous instructions
16231624
pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

+10-1
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ extern "C" LLVMAttributeRef LLVMRustCreateMemoryEffectsAttr(LLVMContextRef C,
418418
}
419419
}
420420

421-
// Enable a fast-math flag
421+
// Enable all fast-math flags
422422
//
423423
// https://llvm.org/docs/LangRef.html#fast-math-flags
424424
extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
@@ -427,6 +427,15 @@ extern "C" void LLVMRustSetFastMath(LLVMValueRef V) {
427427
}
428428
}
429429

430+
// Enable the reassoc fast-math flag
431+
//
432+
// https://llvm.org/docs/LangRef.html#fast-math-flags
433+
extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
434+
if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
435+
I->setHasAllowReassoc(true);
436+
}
437+
}
438+
430439
extern "C" LLVMValueRef
431440
LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
432441
const char *Name, LLVMAtomicOrdering Order) {

library/core/src/intrinsics/simd.rs

+95
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,24 @@
33
//! In this module, a "vector" is any `repr(simd)` type.
44
55
extern "platform-intrinsic" {
6+
/// Insert an element into a vector, returning the updated vector.
7+
///
8+
/// `T` must be a vector with element type `U`.
9+
///
10+
/// # Safety
11+
///
12+
/// `idx` must be in-bounds of the vector.
13+
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
14+
15+
/// Extract an element from a vector.
16+
///
17+
/// `T` must be a vector with element type `U`.
18+
///
19+
/// # Safety
20+
///
21+
/// `idx` must be in-bounds of the vector.
22+
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
23+
624
/// Add two simd vectors elementwise.
725
///
826
/// `T` must be a vector of integer or floating point primitive types.
@@ -315,6 +333,14 @@ extern "platform-intrinsic" {
315333
/// Starting with the value `y`, add the elements of `x` and accumulate.
316334
pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
317335

336+
/// Add elements within a vector in arbitrary order. May also be re-associated with
337+
/// unordered additions on the inputs/outputs.
338+
///
339+
/// `T` must be a vector of integer or floating-point primitive types.
340+
///
341+
/// `U` must be the element type of `T`.
342+
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
343+
318344
/// Multiply elements within a vector from left to right.
319345
///
320346
/// `T` must be a vector of integer or floating-point primitive types.
@@ -324,6 +350,14 @@ extern "platform-intrinsic" {
324350
/// Starting with the value `y`, multiply the elements of `x` and accumulate.
325351
pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
326352

353+
/// Add elements within a vector in arbitrary order. May also be re-associated with
354+
/// unordered additions on the inputs/outputs.
355+
///
356+
/// `T` must be a vector of integer or floating-point primitive types.
357+
///
358+
/// `U` must be the element type of `T`.
359+
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
360+
327361
/// Check if all mask values are true.
328362
///
329363
/// `T` must be a vector of integer primitive types.
@@ -349,6 +383,19 @@ extern "platform-intrinsic" {
349383
/// For floating-point values, uses IEEE-754 `maxNum`.
350384
pub fn simd_reduce_max<T, U>(x: T) -> U;
351385

386+
/// Return the maximum element of a vector.
387+
///
388+
/// `T` must be a vector of integer or floating-point primitive types.
389+
///
390+
/// `U` must be the element type of `T`.
391+
///
392+
/// For floating-point values, uses IEEE-754 `maxNum`.
393+
///
394+
/// # Safety
395+
///
396+
/// All input elements must be finite (i.e., not NAN and not +/- INF).
397+
pub fn simd_reduce_max_nanless<T, U>(x: T) -> U;
398+
352399
/// Return the minimum element of a vector.
353400
///
354401
/// `T` must be a vector of integer or floating-point primitive types.
@@ -358,6 +405,19 @@ extern "platform-intrinsic" {
358405
/// For floating-point values, uses IEEE-754 `minNum`.
359406
pub fn simd_reduce_min<T, U>(x: T) -> U;
360407

408+
/// Return the minimum element of a vector.
409+
///
410+
/// `T` must be a vector of integer or floating-point primitive types.
411+
///
412+
/// `U` must be the element type of `T`.
413+
///
414+
/// For floating-point values, uses IEEE-754 `minNum`.
415+
///
416+
/// # Safety
417+
///
418+
/// All input elements must be finite (i.e., not NAN and not +/- INF).
419+
pub fn simd_reduce_min_nanless<T, U>(x: T) -> U;
420+
361421
/// Logical "and" all elements together.
362422
///
363423
/// `T` must be a vector of integer or floating-point primitive types.
@@ -516,4 +576,39 @@ extern "platform-intrinsic" {
516576
///
517577
/// `T` must be a vector of floats.
518578
pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
579+
580+
// Computes the sine of each element.
581+
///
582+
/// `T` must be a vector of floats.
583+
pub fn simd_fsin<T>(a: T) -> T;
584+
585+
// Computes the cosine of each element.
586+
///
587+
/// `T` must be a vector of floats.
588+
pub fn simd_fcos<T>(a: T) -> T;
589+
590+
// Computes the exponential function of each element.
591+
///
592+
/// `T` must be a vector of floats.
593+
pub fn simd_fexp<T>(a: T) -> T;
594+
595+
// Computes 2 raised to the power of each element.
596+
///
597+
/// `T` must be a vector of floats.
598+
pub fn simd_fexp2<T>(a: T) -> T;
599+
600+
// Computes the base 10 logarithm of each element.
601+
///
602+
/// `T` must be a vector of floats.
603+
pub fn simd_flog10<T>(a: T) -> T;
604+
605+
// Computes the base 2 logarithm of each element.
606+
///
607+
/// `T` must be a vector of floats.
608+
pub fn simd_flog2<T>(a: T) -> T;
609+
610+
// Computes the natural logarithm of each element.
611+
///
612+
/// `T` must be a vector of floats.
613+
pub fn simd_flog<T>(a: T) -> T;
519614
}

0 commit comments

Comments
 (0)