Skip to content

Commit 05dc7c9

Browse files
alexcrichtonAmanieu
authored andcommitted
Another round of wasm SIMD updates
This round is dependant on rust-lang/llvm-project#101 landing first in rust-lang/rust and won't pass CI until that does. That PR, however, will also break wasm CI because it's changing how the wasm target works. My goal here is to open this early to get it out there so that when that PR lands in rust-lang/rust and CI breaks in stdarch then this can be merged to make CI green again. The changes here are mostly around the codegen for various intrinsics. Some wasm-specific intrinsics have been removed in favor of more general LLVM intrinsics, and other intrinsics have been removed in favor of pattern-matching codegen. The only new instruction supported as part of this chagne is `v128.any_true`. This leaves only one instruction unsupported in LLVM which is `i64x2.abs`. I think the codegen for the instruction is correct in stdsimd, though, and LLVM just needs to update with a pattern-match to actually emit the opcode. That'll happen in a future LLVM update.
1 parent bc5c33c commit 05dc7c9

File tree

1 file changed

+53
-35
lines changed

1 file changed

+53
-35
lines changed

crates/core_arch/src/wasm32/simd128.rs

+53-35
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,6 @@ extern "C" {
212212
fn llvm_i64x2_all_true(x: simd::i64x2) -> i32;
213213
#[link_name = "llvm.wasm.bitmask.v2i64"]
214214
fn llvm_bitmask_i64x2(a: simd::i64x2) -> i32;
215-
#[link_name = "llvm.wasm.extend.low.signed"]
216-
fn llvm_i64x2_extend_low_i32x4_s(a: simd::i32x4) -> simd::i64x2;
217-
#[link_name = "llvm.wasm.extend.high.signed"]
218-
fn llvm_i64x2_extend_high_i32x4_s(a: simd::i32x4) -> simd::i64x2;
219-
#[link_name = "llvm.wasm.extend.low.unsigned"]
220-
fn llvm_i64x2_extend_low_i32x4_u(a: simd::i32x4) -> simd::i64x2;
221-
#[link_name = "llvm.wasm.extend.high.unsigned"]
222-
fn llvm_i64x2_extend_high_i32x4_u(a: simd::i32x4) -> simd::i64x2;
223215
#[link_name = "llvm.wasm.extmul.low.signed.v2i64"]
224216
fn llvm_i64x2_extmul_low_i32x4_s(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2;
225217
#[link_name = "llvm.wasm.extmul.high.signed.v2i64"]
@@ -229,13 +221,13 @@ extern "C" {
229221
#[link_name = "llvm.wasm.extmul.high.unsigned.v2i64"]
230222
fn llvm_i64x2_extmul_high_i32x4_u(a: simd::i32x4, b: simd::i32x4) -> simd::i64x2;
231223

232-
#[link_name = "llvm.wasm.ceil.v4f32"]
224+
#[link_name = "llvm.ceil.v4f32"]
233225
fn llvm_f32x4_ceil(x: simd::f32x4) -> simd::f32x4;
234-
#[link_name = "llvm.wasm.floor.v4f32"]
226+
#[link_name = "llvm.floor.v4f32"]
235227
fn llvm_f32x4_floor(x: simd::f32x4) -> simd::f32x4;
236-
#[link_name = "llvm.wasm.trunc.v4f32"]
228+
#[link_name = "llvm.trunc.v4f32"]
237229
fn llvm_f32x4_trunc(x: simd::f32x4) -> simd::f32x4;
238-
#[link_name = "llvm.wasm.nearest.v4f32"]
230+
#[link_name = "llvm.nearbyint.v4f32"]
239231
fn llvm_f32x4_nearest(x: simd::f32x4) -> simd::f32x4;
240232
#[link_name = "llvm.fabs.v4f32"]
241233
fn llvm_f32x4_abs(x: simd::f32x4) -> simd::f32x4;
@@ -250,13 +242,13 @@ extern "C" {
250242
#[link_name = "llvm.wasm.pmax.v4f32"]
251243
fn llvm_f32x4_pmax(x: simd::f32x4, y: simd::f32x4) -> simd::f32x4;
252244

253-
#[link_name = "llvm.wasm.ceil.v2f64"]
245+
#[link_name = "llvm.ceil.v2f64"]
254246
fn llvm_f64x2_ceil(x: simd::f64x2) -> simd::f64x2;
255-
#[link_name = "llvm.wasm.floor.v2f64"]
247+
#[link_name = "llvm.floor.v2f64"]
256248
fn llvm_f64x2_floor(x: simd::f64x2) -> simd::f64x2;
257-
#[link_name = "llvm.wasm.trunc.v2f64"]
249+
#[link_name = "llvm.trunc.v2f64"]
258250
fn llvm_f64x2_trunc(x: simd::f64x2) -> simd::f64x2;
259-
#[link_name = "llvm.wasm.nearest.v2f64"]
251+
#[link_name = "llvm.nearbyint.v2f64"]
260252
fn llvm_f64x2_nearest(x: simd::f64x2) -> simd::f64x2;
261253
#[link_name = "llvm.fabs.v2f64"]
262254
fn llvm_f64x2_abs(x: simd::f64x2) -> simd::f64x2;
@@ -271,18 +263,14 @@ extern "C" {
271263
#[link_name = "llvm.wasm.pmax.v2f64"]
272264
fn llvm_f64x2_pmax(x: simd::f64x2, y: simd::f64x2) -> simd::f64x2;
273265

274-
#[link_name = "llvm.wasm.trunc.saturate.signed.v4i32.v4f32"]
266+
#[link_name = "llvm.fptosi.sat.v4i32.v4f32"]
275267
fn llvm_i32x4_trunc_sat_f32x4_s(x: simd::f32x4) -> simd::i32x4;
276-
#[link_name = "llvm.wasm.trunc.saturate.unsigned.v4i32.v4f32"]
268+
#[link_name = "llvm.fptoui.sat.v4i32.v4f32"]
277269
fn llvm_i32x4_trunc_sat_f32x4_u(x: simd::f32x4) -> simd::i32x4;
278-
#[link_name = "llvm.wasm.convert.low.signed"]
279-
fn llvm_f64x2_convert_low_i32x4_s(x: simd::i32x4) -> simd::f64x2;
280-
#[link_name = "llvm.wasm.convert.low.unsigned"]
281-
fn llvm_f64x2_convert_low_i32x4_u(x: simd::i32x4) -> simd::f64x2;
282-
#[link_name = "llvm.wasm.trunc.sat.zero.signed"]
283-
fn llvm_i32x4_trunc_sat_f64x2_s_zero(x: simd::f64x2) -> simd::i32x4;
284-
#[link_name = "llvm.wasm.trunc.sat.zero.unsigned"]
285-
fn llvm_i32x4_trunc_sat_f64x2_u_zero(x: simd::f64x2) -> simd::i32x4;
270+
#[link_name = "llvm.fptosi.sat.v2i32.v2f64"]
271+
fn llvm_i32x2_trunc_sat_f64x2_s(x: simd::f64x2) -> simd::i32x2;
272+
#[link_name = "llvm.fptoui.sat.v2i32.v2f64"]
273+
fn llvm_i32x2_trunc_sat_f64x2_u(x: simd::f64x2) -> simd::i32x2;
286274
#[link_name = "llvm.wasm.demote.zero"]
287275
fn llvm_f32x4_demote_f64x2_zero(x: simd::f64x2) -> simd::f32x4;
288276
#[link_name = "llvm.wasm.promote.low"]
@@ -1836,7 +1824,7 @@ pub unsafe fn v128_bitselect(v1: v128, v2: v128, c: v128) -> v128 {
18361824

18371825
/// Returns true if any lane is nonzero or false if all lanes are zero.
18381826
#[inline]
1839-
// #[cfg_attr(test, assert_instr(v128.any_true))] // FIXME llvm
1827+
#[cfg_attr(test, assert_instr(v128.any_true))]
18401828
#[target_feature(enable = "simd128")]
18411829
pub unsafe fn v128_any_true(a: v128) -> bool {
18421830
llvm_any_true_i8x16(a.as_i8x16()) != 0
@@ -2688,7 +2676,9 @@ pub unsafe fn i64x2_bitmask(a: v128) -> i32 {
26882676
// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_s))] // FIXME wasmtime
26892677
#[target_feature(enable = "simd128")]
26902678
pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 {
2691-
transmute(llvm_i64x2_extend_low_i32x4_s(a.as_i32x4()))
2679+
transmute(simd_cast::<_, simd::i64x2>(
2680+
simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [0, 1]),
2681+
))
26922682
}
26932683

26942684
/// Converts high half of the smaller lane vector to a larger lane
@@ -2697,7 +2687,9 @@ pub unsafe fn i64x2_extend_low_i32x4(a: v128) -> v128 {
26972687
// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_s))] // FIXME wasmtime
26982688
#[target_feature(enable = "simd128")]
26992689
pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 {
2700-
transmute(llvm_i64x2_extend_high_i32x4_s(a.as_i32x4()))
2690+
transmute(simd_cast::<_, simd::i64x2>(
2691+
simd_shuffle2::<_, simd::i32x2>(a.as_i32x4(), a.as_i32x4(), [2, 3]),
2692+
))
27012693
}
27022694

27032695
/// Converts low half of the smaller lane vector to a larger lane
@@ -2706,7 +2698,9 @@ pub unsafe fn i64x2_extend_high_i32x4(a: v128) -> v128 {
27062698
// #[cfg_attr(test, assert_instr(i64x2.extend_low_i32x4_u))] // FIXME wasmtime
27072699
#[target_feature(enable = "simd128")]
27082700
pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 {
2709-
transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4()))
2701+
transmute(simd_cast::<_, simd::i64x2>(
2702+
simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [0, 1]),
2703+
))
27102704
}
27112705

27122706
/// Converts high half of the smaller lane vector to a larger lane
@@ -2715,7 +2709,9 @@ pub unsafe fn i64x2_extend_low_u32x4(a: v128) -> v128 {
27152709
// #[cfg_attr(test, assert_instr(i64x2.extend_high_i32x4_u))] // FIXME wasmtime
27162710
#[target_feature(enable = "simd128")]
27172711
pub unsafe fn i64x2_extend_high_u32x4(a: v128) -> v128 {
2718-
transmute(llvm_i64x2_extend_low_i32x4_u(a.as_i32x4()))
2712+
transmute(simd_cast::<_, simd::i64x2>(
2713+
simd_shuffle2::<_, simd::u32x2>(a.as_u32x4(), a.as_u32x4(), [2, 3]),
2714+
))
27192715
}
27202716

27212717
/// Shifts each lane to the left by the specified number of bits.
@@ -3137,7 +3133,11 @@ pub unsafe fn f32x4_convert_u32x4(a: v128) -> v128 {
31373133
// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_s_zero))] // FIXME wasmtime
31383134
#[target_feature(enable = "simd128")]
31393135
pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
3140-
transmute(llvm_i32x4_trunc_sat_f64x2_s_zero(a.as_f64x2()))
3136+
transmute(simd_shuffle4::<simd::i32x2, simd::i32x4>(
3137+
llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
3138+
simd::i32x2::splat(0),
3139+
[0, 1, 2, 3],
3140+
))
31413141
}
31423142

31433143
/// Saturating conversion of the two double-precision floating point lanes to
@@ -3152,23 +3152,41 @@ pub unsafe fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
31523152
// #[cfg_attr(test, assert_instr(i32x4.trunc_sat_f64x2_u_zero))] // FIXME wasmtime
31533153
#[target_feature(enable = "simd128")]
31543154
pub unsafe fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
3155-
transmute(llvm_i32x4_trunc_sat_f64x2_u_zero(a.as_f64x2()))
3155+
transmute(simd_shuffle4::<simd::i32x2, simd::i32x4>(
3156+
llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
3157+
simd::i32x2::splat(0),
3158+
[0, 1, 2, 3],
3159+
))
31563160
}
31573161

31583162
/// Lane-wise conversion from integer to floating point.
31593163
#[inline]
31603164
#[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_s))]
31613165
#[target_feature(enable = "simd128")]
31623166
pub unsafe fn f64x2_convert_low_i32x4(a: v128) -> v128 {
3163-
transmute(llvm_f64x2_convert_low_i32x4_s(a.as_i32x4()))
3167+
transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::<
3168+
simd::i32x4,
3169+
simd::i32x2,
3170+
>(
3171+
a.as_i32x4(),
3172+
a.as_i32x4(),
3173+
[0, 1],
3174+
)))
31643175
}
31653176

31663177
/// Lane-wise conversion from integer to floating point.
31673178
#[inline]
31683179
// #[cfg_attr(test, assert_instr(f64x2.convert_low_i32x4_u))] // FIXME wasmtime
31693180
#[target_feature(enable = "simd128")]
31703181
pub unsafe fn f64x2_convert_low_u32x4(a: v128) -> v128 {
3171-
transmute(llvm_f64x2_convert_low_i32x4_u(a.as_i32x4()))
3182+
transmute(simd_cast::<_, simd::f64x2>(simd_shuffle2::<
3183+
simd::u32x4,
3184+
simd::u32x2,
3185+
>(
3186+
a.as_u32x4(),
3187+
a.as_u32x4(),
3188+
[0, 1],
3189+
)))
31723190
}
31733191

31743192
/// Conversion of the two double-precision floating point lanes to two lower

0 commit comments

Comments
 (0)