Skip to content

Commit df30a0c

Browse files
committed
Fixed some more intrinsics
Added some tests, Fixed incorrect target-features, and verification code for target-features. Removed all MMX support from verification.
1 parent 8ac563f commit df30a0c

File tree

13 files changed

+333
-368
lines changed

13 files changed

+333
-368
lines changed

crates/core_arch/src/x86/avx.rs

+73-19
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
5252
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
5353
#[inline]
5454
#[target_feature(enable = "avx")]
55-
// FIXME: Should be 'vandpd' instruction.
5655
// See https://github.com/rust-lang/stdarch/issues/71
57-
#[cfg_attr(test, assert_instr(vandps))]
56+
#[cfg_attr(test, assert_instr(vandp))]
5857
#[stable(feature = "simd_x86", since = "1.27.0")]
5958
pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
6059
let a: u64x4 = transmute(a);
@@ -82,9 +81,8 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
8281
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
8382
#[inline]
8483
#[target_feature(enable = "avx")]
85-
// FIXME: should be `vorpd` instruction.
8684
// See <https://github.com/rust-lang/stdarch/issues/71>.
87-
#[cfg_attr(test, assert_instr(vorps))]
85+
#[cfg_attr(test, assert_instr(vorp))]
8886
#[stable(feature = "simd_x86", since = "1.27.0")]
8987
pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
9088
let a: u64x4 = transmute(a);
@@ -162,8 +160,7 @@ pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256
162160
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
163161
#[inline]
164162
#[target_feature(enable = "avx")]
165-
// FIXME: should be `vandnpd` instruction.
166-
#[cfg_attr(test, assert_instr(vandnps))]
163+
#[cfg_attr(test, assert_instr(vandnp))]
167164
#[stable(feature = "simd_x86", since = "1.27.0")]
168165
pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
169166
let a: u64x4 = transmute(a);
@@ -615,8 +612,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
615612
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
616613
#[inline]
617614
#[target_feature(enable = "avx")]
618-
// FIXME Should be 'vxorpd' instruction.
619-
#[cfg_attr(test, assert_instr(vxorps))]
615+
#[cfg_attr(test, assert_instr(vxorp))]
620616
#[stable(feature = "simd_x86", since = "1.27.0")]
621617
pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
622618
let a: u64x4 = transmute(a);
@@ -995,6 +991,29 @@ pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
995991
transmute(dst)
996992
}
997993

994+
/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
995+
///
996+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
997+
#[inline]
998+
#[target_feature(enable = "avx")]
999+
// This intrinsic has no corresponding instruction.
1000+
#[rustc_legacy_const_generics(1)]
1001+
#[stable(feature = "simd_x86", since = "1.27.0")]
1002+
pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
1003+
static_assert_uimm_bits!(INDEX, 3);
1004+
simd_extract!(a.as_i32x8(), INDEX as u32)
1005+
}
1006+
1007+
/// Returns the first element of the input vector of `[8 x i32]`.
1008+
///
1009+
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
1010+
#[inline]
1011+
#[target_feature(enable = "avx")]
1012+
#[stable(feature = "simd_x86", since = "1.27.0")]
1013+
pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
1014+
simd_extract!(a.as_i32x8(), 0)
1015+
}
1016+
9981017
/// Zeroes the contents of all XMM or YMM registers.
9991018
///
10001019
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
@@ -1378,7 +1397,7 @@ pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m25
13781397
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
13791398
#[inline]
13801399
#[target_feature(enable = "avx")]
1381-
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1400+
#[cfg_attr(test, assert_instr(vmovap))]
13821401
#[stable(feature = "simd_x86", since = "1.27.0")]
13831402
#[allow(clippy::cast_ptr_alignment)]
13841403
pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
@@ -1393,7 +1412,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
13931412
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
13941413
#[inline]
13951414
#[target_feature(enable = "avx")]
1396-
#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
1415+
#[cfg_attr(test, assert_instr(vmovap))]
13971416
#[stable(feature = "simd_x86", since = "1.27.0")]
13981417
#[allow(clippy::cast_ptr_alignment)]
13991418
pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
@@ -1437,7 +1456,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
14371456
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
14381457
#[inline]
14391458
#[target_feature(enable = "avx")]
1440-
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1459+
#[cfg_attr(test, assert_instr(vmovup))]
14411460
#[stable(feature = "simd_x86", since = "1.27.0")]
14421461
pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
14431462
let mut dst = _mm256_undefined_pd();
@@ -1456,7 +1475,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
14561475
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
14571476
#[inline]
14581477
#[target_feature(enable = "avx")]
1459-
#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
1478+
#[cfg_attr(test, assert_instr(vmovup))]
14601479
#[stable(feature = "simd_x86", since = "1.27.0")]
14611480
pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
14621481
mem_addr.cast::<__m256d>().write_unaligned(a);
@@ -2145,7 +2164,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21452164
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
21462165
#[inline]
21472166
#[target_feature(enable = "avx")]
2148-
#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
2167+
#[cfg_attr(test, assert_instr(vxorp))]
21492168
#[stable(feature = "simd_x86", since = "1.27.0")]
21502169
pub unsafe fn _mm256_setzero_pd() -> __m256d {
21512170
_mm256_set1_pd(0.0)
@@ -2676,8 +2695,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
26762695
// instructions, thus it has zero latency.
26772696
#[stable(feature = "simd_x86", since = "1.27.0")]
26782697
pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2679-
// FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2680-
simd_shuffle!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
2698+
simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4])
26812699
}
26822700

26832701
/// Casts vector of type __m128d to type __m256d;
@@ -2690,8 +2708,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
26902708
// instructions, thus it has zero latency.
26912709
#[stable(feature = "simd_x86", since = "1.27.0")]
26922710
pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2693-
// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2694-
simd_shuffle!(a, a, [0, 1, 0, 0])
2711+
simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2])
26952712
}
26962713

26972714
/// Casts vector of type __m128i to type __m256i;
@@ -2705,8 +2722,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
27052722
#[stable(feature = "simd_x86", since = "1.27.0")]
27062723
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
27072724
let a = a.as_i64x2();
2708-
// FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2709-
let dst: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 0]);
2725+
let undefined = _mm_undefined_si128().as_i64x2();
2726+
let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
27102727
transmute(dst)
27112728
}
27122729

@@ -3719,6 +3736,22 @@ mod tests {
37193736
assert_eq_m128i(r, e);
37203737
}
37213738

3739+
#[simd_test(enable = "avx")]
3740+
unsafe fn test_mm256_extract_epi32() {
3741+
let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
3742+
let r1 = _mm256_extract_epi32::<0>(a);
3743+
let r2 = _mm256_extract_epi32::<3>(a);
3744+
assert_eq!(r1, -1);
3745+
assert_eq!(r2, 3);
3746+
}
3747+
3748+
#[simd_test(enable = "avx")]
3749+
unsafe fn test_mm256_cvtsi256_si32() {
3750+
let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
3751+
let r = _mm256_cvtsi256_si32(a);
3752+
assert_eq!(r, 1);
3753+
}
3754+
37223755
#[simd_test(enable = "avx")]
37233756
#[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
37243757
unsafe fn test_mm256_zeroall() {
@@ -4698,6 +4731,27 @@ mod tests {
46984731
assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
46994732
}
47004733

4734+
#[simd_test(enable = "avx")]
4735+
unsafe fn test_mm256_castps128_ps256() {
4736+
let a = _mm_setr_ps(1., 2., 3., 4.);
4737+
let r = _mm256_castps128_ps256(a);
4738+
assert_eq_m128(_mm256_castps256_ps128(r), a);
4739+
}
4740+
4741+
#[simd_test(enable = "avx")]
4742+
unsafe fn test_mm256_castpd128_pd256() {
4743+
let a = _mm_setr_pd(1., 2.);
4744+
let r = _mm256_castpd128_pd256(a);
4745+
assert_eq_m128d(_mm256_castpd256_pd128(r), a);
4746+
}
4747+
4748+
#[simd_test(enable = "avx")]
4749+
unsafe fn test_mm256_castsi128_si256() {
4750+
let a = _mm_setr_epi32(1, 2, 3, 4);
4751+
let r = _mm256_castsi128_si256(a);
4752+
assert_eq_m128i(_mm256_castsi256_si128(r), a);
4753+
}
4754+
47014755
#[simd_test(enable = "avx")]
47024756
unsafe fn test_mm256_zextps128_ps256() {
47034757
let a = _mm_setr_ps(1., 2., 3., 4.);

crates/core_arch/src/x86/avx2.rs

-39
Original file line numberDiff line numberDiff line change
@@ -3610,29 +3610,6 @@ pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
36103610
simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32
36113611
}
36123612

3613-
/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
3614-
///
3615-
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
3616-
#[inline]
3617-
#[target_feature(enable = "avx2")]
3618-
// This intrinsic has no corresponding instruction.
3619-
#[rustc_legacy_const_generics(1)]
3620-
#[stable(feature = "simd_x86", since = "1.27.0")]
3621-
pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
3622-
static_assert_uimm_bits!(INDEX, 3);
3623-
simd_extract!(a.as_i32x8(), INDEX as u32)
3624-
}
3625-
3626-
/// Returns the first element of the input vector of `[8 x i32]`.
3627-
///
3628-
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
3629-
#[inline]
3630-
#[target_feature(enable = "avx2")]
3631-
#[stable(feature = "simd_x86", since = "1.27.0")]
3632-
pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
3633-
simd_extract!(a.as_i32x8(), 0)
3634-
}
3635-
36363613
#[allow(improper_ctypes)]
36373614
extern "C" {
36383615
#[link_name = "llvm.x86.avx2.phadd.w"]
@@ -5749,20 +5726,4 @@ mod tests {
57495726
assert_eq!(r1, 0xFFFF);
57505727
assert_eq!(r2, 3);
57515728
}
5752-
5753-
#[simd_test(enable = "avx2")]
5754-
unsafe fn test_mm256_extract_epi32() {
5755-
let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
5756-
let r1 = _mm256_extract_epi32::<0>(a);
5757-
let r2 = _mm256_extract_epi32::<3>(a);
5758-
assert_eq!(r1, -1);
5759-
assert_eq!(r2, 3);
5760-
}
5761-
5762-
#[simd_test(enable = "avx2")]
5763-
unsafe fn test_mm256_cvtsi256_si32() {
5764-
let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
5765-
let r = _mm256_cvtsi256_si32(a);
5766-
assert_eq!(r, 1);
5767-
}
57685729
}

0 commit comments

Comments
 (0)