@@ -52,9 +52,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
52
52
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
53
53
#[ inline]
54
54
#[ target_feature( enable = "avx" ) ]
55
- // FIXME: Should be 'vandpd' instruction.
56
55
// See https://github.com/rust-lang/stdarch/issues/71
57
- #[ cfg_attr( test, assert_instr( vandps ) ) ]
56
+ #[ cfg_attr( test, assert_instr( vandp ) ) ]
58
57
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
59
58
pub unsafe fn _mm256_and_pd ( a : __m256d , b : __m256d ) -> __m256d {
60
59
let a: u64x4 = transmute ( a) ;
@@ -82,9 +81,8 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
82
81
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
83
82
#[ inline]
84
83
#[ target_feature( enable = "avx" ) ]
85
- // FIXME: should be `vorpd` instruction.
86
84
// See <https://github.com/rust-lang/stdarch/issues/71>.
87
- #[ cfg_attr( test, assert_instr( vorps ) ) ]
85
+ #[ cfg_attr( test, assert_instr( vorp ) ) ]
88
86
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
89
87
pub unsafe fn _mm256_or_pd ( a : __m256d , b : __m256d ) -> __m256d {
90
88
let a: u64x4 = transmute ( a) ;
@@ -162,8 +160,7 @@ pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256
162
160
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
163
161
#[ inline]
164
162
#[ target_feature( enable = "avx" ) ]
165
- // FIXME: should be `vandnpd` instruction.
166
- #[ cfg_attr( test, assert_instr( vandnps) ) ]
163
+ #[ cfg_attr( test, assert_instr( vandnp) ) ]
167
164
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
168
165
pub unsafe fn _mm256_andnot_pd ( a : __m256d , b : __m256d ) -> __m256d {
169
166
let a: u64x4 = transmute ( a) ;
@@ -615,8 +612,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
615
612
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
616
613
#[ inline]
617
614
#[ target_feature( enable = "avx" ) ]
618
- // FIXME Should be 'vxorpd' instruction.
619
- #[ cfg_attr( test, assert_instr( vxorps) ) ]
615
+ #[ cfg_attr( test, assert_instr( vxorp) ) ]
620
616
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
621
617
pub unsafe fn _mm256_xor_pd ( a : __m256d , b : __m256d ) -> __m256d {
622
618
let a: u64x4 = transmute ( a) ;
@@ -995,6 +991,29 @@ pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
995
991
transmute ( dst)
996
992
}
997
993
994
+ /// Extracts a 32-bit integer from `a`, selected with `INDEX`.
995
+ ///
996
+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
997
+ #[ inline]
998
+ #[ target_feature( enable = "avx" ) ]
999
+ // This intrinsic has no corresponding instruction.
1000
+ #[ rustc_legacy_const_generics( 1 ) ]
1001
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1002
+ pub unsafe fn _mm256_extract_epi32 < const INDEX : i32 > ( a : __m256i ) -> i32 {
1003
+ static_assert_uimm_bits ! ( INDEX , 3 ) ;
1004
+ simd_extract ! ( a. as_i32x8( ) , INDEX as u32 )
1005
+ }
1006
+
1007
+ /// Returns the first element of the input vector of `[8 x i32]`.
1008
+ ///
1009
+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
1010
+ #[ inline]
1011
+ #[ target_feature( enable = "avx" ) ]
1012
+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1013
+ pub unsafe fn _mm256_cvtsi256_si32 ( a : __m256i ) -> i32 {
1014
+ simd_extract ! ( a. as_i32x8( ) , 0 )
1015
+ }
1016
+
998
1017
/// Zeroes the contents of all XMM or YMM registers.
999
1018
///
1000
1019
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
@@ -1378,7 +1397,7 @@ pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m25
1378
1397
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
1379
1398
#[ inline]
1380
1399
#[ target_feature( enable = "avx" ) ]
1381
- #[ cfg_attr( test, assert_instr( vmovaps ) ) ] // FIXME vmovapd expected
1400
+ #[ cfg_attr( test, assert_instr( vmovap ) ) ]
1382
1401
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1383
1402
#[ allow( clippy:: cast_ptr_alignment) ]
1384
1403
pub unsafe fn _mm256_load_pd ( mem_addr : * const f64 ) -> __m256d {
@@ -1393,7 +1412,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
1393
1412
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
1394
1413
#[ inline]
1395
1414
#[ target_feature( enable = "avx" ) ]
1396
- #[ cfg_attr( test, assert_instr( vmovaps ) ) ] // FIXME vmovapd expected
1415
+ #[ cfg_attr( test, assert_instr( vmovap ) ) ]
1397
1416
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1398
1417
#[ allow( clippy:: cast_ptr_alignment) ]
1399
1418
pub unsafe fn _mm256_store_pd ( mem_addr : * mut f64 , a : __m256d ) {
@@ -1437,7 +1456,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
1437
1456
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
1438
1457
#[ inline]
1439
1458
#[ target_feature( enable = "avx" ) ]
1440
- #[ cfg_attr( test, assert_instr( vmovups ) ) ] // FIXME vmovupd expected
1459
+ #[ cfg_attr( test, assert_instr( vmovup ) ) ]
1441
1460
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1442
1461
pub unsafe fn _mm256_loadu_pd ( mem_addr : * const f64 ) -> __m256d {
1443
1462
let mut dst = _mm256_undefined_pd ( ) ;
@@ -1456,7 +1475,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
1456
1475
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
1457
1476
#[ inline]
1458
1477
#[ target_feature( enable = "avx" ) ]
1459
- #[ cfg_attr( test, assert_instr( vmovups ) ) ] // FIXME vmovupd expected
1478
+ #[ cfg_attr( test, assert_instr( vmovup ) ) ]
1460
1479
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1461
1480
pub unsafe fn _mm256_storeu_pd ( mem_addr : * mut f64 , a : __m256d ) {
1462
1481
mem_addr. cast :: < __m256d > ( ) . write_unaligned ( a) ;
@@ -2145,7 +2164,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
2145
2164
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
2146
2165
#[ inline]
2147
2166
#[ target_feature( enable = "avx" ) ]
2148
- #[ cfg_attr( test, assert_instr( vxorps ) ) ] // FIXME vxorpd expected
2167
+ #[ cfg_attr( test, assert_instr( vxorp ) ) ]
2149
2168
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2150
2169
pub unsafe fn _mm256_setzero_pd ( ) -> __m256d {
2151
2170
_mm256_set1_pd ( 0.0 )
@@ -2676,8 +2695,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
2676
2695
// instructions, thus it has zero latency.
2677
2696
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2678
2697
pub unsafe fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
2679
- // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2680
- simd_shuffle ! ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
2698
+ simd_shuffle ! ( a, _mm_undefined_ps( ) , [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] )
2681
2699
}
2682
2700
2683
2701
/// Casts vector of type __m128d to type __m256d;
@@ -2690,8 +2708,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
2690
2708
// instructions, thus it has zero latency.
2691
2709
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2692
2710
pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> __m256d {
2693
- // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2694
- simd_shuffle ! ( a, a, [ 0 , 1 , 0 , 0 ] )
2711
+ simd_shuffle ! ( a, _mm_undefined_pd( ) , [ 0 , 1 , 2 , 2 ] )
2695
2712
}
2696
2713
2697
2714
/// Casts vector of type __m128i to type __m256i;
@@ -2705,8 +2722,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
2705
2722
#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
2706
2723
pub unsafe fn _mm256_castsi128_si256 ( a : __m128i ) -> __m256i {
2707
2724
let a = a. as_i64x2 ( ) ;
2708
- // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2709
- let dst: i64x4 = simd_shuffle ! ( a, a , [ 0 , 1 , 0 , 0 ] ) ;
2725
+ let undefined = _mm_undefined_si128 ( ) . as_i64x2 ( ) ;
2726
+ let dst: i64x4 = simd_shuffle ! ( a, undefined , [ 0 , 1 , 2 , 2 ] ) ;
2710
2727
transmute ( dst)
2711
2728
}
2712
2729
@@ -3719,6 +3736,22 @@ mod tests {
3719
3736
assert_eq_m128i ( r, e) ;
3720
3737
}
3721
3738
3739
+ #[ simd_test( enable = "avx" ) ]
3740
+ unsafe fn test_mm256_extract_epi32 ( ) {
3741
+ let a = _mm256_setr_epi32 ( -1 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
3742
+ let r1 = _mm256_extract_epi32 :: < 0 > ( a) ;
3743
+ let r2 = _mm256_extract_epi32 :: < 3 > ( a) ;
3744
+ assert_eq ! ( r1, -1 ) ;
3745
+ assert_eq ! ( r2, 3 ) ;
3746
+ }
3747
+
3748
+ #[ simd_test( enable = "avx" ) ]
3749
+ unsafe fn test_mm256_cvtsi256_si32 ( ) {
3750
+ let a = _mm256_setr_epi32 ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
3751
+ let r = _mm256_cvtsi256_si32 ( a) ;
3752
+ assert_eq ! ( r, 1 ) ;
3753
+ }
3754
+
3722
3755
#[ simd_test( enable = "avx" ) ]
3723
3756
#[ cfg_attr( miri, ignore) ] // Register-level operation not supported by Miri
3724
3757
unsafe fn test_mm256_zeroall ( ) {
@@ -4698,6 +4731,27 @@ mod tests {
4698
4731
assert_eq_m128i ( r, _mm_setr_epi64x ( 1 , 2 ) ) ;
4699
4732
}
4700
4733
4734
+ #[ simd_test( enable = "avx" ) ]
4735
+ unsafe fn test_mm256_castps128_ps256 ( ) {
4736
+ let a = _mm_setr_ps ( 1. , 2. , 3. , 4. ) ;
4737
+ let r = _mm256_castps128_ps256 ( a) ;
4738
+ assert_eq_m128 ( _mm256_castps256_ps128 ( r) , a) ;
4739
+ }
4740
+
4741
+ #[ simd_test( enable = "avx" ) ]
4742
+ unsafe fn test_mm256_castpd128_pd256 ( ) {
4743
+ let a = _mm_setr_pd ( 1. , 2. ) ;
4744
+ let r = _mm256_castpd128_pd256 ( a) ;
4745
+ assert_eq_m128d ( _mm256_castpd256_pd128 ( r) , a) ;
4746
+ }
4747
+
4748
+ #[ simd_test( enable = "avx" ) ]
4749
+ unsafe fn test_mm256_castsi128_si256 ( ) {
4750
+ let a = _mm_setr_epi32 ( 1 , 2 , 3 , 4 ) ;
4751
+ let r = _mm256_castsi128_si256 ( a) ;
4752
+ assert_eq_m128i ( _mm256_castsi256_si128 ( r) , a) ;
4753
+ }
4754
+
4701
4755
#[ simd_test( enable = "avx" ) ]
4702
4756
unsafe fn test_mm256_zextps128_ps256 ( ) {
4703
4757
let a = _mm_setr_ps ( 1. , 2. , 3. , 4. ) ;
0 commit comments