@@ -52,9 +52,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
5252/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
5353#[ inline]
5454#[ target_feature( enable = "avx" ) ]
55- // FIXME: Should be 'vandpd' instruction.
5655// See https://github.com/rust-lang/stdarch/issues/71
57- #[ cfg_attr( test, assert_instr( vandps ) ) ]
56+ #[ cfg_attr( test, assert_instr( vandp ) ) ]
5857#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
5958pub unsafe fn _mm256_and_pd ( a : __m256d , b : __m256d ) -> __m256d {
6059 let a: u64x4 = transmute ( a) ;
@@ -82,9 +81,8 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
8281/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
8382#[ inline]
8483#[ target_feature( enable = "avx" ) ]
85- // FIXME: should be `vorpd` instruction.
8684// See <https://github.com/rust-lang/stdarch/issues/71>.
87- #[ cfg_attr( test, assert_instr( vorps ) ) ]
85+ #[ cfg_attr( test, assert_instr( vorp ) ) ]
8886#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
8987pub unsafe fn _mm256_or_pd ( a : __m256d , b : __m256d ) -> __m256d {
9088 let a: u64x4 = transmute ( a) ;
@@ -162,8 +160,7 @@ pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256
162160/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
163161#[ inline]
164162#[ target_feature( enable = "avx" ) ]
165- // FIXME: should be `vandnpd` instruction.
166- #[ cfg_attr( test, assert_instr( vandnps) ) ]
163+ #[ cfg_attr( test, assert_instr( vandnp) ) ]
167164#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
168165pub unsafe fn _mm256_andnot_pd ( a : __m256d , b : __m256d ) -> __m256d {
169166 let a: u64x4 = transmute ( a) ;
@@ -615,8 +612,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
615612/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
616613#[ inline]
617614#[ target_feature( enable = "avx" ) ]
618- // FIXME Should be 'vxorpd' instruction.
619- #[ cfg_attr( test, assert_instr( vxorps) ) ]
615+ #[ cfg_attr( test, assert_instr( vxorp) ) ]
620616#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
621617pub unsafe fn _mm256_xor_pd ( a : __m256d , b : __m256d ) -> __m256d {
622618 let a: u64x4 = transmute ( a) ;
@@ -995,6 +991,29 @@ pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
995991 transmute ( dst)
996992}
997993
994+ /// Extracts a 32-bit integer from `a`, selected with `INDEX`.
995+ ///
996+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
997+ #[ inline]
998+ #[ target_feature( enable = "avx" ) ]
999+ // This intrinsic has no corresponding instruction.
1000+ #[ rustc_legacy_const_generics( 1 ) ]
1001+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1002+ pub unsafe fn _mm256_extract_epi32 < const INDEX : i32 > ( a : __m256i ) -> i32 {
1003+ static_assert_uimm_bits ! ( INDEX , 3 ) ;
1004+ simd_extract ! ( a. as_i32x8( ) , INDEX as u32 )
1005+ }
1006+
1007+ /// Returns the first element of the input vector of `[8 x i32]`.
1008+ ///
1009+ /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
1010+ #[ inline]
1011+ #[ target_feature( enable = "avx" ) ]
1012+ #[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
1013+ pub unsafe fn _mm256_cvtsi256_si32 ( a : __m256i ) -> i32 {
1014+ simd_extract ! ( a. as_i32x8( ) , 0 )
1015+ }
1016+
9981017/// Zeroes the contents of all XMM or YMM registers.
9991018///
10001019/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
@@ -1378,7 +1397,7 @@ pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m25
13781397/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
13791398#[ inline]
13801399#[ target_feature( enable = "avx" ) ]
1381- #[ cfg_attr( test, assert_instr( vmovaps ) ) ] // FIXME vmovapd expected
1400+ #[ cfg_attr( test, assert_instr( vmovap ) ) ]
13821401#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
13831402#[ allow( clippy:: cast_ptr_alignment) ]
13841403pub unsafe fn _mm256_load_pd ( mem_addr : * const f64 ) -> __m256d {
@@ -1393,7 +1412,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
13931412/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
13941413#[ inline]
13951414#[ target_feature( enable = "avx" ) ]
1396- #[ cfg_attr( test, assert_instr( vmovaps ) ) ] // FIXME vmovapd expected
1415+ #[ cfg_attr( test, assert_instr( vmovap ) ) ]
13971416#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
13981417#[ allow( clippy:: cast_ptr_alignment) ]
13991418pub unsafe fn _mm256_store_pd ( mem_addr : * mut f64 , a : __m256d ) {
@@ -1437,7 +1456,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
14371456/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
14381457#[ inline]
14391458#[ target_feature( enable = "avx" ) ]
1440- #[ cfg_attr( test, assert_instr( vmovups ) ) ] // FIXME vmovupd expected
1459+ #[ cfg_attr( test, assert_instr( vmovup ) ) ]
14411460#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
14421461pub unsafe fn _mm256_loadu_pd ( mem_addr : * const f64 ) -> __m256d {
14431462 let mut dst = _mm256_undefined_pd ( ) ;
@@ -1456,7 +1475,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
14561475/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
14571476#[ inline]
14581477#[ target_feature( enable = "avx" ) ]
1459- #[ cfg_attr( test, assert_instr( vmovups ) ) ] // FIXME vmovupd expected
1478+ #[ cfg_attr( test, assert_instr( vmovup ) ) ]
14601479#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
14611480pub unsafe fn _mm256_storeu_pd ( mem_addr : * mut f64 , a : __m256d ) {
14621481 mem_addr. cast :: < __m256d > ( ) . write_unaligned ( a) ;
@@ -2145,7 +2164,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
21452164/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
21462165#[ inline]
21472166#[ target_feature( enable = "avx" ) ]
2148- #[ cfg_attr( test, assert_instr( vxorps ) ) ] // FIXME vxorpd expected
2167+ #[ cfg_attr( test, assert_instr( vxorp ) ) ]
21492168#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
21502169pub unsafe fn _mm256_setzero_pd ( ) -> __m256d {
21512170 _mm256_set1_pd ( 0.0 )
@@ -2676,8 +2695,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
26762695// instructions, thus it has zero latency.
26772696#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26782697pub unsafe fn _mm256_castps128_ps256 ( a : __m128 ) -> __m256 {
2679- // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
2680- simd_shuffle ! ( a, a, [ 0 , 1 , 2 , 3 , 0 , 0 , 0 , 0 ] )
2698+ simd_shuffle ! ( a, _mm_undefined_ps( ) , [ 0 , 1 , 2 , 3 , 4 , 4 , 4 , 4 ] )
26812699}
26822700
26832701/// Casts vector of type __m128d to type __m256d;
@@ -2690,8 +2708,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
26902708// instructions, thus it has zero latency.
26912709#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
26922710pub unsafe fn _mm256_castpd128_pd256 ( a : __m128d ) -> __m256d {
2693- // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2694- simd_shuffle ! ( a, a, [ 0 , 1 , 0 , 0 ] )
2711+ simd_shuffle ! ( a, _mm_undefined_pd( ) , [ 0 , 1 , 2 , 2 ] )
26952712}
26962713
26972714/// Casts vector of type __m128i to type __m256i;
@@ -2705,8 +2722,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
27052722#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
27062723pub unsafe fn _mm256_castsi128_si256 ( a : __m128i ) -> __m256i {
27072724 let a = a. as_i64x2 ( ) ;
2708- // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
2709- let dst: i64x4 = simd_shuffle ! ( a, a , [ 0 , 1 , 0 , 0 ] ) ;
2725+ let undefined = _mm_undefined_si128 ( ) . as_i64x2 ( ) ;
2726+ let dst: i64x4 = simd_shuffle ! ( a, undefined , [ 0 , 1 , 2 , 2 ] ) ;
27102727 transmute ( dst)
27112728}
27122729
@@ -3719,6 +3736,22 @@ mod tests {
37193736 assert_eq_m128i ( r, e) ;
37203737 }
37213738
3739+ #[ simd_test( enable = "avx" ) ]
3740+ unsafe fn test_mm256_extract_epi32 ( ) {
3741+ let a = _mm256_setr_epi32 ( -1 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ) ;
3742+ let r1 = _mm256_extract_epi32 :: < 0 > ( a) ;
3743+ let r2 = _mm256_extract_epi32 :: < 3 > ( a) ;
3744+ assert_eq ! ( r1, -1 ) ;
3745+ assert_eq ! ( r2, 3 ) ;
3746+ }
3747+
3748+ #[ simd_test( enable = "avx" ) ]
3749+ unsafe fn test_mm256_cvtsi256_si32 ( ) {
3750+ let a = _mm256_setr_epi32 ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
3751+ let r = _mm256_cvtsi256_si32 ( a) ;
3752+ assert_eq ! ( r, 1 ) ;
3753+ }
3754+
37223755 #[ simd_test( enable = "avx" ) ]
37233756 #[ cfg_attr( miri, ignore) ] // Register-level operation not supported by Miri
37243757 unsafe fn test_mm256_zeroall ( ) {
@@ -4698,6 +4731,27 @@ mod tests {
46984731 assert_eq_m128i ( r, _mm_setr_epi64x ( 1 , 2 ) ) ;
46994732 }
47004733
4734+ #[ simd_test( enable = "avx" ) ]
4735+ unsafe fn test_mm256_castps128_ps256 ( ) {
4736+ let a = _mm_setr_ps ( 1. , 2. , 3. , 4. ) ;
4737+ let r = _mm256_castps128_ps256 ( a) ;
4738+ assert_eq_m128 ( _mm256_castps256_ps128 ( r) , a) ;
4739+ }
4740+
4741+ #[ simd_test( enable = "avx" ) ]
4742+ unsafe fn test_mm256_castpd128_pd256 ( ) {
4743+ let a = _mm_setr_pd ( 1. , 2. ) ;
4744+ let r = _mm256_castpd128_pd256 ( a) ;
4745+ assert_eq_m128d ( _mm256_castpd256_pd128 ( r) , a) ;
4746+ }
4747+
4748+ #[ simd_test( enable = "avx" ) ]
4749+ unsafe fn test_mm256_castsi128_si256 ( ) {
4750+ let a = _mm_setr_epi32 ( 1 , 2 , 3 , 4 ) ;
4751+ let r = _mm256_castsi128_si256 ( a) ;
4752+ assert_eq_m128i ( _mm256_castsi256_si128 ( r) , a) ;
4753+ }
4754+
47014755 #[ simd_test( enable = "avx" ) ]
47024756 unsafe fn test_mm256_zextps128_ps256 ( ) {
47034757 let a = _mm_setr_ps ( 1. , 2. , 3. , 4. ) ;
0 commit comments