Skip to content

Commit 11578e7

Browse files
sayantnAmanieu
authored andcommitted
Use LLVM intrinsics for masked load/stores, expand-loads and fp-class
Also, remove some redundant sse target-features from avx intrinsics
1 parent 13c3af4 commit 11578e7

File tree

5 files changed

+481
-1242
lines changed

5 files changed

+481
-1242
lines changed

crates/core_arch/src/x86/avx.rs

+15-15
Original file line numberDiff line numberDiff line change
@@ -737,7 +737,7 @@ pub const _CMP_TRUE_US: i32 = 0x1f;
737737
///
738738
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_pd)
739739
#[inline]
740-
#[target_feature(enable = "avx,sse2")]
740+
#[target_feature(enable = "avx")]
741741
#[cfg_attr(test, assert_instr(vcmpeqpd, IMM5 = 0))] // TODO Validate vcmppd
742742
#[rustc_legacy_const_generics(2)]
743743
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -767,7 +767,7 @@ pub unsafe fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d
767767
///
768768
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ps)
769769
#[inline]
770-
#[target_feature(enable = "avx,sse")]
770+
#[target_feature(enable = "avx")]
771771
#[cfg_attr(test, assert_instr(vcmpeqps, IMM5 = 0))] // TODO Validate vcmpps
772772
#[rustc_legacy_const_generics(2)]
773773
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -799,7 +799,7 @@ pub unsafe fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256 {
799799
///
800800
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_sd)
801801
#[inline]
802-
#[target_feature(enable = "avx,sse2")]
802+
#[target_feature(enable = "avx")]
803803
#[cfg_attr(test, assert_instr(vcmpeqsd, IMM5 = 0))] // TODO Validate vcmpsd
804804
#[rustc_legacy_const_generics(2)]
805805
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -816,7 +816,7 @@ pub unsafe fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d {
816816
///
817817
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmp_ss)
818818
#[inline]
819-
#[target_feature(enable = "avx,sse")]
819+
#[target_feature(enable = "avx")]
820820
#[cfg_attr(test, assert_instr(vcmpeqss, IMM5 = 0))] // TODO Validate vcmpss
821821
#[rustc_legacy_const_generics(2)]
822822
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -1093,7 +1093,7 @@ pub unsafe fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256 {
10931093
///
10941094
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_ps)
10951095
#[inline]
1096-
#[target_feature(enable = "avx,sse")]
1096+
#[target_feature(enable = "avx")]
10971097
#[cfg_attr(test, assert_instr(vshufps, IMM8 = 9))]
10981098
#[rustc_legacy_const_generics(1)]
10991099
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -1163,7 +1163,7 @@ pub unsafe fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d {
11631163
///
11641164
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_permute_pd)
11651165
#[inline]
1166-
#[target_feature(enable = "avx,sse2")]
1166+
#[target_feature(enable = "avx")]
11671167
#[cfg_attr(test, assert_instr(vshufpd, IMM2 = 0x1))]
11681168
#[rustc_legacy_const_generics(1)]
11691169
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -2733,7 +2733,7 @@ pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
27332733
///
27342734
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextps128_ps256)
27352735
#[inline]
2736-
#[target_feature(enable = "avx,sse")]
2736+
#[target_feature(enable = "avx")]
27372737
// This intrinsic is only used for compilation and does not generate any
27382738
// instructions, thus it has zero latency.
27392739
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -2747,7 +2747,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
27472747
///
27482748
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextsi128_si256)
27492749
#[inline]
2750-
#[target_feature(enable = "avx,sse2")]
2750+
#[target_feature(enable = "avx")]
27512751
// This intrinsic is only used for compilation and does not generate any
27522752
// instructions, thus it has zero latency.
27532753
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -2764,7 +2764,7 @@ pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
27642764
///
27652765
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zextpd128_pd256)
27662766
#[inline]
2767-
#[target_feature(enable = "avx,sse2")]
2767+
#[target_feature(enable = "avx")]
27682768
// This intrinsic is only used for compilation and does not generate any
27692769
// instructions, thus it has zero latency.
27702770
#[stable(feature = "simd_x86", since = "1.27.0")]
@@ -2888,7 +2888,7 @@ pub unsafe fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i {
28882888
///
28892889
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128)
28902890
#[inline]
2891-
#[target_feature(enable = "avx,sse")]
2891+
#[target_feature(enable = "avx")]
28922892
// This intrinsic has no corresponding instruction.
28932893
#[stable(feature = "simd_x86", since = "1.27.0")]
28942894
pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256 {
@@ -2903,7 +2903,7 @@ pub unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m2
29032903
///
29042904
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128d)
29052905
#[inline]
2906-
#[target_feature(enable = "avx,sse2")]
2906+
#[target_feature(enable = "avx")]
29072907
// This intrinsic has no corresponding instruction.
29082908
#[stable(feature = "simd_x86", since = "1.27.0")]
29092909
pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d {
@@ -2917,7 +2917,7 @@ pub unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m
29172917
///
29182918
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu2_m128i)
29192919
#[inline]
2920-
#[target_feature(enable = "avx,sse2")]
2920+
#[target_feature(enable = "avx")]
29212921
// This intrinsic has no corresponding instruction.
29222922
#[stable(feature = "simd_x86", since = "1.27.0")]
29232923
pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i {
@@ -2932,7 +2932,7 @@ pub unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i
29322932
///
29332933
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128)
29342934
#[inline]
2935-
#[target_feature(enable = "avx,sse")]
2935+
#[target_feature(enable = "avx")]
29362936
// This intrinsic has no corresponding instruction.
29372937
#[stable(feature = "simd_x86", since = "1.27.0")]
29382938
pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256) {
@@ -2949,7 +2949,7 @@ pub unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256)
29492949
///
29502950
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128d)
29512951
#[inline]
2952-
#[target_feature(enable = "avx,sse2")]
2952+
#[target_feature(enable = "avx")]
29532953
// This intrinsic has no corresponding instruction.
29542954
#[stable(feature = "simd_x86", since = "1.27.0")]
29552955
pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d) {
@@ -2965,7 +2965,7 @@ pub unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256
29652965
///
29662966
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu2_m128i)
29672967
#[inline]
2968-
#[target_feature(enable = "avx,sse2")]
2968+
#[target_feature(enable = "avx")]
29692969
// This intrinsic has no corresponding instruction.
29702970
#[stable(feature = "simd_x86", since = "1.27.0")]
29712971
pub unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i) {

0 commit comments

Comments
 (0)