Skip to content

Commit ace6dd4

Browse files
committed
Implement missing in SSE4a and TBM
Add `extracti`, `inserti` and `bextri` intrinsics. Refactor TBM into 2 modules
1 parent 5520a91 commit ace6dd4

File tree

4 files changed

+310
-258
lines changed

4 files changed

+310
-258
lines changed

crates/core_arch/src/x86/sse4a.rs

+61-3
Original file line numberDiff line numberDiff line change
@@ -9,17 +9,18 @@ use stdarch_test::assert_instr;
99
extern "C" {
1010
#[link_name = "llvm.x86.sse4a.extrq"]
1111
fn extrq(x: i64x2, y: i8x16) -> i64x2;
12+
#[link_name = "llvm.x86.sse4a.extrqi"]
13+
fn extrqi(x: i64x2, len: u8, idx: u8) -> i64x2;
1214
#[link_name = "llvm.x86.sse4a.insertq"]
1315
fn insertq(x: i64x2, y: i64x2) -> i64x2;
16+
#[link_name = "llvm.x86.sse4a.insertqi"]
17+
fn insertqi(x: i64x2, y: i64x2, len: u8, idx: u8) -> i64x2;
1418
#[link_name = "llvm.x86.sse4a.movnt.sd"]
1519
fn movntsd(x: *mut f64, y: __m128d);
1620
#[link_name = "llvm.x86.sse4a.movnt.ss"]
1721
fn movntss(x: *mut f32, y: __m128);
1822
}
1923

20-
// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
21-
// FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
22-
2324
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
2425
///
2526
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
@@ -39,6 +40,27 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
3940
transmute(extrq(x.as_i64x2(), y.as_i8x16()))
4041
}
4142

43+
/// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the
44+
/// index `idx` and of the length `len`.
45+
///
46+
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length
47+
/// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error
48+
/// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
49+
///
50+
/// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits.
51+
#[inline]
52+
#[target_feature(enable = "sse4a")]
53+
#[cfg_attr(test, assert_instr(extrq, LEN = 5, IDX = 5))]
54+
#[rustc_legacy_const_generics(1, 2)]
55+
#[unstable(feature = "simd_x86_updates", issue = "126936")]
56+
pub unsafe fn _mm_extracti_si64<const LEN: i32, const IDX: i32>(x: __m128i) -> __m128i {
57+
// LLVM mentions that it is UB if these are not satisfied
58+
static_assert_uimm_bits!(LEN, 6);
59+
static_assert_uimm_bits!(IDX, 6);
60+
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
61+
transmute(extrqi(x.as_i64x2(), LEN as u8, IDX as u8))
62+
}
63+
4264
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
4365
///
4466
/// The bits of `y`:
@@ -56,6 +78,25 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
5678
transmute(insertq(x.as_i64x2(), y.as_i64x2()))
5779
}
5880

81+
/// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into
82+
/// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`.
83+
///
84+
/// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index
85+
/// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a
86+
/// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
87+
#[inline]
88+
#[target_feature(enable = "sse4a")]
89+
#[cfg_attr(test, assert_instr(insertq, LEN = 5, IDX = 5))]
90+
#[rustc_legacy_const_generics(2, 3)]
91+
#[unstable(feature = "simd_x86_updates", issue = "126936")]
92+
pub unsafe fn _mm_inserti_si64<const LEN: i32, const IDX: i32>(x: __m128i, y: __m128i) -> __m128i {
93+
// LLVM mentions that it is UB if these are not satisfied
94+
static_assert_uimm_bits!(LEN, 6);
95+
static_assert_uimm_bits!(IDX, 6);
96+
static_assert!((LEN == 0 && IDX == 0) || (LEN != 0 && LEN + IDX <= 64));
97+
transmute(insertqi(x.as_i64x2(), y.as_i64x2(), LEN as u8, IDX as u8))
98+
}
99+
59100
/// Non-temporal store of `a.0` into `p`.
60101
///
61102
/// Writes 64-bit data to a memory location without polluting the caches.
@@ -114,6 +155,14 @@ mod tests {
114155
assert_eq_m128i(r, e);
115156
}
116157

158+
#[simd_test(enable = "sse4a")]
159+
unsafe fn test_mm_extracti_si64() {
160+
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
161+
let r = _mm_extracti_si64::<8, 8>(a);
162+
let e = _mm_setr_epi64x(0xcd, 0);
163+
assert_eq_m128i(r, e);
164+
}
165+
117166
#[simd_test(enable = "sse4a")]
118167
unsafe fn test_mm_insert_si64() {
119168
let i = 0b0110_i64;
@@ -131,6 +180,15 @@ mod tests {
131180
assert_eq_m128i(r, expected);
132181
}
133182

183+
#[simd_test(enable = "sse4a")]
184+
unsafe fn test_mm_inserti_si64() {
185+
let a = _mm_setr_epi64x(0x0123456789abcdef, 0);
186+
let b = _mm_setr_epi64x(0x0011223344556677, 0);
187+
let r = _mm_inserti_si64::<8, 8>(a, b);
188+
let e = _mm_setr_epi64x(0x0123456789ab77ef, 0);
189+
assert_eq_m128i(r, e);
190+
}
191+
134192
#[repr(align(16))]
135193
struct MemoryF64 {
136194
data: [f64; 2],

0 commit comments

Comments
 (0)