@@ -9,17 +9,18 @@ use stdarch_test::assert_instr;
9
9
extern "C" {
10
10
#[ link_name = "llvm.x86.sse4a.extrq" ]
11
11
fn extrq ( x : i64x2 , y : i8x16 ) -> i64x2 ;
12
+ #[ link_name = "llvm.x86.sse4a.extrqi" ]
13
+ fn extrqi ( x : i64x2 , len : u8 , idx : u8 ) -> i64x2 ;
12
14
#[ link_name = "llvm.x86.sse4a.insertq" ]
13
15
fn insertq ( x : i64x2 , y : i64x2 ) -> i64x2 ;
16
+ #[ link_name = "llvm.x86.sse4a.insertqi" ]
17
+ fn insertqi ( x : i64x2 , y : i64x2 , len : u8 , idx : u8 ) -> i64x2 ;
14
18
#[ link_name = "llvm.x86.sse4a.movnt.sd" ]
15
19
fn movntsd ( x : * mut f64 , y : __m128d ) ;
16
20
#[ link_name = "llvm.x86.sse4a.movnt.ss" ]
17
21
fn movntss ( x : * mut f32 , y : __m128 ) ;
18
22
}
19
23
20
- // FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
21
- // FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
22
-
23
24
/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
24
25
///
25
26
/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
@@ -39,6 +40,27 @@ pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
39
40
transmute ( extrq ( x. as_i64x2 ( ) , y. as_i8x16 ( ) ) )
40
41
}
41
42
43
+ /// Extracts the specified bits from the lower 64 bits of the 128-bit integer vector operand at the
44
+ /// index `idx` and of the length `len`.
45
+ ///
46
+ /// `idx` specifies the index of the LSB. `len` specifies the number of bits to extract. If length
47
+ /// and index are both zero, bits `[63:0]` of parameter `x` are extracted. It is a compile-time error
48
+ /// for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
49
+ ///
50
+ /// Returns a 128-bit integer vector whose lower 64 bits contain the extracted bits.
51
+ #[ inline]
52
+ #[ target_feature( enable = "sse4a" ) ]
53
+ #[ cfg_attr( test, assert_instr( extrq, LEN = 5 , IDX = 5 ) ) ]
54
+ #[ rustc_legacy_const_generics( 1 , 2 ) ]
55
+ #[ unstable( feature = "simd_x86_updates" , issue = "126936" ) ]
56
+ pub unsafe fn _mm_extracti_si64 < const LEN : i32 , const IDX : i32 > ( x : __m128i ) -> __m128i {
57
+ // LLVM mentions that it is UB if these are not satisfied
58
+ static_assert_uimm_bits ! ( LEN , 6 ) ;
59
+ static_assert_uimm_bits ! ( IDX , 6 ) ;
60
+ static_assert ! ( ( LEN == 0 && IDX == 0 ) || ( LEN != 0 && LEN + IDX <= 64 ) ) ;
61
+ transmute ( extrqi ( x. as_i64x2 ( ) , LEN as u8 , IDX as u8 ) )
62
+ }
63
+
42
64
/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
43
65
///
44
66
/// The bits of `y`:
@@ -56,6 +78,25 @@ pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
56
78
transmute ( insertq ( x. as_i64x2 ( ) , y. as_i64x2 ( ) ) )
57
79
}
58
80
81
+ /// Inserts the `len` least-significant bits from the lower 64 bits of the 128-bit integer vector operand `y` into
82
+ /// the lower 64 bits of the 128-bit integer vector operand `x` at the index `idx` and of the length `len`.
83
+ ///
84
+ /// `idx` specifies the index of the LSB. `len` specifies the number of bits to insert. If length and index
85
+ /// are both zero, bits `[63:0]` of parameter `x` are replaced with bits `[63:0]` of parameter `y`. It is a
86
+ /// compile-time error for `len + idx` to be greater than 64 or for `len` to be zero and `idx` to be non-zero.
87
+ #[ inline]
88
+ #[ target_feature( enable = "sse4a" ) ]
89
+ #[ cfg_attr( test, assert_instr( insertq, LEN = 5 , IDX = 5 ) ) ]
90
+ #[ rustc_legacy_const_generics( 2 , 3 ) ]
91
+ #[ unstable( feature = "simd_x86_updates" , issue = "126936" ) ]
92
+ pub unsafe fn _mm_inserti_si64 < const LEN : i32 , const IDX : i32 > ( x : __m128i , y : __m128i ) -> __m128i {
93
+ // LLVM mentions that it is UB if these are not satisfied
94
+ static_assert_uimm_bits ! ( LEN , 6 ) ;
95
+ static_assert_uimm_bits ! ( IDX , 6 ) ;
96
+ static_assert ! ( ( LEN == 0 && IDX == 0 ) || ( LEN != 0 && LEN + IDX <= 64 ) ) ;
97
+ transmute ( insertqi ( x. as_i64x2 ( ) , y. as_i64x2 ( ) , LEN as u8 , IDX as u8 ) )
98
+ }
99
+
59
100
/// Non-temporal store of `a.0` into `p`.
60
101
///
61
102
/// Writes 64-bit data to a memory location without polluting the caches.
@@ -114,6 +155,14 @@ mod tests {
114
155
assert_eq_m128i ( r, e) ;
115
156
}
116
157
158
+ #[ simd_test( enable = "sse4a" ) ]
159
+ unsafe fn test_mm_extracti_si64 ( ) {
160
+ let a = _mm_setr_epi64x ( 0x0123456789abcdef , 0 ) ;
161
+ let r = _mm_extracti_si64 :: < 8 , 8 > ( a) ;
162
+ let e = _mm_setr_epi64x ( 0xcd , 0 ) ;
163
+ assert_eq_m128i ( r, e) ;
164
+ }
165
+
117
166
#[ simd_test( enable = "sse4a" ) ]
118
167
unsafe fn test_mm_insert_si64 ( ) {
119
168
let i = 0b0110_i64 ;
@@ -131,6 +180,15 @@ mod tests {
131
180
assert_eq_m128i ( r, expected) ;
132
181
}
133
182
183
+ #[ simd_test( enable = "sse4a" ) ]
184
+ unsafe fn test_mm_inserti_si64 ( ) {
185
+ let a = _mm_setr_epi64x ( 0x0123456789abcdef , 0 ) ;
186
+ let b = _mm_setr_epi64x ( 0x0011223344556677 , 0 ) ;
187
+ let r = _mm_inserti_si64 :: < 8 , 8 > ( a, b) ;
188
+ let e = _mm_setr_epi64x ( 0x0123456789ab77ef , 0 ) ;
189
+ assert_eq_m128i ( r, e) ;
190
+ }
191
+
134
192
#[ repr( align( 16 ) ) ]
135
193
struct MemoryF64 {
136
194
data : [ f64 ; 2 ] ,
0 commit comments