27
27
#[ cfg( feature = "simd_opt" ) ]
28
28
use std:: mem:: transmute;
29
29
30
- #[ cfg( feature = "simd" ) ]
31
30
pub use simdty:: { u32x4, u64x4} ;
32
31
33
- #[ cfg( not( feature = "simd" ) ) ]
34
- #[ derive( Clone , Copy , Debug ) ]
35
- #[ repr( C ) ]
36
- pub struct u32x4 ( pub u32 , pub u32 , pub u32 , pub u32 ) ;
32
+ #[ cfg( feature = "simd" ) ]
33
+ extern "platform-intrinsic" {
34
+ fn simd_add < T > ( x : T , y : T ) -> T ;
35
+ fn simd_shl < T > ( x : T , y : T ) -> T ;
36
+ fn simd_shr < T > ( x : T , y : T ) -> T ;
37
+ fn simd_xor < T > ( x : T , y : T ) -> T ;
38
+ }
37
39
38
- #[ cfg( not( feature = "simd" ) ) ]
39
- #[ derive( Clone , Copy , Debug ) ]
40
- #[ repr( C ) ]
41
- pub struct u64x4 ( pub u64 , pub u64 , pub u64 , pub u64 ) ;
40
+ #[ cfg( feature = "simd_opt" ) ]
41
+ extern "platform-intrinsic" {
42
+ fn simd_shuffle8 < T , Elem > ( v : T , w : T ,
43
+ i0 : u32 , i1 : u32 , i2 : u32 , i3 : u32 ,
44
+ i4 : u32 , i5 : u32 , i6 : u32 , i7 : u32 ) -> T ;
42
45
43
- #[ cfg( not( feature = "simd" ) ) ]
44
- use std:: ops:: BitXor ;
46
+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
47
+ fn simd_shuffle16 < T , Elem > ( v : T , w : T ,
48
+ i0 : u32 , i1 : u32 , i2 : u32 , i3 : u32 ,
49
+ i4 : u32 , i5 : u32 , i6 : u32 , i7 : u32 ,
50
+ i8 : u32 , i9 : u32 , i10 : u32 , i11 : u32 ,
51
+ i12 : u32 , i13 : u32 , i14 : u32 , i15 : u32 ,
52
+ ) -> T ;
53
+ }
45
54
46
- macro_rules! impl_bitxor {
55
+ use std:: ops:: { Add , BitXor , Shl , Shr } ;
56
+
57
+ macro_rules! impl_ops {
47
58
( $vec: ident) => {
48
- #[ cfg( not( feature = "simd" ) ) ]
59
+ impl Add for $vec {
60
+ type Output = Self ;
61
+
62
+ #[ cfg( feature = "simd" ) ]
63
+ #[ inline( always) ]
64
+ fn add( self , rhs: Self ) -> Self :: Output {
65
+ unsafe { simd_add( self , rhs) }
66
+ }
67
+
68
+ #[ cfg( not( feature = "simd" ) ) ]
69
+ #[ inline( always) ]
70
+ fn add( self , rhs: Self ) -> Self :: Output {
71
+ $vec( self . 0 . wrapping_add( rhs. 0 ) ,
72
+ self . 1 . wrapping_add( rhs. 1 ) ,
73
+ self . 2 . wrapping_add( rhs. 2 ) ,
74
+ self . 3 . wrapping_add( rhs. 3 ) )
75
+ }
76
+ }
77
+
49
78
impl BitXor for $vec {
50
79
type Output = Self ;
51
80
81
+ #[ cfg( feature = "simd" ) ]
82
+ #[ inline( always) ]
83
+ fn bitxor( self , rhs: Self ) -> Self :: Output {
84
+ unsafe { simd_xor( self , rhs) }
85
+ }
86
+
87
+ #[ cfg( not( feature = "simd" ) ) ]
52
88
#[ inline( always) ]
53
89
fn bitxor( self , rhs: Self ) -> Self :: Output {
54
90
$vec( self . 0 ^ rhs. 0 ,
@@ -57,11 +93,49 @@ macro_rules! impl_bitxor {
57
93
self . 3 ^ rhs. 3 )
58
94
}
59
95
}
96
+
97
+ impl Shl <$vec> for $vec {
98
+ type Output = Self ;
99
+
100
+ #[ cfg( feature = "simd" ) ]
101
+ #[ inline( always) ]
102
+ fn shl( self , rhs: Self ) -> Self :: Output {
103
+ unsafe { simd_shl( self , rhs) }
104
+ }
105
+
106
+ #[ cfg( not( feature = "simd" ) ) ]
107
+ #[ inline( always) ]
108
+ fn shl( self , rhs: Self ) -> Self :: Output {
109
+ $vec( self . 0 << rhs. 0 ,
110
+ self . 1 << rhs. 1 ,
111
+ self . 2 << rhs. 2 ,
112
+ self . 3 << rhs. 3 )
113
+ }
114
+ }
115
+
116
+ impl Shr <$vec> for $vec {
117
+ type Output = Self ;
118
+
119
+ #[ cfg( feature = "simd" ) ]
120
+ #[ inline( always) ]
121
+ fn shr( self , rhs: Self ) -> Self :: Output {
122
+ unsafe { simd_shr( self , rhs) }
123
+ }
124
+
125
+ #[ cfg( not( feature = "simd" ) ) ]
126
+ #[ inline( always) ]
127
+ fn shr( self , rhs: Self ) -> Self :: Output {
128
+ $vec( self . 0 >> rhs. 0 ,
129
+ self . 1 >> rhs. 1 ,
130
+ self . 2 >> rhs. 2 ,
131
+ self . 3 >> rhs. 3 )
132
+ }
133
+ }
60
134
}
61
135
}
62
136
63
- impl_bitxor ! ( u32x4) ;
64
- impl_bitxor ! ( u64x4) ;
137
+ impl_ops ! ( u32x4) ;
138
+ impl_ops ! ( u64x4) ;
65
139
66
140
pub trait Vector4 < T > : Copy {
67
141
fn gather ( src : & [ T ] , i0 : usize , i1 : usize , i2 : usize , i3 : usize ) -> Self ;
@@ -118,26 +192,16 @@ macro_rules! impl_vector4_common {
118
192
self . 3 . to_le( ) )
119
193
}
120
194
121
- #[ cfg( feature = "simd" ) ]
122
195
#[ inline( always) ]
123
196
fn wrapping_add( self , rhs: Self ) -> Self { self + rhs }
124
197
125
- #[ cfg( not( feature = "simd" ) ) ]
126
- #[ inline( always) ]
127
- fn wrapping_add( self , rhs: Self ) -> Self {
128
- $vec( self . 0 . wrapping_add( rhs. 0 ) ,
129
- self . 1 . wrapping_add( rhs. 1 ) ,
130
- self . 2 . wrapping_add( rhs. 2 ) ,
131
- self . 3 . wrapping_add( rhs. 3 ) )
132
- }
133
-
134
198
#[ cfg( feature = "simd" ) ]
135
199
#[ inline( always) ]
136
200
fn rotate_right_any( self , n: u32 ) -> Self {
137
201
let r = n as $word;
138
202
let l = $bits - r;
139
203
140
- ( self >> $vec( r, r, r, r) ) | ( self << $vec( l, l, l, l) )
204
+ ( self >> $vec( r, r, r, r) ) ^ ( self << $vec( l, l, l, l) )
141
205
}
142
206
143
207
#[ cfg( not( feature = "simd" ) ) ]
@@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
174
238
use simdty:: u16x8;
175
239
unsafe {
176
240
let tmp: u16x8 = transmute ( vec) ;
177
- transmute ( u16x8 ( tmp. 1 , tmp. 0 ,
178
- tmp. 3 , tmp. 2 ,
179
- tmp. 5 , tmp. 4 ,
180
- tmp. 7 , tmp. 6 ) )
241
+ transmute ( simd_shuffle8 :: < u16x8 , u16 > ( tmp, tmp,
242
+ 1 , 0 ,
243
+ 3 , 2 ,
244
+ 5 , 4 ,
245
+ 7 , 6 ) )
181
246
}
182
247
}
183
248
@@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
205
270
use simdty:: u32x8;
206
271
unsafe {
207
272
let tmp: u32x8 = transmute ( vec) ;
208
- transmute ( u32x8 ( tmp. 1 , tmp. 0 ,
209
- tmp. 3 , tmp. 2 ,
210
- tmp. 5 , tmp. 4 ,
211
- tmp. 7 , tmp. 6 ) )
273
+ transmute ( simd_shuffle8 :: < u32x8 , u32 > ( tmp, tmp,
274
+ 1 , 0 ,
275
+ 3 , 2 ,
276
+ 5 , 4 ,
277
+ 7 , 6 ) )
212
278
}
213
279
}
214
280
@@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
219
285
use simdty:: u16x16;
220
286
unsafe {
221
287
let tmp: u16x16 = transmute ( vec) ;
222
- transmute ( u16x16 ( tmp. 1 , tmp. 2 , tmp. 3 , tmp. 0 ,
223
- tmp. 5 , tmp. 6 , tmp. 7 , tmp. 4 ,
224
- tmp. 9 , tmp. 10 , tmp. 11 , tmp. 8 ,
225
- tmp. 13 , tmp. 14 , tmp. 15 , tmp. 12 ) )
288
+ transmute ( simd_shuffle16 :: < u16x16 , u16 > ( tmp, tmp,
289
+ 1 , 2 , 3 , 0 ,
290
+ 5 , 6 , 7 , 4 ,
291
+ 9 , 10 , 11 , 8 ,
292
+ 13 , 14 , 15 , 12 ) )
226
293
}
227
294
}
228
295
0 commit comments