2727#[ cfg( feature = "simd_opt" ) ]
2828use std:: mem:: transmute;
2929
30- #[ cfg( feature = "simd" ) ]
3130pub use simdty:: { u32x4, u64x4} ;
3231
33- #[ cfg( not( feature = "simd" ) ) ]
34- #[ derive( Clone , Copy , Debug ) ]
35- #[ repr( C ) ]
36- pub struct u32x4 ( pub u32 , pub u32 , pub u32 , pub u32 ) ;
32+ #[ cfg( feature = "simd" ) ]
33+ extern "platform-intrinsic" {
34+ fn simd_add < T > ( x : T , y : T ) -> T ;
35+ fn simd_shl < T > ( x : T , y : T ) -> T ;
36+ fn simd_shr < T > ( x : T , y : T ) -> T ;
37+ fn simd_xor < T > ( x : T , y : T ) -> T ;
38+ }
3739
38- #[ cfg( not( feature = "simd" ) ) ]
39- #[ derive( Clone , Copy , Debug ) ]
40- #[ repr( C ) ]
41- pub struct u64x4 ( pub u64 , pub u64 , pub u64 , pub u64 ) ;
40+ #[ cfg( feature = "simd_opt" ) ]
41+ extern "platform-intrinsic" {
42+ fn simd_shuffle8 < T , Elem > ( v : T , w : T ,
43+ i0 : u32 , i1 : u32 , i2 : u32 , i3 : u32 ,
44+ i4 : u32 , i5 : u32 , i6 : u32 , i7 : u32 ) -> T ;
4245
43- #[ cfg( not( feature = "simd" ) ) ]
44- use std:: ops:: BitXor ;
46+ #[ cfg( any( target_arch = "x86" , target_arch = "x86_64" ) ) ]
47+ fn simd_shuffle16 < T , Elem > ( v : T , w : T ,
48+ i0 : u32 , i1 : u32 , i2 : u32 , i3 : u32 ,
49+ i4 : u32 , i5 : u32 , i6 : u32 , i7 : u32 ,
50+ i8 : u32 , i9 : u32 , i10 : u32 , i11 : u32 ,
51+ i12 : u32 , i13 : u32 , i14 : u32 , i15 : u32 ,
52+ ) -> T ;
53+ }
4554
46- macro_rules! impl_bitxor {
55+ use std:: ops:: { Add , BitXor , Shl , Shr } ;
56+
57+ macro_rules! impl_ops {
4758 ( $vec: ident) => {
48- #[ cfg( not( feature = "simd" ) ) ]
59+ impl Add for $vec {
60+ type Output = Self ;
61+
62+ #[ cfg( feature = "simd" ) ]
63+ #[ inline( always) ]
64+ fn add( self , rhs: Self ) -> Self :: Output {
65+ unsafe { simd_add( self , rhs) }
66+ }
67+
68+ #[ cfg( not( feature = "simd" ) ) ]
69+ #[ inline( always) ]
70+ fn add( self , rhs: Self ) -> Self :: Output {
71+ $vec( self . 0 . wrapping_add( rhs. 0 ) ,
72+ self . 1 . wrapping_add( rhs. 1 ) ,
73+ self . 2 . wrapping_add( rhs. 2 ) ,
74+ self . 3 . wrapping_add( rhs. 3 ) )
75+ }
76+ }
77+
4978 impl BitXor for $vec {
5079 type Output = Self ;
5180
81+ #[ cfg( feature = "simd" ) ]
82+ #[ inline( always) ]
83+ fn bitxor( self , rhs: Self ) -> Self :: Output {
84+ unsafe { simd_xor( self , rhs) }
85+ }
86+
87+ #[ cfg( not( feature = "simd" ) ) ]
5288 #[ inline( always) ]
5389 fn bitxor( self , rhs: Self ) -> Self :: Output {
5490 $vec( self . 0 ^ rhs. 0 ,
@@ -57,11 +93,49 @@ macro_rules! impl_bitxor {
5793 self . 3 ^ rhs. 3 )
5894 }
5995 }
96+
97+ impl Shl <$vec> for $vec {
98+ type Output = Self ;
99+
100+ #[ cfg( feature = "simd" ) ]
101+ #[ inline( always) ]
102+ fn shl( self , rhs: Self ) -> Self :: Output {
103+ unsafe { simd_shl( self , rhs) }
104+ }
105+
106+ #[ cfg( not( feature = "simd" ) ) ]
107+ #[ inline( always) ]
108+ fn shl( self , rhs: Self ) -> Self :: Output {
109+ $vec( self . 0 << rhs. 0 ,
110+ self . 1 << rhs. 1 ,
111+ self . 2 << rhs. 2 ,
112+ self . 3 << rhs. 3 )
113+ }
114+ }
115+
116+ impl Shr <$vec> for $vec {
117+ type Output = Self ;
118+
119+ #[ cfg( feature = "simd" ) ]
120+ #[ inline( always) ]
121+ fn shr( self , rhs: Self ) -> Self :: Output {
122+ unsafe { simd_shr( self , rhs) }
123+ }
124+
125+ #[ cfg( not( feature = "simd" ) ) ]
126+ #[ inline( always) ]
127+ fn shr( self , rhs: Self ) -> Self :: Output {
128+ $vec( self . 0 >> rhs. 0 ,
129+ self . 1 >> rhs. 1 ,
130+ self . 2 >> rhs. 2 ,
131+ self . 3 >> rhs. 3 )
132+ }
133+ }
60134 }
61135}
62136
63- impl_bitxor ! ( u32x4) ;
64- impl_bitxor ! ( u64x4) ;
137+ impl_ops ! ( u32x4) ;
138+ impl_ops ! ( u64x4) ;
65139
66140pub trait Vector4 < T > : Copy {
67141 fn gather ( src : & [ T ] , i0 : usize , i1 : usize , i2 : usize , i3 : usize ) -> Self ;
@@ -118,26 +192,16 @@ macro_rules! impl_vector4_common {
118192 self . 3 . to_le( ) )
119193 }
120194
121- #[ cfg( feature = "simd" ) ]
122195 #[ inline( always) ]
123196 fn wrapping_add( self , rhs: Self ) -> Self { self + rhs }
124197
125- #[ cfg( not( feature = "simd" ) ) ]
126- #[ inline( always) ]
127- fn wrapping_add( self , rhs: Self ) -> Self {
128- $vec( self . 0 . wrapping_add( rhs. 0 ) ,
129- self . 1 . wrapping_add( rhs. 1 ) ,
130- self . 2 . wrapping_add( rhs. 2 ) ,
131- self . 3 . wrapping_add( rhs. 3 ) )
132- }
133-
134198 #[ cfg( feature = "simd" ) ]
135199 #[ inline( always) ]
136200 fn rotate_right_any( self , n: u32 ) -> Self {
137201 let r = n as $word;
138202 let l = $bits - r;
139203
140- ( self >> $vec( r, r, r, r) ) | ( self << $vec( l, l, l, l) )
204+ ( self >> $vec( r, r, r, r) ) ^ ( self << $vec( l, l, l, l) )
141205 }
142206
143207 #[ cfg( not( feature = "simd" ) ) ]
@@ -174,10 +238,11 @@ fn u32x4_rotate_right_16(vec: u32x4) -> u32x4 {
174238 use simdty:: u16x8;
175239 unsafe {
176240 let tmp: u16x8 = transmute ( vec) ;
177- transmute ( u16x8 ( tmp. 1 , tmp. 0 ,
178- tmp. 3 , tmp. 2 ,
179- tmp. 5 , tmp. 4 ,
180- tmp. 7 , tmp. 6 ) )
241+ transmute ( simd_shuffle8 :: < u16x8 , u16 > ( tmp, tmp,
242+ 1 , 0 ,
243+ 3 , 2 ,
244+ 5 , 4 ,
245+ 7 , 6 ) )
181246 }
182247}
183248
@@ -205,10 +270,11 @@ fn u64x4_rotate_right_32(vec: u64x4) -> u64x4 {
205270 use simdty:: u32x8;
206271 unsafe {
207272 let tmp: u32x8 = transmute ( vec) ;
208- transmute ( u32x8 ( tmp. 1 , tmp. 0 ,
209- tmp. 3 , tmp. 2 ,
210- tmp. 5 , tmp. 4 ,
211- tmp. 7 , tmp. 6 ) )
273+ transmute ( simd_shuffle8 :: < u32x8 , u32 > ( tmp, tmp,
274+ 1 , 0 ,
275+ 3 , 2 ,
276+ 5 , 4 ,
277+ 7 , 6 ) )
212278 }
213279}
214280
@@ -219,10 +285,11 @@ fn u64x4_rotate_right_16(vec: u64x4) -> u64x4 {
219285 use simdty:: u16x16;
220286 unsafe {
221287 let tmp: u16x16 = transmute ( vec) ;
222- transmute ( u16x16 ( tmp. 1 , tmp. 2 , tmp. 3 , tmp. 0 ,
223- tmp. 5 , tmp. 6 , tmp. 7 , tmp. 4 ,
224- tmp. 9 , tmp. 10 , tmp. 11 , tmp. 8 ,
225- tmp. 13 , tmp. 14 , tmp. 15 , tmp. 12 ) )
288+ transmute ( simd_shuffle16 :: < u16x16 , u16 > ( tmp, tmp,
289+ 1 , 2 , 3 , 0 ,
290+ 5 , 6 , 7 , 4 ,
291+ 9 , 10 , 11 , 8 ,
292+ 13 , 14 , 15 , 12 ) )
226293 }
227294}
228295
0 commit comments