File tree 2 files changed +26
-3
lines changed
2 files changed +26
-3
lines changed Original file line number Diff line number Diff line change @@ -85,7 +85,19 @@ impl SimdU8Value {
85
85
#[ inline]
86
86
#[ allow( clippy:: cast_ptr_alignment) ]
87
87
unsafe fn load_from ( ptr : * const u8 ) -> Self {
88
- Self :: from ( vld1q_u8 ( ptr) )
88
+ // WORKAROUND:
89
+ // The vld1q_u8 intrinsic is currently broken, it treats it as individual
90
+ // byte loads so the compiler sometimes decides it is a better to load
91
+ // individual bytes to "optimize" a subsequent SIMD shuffle
92
+ //
93
+ // This code forces a full 128-bit load.
94
+ let mut dst = core:: mem:: MaybeUninit :: < uint8x16_t > :: uninit ( ) ;
95
+ core:: ptr:: copy_nonoverlapping (
96
+ ptr as * const u8 ,
97
+ dst. as_mut_ptr ( ) as * mut u8 ,
98
+ core:: mem:: size_of :: < uint8x16_t > ( ) ,
99
+ ) ;
100
+ Self :: from ( dst. assume_init ( ) )
89
101
}
90
102
91
103
#[ inline]
Original file line number Diff line number Diff line change @@ -192,9 +192,20 @@ macro_rules! algorithm_simd {
192
192
193
193
#[ cfg_attr( not( target_arch="aarch64" ) , target_feature( enable = $feat) ) ]
194
194
#[ inline]
195
+ #[ allow( unconditional_panic) ] // does not panic because len is checked
196
+ #[ allow( const_err) ] // the same, but for Rust 1.38.0
195
197
unsafe fn check_block( & mut self , input: SimdInput ) {
196
- for i in 0 ..input. vals. len( ) {
197
- self . check_bytes( input. vals[ i] ) ;
198
+ // necessary because a for loop is not unrolled on ARM64
199
+ if input. vals. len( ) == 2 {
200
+ self . check_bytes( input. vals[ 0 ] ) ;
201
+ self . check_bytes( input. vals[ 1 ] ) ;
202
+ } else if input. vals. len( ) == 4 {
203
+ self . check_bytes( input. vals[ 0 ] ) ;
204
+ self . check_bytes( input. vals[ 1 ] ) ;
205
+ self . check_bytes( input. vals[ 2 ] ) ;
206
+ self . check_bytes( input. vals[ 3 ] ) ;
207
+ } else {
208
+ panic!( "Unsupported number of chunks" ) ;
198
209
}
199
210
}
200
211
}
You can’t perform that action at this time.
0 commit comments