2
2
3
3
use super :: Utf8Error ;
4
4
use crate :: intrinsics:: { const_eval_select, unlikely} ;
5
+ use crate :: mem;
5
6
6
7
/// Returns the initial codepoint accumulator for the first byte.
7
8
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
@@ -163,6 +164,13 @@ const ST_ERROR: u32 = 0 * BITS_PER_STATE as u32;
163
164
#[ allow( clippy:: all) ]
164
165
const ST_ACCEPT : u32 = 1 * BITS_PER_STATE as u32 ;
165
166
167
+ /// Platforms that does not have efficient 64-bit shift and should use 32-bit shift fallback.
168
+ const USE_SHIFT32 : bool = cfg ! ( all(
169
+ any( target_pointer_width = "16" , target_pointer_width = "32" ) ,
170
+ // WASM32 supports 64-bit shift.
171
+ not( target_arch = "wasm32" ) ,
172
+ ) ) ;
173
+
166
174
// After storing STATE_CNT * BITS_PER_STATE = 54bits on 64-bit platform, or (STATE_CNT - 5)
167
175
// * BITS_PER_STATE = 24bits on 32-bit platform, we still have some high bits left.
168
176
// They will never be used via state transition.
@@ -218,13 +226,12 @@ static TRANS_TABLE: [u64; 256] = {
218
226
219
227
// On platforms without 64-bit shift, align states 5..10 to 32-bit boundary.
220
228
// See docs above for details.
221
- let need_align = cfg ! ( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ;
222
229
let mut bits = 0u64 ;
223
230
let mut j = 0 ;
224
231
while j < to. len ( ) {
225
232
let to_off =
226
- to[ j] * BITS_PER_STATE as u64 + if need_align && to[ j] >= 5 { 2 } else { 0 } ;
227
- let off = j as u32 * BITS_PER_STATE + if need_align && j >= 5 { 2 } else { 0 } ;
233
+ to[ j] * BITS_PER_STATE as u64 + if USE_SHIFT32 && to[ j] >= 5 { 2 } else { 0 } ;
234
+ let off = j as u32 * BITS_PER_STATE + if USE_SHIFT32 && j >= 5 { 2 } else { 0 } ;
228
235
bits |= to_off << off;
229
236
j += 1 ;
230
237
}
@@ -244,20 +251,17 @@ static TRANS_TABLE: [u64; 256] = {
244
251
table
245
252
} ;
246
253
247
- #[ cfg( not( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ) ]
248
254
#[ inline( always) ]
249
255
const fn next_state ( st : u32 , byte : u8 ) -> u32 {
250
- TRANS_TABLE [ byte as usize ] . wrapping_shr ( st as _ ) as _
251
- }
252
-
253
- #[ cfg( any( target_pointer_width = "16" , target_pointer_width = "32" ) ) ]
254
- #[ inline( always) ]
255
- const fn next_state ( st : u32 , byte : u8 ) -> u32 {
256
- // SAFETY: `u64` is more aligned than `u32`, and has the same repr as `[u32; 2]`.
257
- let [ lo, hi] = unsafe { crate :: mem:: transmute :: < u64 , [ u32 ; 2 ] > ( TRANS_TABLE [ byte as usize ] ) } ;
258
- #[ cfg( target_endian = "big" ) ]
259
- let ( lo, hi) = ( hi, lo) ;
260
- if st & 32 == 0 { lo } else { hi } . wrapping_shr ( st)
256
+ if USE_SHIFT32 {
257
+ // SAFETY: `u64` is more aligned than `u32`, and has the same repr as `[u32; 2]`.
258
+ let [ lo, hi] = unsafe { mem:: transmute :: < u64 , [ u32 ; 2 ] > ( TRANS_TABLE [ byte as usize ] ) } ;
259
+ #[ cfg( target_endian = "big" ) ]
260
+ let ( lo, hi) = ( hi, lo) ;
261
+ if st & 32 == 0 { lo } else { hi } . wrapping_shr ( st)
262
+ } else {
263
+ TRANS_TABLE [ byte as usize ] . wrapping_shr ( st as _ ) as _
264
+ }
261
265
}
262
266
263
267
/// Check if `byte` is a valid UTF-8 first byte, assuming it must be a valid first or
0 commit comments