@@ -41,84 +41,84 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
41
41
core:: mem:: transmute ( x_read)
42
42
}
43
43
44
- /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
45
- /// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
46
- /// chunk size if a load happened.
47
- #[ cfg( not( feature = "mem-unaligned" ) ) ]
48
44
#[ inline( always) ]
49
- unsafe fn load_chunk_aligned < T : Copy > (
50
- src : * const usize ,
51
- dst : * mut usize ,
52
- load_sz : usize ,
53
- offset : usize ,
54
- ) -> usize {
55
- let chunk_sz = core:: mem:: size_of :: < T > ( ) ;
56
- if ( load_sz & chunk_sz) != 0 {
57
- * dst. wrapping_byte_add ( offset) . cast :: < T > ( ) = * src. wrapping_byte_add ( offset) . cast :: < T > ( ) ;
58
- offset | chunk_sz
59
- } else {
60
- offset
45
+ unsafe fn copy_forward_bytes ( mut dest : * mut u8 , mut src : * const u8 , n : usize ) {
46
+ let dest_end = dest. wrapping_add ( n) ;
47
+ while dest < dest_end {
48
+ * dest = * src;
49
+ dest = dest. wrapping_add ( 1 ) ;
50
+ src = src. wrapping_add ( 1 ) ;
61
51
}
62
52
}
63
53
64
- /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
65
- /// read with the out-of-bounds part filled with 0s.
66
- /// `load_sz` be strictly less than `WORD_SIZE`.
54
+ /// Load `load_sz` many bytes from `src`, which must be usize-aligned.
55
+ /// `load_sz` must be strictly less than `WORD_SIZE`.
56
+ ///
57
+ /// The remaining bytes are filled non-deterministically.
67
58
#[ cfg( not( feature = "mem-unaligned" ) ) ]
68
59
#[ inline( always) ]
69
60
unsafe fn load_aligned_partial ( src : * const usize , load_sz : usize ) -> usize {
70
61
debug_assert ! ( load_sz < WORD_SIZE ) ;
71
- // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
72
- // (since `load_sz < WORD_SIZE`).
73
- const { assert ! ( WORD_SIZE <= 8 ) } ;
62
+ debug_assert ! ( src. addr( ) % WORD_SIZE == 0 ) ;
63
+
64
+ let mut out: usize ;
65
+ core:: cfg_match! {
66
+ // We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
67
+ all( not( miri) , any( target_arch = "arm" , target_arch = "aarch64" , target_arch = "arm64ec" ) ) => {
68
+ core:: arch:: asm!(
69
+ "ldr {out}, [{src}]" ,
70
+ src = in( reg) src,
71
+ out = lateout( reg) out,
72
+ options( nostack, readonly, preserves_flags) ,
73
+ ) ;
74
+ }
75
+ _ => {
76
+ out = 0 ;
77
+ copy_forward_bytes( & raw mut out as * mut u8 , src as * mut u8 , load_sz) ;
78
+ }
79
+
80
+ }
74
81
75
- let mut i = 0 ;
76
- let mut out = 0usize ;
77
- // We load in decreasing order, so the pointers remain sufficiently aligned for the next step.
78
- i = load_chunk_aligned :: < u32 > ( src, & raw mut out, load_sz, i) ;
79
- i = load_chunk_aligned :: < u16 > ( src, & raw mut out, load_sz, i) ;
80
- i = load_chunk_aligned :: < u8 > ( src, & raw mut out, load_sz, i) ;
81
- debug_assert ! ( i == load_sz) ;
82
82
out
83
83
}
84
84
85
85
/// Load `load_sz` many bytes from `src.wrapping_byte_add(WORD_SIZE - load_sz)`. `src` must be
86
- /// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
87
- /// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
88
- /// `load_sz` be strictly less than `WORD_SIZE`.
86
+ /// `usize`-aligned. `load_sz` must be strictly less than `WORD_SIZE`.
87
+ ///
88
+ /// The bytes are returned as the *last* bytes of the return value, i.e., this acts as if we had
89
+ /// done a `usize` read from `src`, with the out-of-bounds part filled non-deterministically.
89
90
#[ cfg( not( feature = "mem-unaligned" ) ) ]
90
91
#[ inline( always) ]
91
92
unsafe fn load_aligned_end_partial ( src : * const usize , load_sz : usize ) -> usize {
92
93
debug_assert ! ( load_sz < WORD_SIZE ) ;
93
- // We can read up to 7 bytes here, which is enough for WORD_SIZE of 8
94
- // (since `load_sz < WORD_SIZE`).
95
- const { assert ! ( WORD_SIZE <= 8 ) } ;
94
+ debug_assert ! ( src. addr( ) % WORD_SIZE == 0 ) ;
95
+
96
+ let mut out: usize ;
97
+ core:: cfg_match! {
98
+ // We don't need an x86 path here as `feature = "mem-unaligned"` is always set there.
99
+ all( not( miri) , any( target_arch = "arm" , target_arch = "aarch64" , target_arch = "arm64ec" ) ) => {
100
+ core:: arch:: asm!(
101
+ "ldr {out}, [{src}]" ,
102
+ src = in( reg) src,
103
+ out = lateout( reg) out,
104
+ options( nostack, readonly, preserves_flags) ,
105
+ ) ;
106
+ }
107
+ _ => {
108
+ out = 0 ;
109
+ // Obtain pointers pointing to the beginning of the range we want to load.
110
+ let src_shifted = src. wrapping_byte_add( WORD_SIZE - load_sz) ;
111
+ let out_shifted = ( & raw mut out) . wrapping_byte_add( WORD_SIZE - load_sz) ;
112
+ copy_forward_bytes( out_shifted as * mut u8 , src_shifted as * mut u8 , load_sz) ;
113
+ }
114
+
115
+ }
96
116
97
- let mut i = 0 ;
98
- let mut out = 0usize ;
99
- // Obtain pointers pointing to the beginning of the range we want to load.
100
- let src_shifted = src. wrapping_byte_add ( WORD_SIZE - load_sz) ;
101
- let out_shifted = ( & raw mut out) . wrapping_byte_add ( WORD_SIZE - load_sz) ;
102
- // We load in increasing order, so by the time we reach `u16` things are 2-aligned etc.
103
- i = load_chunk_aligned :: < u8 > ( src_shifted, out_shifted, load_sz, i) ;
104
- i = load_chunk_aligned :: < u16 > ( src_shifted, out_shifted, load_sz, i) ;
105
- i = load_chunk_aligned :: < u32 > ( src_shifted, out_shifted, load_sz, i) ;
106
- debug_assert ! ( i == load_sz) ;
107
117
out
108
118
}
109
119
110
120
#[ inline( always) ]
111
121
pub unsafe fn copy_forward ( mut dest : * mut u8 , mut src : * const u8 , mut n : usize ) {
112
- #[ inline( always) ]
113
- unsafe fn copy_forward_bytes ( mut dest : * mut u8 , mut src : * const u8 , n : usize ) {
114
- let dest_end = dest. wrapping_add ( n) ;
115
- while dest < dest_end {
116
- * dest = * src;
117
- dest = dest. wrapping_add ( 1 ) ;
118
- src = src. wrapping_add ( 1 ) ;
119
- }
120
- }
121
-
122
122
#[ inline( always) ]
123
123
unsafe fn copy_forward_aligned_words ( dest : * mut u8 , src : * const u8 , n : usize ) {
124
124
let mut dest_usize = dest as * mut usize ;
0 commit comments