@@ -2,6 +2,7 @@ use super::V2_COOKIE;
2
2
use super :: super :: { Counter , Histogram , RestatState } ;
3
3
use super :: super :: num:: ToPrimitive ;
4
4
use std:: io:: { self , Cursor , ErrorKind , Read } ;
5
+ use std:: marker:: PhantomData ;
5
6
use std;
6
7
use super :: byteorder:: { BigEndian , ReadBytesExt } ;
7
8
@@ -54,8 +55,6 @@ impl Deserializer {
54
55
/// bytes already in slice or `Vec` form.
55
56
pub fn deserialize < T : Counter , R : Read > ( & mut self , reader : & mut R )
56
57
-> Result < Histogram < T > , DeserializeError > {
57
- // TODO benchmark minimizing read calls by reading into a fixed-size header buffer
58
-
59
58
let cookie = reader. read_u32 :: < BigEndian > ( ) ?;
60
59
61
60
if cookie != V2_COOKIE {
@@ -87,31 +86,30 @@ impl Deserializer {
87
86
let mut payload_slice = & mut self . payload_buf [ 0 ..payload_len] ;
88
87
reader. read_exact ( & mut payload_slice) ?;
89
88
90
- let mut cursor = Cursor :: new ( & payload_slice) ;
91
- let mut dest_index: usize = 0 ;
89
+ let mut payload_index: usize = 0 ;
92
90
let mut restat_state = RestatState :: new ( ) ;
93
- while cursor . position ( ) < payload_slice . len ( ) as u64 {
94
- let num = zig_zag_decode ( varint_read ( & mut cursor ) ? ) ;
95
-
96
- if num < 0 {
97
- let zero_count = ( -num ) . to_usize ( )
98
- . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ? ;
99
- // skip the zeros
100
- dest_index = dest_index . checked_add ( zero_count )
101
- . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ? ;
102
- continue ;
103
- } else {
104
- let count : T = T :: from_i64 ( num )
105
- . ok_or ( DeserializeError :: UnsuitableCounterType ) ?;
106
-
107
- h . set_count_at_index ( dest_index , count )
108
- . map_err ( |_| DeserializeError :: EncodedArrayTooLong ) ? ;
109
-
110
- restat_state . on_nonzero_count ( dest_index , count ) ;
111
-
112
- dest_index = dest_index . checked_add ( 1 )
113
- . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ? ;
114
- }
91
+ let mut decode_state = DecodeLoopState :: new ( ) ;
92
+
93
+ while payload_index < payload_len . saturating_sub ( 9 ) {
94
+ // Read with fast loop until we are within 9 of the end. Fast loop can't handle EOF,
95
+ // so bail to slow version for the last few bytes.
96
+
97
+ let ( zz_num , bytes_read ) = varint_read_slice (
98
+ & payload_slice [ payload_index.. ( payload_index + 9 ) ] ) ;
99
+ payload_index += bytes_read ;
100
+
101
+ let count_or_zeros = zig_zag_decode ( zz_num ) ;
102
+
103
+ decode_state . on_decoded_num ( count_or_zeros , & mut restat_state , & mut h ) ?;
104
+ }
105
+
106
+ // Now read the leftovers
107
+ let leftover_slice = & payload_slice [ payload_index.. ] ;
108
+ let mut cursor = Cursor :: new ( & leftover_slice ) ;
109
+ while cursor . position ( ) < leftover_slice . len ( ) as u64 {
110
+ let count_or_zeros = zig_zag_decode ( varint_read ( & mut cursor ) ? ) ;
111
+
112
+ decode_state . on_decoded_num ( count_or_zeros , & mut restat_state , & mut h ) ? ;
115
113
}
116
114
117
115
restat_state. update_histogram ( & mut h) ;
@@ -120,6 +118,62 @@ impl Deserializer {
120
118
}
121
119
}
122
120
121
+ // Only public for testing.
122
+ /// Read from a slice that must be 9 bytes long or longer. Returns the decoded number and how many
123
+ /// bytes were consumed.
124
+ #[ inline]
125
+ pub fn varint_read_slice ( slice : & [ u8 ] ) -> ( u64 , usize ) {
126
+ let mut b = slice[ 0 ] ;
127
+
128
+ // take low 7 bits
129
+ let mut value: u64 = low_7_bits ( b) ;
130
+ if !is_high_bit_set ( b) {
131
+ return ( value, 1 ) ;
132
+ }
133
+ // high bit set, keep reading
134
+ b = slice[ 1 ] ;
135
+ value |= low_7_bits ( b) << 7 ;
136
+ if !is_high_bit_set ( b) {
137
+ return ( value, 2 ) ;
138
+ }
139
+ b = slice[ 2 ] ;
140
+ value |= low_7_bits ( b) << 7 * 2 ;
141
+ if !is_high_bit_set ( b) {
142
+ return ( value, 3 ) ;
143
+ }
144
+ b = slice[ 3 ] ;
145
+ value |= low_7_bits ( b) << 7 * 3 ;
146
+ if !is_high_bit_set ( b) {
147
+ return ( value, 4 ) ;
148
+ }
149
+ b = slice[ 4 ] ;
150
+ value |= low_7_bits ( b) << 7 * 4 ;
151
+ if !is_high_bit_set ( b) {
152
+ return ( value, 5 ) ;
153
+ }
154
+ b = slice[ 5 ] ;
155
+ value |= low_7_bits ( b) << 7 * 5 ;
156
+ if !is_high_bit_set ( b) {
157
+ return ( value, 6 ) ;
158
+ }
159
+ b = slice[ 6 ] ;
160
+ value |= low_7_bits ( b) << 7 * 6 ;
161
+ if !is_high_bit_set ( b) {
162
+ return ( value, 7 ) ;
163
+ }
164
+ b = slice[ 7 ] ;
165
+ value |= low_7_bits ( b) << 7 * 7 ;
166
+ if !is_high_bit_set ( b) {
167
+ return ( value, 8 ) ;
168
+ }
169
+
170
+ b = slice[ 8 ] ;
171
+ // special case: use last byte as is
172
+ value |= ( b as u64 ) << 7 * 8 ;
173
+
174
+ ( value, 9 )
175
+ }
176
+
123
177
// Only public for testing.
124
178
/// Read a LEB128-64b9B from the buffer
125
179
pub fn varint_read < R : Read > ( reader : & mut R ) -> io:: Result < u64 > {
@@ -174,7 +228,6 @@ fn low_7_bits(b: u8) -> u64 {
174
228
175
229
#[ inline]
176
230
fn is_high_bit_set ( b : u8 ) -> bool {
177
- // TODO benchmark leading zeros rather than masking
178
231
( b & 0x80 ) != 0
179
232
}
180
233
@@ -183,3 +236,45 @@ fn is_high_bit_set(b: u8) -> bool {
183
236
pub fn zig_zag_decode ( encoded : u64 ) -> i64 {
184
237
( ( encoded >> 1 ) as i64 ) ^ -( ( encoded & 1 ) as i64 )
185
238
}
239
+
240
+ /// We need to perform the same logic in two different decode loops while carrying over a modicum
241
+ /// of state.
242
+ struct DecodeLoopState < T : Counter > {
243
+ dest_index : usize ,
244
+ phantom : PhantomData < T >
245
+ }
246
+
247
+ impl < T : Counter > DecodeLoopState < T > {
248
+
249
+ fn new ( ) -> DecodeLoopState < T > {
250
+ DecodeLoopState {
251
+ dest_index : 0 ,
252
+ phantom : PhantomData
253
+ }
254
+ }
255
+
256
+ #[ inline]
257
+ fn on_decoded_num ( & mut self , count_or_zeros : i64 , restat_state : & mut RestatState < T > ,
258
+ h : & mut Histogram < T > ) -> Result < ( ) , DeserializeError > {
259
+ if count_or_zeros < 0 {
260
+ let zero_count = ( -count_or_zeros) . to_usize ( )
261
+ . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ?;
262
+ // skip the zeros
263
+ self . dest_index = self . dest_index . checked_add ( zero_count)
264
+ . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ?;
265
+ } else {
266
+ let count: T = T :: from_i64 ( count_or_zeros)
267
+ . ok_or ( DeserializeError :: UnsuitableCounterType ) ?;
268
+
269
+ h. set_count_at_index ( self . dest_index , count)
270
+ . map_err ( |_| DeserializeError :: EncodedArrayTooLong ) ?;
271
+
272
+ restat_state. on_nonzero_count ( self . dest_index , count) ;
273
+
274
+ self . dest_index = self . dest_index . checked_add ( 1 )
275
+ . ok_or ( DeserializeError :: UsizeTypeTooSmall ) ?;
276
+ }
277
+
278
+ Ok ( ( ) )
279
+ }
280
+ }
0 commit comments