Skip to content

Commit 53c2f44

Browse files
Make leb128 coding a bit faster.
1 parent 2e33c89 commit 53c2f44

File tree

2 files changed

+144
-81
lines changed

2 files changed

+144
-81
lines changed

src/libserialize/leb128.rs

+102-62
Original file line numberDiff line numberDiff line change
@@ -9,64 +9,94 @@
99
// except according to those terms.
1010

1111
#[inline]
12-
fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
12+
pub fn write_to_vec(vec: &mut Vec<u8>, position: usize, byte: u8) {
1313
if position == vec.len() {
1414
vec.push(byte);
1515
} else {
1616
vec[position] = byte;
1717
}
1818
}
1919

20-
#[inline]
21-
/// encodes an integer using unsigned leb128 encoding and stores
22-
/// the result using a callback function.
23-
///
24-
/// The callback `write` is called once for each position
25-
/// that is to be written to with the byte to be encoded
26-
/// at that position.
27-
pub fn write_unsigned_leb128_to<W>(mut value: u128, mut write: W) -> usize
28-
where W: FnMut(usize, u8)
29-
{
30-
let mut position = 0;
31-
loop {
32-
let mut byte = (value & 0x7F) as u8;
33-
value >>= 7;
34-
if value != 0 {
35-
byte |= 0x80;
36-
}
37-
38-
write(position, byte);
39-
position += 1;
20+
#[cfg(target_pointer_width = "32")]
21+
const USIZE_LEB128_SIZE: usize = 5;
22+
#[cfg(target_pointer_width = "64")]
23+
const USIZE_LEB128_SIZE: usize = 10;
24+
25+
macro_rules! leb128_size {
26+
(u16) => (3);
27+
(u32) => (5);
28+
(u64) => (10);
29+
(u128) => (19);
30+
(usize) => (USIZE_LEB128_SIZE);
31+
}
4032

41-
if value == 0 {
42-
break;
33+
macro_rules! impl_write_unsigned_leb128 {
34+
($fn_name:ident, $int_ty:ident) => (
35+
#[inline]
36+
pub fn $fn_name(out: &mut Vec<u8>, start_position: usize, mut value: $int_ty) -> usize {
37+
let mut position = start_position;
38+
for _ in 0 .. leb128_size!($int_ty) {
39+
let mut byte = (value & 0x7F) as u8;
40+
value >>= 7;
41+
if value != 0 {
42+
byte |= 0x80;
43+
}
44+
45+
write_to_vec(out, position, byte);
46+
position += 1;
47+
48+
if value == 0 {
49+
break;
50+
}
51+
}
52+
53+
position - start_position
4354
}
44-
}
45-
46-
position
55+
)
4756
}
4857

49-
pub fn write_unsigned_leb128(out: &mut Vec<u8>, start_position: usize, value: u128) -> usize {
50-
write_unsigned_leb128_to(value, |i, v| write_to_vec(out, start_position+i, v))
58+
impl_write_unsigned_leb128!(write_u16_leb128, u16);
59+
impl_write_unsigned_leb128!(write_u32_leb128, u32);
60+
impl_write_unsigned_leb128!(write_u64_leb128, u64);
61+
impl_write_unsigned_leb128!(write_u128_leb128, u128);
62+
impl_write_unsigned_leb128!(write_usize_leb128, usize);
63+
64+
65+
macro_rules! impl_read_unsigned_leb128 {
66+
($fn_name:ident, $int_ty:ident) => (
67+
#[inline]
68+
pub fn $fn_name(slice: &[u8]) -> ($int_ty, usize) {
69+
let mut result: $int_ty = 0;
70+
let mut shift = 0;
71+
let mut position = 0;
72+
73+
for _ in 0 .. leb128_size!($int_ty) {
74+
let byte = unsafe {
75+
*slice.get_unchecked(position)
76+
};
77+
position += 1;
78+
result |= ((byte & 0x7F) as $int_ty) << shift;
79+
if (byte & 0x80) == 0 {
80+
break;
81+
}
82+
shift += 7;
83+
}
84+
85+
// Do a single bounds check at the end instead of for every byte.
86+
assert!(position <= slice.len());
87+
88+
(result, position)
89+
}
90+
)
5191
}
5292

53-
#[inline]
54-
pub fn read_unsigned_leb128(data: &[u8], start_position: usize) -> (u128, usize) {
55-
let mut result = 0;
56-
let mut shift = 0;
57-
let mut position = start_position;
58-
loop {
59-
let byte = data[position];
60-
position += 1;
61-
result |= ((byte & 0x7F) as u128) << shift;
62-
if (byte & 0x80) == 0 {
63-
break;
64-
}
65-
shift += 7;
66-
}
93+
impl_read_unsigned_leb128!(read_u16_leb128, u16);
94+
impl_read_unsigned_leb128!(read_u32_leb128, u32);
95+
impl_read_unsigned_leb128!(read_u64_leb128, u64);
96+
impl_read_unsigned_leb128!(read_u128_leb128, u128);
97+
impl_read_unsigned_leb128!(read_usize_leb128, usize);
98+
6799

68-
(result, position - start_position)
69-
}
70100

71101
#[inline]
72102
/// encodes an integer using signed leb128 encoding and stores
@@ -130,26 +160,36 @@ pub fn read_signed_leb128(data: &[u8], start_position: usize) -> (i128, usize) {
130160
(result, position - start_position)
131161
}
132162

133-
#[test]
134-
fn test_unsigned_leb128() {
135-
let mut stream = Vec::with_capacity(10000);
136-
137-
for x in 0..62 {
138-
let pos = stream.len();
139-
let bytes_written = write_unsigned_leb128(&mut stream, pos, 3 << x);
140-
assert_eq!(stream.len(), pos + bytes_written);
141-
}
142-
143-
let mut position = 0;
144-
for x in 0..62 {
145-
let expected = 3 << x;
146-
let (actual, bytes_read) = read_unsigned_leb128(&stream, position);
147-
assert_eq!(expected, actual);
148-
position += bytes_read;
149-
}
150-
assert_eq!(stream.len(), position);
163+
macro_rules! impl_test_unsigned_leb128 {
164+
($test_name:ident, $write_fn_name:ident, $read_fn_name:ident, $int_ty:ident) => (
165+
#[test]
166+
fn $test_name() {
167+
let mut stream = Vec::new();
168+
169+
for x in 0..62 {
170+
let pos = stream.len();
171+
let bytes_written = $write_fn_name(&mut stream, pos, (3u64 << x) as $int_ty);
172+
assert_eq!(stream.len(), pos + bytes_written);
173+
}
174+
175+
let mut position = 0;
176+
for x in 0..62 {
177+
let expected = (3u64 << x) as $int_ty;
178+
let (actual, bytes_read) = $read_fn_name(&stream[position ..]);
179+
assert_eq!(expected, actual);
180+
position += bytes_read;
181+
}
182+
assert_eq!(stream.len(), position);
183+
}
184+
)
151185
}
152186

187+
impl_test_unsigned_leb128!(test_u16_leb128, write_u16_leb128, read_u16_leb128, u16);
188+
impl_test_unsigned_leb128!(test_u32_leb128, write_u32_leb128, read_u32_leb128, u32);
189+
impl_test_unsigned_leb128!(test_u64_leb128, write_u64_leb128, read_u64_leb128, u64);
190+
impl_test_unsigned_leb128!(test_u128_leb128, write_u128_leb128, read_u128_leb128, u128);
191+
impl_test_unsigned_leb128!(test_usize_leb128, write_usize_leb128, read_usize_leb128, usize);
192+
153193
#[test]
154194
fn test_signed_leb128() {
155195
let values: Vec<_> = (-500..500).map(|i| i * 0x12345789ABCDEF).collect();

src/libserialize/opaque.rs

+42-19
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use leb128::{read_signed_leb128, read_unsigned_leb128, write_signed_leb128, write_unsigned_leb128};
11+
use leb128::{self, read_signed_leb128, write_signed_leb128};
1212
use std::borrow::Cow;
1313
use std::io::{self, Write};
1414
use serialize;
@@ -31,9 +31,9 @@ impl<'a> Encoder<'a> {
3131

3232

3333
macro_rules! write_uleb128 {
34-
($enc:expr, $value:expr) => {{
34+
($enc:expr, $value:expr, $fun:ident) => {{
3535
let pos = $enc.cursor.position() as usize;
36-
let bytes_written = write_unsigned_leb128($enc.cursor.get_mut(), pos, $value as u128);
36+
let bytes_written = leb128::$fun($enc.cursor.get_mut(), pos, $value);
3737
$enc.cursor.set_position((pos + bytes_written) as u64);
3838
Ok(())
3939
}}
@@ -51,61 +51,76 @@ macro_rules! write_sleb128 {
5151
impl<'a> serialize::Encoder for Encoder<'a> {
5252
type Error = io::Error;
5353

54+
#[inline]
5455
fn emit_nil(&mut self) -> EncodeResult {
5556
Ok(())
5657
}
5758

59+
#[inline]
5860
fn emit_usize(&mut self, v: usize) -> EncodeResult {
59-
write_uleb128!(self, v)
61+
write_uleb128!(self, v, write_usize_leb128)
6062
}
6163

64+
#[inline]
6265
fn emit_u128(&mut self, v: u128) -> EncodeResult {
63-
write_uleb128!(self, v)
66+
write_uleb128!(self, v, write_u128_leb128)
6467
}
6568

69+
#[inline]
6670
fn emit_u64(&mut self, v: u64) -> EncodeResult {
67-
write_uleb128!(self, v)
71+
write_uleb128!(self, v, write_u64_leb128)
6872
}
6973

74+
#[inline]
7075
fn emit_u32(&mut self, v: u32) -> EncodeResult {
71-
write_uleb128!(self, v)
76+
write_uleb128!(self, v, write_u32_leb128)
7277
}
7378

79+
#[inline]
7480
fn emit_u16(&mut self, v: u16) -> EncodeResult {
75-
write_uleb128!(self, v)
81+
write_uleb128!(self, v, write_u16_leb128)
7682
}
7783

84+
#[inline]
7885
fn emit_u8(&mut self, v: u8) -> EncodeResult {
79-
let _ = self.cursor.write_all(&[v]);
86+
let pos = self.cursor.position() as usize;
87+
leb128::write_to_vec(self.cursor.get_mut(), pos, v);
88+
self.cursor.set_position((pos + 1) as u64);
8089
Ok(())
8190
}
8291

92+
#[inline]
8393
fn emit_isize(&mut self, v: isize) -> EncodeResult {
8494
write_sleb128!(self, v)
8595
}
8696

97+
#[inline]
8798
fn emit_i128(&mut self, v: i128) -> EncodeResult {
8899
write_sleb128!(self, v)
89100
}
90101

102+
#[inline]
91103
fn emit_i64(&mut self, v: i64) -> EncodeResult {
92104
write_sleb128!(self, v)
93105
}
94106

107+
#[inline]
95108
fn emit_i32(&mut self, v: i32) -> EncodeResult {
96109
write_sleb128!(self, v)
97110
}
98111

112+
#[inline]
99113
fn emit_i16(&mut self, v: i16) -> EncodeResult {
100114
write_sleb128!(self, v)
101115
}
102116

117+
#[inline]
103118
fn emit_i8(&mut self, v: i8) -> EncodeResult {
104119
let as_u8: u8 = unsafe { ::std::mem::transmute(v) };
105-
let _ = self.cursor.write_all(&[as_u8]);
106-
Ok(())
120+
self.emit_u8(as_u8)
107121
}
108122

123+
#[inline]
109124
fn emit_bool(&mut self, v: bool) -> EncodeResult {
110125
self.emit_u8(if v {
111126
1
@@ -114,20 +129,24 @@ impl<'a> serialize::Encoder for Encoder<'a> {
114129
})
115130
}
116131

132+
#[inline]
117133
fn emit_f64(&mut self, v: f64) -> EncodeResult {
118134
let as_u64: u64 = unsafe { ::std::mem::transmute(v) };
119135
self.emit_u64(as_u64)
120136
}
121137

138+
#[inline]
122139
fn emit_f32(&mut self, v: f32) -> EncodeResult {
123140
let as_u32: u32 = unsafe { ::std::mem::transmute(v) };
124141
self.emit_u32(as_u32)
125142
}
126143

144+
#[inline]
127145
fn emit_char(&mut self, v: char) -> EncodeResult {
128146
self.emit_u32(v as u32)
129147
}
130148

149+
#[inline]
131150
fn emit_str(&mut self, v: &str) -> EncodeResult {
132151
self.emit_usize(v.len())?;
133152
let _ = self.cursor.write_all(v.as_bytes());
@@ -136,6 +155,7 @@ impl<'a> serialize::Encoder for Encoder<'a> {
136155
}
137156

138157
impl<'a> Encoder<'a> {
158+
#[inline]
139159
pub fn position(&self) -> usize {
140160
self.cursor.position() as usize
141161
}
@@ -158,24 +178,27 @@ impl<'a> Decoder<'a> {
158178
}
159179
}
160180

181+
#[inline]
161182
pub fn position(&self) -> usize {
162183
self.position
163184
}
164185

186+
#[inline]
165187
pub fn set_position(&mut self, pos: usize) {
166188
self.position = pos
167189
}
168190

191+
#[inline]
169192
pub fn advance(&mut self, bytes: usize) {
170193
self.position += bytes;
171194
}
172195
}
173196

174197
macro_rules! read_uleb128 {
175-
($dec:expr, $t:ty) => ({
176-
let (value, bytes_read) = read_unsigned_leb128($dec.data, $dec.position);
198+
($dec:expr, $t:ty, $fun:ident) => ({
199+
let (value, bytes_read) = leb128::$fun(&$dec.data[$dec.position ..]);
177200
$dec.position += bytes_read;
178-
Ok(value as $t)
201+
Ok(value)
179202
})
180203
}
181204

@@ -198,22 +221,22 @@ impl<'a> serialize::Decoder for Decoder<'a> {
198221

199222
#[inline]
200223
fn read_u128(&mut self) -> Result<u128, Self::Error> {
201-
read_uleb128!(self, u128)
224+
read_uleb128!(self, u128, read_u128_leb128)
202225
}
203226

204227
#[inline]
205228
fn read_u64(&mut self) -> Result<u64, Self::Error> {
206-
read_uleb128!(self, u64)
229+
read_uleb128!(self, u64, read_u64_leb128)
207230
}
208231

209232
#[inline]
210233
fn read_u32(&mut self) -> Result<u32, Self::Error> {
211-
read_uleb128!(self, u32)
234+
read_uleb128!(self, u32, read_u32_leb128)
212235
}
213236

214237
#[inline]
215238
fn read_u16(&mut self) -> Result<u16, Self::Error> {
216-
read_uleb128!(self, u16)
239+
read_uleb128!(self, u16, read_u16_leb128)
217240
}
218241

219242
#[inline]
@@ -225,7 +248,7 @@ impl<'a> serialize::Decoder for Decoder<'a> {
225248

226249
#[inline]
227250
fn read_usize(&mut self) -> Result<usize, Self::Error> {
228-
read_uleb128!(self, usize)
251+
read_uleb128!(self, usize, read_usize_leb128)
229252
}
230253

231254
#[inline]

0 commit comments

Comments
 (0)