Skip to content

Commit adbd254

Browse files
committed
Sped up base64 encoding 2x
1 parent cba058f commit adbd254

File tree

2 files changed

+90
-52
lines changed

2 files changed

+90
-52
lines changed

benches/base64.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,22 @@ fn bench_from_base64(b: &mut Bencher) {
2727
b.bytes = sb.len() as u64;
2828
}
2929

30+
31+
#[bench]
32+
fn bench_to_base64_large(b: &mut Bencher) {
33+
let s: Vec<_> = (0..10000).map(|i| ((i as u32 * 12345) % 256) as u8).collect();
34+
b.iter(|| {
35+
s.to_base64(STANDARD);
36+
});
37+
b.bytes = s.len() as u64;
38+
}
39+
40+
#[bench]
41+
fn bench_from_base64_large(b: &mut Bencher) {
42+
let s: Vec<_> = (0..10000).map(|i| ((i as u32 * 12345) % 256) as u8).collect();
43+
let sb = s.to_base64(STANDARD);
44+
b.iter(|| {
45+
sb.from_base64().unwrap();
46+
});
47+
b.bytes = sb.len() as u64;
48+
}

src/base64.rs

Lines changed: 71 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -96,76 +96,95 @@ impl ToBase64 for [u8] {
9696
UrlSafe => URLSAFE_CHARS
9797
};
9898

99-
// In general, this Vec only needs (4/3) * self.len() memory, but
100-
// addition is faster than multiplication and division.
101-
let mut v = Vec::with_capacity(self.len() + self.len());
102-
let mut i = 0;
103-
let mut cur_length = 0;
10499
let len = self.len();
105-
let mod_len = len % 3;
106-
let cond_len = len - mod_len;
107100
let newline = match config.newline {
108101
Newline::LF => "\n",
109102
Newline::CRLF => "\r\n",
110103
};
111-
while i < cond_len {
112-
let (first, second, third) = (self[i], self[i + 1], self[i + 2]);
113-
if let Some(line_length) = config.line_length {
114-
if cur_length >= line_length {
115-
v.extend(newline.bytes());
116-
cur_length = 0;
117-
}
118-
}
119104

120-
let n = (first as u32) << 16 |
121-
(second as u32) << 8 |
122-
(third as u32);
105+
// Preallocate memory.
106+
let mut prealloc_len = (len + 2) / 3 * 4;
107+
if let Some(line_length) = config.line_length {
108+
let num_lines = (prealloc_len - 1) / line_length;
109+
prealloc_len += num_lines * newline.bytes().count();
110+
}
123111

124-
// This 24-bit number gets separated into four 6-bit numbers.
125-
v.push(bytes[((n >> 18) & 63) as usize]);
126-
v.push(bytes[((n >> 12) & 63) as usize]);
127-
v.push(bytes[((n >> 6 ) & 63) as usize]);
128-
v.push(bytes[(n & 63) as usize]);
112+
let mut out_bytes = vec![b'='; prealloc_len];
129113

130-
cur_length += 4;
131-
i += 3;
132-
}
114+
// Deal with padding bytes
115+
let mod_len = len % 3;
133116

134-
if mod_len != 0 {
135-
if let Some(line_length) = config.line_length {
136-
if cur_length >= line_length {
137-
v.extend(newline.bytes());
117+
// Use iterators to reduce branching
118+
{
119+
let mut cur_length = 0;
120+
121+
let mut s_in = self[..len - mod_len].iter().map(|&x| x as u32);
122+
let mut s_out = out_bytes.iter_mut();
123+
124+
// Convenient shorthand
125+
let enc = |val| bytes[val as usize];
126+
let mut write = |val| *s_out.next().unwrap() = val;
127+
128+
// Iterate though blocks of 4
129+
while let (Some(first), Some(second), Some(third)) =
130+
(s_in.next(), s_in.next(), s_in.next()) {
131+
132+
// Line break if needed
133+
if let Some(line_length) = config.line_length {
134+
if cur_length >= line_length {
135+
for b in newline.bytes() { write(b) };
136+
cur_length = 0;
137+
}
138138
}
139+
140+
let n = first << 16 | second << 8 | third;
141+
142+
// This 24-bit number gets separated into four 6-bit numbers.
143+
write(enc((n >> 18) & 63));
144+
write(enc((n >> 12) & 63));
145+
write(enc((n >> 6 ) & 63));
146+
write(enc((n >> 0 ) & 63));
147+
148+
cur_length += 4;
139149
}
140-
}
141150

142-
// Heh, would be cool if we knew this was exhaustive
143-
// (the dream of bounded integer types)
144-
match mod_len {
145-
0 => (),
146-
1 => {
147-
let n = (self[i] as u32) << 16;
148-
v.push(bytes[((n >> 18) & 63) as usize]);
149-
v.push(bytes[((n >> 12) & 63) as usize]);
150-
if config.pad {
151-
v.push(b'=');
152-
v.push(b'=');
151+
// Line break only needed if padding is required
152+
if mod_len != 0 {
153+
if let Some(line_length) = config.line_length {
154+
if cur_length >= line_length {
155+
for b in newline.bytes() { write(b) };
156+
}
153157
}
154158
}
155-
2 => {
156-
let n = (self[i] as u32) << 16 |
157-
(self[i + 1] as u32) << 8;
158-
v.push(bytes[((n >> 18) & 63) as usize]);
159-
v.push(bytes[((n >> 12) & 63) as usize]);
160-
v.push(bytes[((n >> 6 ) & 63) as usize]);
161-
if config.pad {
162-
v.push(b'=');
159+
160+
// Heh, would be cool if we knew this was exhaustive
161+
// (the dream of bounded integer types)
162+
match mod_len {
163+
0 => (),
164+
1 => {
165+
let n = (self[len-1] as u32) << 16;
166+
write(enc((n >> 18) & 63));
167+
write(enc((n >> 12) & 63));
168+
}
169+
2 => {
170+
let n = (self[len-2] as u32) << 16 |
171+
(self[len-1] as u32) << 8;
172+
write(enc((n >> 18) & 63));
173+
write(enc((n >> 12) & 63));
174+
write(enc((n >> 6 ) & 63));
163175
}
176+
_ => panic!("Algebra is broken, please alert the math police")
177+
}
178+
}
179+
180+
// We get padding for "free", so only have to drop it if unwanted.
181+
if !config.pad {
182+
while let Some(&b'=') = out_bytes.last() {
183+
out_bytes.pop();
164184
}
165-
_ => panic!("Algebra is broken, please alert the math police")
166185
}
167186

168-
unsafe { String::from_utf8_unchecked(v) }
187+
unsafe { String::from_utf8_unchecked(out_bytes) }
169188
}
170189
}
171190

0 commit comments

Comments
 (0)