Skip to content

Commit b61e670

Browse files
nicotimschumi
authored andcommitted
LibCompress: Speed up CanonicalCode::read_symbol() slow path
Symbols that need <= 8 bits hit a fast path as of #18075, but the slow path has done a full binary search over all symbols ever since this code was added in #2963. (#3405 even added a FIXME for doing this, but #18075 removed it.) Instead of doing a binary search over all codes for every single bit read, this implements the Moffat-Turpin approach described at https://www.hanshq.net/zip.html#huffdec, which only requires a table read per bit. hyperfine 'Build/lagom/bin/unzip ~/Downloads/enwik8.zip' 1.008 s ± 0.016 s => 957.7 ms ± 3.9 ms, 5% faster Due to issue #25005, we can't peek the full 15 bits at once but have to read them one-by-one. This makes the code look a bit different than in the linked article. I also tried not changing CanonicalCode::from_bytes() too much. It does 15 passes over all symbols. I think it could do it in a single pass instead. But that's for a future change. No behavior change (other than slightly faster perf).
1 parent d933e07 commit b61e670

File tree

2 files changed

+27
-12
lines changed

2 files changed

+27
-12
lines changed

Userland/Libraries/LibCompress/Deflate.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -86,18 +86,26 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
8686
Array<PrefixCode, 1 << CanonicalCode::max_allowed_prefixed_code_length> prefix_codes;
8787
size_t number_of_prefix_codes = 0;
8888

89+
code.m_first_symbol_of_length_after.append(0);
90+
code.m_offset_to_first_symbol_index.append(0);
91+
8992
auto next_code = 0;
9093
for (size_t code_length = 1; code_length <= 15; ++code_length) {
9194
next_code <<= 1;
9295
auto start_bit = 1 << code_length;
9396

97+
auto first_code_at_length = next_code;
98+
auto first_symbol_index_at_length = code.m_symbol_values.size();
99+
94100
for (size_t symbol = 0; symbol < bytes.size(); ++symbol) {
95101
if (bytes[symbol] != code_length)
96102
continue;
97103

98104
if (next_code > start_bit)
99105
return Error::from_string_literal("Failed to decode code lengths");
100106

107+
code.m_symbol_values.append(symbol);
108+
101109
if (code_length <= CanonicalCode::max_allowed_prefixed_code_length) {
102110
if (number_of_prefix_codes >= prefix_codes.size())
103111
return Error::from_string_literal("Invalid canonical Huffman code");
@@ -108,9 +116,6 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
108116
prefix_code.code_length = code_length;
109117

110118
code.m_max_prefixed_code_length = code_length;
111-
} else {
112-
code.m_symbol_codes.append(start_bit | next_code);
113-
code.m_symbol_values.append(symbol);
114119
}
115120

116121
if (code.m_bit_codes.size() < symbol + 1) {
@@ -122,6 +127,15 @@ ErrorOr<CanonicalCode> CanonicalCode::from_bytes(ReadonlyBytes bytes)
122127

123128
next_code++;
124129
}
130+
131+
u32 sentinel = next_code;
132+
code.m_first_symbol_of_length_after.append(sentinel);
133+
VERIFY(code.m_first_symbol_of_length_after[code_length] == sentinel);
134+
135+
if (code.m_symbol_values.size() > first_symbol_index_at_length)
136+
code.m_offset_to_first_symbol_index.append(first_symbol_index_at_length - first_code_at_length);
137+
else
138+
code.m_offset_to_first_symbol_index.append(0); // Never evaluated.
125139
}
126140

127141
if (next_code != (1 << 15))
@@ -152,15 +166,14 @@ ErrorOr<u32> CanonicalCode::read_symbol(LittleEndianInputBitStream& stream) cons
152166
return symbol_value;
153167
}
154168

155-
auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length));
156-
code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length);
157-
code_bits |= 1 << m_max_prefixed_code_length;
158-
159-
for (size_t i = m_max_prefixed_code_length; i < 16; ++i) {
160-
size_t index;
161-
if (binary_search(m_symbol_codes.span(), code_bits, &index))
162-
return m_symbol_values[index];
169+
auto code_bits = TRY(stream.read_bits<u16>(m_max_prefixed_code_length + 1));
170+
code_bits = fast_reverse16(code_bits, m_max_prefixed_code_length + 1);
163171

172+
for (size_t i = m_max_prefixed_code_length + 1; i <= 15; ++i) {
173+
if (code_bits < m_first_symbol_of_length_after[i]) {
174+
auto symbol_index = (uint16_t)(m_offset_to_first_symbol_index[i] + code_bits);
175+
return m_symbol_values[symbol_index];
176+
}
164177
code_bits = code_bits << 1 | TRY(stream.read_bit());
165178
}
166179

Userland/Libraries/LibCompress/Deflate.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,11 @@ class CanonicalCode {
3939
};
4040

4141
// Decompression - indexed by code
42-
Vector<u16, 286> m_symbol_codes;
4342
Vector<u16, 286> m_symbol_values;
4443

44+
Vector<u32, 16> m_first_symbol_of_length_after;
45+
Vector<u16, 16> m_offset_to_first_symbol_index;
46+
4547
Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
4648
size_t m_max_prefixed_code_length { 0 };
4749

0 commit comments

Comments
 (0)