Skip to content

Commit 4dca957

Browse files
committed
Merge pull request #35 from python-hyper/huffman-table
Improve Huffman decoding speed using table-based implementation.
2 parents 96ff892 + 5214a0c commit 4dca957

File tree

6 files changed

+4789
-106
lines changed

6 files changed

+4789
-106
lines changed

HISTORY.rst

+7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,13 @@
11
Release History
22
===============
33

4+
dev (XXXX)
5+
----------
6+
7+
**Bugfixes**
8+
9+
- Improve Huffman decoding speed by 4x using an approach borrowed from nghttp2.
10+
411
2.1.1 (2016-03-16)
512
------------------
613

hpack/hpack.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,11 @@
1010
from .table import HeaderTable
1111
from .compat import to_byte, to_bytes
1212
from .exceptions import HPACKDecodingError
13-
from .huffman import HuffmanDecoder, HuffmanEncoder
13+
from .huffman import HuffmanEncoder
1414
from .huffman_constants import (
1515
REQUEST_CODES, REQUEST_CODES_LENGTH
1616
)
17+
from .huffman_table import decode_huffman
1718

1819
log = logging.getLogger(__name__)
1920

@@ -322,9 +323,6 @@ class Decoder(object):
322323

323324
def __init__(self):
324325
self.header_table = HeaderTable()
325-
self.huffman_coder = HuffmanDecoder(
326-
REQUEST_CODES, REQUEST_CODES_LENGTH
327-
)
328326

329327
@property
330328
def header_table_size(self):
@@ -461,7 +459,7 @@ def _decode_literal(self, data, should_index):
461459
name = data[consumed:consumed + length]
462460

463461
if to_byte(data[0]) & 0x80:
464-
name = self.huffman_coder.decode(name)
462+
name = decode_huffman(name)
465463
total_consumed = consumed + length + 1 # Since we moved forward 1.
466464

467465
data = data[consumed + length:]
@@ -471,7 +469,7 @@ def _decode_literal(self, data, should_index):
471469
value = data[consumed:consumed + length]
472470

473471
if to_byte(data[0]) & 0x80:
474-
value = self.huffman_coder.decode(value)
472+
value = decode_huffman(value)
475473

476474
# Updated the total consumed length.
477475
total_consumed += length + consumed

hpack/huffman.py

-72
Original file line numberDiff line numberDiff line change
@@ -7,78 +7,6 @@
77
Huffman-coded content where we already know the Huffman table.
88
"""
99
from .compat import to_byte, decode_hex
10-
from .exceptions import HPACKDecodingError
11-
12-
13-
def _pad_binary(bin_str, req_len=8):
14-
"""
15-
Given a binary string (returned by bin()), pad it to a full byte length.
16-
"""
17-
bin_str = bin_str[2:] # Strip the 0b prefix
18-
return max(0, req_len - len(bin_str)) * '0' + bin_str
19-
20-
21-
def _hex_to_bin_str(hex_string):
22-
"""
23-
Given a Python bytestring, returns a string representing those bytes in
24-
unicode form.
25-
"""
26-
unpadded_bin_string_list = (bin(to_byte(c)) for c in hex_string)
27-
padded_bin_string_list = map(_pad_binary, unpadded_bin_string_list)
28-
bitwise_message = "".join(padded_bin_string_list)
29-
return bitwise_message
30-
31-
32-
class HuffmanDecoder(object):
33-
"""
34-
Decodes a Huffman-coded bytestream according to the Huffman table laid out
35-
in the HPACK specification.
36-
"""
37-
class _Node(object):
38-
def __init__(self, data):
39-
self.data = data
40-
self.mapping = {}
41-
42-
def __init__(self, huffman_code_list, huffman_code_list_lengths):
43-
self.root = self._Node(None)
44-
for index, (huffman_code, code_length) in enumerate(zip(huffman_code_list, huffman_code_list_lengths)):
45-
self._insert(huffman_code, code_length, index)
46-
47-
def _insert(self, hex_number, hex_length, letter):
48-
"""
49-
Inserts a Huffman code point into the tree.
50-
"""
51-
hex_number = _pad_binary(bin(hex_number), hex_length)
52-
cur_node = self.root
53-
for digit in hex_number:
54-
if digit not in cur_node.mapping:
55-
cur_node.mapping[digit] = self._Node(None)
56-
cur_node = cur_node.mapping[digit]
57-
cur_node.data = letter
58-
59-
def decode(self, encoded_string):
60-
"""
61-
Decode the given Huffman coded string.
62-
"""
63-
number = _hex_to_bin_str(encoded_string)
64-
cur_node = self.root
65-
decoded_message = bytearray()
66-
67-
try:
68-
for digit in number:
69-
cur_node = cur_node.mapping[digit]
70-
if cur_node.data is not None:
71-
# If we get EOS, everything else is padding.
72-
if cur_node.data == 256:
73-
break
74-
75-
decoded_message.append(cur_node.data)
76-
cur_node = self.root
77-
except KeyError:
78-
# We have a Huffman-coded string that doesn't match our trie. This
79-
# is pretty bad: raise a useful exception.
80-
raise HPACKDecodingError("Invalid Huffman-coded string received.")
81-
return bytes(decoded_message)
8210

8311

8412
class HuffmanEncoder(object):

0 commit comments

Comments
 (0)