Skip to content

Commit 7efb98b

Browse files
cdelerpgjones
authored andcommitted
Changed the ReceiveBuffer
after @tomchristie's proposal from #115 (comment)
1 parent a23ecc6 commit 7efb98b

File tree

4 files changed

+80
-79
lines changed

4 files changed

+80
-79
lines changed

h11/_connection.py

-1
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,6 @@ def next_event(self):
425425
event = self._extract_next_receive_event()
426426
if event not in [NEED_DATA, PAUSED]:
427427
self._process_event(self.their_role, event)
428-
self._receive_buffer.compress()
429428
if event is NEED_DATA:
430429
if len(self._receive_buffer) > self._max_incomplete_event_size:
431430
# 431 is "Request header fields too large" which is pretty

h11/_readers.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
# Remember that this has to run in O(n) time -- so e.g. the bytearray cast is
3131
# critical.
3232
obs_fold_re = re.compile(br"[ \t]+")
33-
strict_line_delimiter_regex = re.compile(b"\r\n", re.MULTILINE)
3433

3534

3635
def _obsolete_line_fold(lines):
@@ -154,7 +153,7 @@ def __call__(self, buf):
154153
assert self._bytes_to_discard == 0
155154
if self._bytes_in_chunk == 0:
156155
# We need to refill our chunk count
157-
chunk_header = buf.maybe_extract_until_next(strict_line_delimiter_regex, 2)
156+
chunk_header = buf.maybe_extract_next_line()
158157
if chunk_header is None:
159158
return None
160159
matches = validate(

h11/_receivebuffer.py

+64-60
Original file line numberDiff line numberDiff line change
@@ -41,94 +41,98 @@
4141
# processed a whole event, which could in theory be slightly more efficient
4242
# than the internal bytearray support.)
4343

44-
blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
45-
46-
47-
def rstrip_line(line):
48-
return line.rstrip(b"\r")
44+
blank_line_regex = re.compile(b"\n\r?\n", re.MULTILINE)
4945

5046

5147
class ReceiveBuffer(object):
5248
def __init__(self):
5349
self._data = bytearray()
54-
# These are both absolute offsets into self._data:
55-
self._start = 0
56-
self._looked_at = 0
50+
self._next_line_search = 0
51+
self._multiple_lines_search = 0
5752

58-
self._looked_for_regex = blank_line_delimiter_regex
53+
def __iadd__(self, byteslike):
54+
self._data += byteslike
55+
return self
5956

6057
def __bool__(self):
6158
return bool(len(self))
6259

60+
def __len__(self):
61+
return len(self._data)
62+
6363
# for @property unprocessed_data
6464
def __bytes__(self):
65-
return bytes(self._data[self._start :])
65+
return bytes(self._data)
6666

6767
if sys.version_info[0] < 3: # version specific: Python 2
6868
__str__ = __bytes__
6969
__nonzero__ = __bool__
7070

71-
def __len__(self):
72-
return len(self._data) - self._start
73-
74-
def compress(self):
75-
# Heuristic: only compress if it lets us reduce size by a factor
76-
# of 2
77-
if self._start > len(self._data) // 2:
78-
del self._data[: self._start]
79-
self._looked_at -= self._start
80-
self._start -= self._start
81-
82-
def __iadd__(self, byteslike):
83-
self._data += byteslike
84-
return self
85-
8671
def maybe_extract_at_most(self, count):
87-
out = self._data[self._start : self._start + count]
72+
"""
73+
Extract a fixed number of bytes from the buffer.
74+
"""
75+
out = self._data[:count]
8876
if not out:
8977
return None
90-
self._start += len(out)
78+
79+
self._data[:count] = b""
80+
self._next_line_search = 0
81+
self._multiple_lines_search = 0
9182
return out
9283

93-
def maybe_extract_until_next(self, needle_regex, max_needle_length):
94-
# Returns extracted bytes on success (advancing offset), or None on
95-
# failure
96-
if self._looked_for_regex == needle_regex:
97-
looked_at = max(self._start, self._looked_at - max_needle_length)
98-
else:
99-
looked_at = self._start
100-
self._looked_for_regex = needle_regex
101-
102-
delimiter_match = next(
103-
self._looked_for_regex.finditer(self._data, looked_at), None
104-
)
105-
106-
if delimiter_match is None:
107-
self._looked_at = len(self._data)
84+
def maybe_extract_next_line(self):
85+
"""
86+
Extract the first line, if it is completed in the buffer.
87+
"""
88+
# Only search in buffer space that we've not already looked at.
89+
partial_buffer = self._data[self._next_line_search :]
90+
partial_idx = partial_buffer.find(b"\n")
91+
if partial_idx == -1:
92+
self._next_line_search = len(self._data)
10893
return None
10994

110-
_, end = delimiter_match.span(0)
111-
112-
out = self._data[self._start : end]
113-
114-
self._start = end
115-
95+
# Truncate the buffer and return it.
96+
idx = self._next_line_search + partial_idx + 1
97+
out = self._data[:idx]
98+
self._data[:idx] = b""
99+
self._next_line_search = 0
100+
self._multiple_lines_search = 0
116101
return out
117102

118-
# HTTP/1.1 has a number of constructs where you keep reading lines until
119-
# you see a blank one. This does that, and then returns the lines.
120103
def maybe_extract_lines(self):
121-
if self._data[self._start : self._start + 2] == b"\r\n":
122-
self._start += 2
104+
"""
105+
Extract everything up to the first blank line, and return a list of lines.
106+
"""
107+
# Handle the case where we have an immediate empty line.
108+
if self._data[:1] == b"\n":
109+
self._data[:1] = b""
110+
self._next_line_search = 0
111+
self._multiple_lines_search = 0
123112
return []
124-
elif self._data[self._start : self._start + 1] == b"\n":
125-
self._start += 1
113+
114+
if self._data[:2] == b"\r\n":
115+
self._data[:2] = b""
116+
self._next_line_search = 0
117+
self._multiple_lines_search = 0
126118
return []
127-
else:
128-
data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3)
129-
if data is None:
130-
return None
131119

132-
lines = list(map(rstrip_line, data.rstrip(b"\r\n").split(b"\n")))
120+
# Only search in buffer space that we've not already looked at.
121+
partial_buffer = self._data[self._multiple_lines_search :]
122+
match = blank_line_regex.search(partial_buffer)
123+
if match is None:
124+
self._multiple_lines_search = max(0, len(self._data) - 2)
125+
return None
126+
127+
# Truncate the buffer and return it.
128+
idx = self._multiple_lines_search + match.span(0)[-1]
129+
out = self._data[:idx]
130+
lines = [line.rstrip(b"\r") for line in out.split(b"\n")]
131+
132+
self._data[:idx] = b""
133+
self._next_line_search = 0
134+
self._multiple_lines_search = 0
135+
136+
assert lines[-2] == lines[-1] == b""
133137

134-
return lines
138+
return lines[:-2]

h11/tests/test_receivebuffer.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -16,15 +16,13 @@ def test_receivebuffer():
1616
assert len(b) == 3
1717
assert bytes(b) == b"123"
1818

19-
b.compress()
2019
assert bytes(b) == b"123"
2120

2221
assert b.maybe_extract_at_most(2) == b"12"
2322
assert b
2423
assert len(b) == 1
2524
assert bytes(b) == b"3"
2625

27-
b.compress()
2826
assert bytes(b) == b"3"
2927

3028
assert b.maybe_extract_at_most(10) == b"3"
@@ -37,32 +35,33 @@ def test_receivebuffer():
3735
# maybe_extract_until_next
3836
################################################################
3937

40-
b += b"12345a6789aa"
38+
b += b"12345\n6789\r\n"
4139

42-
assert b.maybe_extract_until_next(re.compile(b"a"), 1) == b"12345a"
43-
assert bytes(b) == b"6789aa"
40+
assert b.maybe_extract_next_line() == b"12345\n"
41+
assert bytes(b) == b"6789\r\n"
4442

45-
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
46-
assert bytes(b) == b"6789aa"
43+
assert b.maybe_extract_next_line() == b"6789\r\n"
44+
assert bytes(b) == b""
4745

48-
b += b"a12"
49-
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"6789aaa"
50-
assert bytes(b) == b"12"
46+
b += b"12\r"
47+
assert b.maybe_extract_next_line() is None
48+
assert bytes(b) == b"12\r"
5149

5250
# check repeated searches for the same needle, triggering the
5351
# pickup-where-we-left-off logic
54-
b += b"345"
55-
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
52+
b += b"345\n\r"
53+
assert b.maybe_extract_next_line() == b"12\r345\n"
54+
assert bytes(b) == b"\r"
5655

57-
b += b"6789aaa123"
58-
assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"123456789aaa"
59-
assert bytes(b) == b"123"
56+
b += b"6789aaa123\n"
57+
assert b.maybe_extract_next_line() == b"\r6789aaa123\n"
58+
assert bytes(b) == b""
6059

6160
################################################################
6261
# maybe_extract_lines
6362
################################################################
6463

65-
b += b"\r\na: b\r\nfoo:bar\r\n\r\ntrailing"
64+
b += b"123\r\na: b\r\nfoo:bar\r\n\r\ntrailing"
6665
lines = b.maybe_extract_lines()
6766
assert lines == [b"123", b"a: b", b"foo:bar"]
6867
assert bytes(b) == b"trailing"

0 commit comments

Comments
 (0)