Skip to content

Commit 3247e28

Browse files
cdelerpgjones
authored andcommitted
Changed the maybe_extract_lines logic according PR review
1. it uses b"\n\r?\n" as a blank line delimiter regex 2. it splits lines using b"\r?\n" regex, so that it's tolerant for mixed line endings 3. for chunked encoding it rewind buffer until b"\r\n" The changes are based on this comment: #115 (comment)
1 parent f051563 commit 3247e28

File tree

3 files changed

+17
-27
lines changed

3 files changed

+17
-27
lines changed

h11/_readers.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
# Remember that this has to run in O(n) time -- so e.g. the bytearray cast is
3232
# critical.
3333
obs_fold_re = re.compile(br"[ \t]+")
34+
strict_line_delimiter_regex = re.compile(b"\r\n", re.MULTILINE)
3435

3536

3637
def _obsolete_line_fold(lines):
@@ -154,7 +155,7 @@ def __call__(self, buf):
154155
assert self._bytes_to_discard == 0
155156
if self._bytes_in_chunk == 0:
156157
# We need to refill our chunk count
157-
chunk_header = buf.maybe_extract_until_next(line_delimiter_regex, 2)
158+
chunk_header = buf.maybe_extract_until_next(strict_line_delimiter_regex, 2)
158159
if chunk_header is None:
159160
return None
160161
matches = validate(

h11/_receivebuffer.py

+3-17
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
# processed a whole event, which could in theory be slightly more efficient
4242
# than the internal bytearray support.)
4343

44-
blank_line_delimiter_regex = re.compile(b"(\r\n\r\n|\n\n)", re.MULTILINE)
44+
blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
4545
line_delimiter_regex = re.compile(b"\r?\n", re.MULTILINE)
4646

4747

@@ -112,17 +112,6 @@ def maybe_extract_until_next(self, needle_regex, max_needle_length):
112112

113113
return out
114114

115-
def _get_fields_delimiter(self, data, lines_delimiter_regex):
116-
delimiter_match = next(lines_delimiter_regex.finditer(data), None)
117-
118-
if delimiter_match is not None:
119-
begin, end = delimiter_match.span(0)
120-
result = data[begin:end]
121-
else:
122-
result = b"\r\n"
123-
124-
return bytes(result)
125-
126115
# HTTP/1.1 has a number of constructs where you keep reading lines until
127116
# you see a blank one. This does that, and then returns the lines.
128117
def maybe_extract_lines(self):
@@ -133,13 +122,10 @@ def maybe_extract_lines(self):
133122
self._start += 1
134123
return []
135124
else:
136-
data = self.maybe_extract_until_next(blank_line_delimiter_regex, 4)
125+
data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3)
137126
if data is None:
138127
return None
139128

140-
real_lines_delimiter = self._get_fields_delimiter(
141-
data, line_delimiter_regex
142-
)
143-
lines = data.rstrip(b"\r\n").split(real_lines_delimiter)
129+
lines = line_delimiter_regex.split(data.rstrip(b"\r\n"))
144130

145131
return lines

h11/tests/test_receivebuffer.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -89,32 +89,31 @@ def test_receivebuffer():
8989
(
9090
b"HTTP/1.1 200 OK\r\n",
9191
b"Content-type: text/plain\r\n",
92+
b"Connection: close\r\n",
9293
b"\r\n",
9394
b"Some body",
9495
),
9596
id="with_crlf_delimiter",
9697
),
97-
pytest.param(
98-
(b"HTTP/1.1 200 OK\n", b"Content-type: text/plain\n", b"\n", b"Some body"),
99-
id="with_lf_only_delimiter",
100-
),
10198
pytest.param(
10299
(
103-
b"HTTP/1.1 200 OK\r\n",
100+
b"HTTP/1.1 200 OK\n",
104101
b"Content-type: text/plain\n",
102+
b"Connection: close\n",
105103
b"\n",
106104
b"Some body",
107105
),
108-
id="with_double_lf_before_body",
106+
id="with_lf_only_delimiter",
109107
),
110108
pytest.param(
111109
(
112-
b"HTTP/1.1 200 OK\r\n",
110+
b"HTTP/1.1 200 OK\n",
113111
b"Content-type: text/plain\r\n",
112+
b"Connection: close\n",
114113
b"\n",
115114
b"Some body",
116115
),
117-
id="with_mixed_crlf",
116+
id="with_mixed_crlf_and_lf",
118117
),
119118
],
120119
)
@@ -126,5 +125,9 @@ def test_receivebuffer_for_invalid_delimiter(data):
126125

127126
lines = b.maybe_extract_lines()
128127

129-
assert lines == [b"HTTP/1.1 200 OK", b"Content-type: text/plain"]
128+
assert lines == [
129+
b"HTTP/1.1 200 OK",
130+
b"Content-type: text/plain",
131+
b"Connection: close",
132+
]
130133
assert bytes(b) == b"Some body"

0 commit comments

Comments
 (0)