Changed the maybe_extract_lines logic according PR review

cdeler · pgjones · commit 3247e285d530 · 2020-12-26T17:46:24.000Z
1. it uses b"\n\r?\n" as a blank line delimiter regex 2. it splits lines using b"\r?\n" regex, so that it's tolerant for mixed line endings 3. for chunked encoding it rewind buffer until b"\r\n" The changes are based on this comment: #115 (comment)
diff --git a/h11/_readers.py b/h11/_readers.py
@@ -31,6 +31,7 @@
 # Remember that this has to run in O(n) time -- so e.g. the bytearray cast is
 # critical.
 obs_fold_re = re.compile(br"[ \t]+")
+strict_line_delimiter_regex = re.compile(b"\r\n", re.MULTILINE)
 
 
 def _obsolete_line_fold(lines):
@@ -154,7 +155,7 @@ def __call__(self, buf):
         assert self._bytes_to_discard == 0
         if self._bytes_in_chunk == 0:
             # We need to refill our chunk count
-            chunk_header = buf.maybe_extract_until_next(line_delimiter_regex, 2)
+            chunk_header = buf.maybe_extract_until_next(strict_line_delimiter_regex, 2)
             if chunk_header is None:
                 return None
             matches = validate(
diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py
@@ -41,7 +41,7 @@
 # processed a whole event, which could in theory be slightly more efficient
 # than the internal bytearray support.)
 
-blank_line_delimiter_regex = re.compile(b"(\r\n\r\n|\n\n)", re.MULTILINE)
+blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
 line_delimiter_regex = re.compile(b"\r?\n", re.MULTILINE)
 
 
@@ -112,17 +112,6 @@ def maybe_extract_until_next(self, needle_regex, max_needle_length):
 
         return out
 
-    def _get_fields_delimiter(self, data, lines_delimiter_regex):
-        delimiter_match = next(lines_delimiter_regex.finditer(data), None)
-
-        if delimiter_match is not None:
-            begin, end = delimiter_match.span(0)
-            result = data[begin:end]
-        else:
-            result = b"\r\n"
-
-        return bytes(result)
-
     # HTTP/1.1 has a number of constructs where you keep reading lines until
     # you see a blank one. This does that, and then returns the lines.
     def maybe_extract_lines(self):
@@ -133,13 +122,10 @@ def maybe_extract_lines(self):
             self._start += 1
             return []
         else:
-            data = self.maybe_extract_until_next(blank_line_delimiter_regex, 4)
+            data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3)
             if data is None:
                 return None
 
-            real_lines_delimiter = self._get_fields_delimiter(
-                data, line_delimiter_regex
-            )
-            lines = data.rstrip(b"\r\n").split(real_lines_delimiter)
+            lines = line_delimiter_regex.split(data.rstrip(b"\r\n"))
 
             return lines
diff --git a/h11/tests/test_receivebuffer.py b/h11/tests/test_receivebuffer.py
@@ -89,32 +89,31 @@ def test_receivebuffer():
             (
                 b"HTTP/1.1 200 OK\r\n",
                 b"Content-type: text/plain\r\n",
+                b"Connection: close\r\n",
                 b"\r\n",
                 b"Some body",
             ),
             id="with_crlf_delimiter",
         ),
-        pytest.param(
-            (b"HTTP/1.1 200 OK\n", b"Content-type: text/plain\n", b"\n", b"Some body"),
-            id="with_lf_only_delimiter",
-        ),
         pytest.param(
             (
-                b"HTTP/1.1 200 OK\r\n",
+                b"HTTP/1.1 200 OK\n",
                 b"Content-type: text/plain\n",
+                b"Connection: close\n",
                 b"\n",
                 b"Some body",
             ),
-            id="with_double_lf_before_body",
+            id="with_lf_only_delimiter",
         ),
         pytest.param(
             (
-                b"HTTP/1.1 200 OK\r\n",
+                b"HTTP/1.1 200 OK\n",
                 b"Content-type: text/plain\r\n",
+                b"Connection: close\n",
                 b"\n",
                 b"Some body",
             ),
-            id="with_mixed_crlf",
+            id="with_mixed_crlf_and_lf",
         ),
     ],
 )
@@ -126,5 +125,9 @@ def test_receivebuffer_for_invalid_delimiter(data):
 
     lines = b.maybe_extract_lines()
 
-    assert lines == [b"HTTP/1.1 200 OK", b"Content-type: text/plain"]
+    assert lines == [
+        b"HTTP/1.1 200 OK",
+        b"Content-type: text/plain",
+        b"Connection: close",
+    ]
     assert bytes(b) == b"Some body"