Fixed PR remark

cdeler · pgjones · commit 73706452866d · 2020-12-26T17:46:24.000Z
- reworked maybe_extract_until_next
diff --git a/h11/_readers.py b/h11/_readers.py
@@ -21,6 +21,7 @@
 from ._abnf import chunk_header, header_field, request_line, status_line
 from ._events import *
 from ._state import *
+from ._receivebuffer import line_delimiter_regex
 from ._util import LocalProtocolError, RemoteProtocolError, validate
 
 __all__ = ["READERS"]
@@ -153,7 +154,7 @@ def __call__(self, buf):
         assert self._bytes_to_discard == 0
         if self._bytes_in_chunk == 0:
             # We need to refill our chunk count
-            chunk_header = buf.maybe_extract_until_next(b"\r?\n")
+            chunk_header = buf.maybe_extract_until_next(line_delimiter_regex, 2)
             if chunk_header is None:
                 return None
             matches = validate(
diff --git a/h11/_receivebuffer.py b/h11/_receivebuffer.py
@@ -40,8 +40,7 @@
 # processed a whole event, which could in theory be slightly more efficient
 # than the internal bytearray support.)
 
-default_delimiter = b"\n\r?\n"
-delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
+body_and_headers_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE)
 line_delimiter_regex = re.compile(b"\r?\n", re.MULTILINE)
 
 
@@ -51,8 +50,7 @@ def __init__(self):
         # These are both absolute offsets into self._data:
         self._start = 0
         self._looked_at = 0
-        self._looked_for = default_delimiter
-        self._looked_for_regex = delimiter_regex
+        self._looked_for_regex = body_and_headers_delimiter_regex
 
     def __bool__(self):
         return bool(len(self))
@@ -87,19 +85,14 @@ def maybe_extract_at_most(self, count):
         self._start += len(out)
         return out
 
-    def maybe_extract_until_next(self, needle):
+    def maybe_extract_until_next(self, needle_regex, max_needle_length):
         # Returns extracted bytes on success (advancing offset), or None on
         # failure
-        if self._looked_for == needle:
-            looked_at = max(self._start, self._looked_at - len(needle) + 1)
+        if self._looked_for_regex == needle_regex:
+            looked_at = max(self._start, self._looked_at - max_needle_length)
         else:
             looked_at = self._start
-            self._looked_for = needle
-            # Check if default delimiter to avoid expensive re.compile
-            if needle == default_delimiter:
-                self._looked_for_regex = delimiter_regex
-            else:
-                self._looked_for_regex = re.compile(needle, re.MULTILINE)
+            self._looked_for_regex = needle_regex
 
         delimiter_match = next(
             self._looked_for_regex.finditer(self._data, looked_at), None
@@ -136,11 +129,11 @@ def maybe_extract_lines(self):
             self._start += len(start_chunk)
             return []
         else:
-            data = self.maybe_extract_until_next(default_delimiter)
+            data = self.maybe_extract_until_next(body_and_headers_delimiter_regex, 3)
             if data is None:
                 return None
 
-            delimiter = self._get_fields_delimiter(data, line_delimiter_regex)
-            lines = data.rstrip(b"\r\n").split(delimiter)
+            real_lines_delimiter = self._get_fields_delimiter(data, line_delimiter_regex)
+            lines = data.rstrip(b"\r\n").split(real_lines_delimiter)
 
             return lines
diff --git a/h11/tests/test_receivebuffer.py b/h11/tests/test_receivebuffer.py
@@ -1,3 +1,5 @@
+import re
+
 import pytest
 
 from .._receivebuffer import ReceiveBuffer
@@ -37,23 +39,23 @@ def test_receivebuffer():
 
     b += b"12345a6789aa"
 
-    assert b.maybe_extract_until_next(b"a") == b"12345a"
+    assert b.maybe_extract_until_next(re.compile(b"a"), 1) == b"12345a"
     assert bytes(b) == b"6789aa"
 
-    assert b.maybe_extract_until_next(b"aaa") is None
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
     assert bytes(b) == b"6789aa"
 
     b += b"a12"
-    assert b.maybe_extract_until_next(b"aaa") == b"6789aaa"
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"6789aaa"
     assert bytes(b) == b"12"
 
     # check repeated searches for the same needle, triggering the
     # pickup-where-we-left-off logic
     b += b"345"
-    assert b.maybe_extract_until_next(b"aaa") is None
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) is None
 
     b += b"6789aaa123"
-    assert b.maybe_extract_until_next(b"aaa") == b"123456789aaa"
+    assert b.maybe_extract_until_next(re.compile(b"aaa"), 3) == b"123456789aaa"
     assert bytes(b) == b"123"
 
     ################################################################