|
41 | 41 | # processed a whole event, which could in theory be slightly more efficient
|
42 | 42 | # than the internal bytearray support.)
|
43 | 43 |
|
44 |
| -blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE) |
45 |
| - |
46 |
| - |
47 |
| -def rstrip_line(line): |
48 |
| - return line.rstrip(b"\r") |
| 44 | +blank_line_regex = re.compile(b"\n\r?\n", re.MULTILINE) |
49 | 45 |
|
50 | 46 |
|
51 | 47 | class ReceiveBuffer(object):
|
52 | 48 | def __init__(self):
|
53 | 49 | self._data = bytearray()
|
54 |
| - # These are both absolute offsets into self._data: |
55 |
| - self._start = 0 |
56 |
| - self._looked_at = 0 |
| 50 | + self._next_line_search = 0 |
| 51 | + self._multiple_lines_search = 0 |
57 | 52 |
|
58 |
| - self._looked_for_regex = blank_line_delimiter_regex |
| 53 | + def __iadd__(self, byteslike): |
| 54 | + self._data += byteslike |
| 55 | + return self |
59 | 56 |
|
60 | 57 | def __bool__(self):
|
61 | 58 | return bool(len(self))
|
62 | 59 |
|
| 60 | + def __len__(self): |
| 61 | + return len(self._data) |
| 62 | + |
63 | 63 | # for @property unprocessed_data
|
64 | 64 | def __bytes__(self):
|
65 |
| - return bytes(self._data[self._start :]) |
| 65 | + return bytes(self._data) |
66 | 66 |
|
67 | 67 | if sys.version_info[0] < 3: # version specific: Python 2
|
68 | 68 | __str__ = __bytes__
|
69 | 69 | __nonzero__ = __bool__
|
70 | 70 |
|
71 |
| - def __len__(self): |
72 |
| - return len(self._data) - self._start |
73 |
| - |
74 |
| - def compress(self): |
75 |
| - # Heuristic: only compress if it lets us reduce size by a factor |
76 |
| - # of 2 |
77 |
| - if self._start > len(self._data) // 2: |
78 |
| - del self._data[: self._start] |
79 |
| - self._looked_at -= self._start |
80 |
| - self._start -= self._start |
81 |
| - |
82 |
| - def __iadd__(self, byteslike): |
83 |
| - self._data += byteslike |
84 |
| - return self |
85 |
| - |
86 | 71 | def maybe_extract_at_most(self, count):
|
87 |
| - out = self._data[self._start : self._start + count] |
| 72 | + """ |
| 73 | + Extract a fixed number of bytes from the buffer. |
| 74 | + """ |
| 75 | + out = self._data[:count] |
88 | 76 | if not out:
|
89 | 77 | return None
|
90 |
| - self._start += len(out) |
| 78 | + |
| 79 | + self._data[:count] = b"" |
| 80 | + self._next_line_search = 0 |
| 81 | + self._multiple_lines_search = 0 |
91 | 82 | return out
|
92 | 83 |
|
93 |
| - def maybe_extract_until_next(self, needle_regex, max_needle_length): |
94 |
| - # Returns extracted bytes on success (advancing offset), or None on |
95 |
| - # failure |
96 |
| - if self._looked_for_regex == needle_regex: |
97 |
| - looked_at = max(self._start, self._looked_at - max_needle_length) |
98 |
| - else: |
99 |
| - looked_at = self._start |
100 |
| - self._looked_for_regex = needle_regex |
101 |
| - |
102 |
| - delimiter_match = next( |
103 |
| - self._looked_for_regex.finditer(self._data, looked_at), None |
104 |
| - ) |
105 |
| - |
106 |
| - if delimiter_match is None: |
107 |
| - self._looked_at = len(self._data) |
| 84 | + def maybe_extract_next_line(self): |
| 85 | + """ |
| 86 | + Extract the first line, if it is completed in the buffer. |
| 87 | + """ |
| 88 | + # Only search in buffer space that we've not already looked at. |
| 89 | + partial_buffer = self._data[self._next_line_search :] |
| 90 | + partial_idx = partial_buffer.find(b"\n") |
| 91 | + if partial_idx == -1: |
| 92 | + self._next_line_search = len(self._data) |
108 | 93 | return None
|
109 | 94 |
|
110 |
| - _, end = delimiter_match.span(0) |
111 |
| - |
112 |
| - out = self._data[self._start : end] |
113 |
| - |
114 |
| - self._start = end |
115 |
| - |
| 95 | + # Truncate the buffer and return it. |
| 96 | + idx = self._next_line_search + partial_idx + 1 |
| 97 | + out = self._data[:idx] |
| 98 | + self._data[:idx] = b"" |
| 99 | + self._next_line_search = 0 |
| 100 | + self._multiple_lines_search = 0 |
116 | 101 | return out
|
117 | 102 |
|
118 |
| - # HTTP/1.1 has a number of constructs where you keep reading lines until |
119 |
| - # you see a blank one. This does that, and then returns the lines. |
120 | 103 | def maybe_extract_lines(self):
|
121 |
| - if self._data[self._start : self._start + 2] == b"\r\n": |
122 |
| - self._start += 2 |
| 104 | + """ |
| 105 | + Extract everything up to the first blank line, and return a list of lines. |
| 106 | + """ |
| 107 | + # Handle the case where we have an immediate empty line. |
| 108 | + if self._data[:1] == b"\n": |
| 109 | + self._data[:1] = b"" |
| 110 | + self._next_line_search = 0 |
| 111 | + self._multiple_lines_search = 0 |
123 | 112 | return []
|
124 |
| - elif self._data[self._start : self._start + 1] == b"\n": |
125 |
| - self._start += 1 |
| 113 | + |
| 114 | + if self._data[:2] == b"\r\n": |
| 115 | + self._data[:2] = b"" |
| 116 | + self._next_line_search = 0 |
| 117 | + self._multiple_lines_search = 0 |
126 | 118 | return []
|
127 |
| - else: |
128 |
| - data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3) |
129 |
| - if data is None: |
130 |
| - return None |
131 | 119 |
|
132 |
| - lines = list(map(rstrip_line, data.rstrip(b"\r\n").split(b"\n"))) |
| 120 | + # Only search in buffer space that we've not already looked at. |
| 121 | + partial_buffer = self._data[self._multiple_lines_search :] |
| 122 | + match = blank_line_regex.search(partial_buffer) |
| 123 | + if match is None: |
| 124 | + self._multiple_lines_search = max(0, len(self._data) - 2) |
| 125 | + return None |
| 126 | + |
| 127 | + # Truncate the buffer and return it. |
| 128 | + idx = self._multiple_lines_search + match.span(0)[-1] |
| 129 | + out = self._data[:idx] |
| 130 | + lines = [line.rstrip(b"\r") for line in out.split(b"\n")] |
| 131 | + |
| 132 | + self._data[:idx] = b"" |
| 133 | + self._next_line_search = 0 |
| 134 | + self._multiple_lines_search = 0 |
| 135 | + |
| 136 | + assert lines[-2] == lines[-1] == b"" |
133 | 137 |
|
134 |
| - return lines |
| 138 | + return lines[:-2] |
0 commit comments