|
41 | 41 | # processed a whole event, which could in theory be slightly more efficient |
42 | 42 | # than the internal bytearray support.) |
43 | 43 |
|
44 | | -blank_line_delimiter_regex = re.compile(b"\n\r?\n", re.MULTILINE) |
45 | | - |
46 | | - |
47 | | -def rstrip_line(line): |
48 | | - return line.rstrip(b"\r") |
| 44 | +blank_line_regex = re.compile(b"\n\r?\n", re.MULTILINE) |
49 | 45 |
|
50 | 46 |
|
51 | 47 | class ReceiveBuffer(object): |
52 | 48 | def __init__(self): |
53 | 49 | self._data = bytearray() |
54 | | - # These are both absolute offsets into self._data: |
55 | | - self._start = 0 |
56 | | - self._looked_at = 0 |
| 50 | + self._next_line_search = 0 |
| 51 | + self._multiple_lines_search = 0 |
57 | 52 |
|
58 | | - self._looked_for_regex = blank_line_delimiter_regex |
| 53 | + def __iadd__(self, byteslike): |
| 54 | + self._data += byteslike |
| 55 | + return self |
59 | 56 |
|
60 | 57 | def __bool__(self): |
61 | 58 | return bool(len(self)) |
62 | 59 |
|
| 60 | + def __len__(self): |
| 61 | + return len(self._data) |
| 62 | + |
63 | 63 | # for @property unprocessed_data |
64 | 64 | def __bytes__(self): |
65 | | - return bytes(self._data[self._start :]) |
| 65 | + return bytes(self._data) |
66 | 66 |
|
67 | 67 | if sys.version_info[0] < 3: # version specific: Python 2 |
68 | 68 | __str__ = __bytes__ |
69 | 69 | __nonzero__ = __bool__ |
70 | 70 |
|
71 | | - def __len__(self): |
72 | | - return len(self._data) - self._start |
73 | | - |
74 | | - def compress(self): |
75 | | - # Heuristic: only compress if it lets us reduce size by a factor |
76 | | - # of 2 |
77 | | - if self._start > len(self._data) // 2: |
78 | | - del self._data[: self._start] |
79 | | - self._looked_at -= self._start |
80 | | - self._start -= self._start |
81 | | - |
82 | | - def __iadd__(self, byteslike): |
83 | | - self._data += byteslike |
84 | | - return self |
85 | | - |
86 | 71 | def maybe_extract_at_most(self, count): |
87 | | - out = self._data[self._start : self._start + count] |
| 72 | + """ |
| 73 | + Extract a fixed number of bytes from the buffer. |
| 74 | + """ |
| 75 | + out = self._data[:count] |
88 | 76 | if not out: |
89 | 77 | return None |
90 | | - self._start += len(out) |
| 78 | + |
| 79 | + self._data[:count] = b"" |
| 80 | + self._next_line_search = 0 |
| 81 | + self._multiple_lines_search = 0 |
91 | 82 | return out |
92 | 83 |
|
93 | | - def maybe_extract_until_next(self, needle_regex, max_needle_length): |
94 | | - # Returns extracted bytes on success (advancing offset), or None on |
95 | | - # failure |
96 | | - if self._looked_for_regex == needle_regex: |
97 | | - looked_at = max(self._start, self._looked_at - max_needle_length) |
98 | | - else: |
99 | | - looked_at = self._start |
100 | | - self._looked_for_regex = needle_regex |
101 | | - |
102 | | - delimiter_match = next( |
103 | | - self._looked_for_regex.finditer(self._data, looked_at), None |
104 | | - ) |
105 | | - |
106 | | - if delimiter_match is None: |
107 | | - self._looked_at = len(self._data) |
| 84 | + def maybe_extract_next_line(self): |
| 85 | + """ |
| 86 | + Extract the first line, if it is completed in the buffer. |
| 87 | + """ |
| 88 | + # Only search in buffer space that we've not already looked at. |
| 89 | + partial_buffer = self._data[self._next_line_search :] |
| 90 | + partial_idx = partial_buffer.find(b"\n") |
| 91 | + if partial_idx == -1: |
| 92 | + self._next_line_search = len(self._data) |
108 | 93 | return None |
109 | 94 |
|
110 | | - _, end = delimiter_match.span(0) |
111 | | - |
112 | | - out = self._data[self._start : end] |
113 | | - |
114 | | - self._start = end |
115 | | - |
| 95 | + # Truncate the buffer and return it. |
| 96 | + idx = self._next_line_search + partial_idx + 1 |
| 97 | + out = self._data[:idx] |
| 98 | + self._data[:idx] = b"" |
| 99 | + self._next_line_search = 0 |
| 100 | + self._multiple_lines_search = 0 |
116 | 101 | return out |
117 | 102 |
|
118 | | - # HTTP/1.1 has a number of constructs where you keep reading lines until |
119 | | - # you see a blank one. This does that, and then returns the lines. |
120 | 103 | def maybe_extract_lines(self): |
121 | | - if self._data[self._start : self._start + 2] == b"\r\n": |
122 | | - self._start += 2 |
| 104 | + """ |
| 105 | + Extract everything up to the first blank line, and return a list of lines. |
| 106 | + """ |
| 107 | + # Handle the case where we have an immediate empty line. |
| 108 | + if self._data[:1] == b"\n": |
| 109 | + self._data[:1] = b"" |
| 110 | + self._next_line_search = 0 |
| 111 | + self._multiple_lines_search = 0 |
123 | 112 | return [] |
124 | | - elif self._data[self._start : self._start + 1] == b"\n": |
125 | | - self._start += 1 |
| 113 | + |
| 114 | + if self._data[:2] == b"\r\n": |
| 115 | + self._data[:2] = b"" |
| 116 | + self._next_line_search = 0 |
| 117 | + self._multiple_lines_search = 0 |
126 | 118 | return [] |
127 | | - else: |
128 | | - data = self.maybe_extract_until_next(blank_line_delimiter_regex, 3) |
129 | | - if data is None: |
130 | | - return None |
131 | 119 |
|
132 | | - lines = list(map(rstrip_line, data.rstrip(b"\r\n").split(b"\n"))) |
| 120 | + # Only search in buffer space that we've not already looked at. |
| 121 | + partial_buffer = self._data[self._multiple_lines_search :] |
| 122 | + match = blank_line_regex.search(partial_buffer) |
| 123 | + if match is None: |
| 124 | + self._multiple_lines_search = max(0, len(self._data) - 2) |
| 125 | + return None |
| 126 | + |
| 127 | + # Truncate the buffer and return it. |
| 128 | + idx = self._multiple_lines_search + match.span(0)[-1] |
| 129 | + out = self._data[:idx] |
| 130 | + lines = [line.rstrip(b"\r") for line in out.split(b"\n")] |
| 131 | + |
| 132 | + self._data[:idx] = b"" |
| 133 | + self._next_line_search = 0 |
| 134 | + self._multiple_lines_search = 0 |
| 135 | + |
| 136 | + assert lines[-2] == lines[-1] == b"" |
133 | 137 |
|
134 | | - return lines |
| 138 | + return lines[:-2] |
0 commit comments