|
| 1 | +From 76b1e32b1d730355bd976dbe18c9dd43fafb0ea0 Mon Sep 17 00:00:00 2001 |
| 2 | + |
| 3 | +Date: Thu, 30 Jan 2025 08:38:50 +0000 |
| 4 | +Subject: [PATCH] Return empty tuple to indicate the email parsing error. |
| 5 | + |
| 6 | +Cut short version of original patch taken from below commit |
| 7 | +Taking the changes only for Lib/email/utils.py |
| 8 | + |
| 9 | +From ee953f2b8fc12ee9b8209ab60a2f06c603e5a624 Mon Sep 17 00:00:00 2001 |
| 10 | +From: Petr Viktorin < [email protected]> |
| 11 | +Date: Fri, 6 Sep 2024 13:13:54 +0200 |
| 12 | +Subject: [PATCH] [3.9] [CVE-2023-27043] gh-102988: Reject malformed addresses |
| 13 | + in email.parseaddr() (GH-111116) (#123769) |
| 14 | + |
| 15 | +Detect email address parsing errors and return empty tuple to |
| 16 | +indicate the parsing error (old API). Add an optional 'strict' |
| 17 | +parameter to getaddresses() and parseaddr() functions. Patch by |
| 18 | +Thomas Dwyer. |
| 19 | + |
| 20 | +(cherry picked from commit 4a153a1d3b18803a684cd1bcc2cdf3ede3dbae19) |
| 21 | + |
| 22 | +Co-authored-by: Victor Stinner < [email protected]> |
| 23 | +Co-Authored-By: Thomas Dwyer < [email protected]> |
| 24 | + |
| 25 | + Doc/library/email.utils.rst | 19 +- |
| 26 | + Doc/whatsnew/3.9.rst | 10 + |
| 27 | + Lib/email/utils.py | 151 ++++++++++++- |
| 28 | + Lib/test/test_email/test_email.py | 204 +++++++++++++++++- |
| 29 | + ...-10-20-15-28-08.gh-issue-102988.dStNO7.rst | 8 + |
| 30 | + 5 files changed, 371 insertions(+), 21 deletions(-) |
| 31 | +--- |
| 32 | + Lib/email/utils.py | 151 ++++++++++++++++++++++++++++++++++++++++++--- |
| 33 | + 1 file changed, 142 insertions(+), 9 deletions(-) |
| 34 | + |
| 35 | +diff --git a/Lib/email/utils.py b/Lib/email/utils.py |
| 36 | +index aa949aa..af2fb14 100644 |
| 37 | +--- a/Lib/email/utils.py |
| 38 | ++++ b/Lib/email/utils.py |
| 39 | +@@ -48,6 +48,7 @@ TICK = "'" |
| 40 | + specialsre = re.compile(r'[][\\()<>@,:;".]') |
| 41 | + escapesre = re.compile(r'[\\"]') |
| 42 | + |
| 43 | ++ |
| 44 | + def _has_surrogates(s): |
| 45 | + """Return True if s may contain surrogate-escaped binary data.""" |
| 46 | + # This check is based on the fact that unless there are surrogates, utf8 |
| 47 | +@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'): |
| 48 | + return address |
| 49 | + |
| 50 | + |
| 51 | ++def _iter_escaped_chars(addr): |
| 52 | ++ pos = 0 |
| 53 | ++ escape = False |
| 54 | ++ for pos, ch in enumerate(addr): |
| 55 | ++ if escape: |
| 56 | ++ yield (pos, '\\' + ch) |
| 57 | ++ escape = False |
| 58 | ++ elif ch == '\\': |
| 59 | ++ escape = True |
| 60 | ++ else: |
| 61 | ++ yield (pos, ch) |
| 62 | ++ if escape: |
| 63 | ++ yield (pos, '\\') |
| 64 | ++ |
| 65 | ++ |
| 66 | ++def _strip_quoted_realnames(addr): |
| 67 | ++ """Strip real names between quotes.""" |
| 68 | ++ if '"' not in addr: |
| 69 | ++ # Fast path |
| 70 | ++ return addr |
| 71 | ++ |
| 72 | ++ start = 0 |
| 73 | ++ open_pos = None |
| 74 | ++ result = [] |
| 75 | ++ for pos, ch in _iter_escaped_chars(addr): |
| 76 | ++ if ch == '"': |
| 77 | ++ if open_pos is None: |
| 78 | ++ open_pos = pos |
| 79 | ++ else: |
| 80 | ++ if start != open_pos: |
| 81 | ++ result.append(addr[start:open_pos]) |
| 82 | ++ start = pos + 1 |
| 83 | ++ open_pos = None |
| 84 | ++ |
| 85 | ++ if start < len(addr): |
| 86 | ++ result.append(addr[start:]) |
| 87 | ++ |
| 88 | ++ return ''.join(result) |
| 89 | + |
| 90 | +-def getaddresses(fieldvalues): |
| 91 | +- """Return a list of (REALNAME, EMAIL) for each fieldvalue.""" |
| 92 | +- all = COMMASPACE.join(str(v) for v in fieldvalues) |
| 93 | +- a = _AddressList(all) |
| 94 | +- return a.addresslist |
| 95 | ++ |
| 96 | ++supports_strict_parsing = True |
| 97 | ++ |
| 98 | ++def getaddresses(fieldvalues, *, strict=True): |
| 99 | ++ """Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue. |
| 100 | ++ |
| 101 | ++ When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in |
| 102 | ++ its place. |
| 103 | ++ |
| 104 | ++ If strict is true, use a strict parser which rejects malformed inputs. |
| 105 | ++ """ |
| 106 | ++ |
| 107 | ++ # If strict is true, if the resulting list of parsed addresses is greater |
| 108 | ++ # than the number of fieldvalues in the input list, a parsing error has |
| 109 | ++ # occurred and consequently a list containing a single empty 2-tuple [('', |
| 110 | ++ # '')] is returned in its place. This is done to avoid invalid output. |
| 111 | ++ # |
| 112 | ++ # Malformed input: getaddresses(['[email protected] <[email protected]>']) |
| 113 | ++ # Invalid output: [('', '[email protected]'), ('', '[email protected]')] |
| 114 | ++ # Safe output: [('', '')] |
| 115 | ++ |
| 116 | ++ if not strict: |
| 117 | ++ all = COMMASPACE.join(str(v) for v in fieldvalues) |
| 118 | ++ a = _AddressList(all) |
| 119 | ++ return a.addresslist |
| 120 | ++ |
| 121 | ++ fieldvalues = [str(v) for v in fieldvalues] |
| 122 | ++ fieldvalues = _pre_parse_validation(fieldvalues) |
| 123 | ++ addr = COMMASPACE.join(fieldvalues) |
| 124 | ++ a = _AddressList(addr) |
| 125 | ++ result = _post_parse_validation(a.addresslist) |
| 126 | ++ |
| 127 | ++ # Treat output as invalid if the number of addresses is not equal to the |
| 128 | ++ # expected number of addresses. |
| 129 | ++ n = 0 |
| 130 | ++ for v in fieldvalues: |
| 131 | ++ # When a comma is used in the Real Name part it is not a deliminator. |
| 132 | ++ # So strip those out before counting the commas. |
| 133 | ++ v = _strip_quoted_realnames(v) |
| 134 | ++ # Expected number of addresses: 1 + number of commas |
| 135 | ++ n += 1 + v.count(',') |
| 136 | ++ if len(result) != n: |
| 137 | ++ return [('', '')] |
| 138 | ++ |
| 139 | ++ return result |
| 140 | ++ |
| 141 | ++ |
| 142 | ++def _check_parenthesis(addr): |
| 143 | ++ # Ignore parenthesis in quoted real names. |
| 144 | ++ addr = _strip_quoted_realnames(addr) |
| 145 | ++ |
| 146 | ++ opens = 0 |
| 147 | ++ for pos, ch in _iter_escaped_chars(addr): |
| 148 | ++ if ch == '(': |
| 149 | ++ opens += 1 |
| 150 | ++ elif ch == ')': |
| 151 | ++ opens -= 1 |
| 152 | ++ if opens < 0: |
| 153 | ++ return False |
| 154 | ++ return (opens == 0) |
| 155 | ++ |
| 156 | ++ |
| 157 | ++def _pre_parse_validation(email_header_fields): |
| 158 | ++ accepted_values = [] |
| 159 | ++ for v in email_header_fields: |
| 160 | ++ if not _check_parenthesis(v): |
| 161 | ++ v = "('', '')" |
| 162 | ++ accepted_values.append(v) |
| 163 | ++ |
| 164 | ++ return accepted_values |
| 165 | ++ |
| 166 | ++ |
| 167 | ++def _post_parse_validation(parsed_email_header_tuples): |
| 168 | ++ accepted_values = [] |
| 169 | ++ # The parser would have parsed a correctly formatted domain-literal |
| 170 | ++ # The existence of an [ after parsing indicates a parsing failure |
| 171 | ++ for v in parsed_email_header_tuples: |
| 172 | ++ if '[' in v[1]: |
| 173 | ++ v = ('', '') |
| 174 | ++ accepted_values.append(v) |
| 175 | ++ |
| 176 | ++ return accepted_values |
| 177 | + |
| 178 | + |
| 179 | + def _format_timetuple_and_zone(timetuple, zone): |
| 180 | +@@ -205,16 +321,33 @@ def parsedate_to_datetime(data): |
| 181 | + tzinfo=datetime.timezone(datetime.timedelta(seconds=tz))) |
| 182 | + |
| 183 | + |
| 184 | +-def parseaddr(addr): |
| 185 | ++def parseaddr(addr, *, strict=True): |
| 186 | + """ |
| 187 | + Parse addr into its constituent realname and email address parts. |
| 188 | + |
| 189 | + Return a tuple of realname and email address, unless the parse fails, in |
| 190 | + which case return a 2-tuple of ('', ''). |
| 191 | ++ |
| 192 | ++ If strict is True, use a strict parser which rejects malformed inputs. |
| 193 | + """ |
| 194 | +- addrs = _AddressList(addr).addresslist |
| 195 | +- if not addrs: |
| 196 | +- return '', '' |
| 197 | ++ if not strict: |
| 198 | ++ addrs = _AddressList(addr).addresslist |
| 199 | ++ if not addrs: |
| 200 | ++ return ('', '') |
| 201 | ++ return addrs[0] |
| 202 | ++ |
| 203 | ++ if isinstance(addr, list): |
| 204 | ++ addr = addr[0] |
| 205 | ++ |
| 206 | ++ if not isinstance(addr, str): |
| 207 | ++ return ('', '') |
| 208 | ++ |
| 209 | ++ addr = _pre_parse_validation([addr])[0] |
| 210 | ++ addrs = _post_parse_validation(_AddressList(addr).addresslist) |
| 211 | ++ |
| 212 | ++ if not addrs or len(addrs) > 1: |
| 213 | ++ return ('', '') |
| 214 | ++ |
| 215 | + return addrs[0] |
| 216 | + |
| 217 | + |
| 218 | +-- |
| 219 | +2.40.4 |
| 220 | + |
0 commit comments