Skip to content

Commit 84504b0

Browse files
[mypyc] Use mypy.FORMAT_RE and ConversionSpecifier for % interpolation (#10877)
mypy.checkstrformat offers regex and ConversionSpecifier for tokenizer, thus this PR: * deletes the redundant code * uses ConversionSpecifier as FormatOp
1 parent a54a177 commit 84504b0

File tree

4 files changed

+74
-57
lines changed

4 files changed

+74
-57
lines changed

mypy/checkstrformat.py

+29-20
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import re
1414

1515
from typing import (
16-
cast, List, Tuple, Dict, Callable, Union, Optional, Pattern, Match, Set, Any
16+
cast, List, Tuple, Dict, Callable, Union, Optional, Pattern, Match, Set
1717
)
1818
from typing_extensions import Final, TYPE_CHECKING
1919

@@ -50,14 +50,14 @@ def compile_format_re() -> Pattern[str]:
5050
See https://docs.python.org/3/library/stdtypes.html#printf-style-string-formatting
5151
The regexp is intentionally a bit wider to report better errors.
5252
"""
53-
key_re = r'(\(([^()]*)\))?' # (optional) parenthesised sequence of characters.
54-
flags_re = r'([#0\-+ ]*)' # (optional) sequence of flags.
55-
width_re = r'(\*|[1-9][0-9]*)?' # (optional) minimum field width (* or numbers).
56-
precision_re = r'(?:\.(\*|[0-9]+)?)?' # (optional) . followed by * of numbers.
53+
key_re = r'(\((?P<key>[^)]*)\))?' # (optional) parenthesised sequence of characters.
54+
flags_re = r'(?P<flag>[#0\-+ ]*)' # (optional) sequence of flags.
55+
width_re = r'(?P<width>[1-9][0-9]*|\*)?' # (optional) minimum field width (* or numbers).
56+
precision_re = r'(?:\.(?P<precision>\*|[0-9]+)?)?' # (optional) . followed by * of numbers.
5757
length_mod_re = r'[hlL]?' # (optional) length modifier (unused).
58-
type_re = r'(.)?' # conversion type.
58+
type_re = r'(?P<type>.)?' # conversion type.
5959
format_re = '%' + key_re + flags_re + width_re + precision_re + length_mod_re + type_re
60-
return re.compile(format_re)
60+
return re.compile('({})'.format(format_re))
6161

6262

6363
def compile_new_format_re(custom_spec: bool) -> Pattern[str]:
@@ -114,16 +114,20 @@ def compile_new_format_re(custom_spec: bool) -> Pattern[str]:
114114

115115

116116
class ConversionSpecifier:
117-
def __init__(self, key: Optional[str],
118-
flags: str, width: str, precision: str, type: str,
117+
def __init__(self, type: str,
118+
key: Optional[str],
119+
flags: Optional[str],
120+
width: Optional[str],
121+
precision: Optional[str],
119122
format_spec: Optional[str] = None,
120123
conversion: Optional[str] = None,
121-
field: Optional[str] = None) -> None:
124+
field: Optional[str] = None,
125+
whole_seq: Optional[str] = None) -> None:
126+
self.type = type
122127
self.key = key
123128
self.flags = flags
124129
self.width = width
125130
self.precision = precision
126-
self.type = type
127131
# Used only for str.format() calls (it may be custom for types with __format__()).
128132
self.format_spec = format_spec
129133
self.non_standard_format_spec = False
@@ -132,24 +136,27 @@ def __init__(self, key: Optional[str],
132136
# Full formatted expression (i.e. key plus following attributes and/or indexes).
133137
# Used only for str.format() calls.
134138
self.field = field
139+
self.whole_seq = whole_seq
135140

136141
@classmethod
137-
def from_match(cls, match_obj: Match[str],
142+
def from_match(cls, match: Match[str],
138143
non_standard_spec: bool = False) -> 'ConversionSpecifier':
139144
"""Construct specifier from match object resulted from parsing str.format() call."""
140-
match = cast(Any, match_obj) # TODO: remove this once typeshed is fixed.
141145
if non_standard_spec:
142-
spec = cls(match.group('key'),
143-
flags='', width='', precision='', type='',
146+
spec = cls(type='',
147+
key=match.group('key'),
148+
flags='', width='', precision='',
144149
format_spec=match.group('format_spec'),
145150
conversion=match.group('conversion'),
146151
field=match.group('field'))
147152
spec.non_standard_format_spec = True
148153
return spec
149154
# Replace unmatched optional groups with empty matches (for convenience).
150-
return cls(match.group('key'),
151-
flags=match.group('flags') or '', width=match.group('width') or '',
152-
precision=match.group('precision') or '', type=match.group('type') or '',
155+
return cls(type=match.group('type') or '',
156+
key=match.group('key'),
157+
flags=match.group('flags') or '',
158+
width=match.group('width') or '',
159+
precision=match.group('precision') or '',
153160
format_spec=match.group('format_spec'),
154161
conversion=match.group('conversion'),
155162
field=match.group('field'))
@@ -622,10 +629,12 @@ def check_str_interpolation(self,
622629

623630
def parse_conversion_specifiers(self, format: str) -> List[ConversionSpecifier]:
624631
specifiers: List[ConversionSpecifier] = []
625-
for parens_key, key, flags, width, precision, type in FORMAT_RE.findall(format):
632+
for whole_seq, parens_key, key, flags, width, precision, type \
633+
in FORMAT_RE.findall(format):
626634
if parens_key == '':
627635
key = None
628-
specifiers.append(ConversionSpecifier(key, flags, width, precision, type))
636+
specifiers.append(ConversionSpecifier(type, key, flags, width, precision,
637+
whole_seq=whole_seq))
629638
return specifiers
630639

631640
def analyze_conversion_specifiers(self, specifiers: List[ConversionSpecifier],

mypyc/irbuild/expression.py

+8-6
Original file line numberDiff line numberDiff line change
@@ -569,7 +569,8 @@ def transform_basic_comparison(builder: IRBuilder,
569569
def translate_str_format_percent_sign(builder: IRBuilder,
570570
format_expr: StrExpr,
571571
rhs: Expression) -> Value:
572-
literals, conversion_types = tokenizer_printf_style(format_expr.value)
572+
literals, conversion_specifiers = tokenizer_printf_style(format_expr.value)
573+
573574
variables = []
574575
if isinstance(rhs, TupleExpr):
575576
raw_variables = rhs.items
@@ -578,15 +579,16 @@ def translate_str_format_percent_sign(builder: IRBuilder,
578579
else:
579580
raw_variables = []
580581

581-
is_conversion_matched = (len(conversion_types) == len(raw_variables))
582+
is_conversion_matched = (len(conversion_specifiers) == len(raw_variables))
582583

583584
if is_conversion_matched:
584-
for typ, var in zip(conversion_types, raw_variables):
585+
for specifier, var in zip(conversion_specifiers, raw_variables):
585586
node_type = builder.node_type(var)
586-
if typ == '%d' and (is_int_rprimitive(node_type)
587-
or is_short_int_rprimitive(node_type)):
587+
format_type = specifier.whole_seq
588+
if format_type == '%d' and (is_int_rprimitive(node_type)
589+
or is_short_int_rprimitive(node_type)):
588590
var_str = builder.call_c(int_to_str_op, [builder.accept(var)], format_expr.line)
589-
elif typ == '%s':
591+
elif format_type == '%s':
590592
if is_str_rprimitive(node_type):
591593
var_str = builder.accept(var)
592594
else:

mypyc/irbuild/format_str_tokenizer.py

+13-22
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,37 @@
33
import re
44
from typing import List, Tuple
55

6+
from mypy.checkstrformat import (
7+
FORMAT_RE, ConversionSpecifier
8+
)
69
from mypyc.ir.ops import Value, Integer
710
from mypyc.ir.rtypes import c_pyssize_t_rprimitive
811
from mypyc.irbuild.builder import IRBuilder
912
from mypyc.primitives.str_ops import str_build_op
1013

11-
# printf-style String Formatting:
12-
# https://docs.python.org/3/library/stdtypes.html#old-string-formatting
13-
printf_style_pattern = re.compile(r"""
14-
(
15-
% # Start sign
16-
(?:\((?P<key>[^)]*)\))? # Optional: Mapping key
17-
(?P<flag>[-+#0 ]+)? # Optional: Conversion flags
18-
(?P<width>\d+|\*)? # Optional: Minimum field width
19-
(?:\.(?P<precision>\d+|\*))? # Optional: Precision
20-
[hlL]? # Optional: Length modifier, Ignored
21-
(?P<type>[diouxXeEfFgGcrsa]) # Conversion type
22-
| %%)
23-
""", re.VERBOSE)
2414

25-
26-
def tokenizer_printf_style(format_str: str) -> Tuple[List[str], List[str]]:
15+
def tokenizer_printf_style(format_str: str) -> Tuple[List[str], List[ConversionSpecifier]]:
2716
"""Tokenize a printf-style format string using regex.
2817
2918
Return:
3019
A list of string literals and a list of conversion operations
3120
"""
32-
literals = []
33-
format_ops = []
21+
literals: List[str] = []
22+
specifiers: List[ConversionSpecifier] = []
3423
last_end = 0
3524

36-
for m in re.finditer(printf_style_pattern, format_str):
25+
for m in re.finditer(FORMAT_RE, format_str):
26+
whole_seq, parens_key, key, flags, width, precision, conversion_type = m.groups()
27+
specifiers.append(ConversionSpecifier(conversion_type, key, flags, width, precision,
28+
whole_seq=whole_seq))
29+
3730
cur_start = m.start(1)
38-
format_tmp = m.group(1)
3931
literals.append(format_str[last_end:cur_start])
40-
format_ops.append(format_tmp)
41-
last_end = cur_start + len(format_tmp)
32+
last_end = cur_start + len(whole_seq)
4233

4334
literals.append(format_str[last_end:])
4435

45-
return literals, format_ops
36+
return literals, specifiers
4637

4738

4839
def join_formatted_strings(builder: IRBuilder, literals: List[str],

mypyc/test/test_stringformatting.py

+24-9
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,31 @@
11
import unittest
2+
from typing import List
23

34
from mypyc.irbuild.format_str_tokenizer import tokenizer_printf_style
45

56

67
class TestStringFormatting(unittest.TestCase):
8+
79
def test_tokenizer_printf_style(self) -> None:
8-
assert tokenizer_printf_style("I'm %s, id years old") == \
9-
(["I'm ", ', id years old'], ['%s'])
10-
assert tokenizer_printf_style("Test: %i%%, Test: %02d, Test: %.2f") == \
11-
(['Test: ', '', ', Test: ', ', Test: ', ''], ['%i', '%%', '%02d', '%.2f'])
12-
assert tokenizer_printf_style("ioasdfyuia%i%%%20s%d%sdafafadfa%s%d%x%E%.2f") == \
13-
(['ioasdfyuia', '', '', '', '', 'dafafadfa', '', '', '', '', ''],
14-
['%i', '%%', '%20s', '%d', '%s', '%s', '%d', '%x', '%E', '%.2f'])
15-
assert tokenizer_printf_style("Special: %#20.2f%d, test: ") == \
16-
(['Special: ', '', ', test: '], ['%#20.2f', '%d'])
10+
11+
def tokenizer_printf_style_helper(format_str: str,
12+
literals: List[str], conversion: List[str]) -> bool:
13+
l, specs = tokenizer_printf_style(format_str)
14+
return literals == l and conversion == [x.whole_seq for x in specs]
15+
16+
assert tokenizer_printf_style_helper(
17+
"I'm %s, id years old",
18+
["I'm ", ', id years old'],
19+
['%s'])
20+
assert tokenizer_printf_style_helper(
21+
"Test: %i%%, Test: %02d, Test: %.2f",
22+
['Test: ', '', ', Test: ', ', Test: ', ''],
23+
['%i', '%%', '%02d', '%.2f'])
24+
assert tokenizer_printf_style_helper(
25+
"ioasdfyuia%i%%%20s%d%sdafafadfa%s%d%x%E%.2f",
26+
['ioasdfyuia', '', '', '', '', 'dafafadfa', '', '', '', '', ''],
27+
['%i', '%%', '%20s', '%d', '%s', '%s', '%d', '%x', '%E', '%.2f'])
28+
assert tokenizer_printf_style_helper(
29+
"Special: %#20.2f%d, test: ",
30+
['Special: ', '', ', test: '],
31+
['%#20.2f', '%d'])

0 commit comments

Comments
 (0)