Skip to content

Commit 26da805

Browse files
authored
Merge pull request #1117 from prometheus/optimize-parser
Optimize parser
2 parents f915160 + 2a2ca52 commit 26da805

File tree

1 file changed

+35
-38
lines changed

1 file changed

+35
-38
lines changed

prometheus_client/parser.py

Lines changed: 35 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -62,44 +62,35 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
6262
# The label name is before the equal, or if there's no equal, that's the
6363
# metric name.
6464

65-
term, sub_labels = _next_term(sub_labels, openmetrics)
66-
if not term:
65+
name_term, value_term, sub_labels = _next_term(sub_labels, openmetrics)
66+
if not value_term:
6767
if openmetrics:
6868
raise ValueError("empty term in line: " + labels_string)
6969
continue
7070

71-
quoted_name = False
72-
operator_pos = _next_unquoted_char(term, '=')
73-
if operator_pos == -1:
74-
quoted_name = True
75-
label_name = "__name__"
76-
else:
77-
value_start = _next_unquoted_char(term, '=')
78-
label_name, quoted_name = _unquote_unescape(term[:value_start])
79-
term = term[value_start + 1:]
71+
label_name, quoted_name = _unquote_unescape(name_term)
8072

8173
if not quoted_name and not _is_valid_legacy_metric_name(label_name):
8274
raise ValueError("unquoted UTF-8 metric name")
8375

8476
# Check for missing quotes
85-
term = term.strip()
86-
if not term or term[0] != '"':
77+
if not value_term or value_term[0] != '"':
8778
raise ValueError
8879

8980
# The first quote is guaranteed to be after the equal.
90-
# Find the last unescaped quote.
81+
# Make sure that the next unescaped quote is the last character.
9182
i = 1
92-
while i < len(term):
93-
i = term.index('"', i)
94-
if not _is_character_escaped(term[:i], i):
83+
while i < len(value_term):
84+
i = value_term.index('"', i)
85+
if not _is_character_escaped(value_term[:i], i):
9586
break
9687
i += 1
97-
9888
# The label value is between the first and last quote
9989
quote_end = i + 1
100-
if quote_end != len(term):
90+
if quote_end != len(value_term):
10191
raise ValueError("unexpected text after quote: " + labels_string)
102-
label_value, _ = _unquote_unescape(term[:quote_end])
92+
93+
label_value, _ = _unquote_unescape(value_term)
10394
if label_name == '__name__':
10495
_validate_metric_name(label_name)
10596
else:
@@ -112,11 +103,10 @@ def parse_labels(labels_string: str, openmetrics: bool = False) -> Dict[str, str
112103
raise ValueError("Invalid labels: " + labels_string)
113104

114105

115-
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
116-
"""Extract the next comma-separated label term from the text.
117-
118-
Returns the stripped term and the stripped remainder of the string,
119-
including the comma.
106+
def _next_term(text: str, openmetrics: bool) -> Tuple[str, str, str]:
107+
"""Extract the next comma-separated label term from the text. The results
108+
are stripped terms for the label name, label value, and then the remainder
109+
of the string including the final , or }.
120110
121111
Raises ValueError if the term is empty and we're in openmetrics mode.
122112
"""
@@ -125,41 +115,48 @@ def _next_term(text: str, openmetrics: bool) -> Tuple[str, str]:
125115
if text[0] == ',':
126116
text = text[1:]
127117
if not text:
128-
return "", ""
118+
return "", "", ""
129119
if text[0] == ',':
130120
raise ValueError("multiple commas")
131-
splitpos = _next_unquoted_char(text, ',}')
121+
122+
splitpos = _next_unquoted_char(text, '=,}')
123+
if splitpos >= 0 and text[splitpos] == "=":
124+
labelname = text[:splitpos]
125+
text = text[splitpos + 1:]
126+
splitpos = _next_unquoted_char(text, ',}')
127+
else:
128+
labelname = "__name__"
129+
132130
if splitpos == -1:
133131
splitpos = len(text)
134132
term = text[:splitpos]
135133
if not term and openmetrics:
136134
raise ValueError("empty term:", term)
137135

138-
sublabels = text[splitpos:]
139-
return term.strip(), sublabels.strip()
136+
rest = text[splitpos:]
137+
return labelname, term.strip(), rest.strip()
140138

141139

142-
def _next_unquoted_char(text: str, chs: str, startidx: int = 0) -> int:
140+
def _next_unquoted_char(text: str, chs: Optional[str], startidx: int = 0) -> int:
143141
"""Return position of next unquoted character in tuple, or -1 if not found.
144142
145143
It is always assumed that the first character being checked is not already
146144
inside quotes.
147145
"""
148-
i = startidx
149146
in_quotes = False
150147
if chs is None:
151148
chs = string.whitespace
152-
while i < len(text):
153-
if text[i] == '"' and not _is_character_escaped(text, i):
149+
150+
for i, c in enumerate(text[startidx:]):
151+
if c == '"' and not _is_character_escaped(text, startidx + i):
154152
in_quotes = not in_quotes
155153
if not in_quotes:
156-
if text[i] in chs:
157-
return i
158-
i += 1
154+
if c in chs:
155+
return startidx + i
159156
return -1
160157

161158

162-
def _last_unquoted_char(text: str, chs: str) -> int:
159+
def _last_unquoted_char(text: str, chs: Optional[str]) -> int:
163160
"""Return position of last unquoted character in list, or -1 if not found."""
164161
i = len(text) - 1
165162
in_quotes = False
@@ -253,7 +250,7 @@ def _parse_sample(text):
253250
value, timestamp = _parse_value_and_timestamp(remaining_text)
254251
return Sample(name, {}, value, timestamp)
255252
name = text[:label_start].strip()
256-
label_end = _next_unquoted_char(text, '}')
253+
label_end = _next_unquoted_char(text[label_start:], '}') + label_start
257254
labels = parse_labels(text[label_start + 1:label_end], False)
258255
if not name:
259256
# Name might be in the labels

0 commit comments

Comments
 (0)