Skip to content

Commit f11d11c

Browse files
sd2kfedetorres93
authored andcommitted
perf: improve perf of encoding with new UTF-8 validation
This makes four changes: 1. The `EscapingScheme` and `ValidationScheme` enums are now `Copy` since they are very small and cheap to copy. They're passed by value rather than by reference. 2. The `escape_name` function now returns a `Cow` rather than a `String` to avoid allocations in many cases. 3. `escape_name` also preallocates a buffer for the escaped name rather than starting with an empty `String` and growing it, to amortize the allocations. 4. Use `is_ascii_alphabetic` and `is_ascii_digit` to check for characters that are valid in metric and label names. Based on profiles I suspect that #2 has the highest impact but haven't split these out to see how much of a difference it makes.
1 parent ca92109 commit f11d11c

File tree

3 files changed

+76
-82
lines changed

3 files changed

+76
-82
lines changed

src/encoding.rs

+24-25
Original file line numberDiff line numberDiff line change
@@ -770,7 +770,7 @@ impl ExemplarValueEncoder<'_> {
770770

771771
/// Enum for determining how metric and label names will
772772
/// be validated.
773-
#[derive(Debug, PartialEq, Default, Clone)]
773+
#[derive(Debug, PartialEq, Default, Clone, Copy)]
774774
pub enum ValidationScheme {
775775
/// Setting that requires that metric and label names
776776
/// conform to the original OpenMetrics character requirements.
@@ -807,9 +807,9 @@ fn is_valid_legacy_prefix(prefix: Option<&Prefix>) -> bool {
807807
fn is_quoted_metric_name(
808808
name: &str,
809809
prefix: Option<&Prefix>,
810-
validation_scheme: &ValidationScheme,
810+
validation_scheme: ValidationScheme,
811811
) -> bool {
812-
*validation_scheme == ValidationScheme::UTF8Validation
812+
validation_scheme == ValidationScheme::UTF8Validation
813813
&& (!is_valid_legacy_metric_name(name) || !is_valid_legacy_prefix(prefix))
814814
}
815815

@@ -818,24 +818,20 @@ fn is_valid_legacy_label_name(label_name: &str) -> bool {
818818
return false;
819819
}
820820
for (i, b) in label_name.chars().enumerate() {
821-
if !((b >= 'a' && b <= 'z')
822-
|| (b >= 'A' && b <= 'Z')
823-
|| b == '_'
824-
|| (b >= '0' && b <= '9' && i > 0))
825-
{
821+
if !(b.is_ascii_alphabetic() || b == '_' || (b.is_ascii_digit() && i > 0)) {
826822
return false;
827823
}
828824
}
829825
true
830826
}
831827

832-
fn is_quoted_label_name(name: &str, validation_scheme: &ValidationScheme) -> bool {
833-
*validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name)
828+
fn is_quoted_label_name(name: &str, validation_scheme: ValidationScheme) -> bool {
829+
validation_scheme == ValidationScheme::UTF8Validation && !is_valid_legacy_label_name(name)
834830
}
835831

836832
/// Enum for determining how metric and label names will
837833
/// be escaped.
838-
#[derive(Debug, Default, Clone)]
834+
#[derive(Debug, Default, Clone, Copy)]
839835
pub enum EscapingScheme {
840836
/// Replaces all legacy-invalid characters with underscores.
841837
#[default]
@@ -863,26 +859,30 @@ impl EscapingScheme {
863859
}
864860
}
865861

866-
fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
862+
fn escape_name(name: &str, scheme: EscapingScheme) -> Cow<'_, str> {
867863
if name.is_empty() {
868-
return name.to_string();
864+
return name.into();
869865
}
870-
let mut escaped = String::new();
871866
match scheme {
872-
EscapingScheme::NoEscaping => return name.to_string(),
867+
EscapingScheme::NoEscaping => name.into(),
868+
EscapingScheme::UnderscoreEscaping | EscapingScheme::ValueEncodingEscaping
869+
if is_valid_legacy_metric_name(name) =>
870+
{
871+
name.into()
872+
}
873873
EscapingScheme::UnderscoreEscaping => {
874-
if is_valid_legacy_metric_name(name) {
875-
return name.to_string();
876-
}
874+
let mut escaped = String::with_capacity(name.len());
877875
for (i, b) in name.chars().enumerate() {
878876
if is_valid_legacy_char(b, i) {
879877
escaped.push(b);
880878
} else {
881879
escaped.push('_');
882880
}
883881
}
882+
escaped.into()
884883
}
885884
EscapingScheme::DotsEscaping => {
885+
let mut escaped = String::with_capacity(name.len());
886886
for (i, b) in name.chars().enumerate() {
887887
if b == '_' {
888888
escaped.push_str("__");
@@ -894,11 +894,10 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
894894
escaped.push_str("__");
895895
}
896896
}
897+
escaped.into()
897898
}
898899
EscapingScheme::ValueEncodingEscaping => {
899-
if is_valid_legacy_metric_name(name) {
900-
return name.to_string();
901-
}
900+
let mut escaped = String::with_capacity(name.len());
902901
escaped.push_str("U__");
903902
for (i, b) in name.chars().enumerate() {
904903
if b == '_' {
@@ -909,9 +908,9 @@ fn escape_name(name: &str, scheme: &EscapingScheme) -> String {
909908
write!(escaped, "_{:x}_", b as i64).unwrap();
910909
}
911910
}
911+
escaped.into()
912912
}
913913
}
914-
escaped
915914
}
916915

917916
/// Returns the escaping scheme to use based on the given header.
@@ -1120,13 +1119,13 @@ mod tests {
11201119
];
11211120

11221121
for scenario in scenarios {
1123-
let result = escape_name(scenario.input, &EscapingScheme::UnderscoreEscaping);
1122+
let result = escape_name(scenario.input, EscapingScheme::UnderscoreEscaping);
11241123
assert_eq!(result, scenario.expected_underscores, "{}", scenario.name);
11251124

1126-
let result = escape_name(scenario.input, &EscapingScheme::DotsEscaping);
1125+
let result = escape_name(scenario.input, EscapingScheme::DotsEscaping);
11271126
assert_eq!(result, scenario.expected_dots, "{}", scenario.name);
11281127

1129-
let result = escape_name(scenario.input, &EscapingScheme::ValueEncodingEscaping);
1128+
let result = escape_name(scenario.input, EscapingScheme::ValueEncodingEscaping);
11301129
assert_eq!(result, scenario.expected_value, "{}", scenario.name);
11311130
}
11321131
}

src/encoding/text.rs

+50-55
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ where
148148
registry.encode(
149149
&mut DescriptorEncoder::new(
150150
writer,
151-
&registry.name_validation_scheme(),
152-
&registry.escaping_scheme(),
151+
registry.name_validation_scheme(),
152+
registry.escaping_scheme(),
153153
)
154154
.into(),
155155
)
@@ -196,8 +196,8 @@ pub(crate) struct DescriptorEncoder<'a> {
196196
writer: &'a mut dyn Write,
197197
prefix: Option<&'a Prefix>,
198198
labels: &'a [(Cow<'static, str>, Cow<'static, str>)],
199-
name_validation_scheme: &'a ValidationScheme,
200-
escaping_scheme: &'a EscapingScheme,
199+
name_validation_scheme: ValidationScheme,
200+
escaping_scheme: EscapingScheme,
201201
}
202202

203203
impl std::fmt::Debug for DescriptorEncoder<'_> {
@@ -207,11 +207,11 @@ impl std::fmt::Debug for DescriptorEncoder<'_> {
207207
}
208208

209209
impl DescriptorEncoder<'_> {
210-
pub(crate) fn new<'a>(
211-
writer: &'a mut dyn Write,
212-
name_validation_scheme: &'a ValidationScheme,
213-
escaping_scheme: &'a EscapingScheme,
214-
) -> DescriptorEncoder<'a> {
210+
pub(crate) fn new(
211+
writer: &'_ mut dyn Write,
212+
name_validation_scheme: ValidationScheme,
213+
escaping_scheme: EscapingScheme,
214+
) -> DescriptorEncoder<'_> {
215215
DescriptorEncoder {
216216
writer,
217217
prefix: Default::default(),
@@ -230,8 +230,8 @@ impl DescriptorEncoder<'_> {
230230
prefix,
231231
labels,
232232
writer: self.writer,
233-
name_validation_scheme: &self.name_validation_scheme,
234-
escaping_scheme: &self.escaping_scheme,
233+
name_validation_scheme: self.name_validation_scheme,
234+
escaping_scheme: self.escaping_scheme,
235235
}
236236
}
237237

@@ -246,14 +246,12 @@ impl DescriptorEncoder<'_> {
246246
let mut escaped_prefix: Option<&Prefix> = None;
247247
let escaped_prefix_value: Prefix;
248248
if let Some(prefix) = self.prefix {
249-
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
249+
escaped_prefix_value =
250+
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
250251
escaped_prefix = Some(&escaped_prefix_value);
251252
}
252-
let is_quoted_metric_name = is_quoted_metric_name(
253-
escaped_name.as_str(),
254-
escaped_prefix,
255-
self.name_validation_scheme,
256-
);
253+
let is_quoted_metric_name =
254+
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
257255
self.writer.write_str("# HELP ")?;
258256
if is_quoted_metric_name {
259257
self.writer.write_str("\"")?;
@@ -262,7 +260,7 @@ impl DescriptorEncoder<'_> {
262260
self.writer.write_str(prefix.as_str())?;
263261
self.writer.write_str("_")?;
264262
}
265-
self.writer.write_str(escaped_name.as_str())?;
263+
self.writer.write_str(&escaped_name)?;
266264
if let Some(unit) = unit {
267265
self.writer.write_str("_")?;
268266
self.writer.write_str(unit.as_str())?;
@@ -282,7 +280,7 @@ impl DescriptorEncoder<'_> {
282280
self.writer.write_str(prefix.as_str())?;
283281
self.writer.write_str("_")?;
284282
}
285-
self.writer.write_str(escaped_name.as_str())?;
283+
self.writer.write_str(&escaped_name)?;
286284
if let Some(unit) = unit {
287285
self.writer.write_str("_")?;
288286
self.writer.write_str(unit.as_str())?;
@@ -303,7 +301,7 @@ impl DescriptorEncoder<'_> {
303301
self.writer.write_str(prefix.as_str())?;
304302
self.writer.write_str("_")?;
305303
}
306-
self.writer.write_str(escaped_name.as_str())?;
304+
self.writer.write_str(&escaped_name)?;
307305
self.writer.write_str("_")?;
308306
self.writer.write_str(unit.as_str())?;
309307
if is_quoted_metric_name {
@@ -343,8 +341,8 @@ pub(crate) struct MetricEncoder<'a> {
343341
unit: Option<&'a Unit>,
344342
const_labels: &'a [(Cow<'static, str>, Cow<'static, str>)],
345343
family_labels: Option<&'a dyn super::EncodeLabelSet>,
346-
name_validation_scheme: &'a ValidationScheme,
347-
escaping_scheme: &'a EscapingScheme,
344+
name_validation_scheme: ValidationScheme,
345+
escaping_scheme: EscapingScheme,
348346
}
349347

350348
impl std::fmt::Debug for MetricEncoder<'_> {
@@ -534,14 +532,12 @@ impl MetricEncoder<'_> {
534532
let mut escaped_prefix: Option<&Prefix> = None;
535533
let escaped_prefix_value: Prefix;
536534
if let Some(prefix) = self.prefix {
537-
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
535+
escaped_prefix_value =
536+
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
538537
escaped_prefix = Some(&escaped_prefix_value);
539538
}
540-
let is_quoted_metric_name = is_quoted_metric_name(
541-
escaped_name.as_str(),
542-
escaped_prefix,
543-
self.name_validation_scheme,
544-
);
539+
let is_quoted_metric_name =
540+
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
545541
if is_quoted_metric_name {
546542
self.writer.write_str("{")?;
547543
self.writer.write_str("\"")?;
@@ -550,7 +546,7 @@ impl MetricEncoder<'_> {
550546
self.writer.write_str(prefix.as_str())?;
551547
self.writer.write_str("_")?;
552548
}
553-
self.writer.write_str(escaped_name.as_str())?;
549+
self.writer.write_str(&escaped_name)?;
554550
if let Some(unit) = self.unit {
555551
self.writer.write_str("_")?;
556552
self.writer.write_str(unit.as_str())?;
@@ -576,14 +572,12 @@ impl MetricEncoder<'_> {
576572
let mut escaped_prefix: Option<&Prefix> = None;
577573
let escaped_prefix_value: Prefix;
578574
if let Some(prefix) = self.prefix {
579-
escaped_prefix_value = Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme));
575+
escaped_prefix_value =
576+
Prefix::from(escape_name(prefix.as_str(), self.escaping_scheme).into_owned());
580577
escaped_prefix = Some(&escaped_prefix_value);
581578
}
582-
let is_quoted_metric_name = is_quoted_metric_name(
583-
escaped_name.as_str(),
584-
escaped_prefix,
585-
self.name_validation_scheme,
586-
);
579+
let is_quoted_metric_name =
580+
is_quoted_metric_name(&escaped_name, escaped_prefix, self.name_validation_scheme);
587581
if self.const_labels.is_empty()
588582
&& additional_labels.is_none()
589583
&& self.family_labels.is_none()
@@ -750,8 +744,8 @@ impl ExemplarValueEncoder<'_> {
750744
pub(crate) struct LabelSetEncoder<'a> {
751745
writer: &'a mut dyn Write,
752746
first: bool,
753-
name_validation_scheme: &'a ValidationScheme,
754-
escaping_scheme: &'a EscapingScheme,
747+
name_validation_scheme: ValidationScheme,
748+
escaping_scheme: EscapingScheme,
755749
}
756750

757751
impl std::fmt::Debug for LabelSetEncoder<'_> {
@@ -765,8 +759,8 @@ impl std::fmt::Debug for LabelSetEncoder<'_> {
765759
impl<'a> LabelSetEncoder<'a> {
766760
fn new(
767761
writer: &'a mut dyn Write,
768-
name_validation_scheme: &'a ValidationScheme,
769-
escaping_scheme: &'a EscapingScheme,
762+
name_validation_scheme: ValidationScheme,
763+
escaping_scheme: EscapingScheme,
770764
) -> Self {
771765
Self {
772766
writer,
@@ -791,8 +785,8 @@ impl<'a> LabelSetEncoder<'a> {
791785
pub(crate) struct LabelEncoder<'a> {
792786
writer: &'a mut dyn Write,
793787
first: bool,
794-
name_validation_scheme: &'a ValidationScheme,
795-
escaping_scheme: &'a EscapingScheme,
788+
name_validation_scheme: ValidationScheme,
789+
escaping_scheme: EscapingScheme,
796790
}
797791

798792
impl std::fmt::Debug for LabelEncoder<'_> {
@@ -818,8 +812,8 @@ impl LabelEncoder<'_> {
818812

819813
pub(crate) struct LabelKeyEncoder<'a> {
820814
writer: &'a mut dyn Write,
821-
name_validation_scheme: &'a ValidationScheme,
822-
escaping_scheme: &'a EscapingScheme,
815+
name_validation_scheme: ValidationScheme,
816+
escaping_scheme: EscapingScheme,
823817
}
824818

825819
impl std::fmt::Debug for LabelKeyEncoder<'_> {
@@ -840,12 +834,11 @@ impl<'a> LabelKeyEncoder<'a> {
840834
impl std::fmt::Write for LabelKeyEncoder<'_> {
841835
fn write_str(&mut self, s: &str) -> std::fmt::Result {
842836
let escaped_name = escape_name(s, self.escaping_scheme);
843-
let is_quoted_label_name =
844-
is_quoted_label_name(escaped_name.as_str(), self.name_validation_scheme);
837+
let is_quoted_label_name = is_quoted_label_name(&escaped_name, self.name_validation_scheme);
845838
if is_quoted_label_name {
846839
self.writer.write_str("\"")?;
847840
}
848-
self.writer.write_str(escaped_name.as_str())?;
841+
self.writer.write_str(&escaped_name)?;
849842
if is_quoted_label_name {
850843
self.writer.write_str("\"")?;
851844
}
@@ -1043,8 +1036,7 @@ mod tests {
10431036

10441037
encode(&mut encoded, &registry).unwrap();
10451038

1046-
let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n"
1047-
.to_owned()
1039+
let expected = "# HELP \"my.gauge\"\" My\ngau\nge\".\n".to_owned()
10481040
+ "# TYPE \"my.gauge\"\" gauge\n"
10491041
+ "{\"my.gauge\"\"} inf\n"
10501042
+ "# EOF\n";
@@ -1646,8 +1638,8 @@ mod tests {
16461638
unit: None,
16471639
const_labels: &[],
16481640
family_labels: None,
1649-
name_validation_scheme: &UTF8Validation,
1650-
escaping_scheme: &NoEscaping,
1641+
name_validation_scheme: UTF8Validation,
1642+
escaping_scheme: NoEscaping,
16511643
};
16521644

16531645
encoder.encode_labels::<NoLabelSet>(None).unwrap();
@@ -1667,8 +1659,8 @@ mod tests {
16671659
unit: None,
16681660
const_labels: &const_labels,
16691661
family_labels: Some(&family_labels),
1670-
name_validation_scheme: &UTF8Validation,
1671-
escaping_scheme: &NoEscaping,
1662+
name_validation_scheme: UTF8Validation,
1663+
escaping_scheme: NoEscaping,
16721664
};
16731665

16741666
encoder.encode_labels(Some(&additional_labels)).unwrap();
@@ -1688,12 +1680,15 @@ mod tests {
16881680
unit: None,
16891681
const_labels: &const_labels,
16901682
family_labels: Some(&family_labels),
1691-
name_validation_scheme: &UTF8Validation,
1692-
escaping_scheme: &NoEscaping,
1683+
name_validation_scheme: UTF8Validation,
1684+
escaping_scheme: NoEscaping,
16931685
};
16941686

16951687
encoder.encode_labels(Some(&additional_labels)).unwrap();
1696-
assert_eq!(buffer, "{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}");
1688+
assert_eq!(
1689+
buffer,
1690+
"{\"service.name\"=\"t1\",\"whatever\\whatever\"=\"t2\",\"t*3\"=\"t3\"}"
1691+
);
16971692
}
16981693

16991694
fn parse_with_python_client(input: String) {

0 commit comments

Comments
 (0)