Skip to content

Commit 0c04638

Browse files
author
beorn7
committed
Revert "Ensure there's always a . or e in floats. (#162)"
This reverts commit 67670fe. The format change wreaks havoc with histograms if you have a mixed set of monitored targets. `histogram_quantile` spits out weird values, which are difficult to diagnose. A developer team here at SoundCloud pulled in the current version of prometheus/common via a different indirect dependency than prometheus/client_golang. (client_golang alone would not pull it in yet if you are using Go modules.) Canary instances then had the newer prometheus/common, while the normal production instances had not. The calculated quantiles for the complete service jumped up dramatically, so the team rolled back and started to look for a performance regression. (Just looking at the canary alone still worked, but since nobody suspected this kind of monitoring failure case, the investigation went totally the wrong way.) Given the wide distribution of prometheus/client_golang and the way Go modules work, we will see many of those innocuous upgrades that suddenly change the `le` values on new deployments. Since this change is buried behind dependencies, users will run into this problem without suspecting it, even if we announce it very loud and clearly in prometheus/common or even prometheus/client_golang. For now, we have to revert the change and then think about a way to mitigate it. I'm thinking of sanitizing `le` values in Prometheus 2.x. But let's think of it without haste. Signed-off-by: beorn7 <[email protected]>
1 parent 2998b13 commit 0c04638

File tree

2 files changed

+21
-67
lines changed

2 files changed

+21
-67
lines changed

expfmt/text_create.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -436,11 +436,11 @@ func writeEscapedString(w enhancedWriter, v string, includeDoubleQuote bool) (in
436436
func writeFloat(w enhancedWriter, f float64) (int, error) {
437437
switch {
438438
case f == 1:
439-
return w.WriteString("1.0")
439+
return 1, w.WriteByte('1')
440440
case f == 0:
441-
return w.WriteString("0.0")
441+
return 1, w.WriteByte('0')
442442
case f == -1:
443-
return w.WriteString("-1.0")
443+
return w.WriteString("-1")
444444
case math.IsNaN(f):
445445
return w.WriteString("NaN")
446446
case math.IsInf(f, +1):
@@ -450,12 +450,6 @@ func writeFloat(w enhancedWriter, f float64) (int, error) {
450450
default:
451451
bp := numBufPool.Get().(*[]byte)
452452
*bp = strconv.AppendFloat((*bp)[:0], f, 'g', -1, 64)
453-
// Add a .0 if used fixed point and there is no decimal
454-
// point already. This is for future proofing with OpenMetrics,
455-
// where floats always contain either an exponent or decimal.
456-
if !bytes.ContainsAny(*bp, "e.") {
457-
*bp = append(*bp, '.', '0')
458-
}
459453
written, err := w.Write(*bp)
460454
numBufPool.Put(bp)
461455
return written, err

expfmt/text_create_test.go

Lines changed: 18 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -212,14 +212,14 @@ untyped_name{name_1="value 1"} -1.23e-45
212212
# TYPE summary_name summary
213213
summary_name{quantile="0.5"} -1.23
214214
summary_name{quantile="0.9"} 0.2342354
215-
summary_name{quantile="0.99"} 0.0
215+
summary_name{quantile="0.99"} 0
216216
summary_name_sum -3.4567
217-
summary_name_count 42.0
218-
summary_name{name_1="value 1",name_2="value 2",quantile="0.5"} 1.0
219-
summary_name{name_1="value 1",name_2="value 2",quantile="0.9"} 2.0
220-
summary_name{name_1="value 1",name_2="value 2",quantile="0.99"} 3.0
217+
summary_name_count 42
218+
summary_name{name_1="value 1",name_2="value 2",quantile="0.5"} 1
219+
summary_name{name_1="value 1",name_2="value 2",quantile="0.9"} 2
220+
summary_name{name_1="value 1",name_2="value 2",quantile="0.99"} 3
221221
summary_name_sum{name_1="value 1",name_2="value 2"} 2010.1971
222-
summary_name_count{name_1="value 1",name_2="value 2"} 4711.0
222+
summary_name_count{name_1="value 1",name_2="value 2"} 4711
223223
`,
224224
},
225225
// 4: Histogram
@@ -261,13 +261,13 @@ summary_name_count{name_1="value 1",name_2="value 2"} 4711.0
261261
},
262262
out: `# HELP request_duration_microseconds The response latency.
263263
# TYPE request_duration_microseconds histogram
264-
request_duration_microseconds_bucket{le="100.0"} 123.0
265-
request_duration_microseconds_bucket{le="120.0"} 412.0
266-
request_duration_microseconds_bucket{le="144.0"} 592.0
267-
request_duration_microseconds_bucket{le="172.8"} 1524.0
268-
request_duration_microseconds_bucket{le="+Inf"} 2693.0
264+
request_duration_microseconds_bucket{le="100"} 123
265+
request_duration_microseconds_bucket{le="120"} 412
266+
request_duration_microseconds_bucket{le="144"} 592
267+
request_duration_microseconds_bucket{le="172.8"} 1524
268+
request_duration_microseconds_bucket{le="+Inf"} 2693
269269
request_duration_microseconds_sum 1.7560473e+06
270-
request_duration_microseconds_count 2693.0
270+
request_duration_microseconds_count 2693
271271
`,
272272
},
273273
// 5: Histogram with missing +Inf bucket.
@@ -282,26 +282,6 @@ request_duration_microseconds_count 2693.0
282282
SampleCount: proto.Uint64(2693),
283283
SampleSum: proto.Float64(1756047.3),
284284
Bucket: []*dto.Bucket{
285-
&dto.Bucket{
286-
UpperBound: proto.Float64(0),
287-
CumulativeCount: proto.Uint64(123),
288-
},
289-
&dto.Bucket{
290-
UpperBound: proto.Float64(1e-5),
291-
CumulativeCount: proto.Uint64(123),
292-
},
293-
&dto.Bucket{
294-
UpperBound: proto.Float64(1e-4),
295-
CumulativeCount: proto.Uint64(123),
296-
},
297-
&dto.Bucket{
298-
UpperBound: proto.Float64(1e-1),
299-
CumulativeCount: proto.Uint64(123),
300-
},
301-
&dto.Bucket{
302-
UpperBound: proto.Float64(1),
303-
CumulativeCount: proto.Uint64(123),
304-
},
305285
&dto.Bucket{
306286
UpperBound: proto.Float64(100),
307287
CumulativeCount: proto.Uint64(123),
@@ -318,40 +298,20 @@ request_duration_microseconds_count 2693.0
318298
UpperBound: proto.Float64(172.8),
319299
CumulativeCount: proto.Uint64(1524),
320300
},
321-
&dto.Bucket{
322-
UpperBound: proto.Float64(1e5),
323-
CumulativeCount: proto.Uint64(1543),
324-
},
325-
&dto.Bucket{
326-
UpperBound: proto.Float64(1e6),
327-
CumulativeCount: proto.Uint64(1544),
328-
},
329-
&dto.Bucket{
330-
UpperBound: proto.Float64(1e23),
331-
CumulativeCount: proto.Uint64(1545),
332-
},
333301
},
334302
},
335303
},
336304
},
337305
},
338306
out: `# HELP request_duration_microseconds The response latency.
339307
# TYPE request_duration_microseconds histogram
340-
request_duration_microseconds_bucket{le="0.0"} 123.0
341-
request_duration_microseconds_bucket{le="1e-05"} 123.0
342-
request_duration_microseconds_bucket{le="0.0001"} 123.0
343-
request_duration_microseconds_bucket{le="0.1"} 123.0
344-
request_duration_microseconds_bucket{le="1.0"} 123.0
345-
request_duration_microseconds_bucket{le="100.0"} 123.0
346-
request_duration_microseconds_bucket{le="120.0"} 412.0
347-
request_duration_microseconds_bucket{le="144.0"} 592.0
348-
request_duration_microseconds_bucket{le="172.8"} 1524.0
349-
request_duration_microseconds_bucket{le="100000.0"} 1543.0
350-
request_duration_microseconds_bucket{le="1e+06"} 1544.0
351-
request_duration_microseconds_bucket{le="1e+23"} 1545.0
352-
request_duration_microseconds_bucket{le="+Inf"} 2693.0
308+
request_duration_microseconds_bucket{le="100"} 123
309+
request_duration_microseconds_bucket{le="120"} 412
310+
request_duration_microseconds_bucket{le="144"} 592
311+
request_duration_microseconds_bucket{le="172.8"} 1524
312+
request_duration_microseconds_bucket{le="+Inf"} 2693
353313
request_duration_microseconds_sum 1.7560473e+06
354-
request_duration_microseconds_count 2693.0
314+
request_duration_microseconds_count 2693
355315
`,
356316
},
357317
// 6: No metric type, should result in default type Counter.

0 commit comments

Comments
 (0)