Skip to content

Commit b9e8b85

Browse files
committed
Add some metrics to debug at scale
1 parent 0e37217 commit b9e8b85

File tree

1 file changed

+86
-6
lines changed

1 file changed

+86
-6
lines changed

pkg/metrics/client_go_adapter.go

Lines changed: 86 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,53 @@ package metrics
1818

1919
import (
2020
"context"
21+
"net/url"
22+
"time"
2123

2224
"github.com/prometheus/client_golang/prometheus"
2325
clientmetrics "k8s.io/client-go/tools/metrics"
2426
)
2527

26-
// this file contains setup logic to initialize the myriad of places
27-
// that client-go registers metrics. We copy the names and formats
28-
// from Kubernetes so that we match the core controllers.
29-
3028
var (
31-
// client metrics.
29+
// requestLatency is a Prometheus Histogram metric type partitioned by
30+
// "verb", and "host" labels. It is used for the rest client latency metrics.
31+
requestLatency = prometheus.NewHistogramVec(
32+
prometheus.HistogramOpts{
33+
Name: "rest_client_request_duration_seconds",
34+
Help: "Request latency in seconds. Broken down by verb, and host.",
35+
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
36+
},
37+
[]string{"verb", "host"},
38+
)
39+
40+
requestSize = prometheus.NewHistogramVec(
41+
prometheus.HistogramOpts{
42+
Name: "rest_client_request_size_bytes",
43+
Help: "Request size in bytes. Broken down by verb and host.",
44+
// 64 bytes to 16MB
45+
Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216},
46+
},
47+
[]string{"verb", "host"},
48+
)
49+
50+
responseSize = prometheus.NewHistogramVec(
51+
prometheus.HistogramOpts{
52+
Name: "rest_client_response_size_bytes",
53+
Help: "Response size in bytes. Broken down by verb and host.",
54+
// 64 bytes to 16MB
55+
Buckets: []float64{64, 256, 512, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216},
56+
},
57+
[]string{"verb", "host"},
58+
)
59+
60+
rateLimiterLatency = prometheus.NewHistogramVec(
61+
prometheus.HistogramOpts{
62+
Name: "rest_client_rate_limiter_duration_seconds",
63+
Help: "Client side rate limiter latency in seconds. Broken down by verb, and host.",
64+
Buckets: []float64{0.005, 0.025, 0.1, 0.25, 0.5, 1.0, 2.0, 4.0, 8.0, 15.0, 30.0, 60.0},
65+
},
66+
[]string{"verb", "host"},
67+
)
3268

3369
requestResult = prometheus.NewCounterVec(
3470
prometheus.CounterOpts{
@@ -37,6 +73,14 @@ var (
3773
},
3874
[]string{"code", "method", "host"},
3975
)
76+
77+
requestRetry = prometheus.NewCounterVec(
78+
prometheus.CounterOpts{
79+
Name: "rest_client_request_retries_total",
80+
Help: "Number of request retries, partitioned by status code, verb, and host.",
81+
},
82+
[]string{"code", "verb", "host"},
83+
)
4084
)
4185

4286
func init() {
@@ -46,11 +90,21 @@ func init() {
4690
// registerClientMetrics sets up the client latency metrics from client-go.
4791
func registerClientMetrics() {
4892
// register the metrics with our registry
93+
Registry.MustRegister(requestLatency)
94+
Registry.MustRegister(requestSize)
95+
Registry.MustRegister(responseSize)
96+
Registry.MustRegister(rateLimiterLatency)
4997
Registry.MustRegister(requestResult)
98+
Registry.MustRegister(requestRetry)
5099

51100
// register the metrics with client-go
52101
clientmetrics.Register(clientmetrics.RegisterOpts{
53-
RequestResult: &resultAdapter{metric: requestResult},
102+
RequestLatency: &LatencyAdapter{metric: requestLatency},
103+
RequestSize: &sizeAdapter{metric: requestSize},
104+
ResponseSize: &sizeAdapter{metric: responseSize},
105+
RateLimiterLatency: &LatencyAdapter{metric: rateLimiterLatency},
106+
RequestResult: &resultAdapter{metric: requestResult},
107+
RequestRetry: &retryAdapter{requestRetry},
54108
})
55109
}
56110

@@ -62,10 +116,36 @@ func registerClientMetrics() {
62116
// copied (more-or-less directly) from k8s.io/kubernetes setup code
63117
// (which isn't anywhere in an easily-importable place).
64118

119+
// LatencyAdapter implements LatencyMetric.
120+
type LatencyAdapter struct {
121+
metric *prometheus.HistogramVec
122+
}
123+
124+
// Observe increments the request latency metric for the given verb/URL.
125+
func (l *LatencyAdapter) Observe(_ context.Context, verb string, u url.URL, latency time.Duration) {
126+
l.metric.WithLabelValues(verb, u.String()).Observe(latency.Seconds())
127+
}
128+
129+
type sizeAdapter struct {
130+
metric *prometheus.HistogramVec
131+
}
132+
133+
func (s *sizeAdapter) Observe(ctx context.Context, verb string, host string, size float64) {
134+
s.metric.WithLabelValues(verb, host).Observe(size)
135+
}
136+
65137
type resultAdapter struct {
66138
metric *prometheus.CounterVec
67139
}
68140

69141
func (r *resultAdapter) Increment(_ context.Context, code, method, host string) {
70142
r.metric.WithLabelValues(code, method, host).Inc()
71143
}
144+
145+
type retryAdapter struct {
146+
metric *prometheus.CounterVec
147+
}
148+
149+
func (r *retryAdapter) IncrementRetry(_ context.Context, code, method, host string) {
150+
r.metric.WithLabelValues(code, method, host).Inc()
151+
}

0 commit comments

Comments
 (0)