Skip to content

Commit b5a9fc6

Browse files
author
Nikita Popov
committed
add feature to configurate promhttp error handling
1 parent 6b359b1 commit b5a9fc6

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

README.md

+9-7
Original file line numberDiff line numberDiff line change
@@ -78,22 +78,23 @@ If you are still using the legacy [Access scopes][access-scopes], the `https://w
7878

7979
| Flag | Required | Default | Description |
8080
| ----------------------------------- | -------- |---------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
81-
| `google.project-ids` | No | GCloud SDK auto-discovery | Repeatable flag of Google Project IDs |
82-
| `google.projects.filter` | No | | GCloud projects filter expression. See more [here](https://cloud.google.com/sdk/gcloud/reference/projects/list). |
81+
| `google.project-ids` | No | GCloud SDK auto-discovery | Repeatable flag of Google Project IDs |
82+
| `google.projects.filter` | No | | GCloud projects filter expression. See more [here](https://cloud.google.com/sdk/gcloud/reference/projects/list). |
8383
| `monitoring.metrics-ingest-delay` | No | | Offsets metric collection by a delay appropriate for each metric type, e.g. because bigquery metrics are slow to appear |
8484
| `monitoring.drop-delegated-projects` | No | No | Drop metrics from attached projects and fetch `project_id` only. |
85-
| `monitoring.metrics-prefixes` | Yes | | Repeatable flag of Google Stackdriver Monitoring Metric Type prefixes (see [example][metrics-prefix-example] and [available metrics][metrics-list]) |
85+
| `monitoring.metrics-prefixes` | Yes | | Repeatable flag of Google Stackdriver Monitoring Metric Type prefixes (see [example][metrics-prefix-example] and [available metrics][metrics-list]) |
8686
| `monitoring.metrics-interval` | No | `5m` | Metric's timestamp interval to request from the Google Stackdriver Monitoring Metrics API. Only the most recent data point is used |
8787
| `monitoring.metrics-offset` | No | `0s` | Offset (into the past) for the metric's timestamp interval to request from the Google Stackdriver Monitoring Metrics API, to handle latency in published metrics |
88-
| `monitoring.filters` | No | | Additonal filters to be sent on the Monitoring API call. Add multiple filters by providing this parameter multiple times. See [monitoring.filters](#using-filters) for more info. |
88+
| `monitoring.filters` | No | | Additonal filters to be sent on the Monitoring API call. Add multiple filters by providing this parameter multiple times. See [monitoring.filters](#using-filters) for more info. |
8989
| `monitoring.aggregate-deltas` | No | | If enabled will treat all DELTA metrics as an in-memory counter instead of a gauge. Be sure to read [what to know about aggregating DELTA metrics](#what-to-know-about-aggregating-delta-metrics) |
9090
| `monitoring.aggregate-deltas-ttl` | No | `30m` | How long should a delta metric continue to be exported and stored after GCP stops producing it. Read [slow moving metrics](#slow-moving-metrics) to understand the problem this attempts to solve |
9191
| `monitoring.descriptor-cache-ttl` | No | `0s` | How long should the metric descriptors for a prefixed be cached for |
92+
| `promhttp.error-handling` | No | `httpErrorOnError` | Defines how errors are handled by promhttp.Handler while serving metrics. Possible values: `httpErrorOnError`, `continueOnError`, `panicOnError` are mapped to [available options][promhttp-error-handling-opts] |
9293
| `stackdriver.max-retries` | No | `0` | Max number of retries that should be attempted on 503 errors from stackdriver. |
93-
| `stackdriver.http-timeout` | No | `10s` | How long should stackdriver_exporter wait for a result from the Stackdriver API. |
94+
| `stackdriver.http-timeout` | No | `10s` | How long should stackdriver_exporter wait for a result from the Stackdriver API. |
9495
| `stackdriver.max-backoff=` | No | | Max time between each request in an exp backoff scenario. |
95-
| `stackdriver.backoff-jitter` | No | `1s` | The amount of jitter to introduce in a exp backoff scenario. |
96-
| `stackdriver.retry-statuses` | No | `503` | The HTTP statuses that should trigger a retry. |
96+
| `stackdriver.backoff-jitter` | No | `1s` | The amount of jitter to introduce in a exp backoff scenario. |
97+
| `stackdriver.retry-statuses` | No | `503` | The HTTP statuses that should trigger a retry. |
9798
| `web.config.file` | No | | [EXPERIMENTAL] Path to configuration file that can enable TLS or authentication. |
9899
| `web.listen-address` | No | `:9255` | Address to listen on for web interface and telemetry Repeatable for multiple addresses. |
99100
| `web.systemd-socket` | No | | Use systemd socket activation listeners instead of port listeners (Linux only). |
@@ -247,4 +248,5 @@ Apache License 2.0, see [LICENSE][license].
247248
[monitored-resources]: https://cloud.google.com/monitoring/api/resources
248249
[prometheus]: https://prometheus.io/
249250
[prometheus-boshrelease]: https://github.com/cloudfoundry-community/prometheus-boshrelease
251+
[promhttp-error-handling-opts]: https://github.com/prometheus/client_golang/blob/main/prometheus/promhttp/http.go#L323
250252
[stackdriver]: https://cloud.google.com/monitoring/

stackdriver_exporter.go

+18-1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,10 @@ var (
137137
monitoringDescriptorCacheOnlyGoogle = kingpin.Flag(
138138
"monitoring.descriptor-cache-only-google", "Only cache descriptors for *.googleapis.com metrics",
139139
).Default("true").Bool()
140+
141+
promHttpErrorHandling = kingpin.Flag(
142+
"promhttp.error-handling", "Defines how errors are handled by promhttp.Handler while serving metrics",
143+
).Default("httpErrorOnError").Enum("httpErrorOnError", "continueOnError", "panicOnError")
140144
)
141145

142146
func init() {
@@ -277,7 +281,10 @@ func (h *handler) innerHandler(filters map[string]bool) http.Handler {
277281
registry,
278282
}
279283
}
280-
opts := promhttp.HandlerOpts{ErrorLog: slog.NewLogLogger(h.logger.Handler(), slog.LevelError)}
284+
opts := promhttp.HandlerOpts{
285+
ErrorLog: slog.NewLogLogger(h.logger.Handler(), slog.LevelError),
286+
ErrorHandling: getPromHttpErrorHandlingOpt(*promHttpErrorHandling),
287+
}
281288
// Delegate http serving to Prometheus client library, which will call collector.Collect.
282289
return promhttp.HandlerFor(gatherers, opts)
283290
}
@@ -464,3 +471,13 @@ func parseMetricExtraFilters() []collectors.MetricFilter {
464471
}
465472
return extraFilters
466473
}
474+
475+
func getPromHttpErrorHandlingOpt(flagOpt string) promhttp.HandlerErrorHandling {
476+
if flagOpt == "continueOnError" {
477+
return promhttp.ContinueOnError
478+
}
479+
if flagOpt == "panicOnError" {
480+
return promhttp.PanicOnError
481+
}
482+
return promhttp.HTTPErrorOnError
483+
}

0 commit comments

Comments
 (0)