Skip to content

Commit b9dd95f

Browse files
authored
Merge pull request prometheus#15428 from prometheus/beorn7/metrics
notifier: fix increment of metric prometheus_notifications_errors_total
2 parents 6f1f0aa + e01c5ce commit b9dd95f

File tree

3 files changed

+7
-6
lines changed

3 files changed

+7
-6
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## unreleased
44

5+
* [CHANGE] Notifier: Increment the prometheus_notifications_errors_total metric by the number of affected alerts rather than by one per batch of affected alerts. #15428
56
* [ENHANCEMENT] OTLP receiver: Convert also metric metadata. #15416
67

78
## 3.0.0 / 2024-11-14

documentation/prometheus-mixin/alerts.libsonnet

+2-2
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,8 @@
8484
severity: 'warning',
8585
},
8686
annotations: {
87-
summary: 'Prometheus has encountered more than 1% errors sending alerts to a specific Alertmanager.',
88-
description: '{{ printf "%%.1f" $value }}%% errors while sending alerts from Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}}.' % $._config,
87+
summary: 'More than 1% of alerts sent by Prometheus to a specific Alertmanager were affected by errors.',
88+
description: '{{ printf "%%.1f" $value }}%% of alerts sent by Prometheus %(prometheusName)s to Alertmanager {{$labels.alertmanager}} were affected by errors.' % $._config,
8989
},
9090
},
9191
{

notifier/notifier.go

+4-4
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ func newAlertMetrics(r prometheus.Registerer, queueCap int, queueLen, alertmanag
160160
Namespace: namespace,
161161
Subsystem: subsystem,
162162
Name: "errors_total",
163-
Help: "Total number of errors sending alert notifications.",
163+
Help: "Total number of sent alerts affected by errors.",
164164
},
165165
[]string{alertmanagerLabel},
166166
),
@@ -619,13 +619,13 @@ func (n *Manager) sendAll(alerts ...*Alert) bool {
619619

620620
go func(ctx context.Context, client *http.Client, url string, payload []byte, count int) {
621621
if err := n.sendOne(ctx, client, url, payload); err != nil {
622-
n.logger.Error("Error sending alert", "alertmanager", url, "count", count, "err", err)
623-
n.metrics.errors.WithLabelValues(url).Inc()
622+
n.logger.Error("Error sending alerts", "alertmanager", url, "count", count, "err", err)
623+
n.metrics.errors.WithLabelValues(url).Add(float64(count))
624624
} else {
625625
numSuccess.Inc()
626626
}
627627
n.metrics.latency.WithLabelValues(url).Observe(time.Since(begin).Seconds())
628-
n.metrics.sent.WithLabelValues(url).Add(float64(len(amAlerts)))
628+
n.metrics.sent.WithLabelValues(url).Add(float64(count))
629629

630630
wg.Done()
631631
}(ctx, ams.client, am.url().String(), payload, len(amAlerts))

0 commit comments

Comments
 (0)