@@ -104,6 +104,12 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
104
104
# HELP cortex_alertmanager_nflog_snapshot_size_bytes Size of the last notification log snapshot in bytes.
105
105
# TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
106
106
cortex_alertmanager_nflog_snapshot_size_bytes 111
107
+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
108
+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
109
+ cortex_alertmanager_nflog_maintenance_total 111
110
+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
111
+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
112
+ cortex_alertmanager_nflog_maintenance_errors_total 111
107
113
# HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
108
114
# TYPE cortex_alertmanager_notification_latency_seconds histogram
109
115
cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -277,6 +283,12 @@ func TestAlertmanagerMetricsStore(t *testing.T) {
277
283
# HELP cortex_alertmanager_silences_snapshot_size_bytes Size of the last silence snapshot in bytes.
278
284
# TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
279
285
cortex_alertmanager_silences_snapshot_size_bytes 111
286
+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
287
+ # TYPE cortex_alertmanager_silences_maintenance_total counter
288
+ cortex_alertmanager_silences_maintenance_total 111
289
+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
290
+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
291
+ cortex_alertmanager_silences_maintenance_errors_total 111
280
292
# HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
281
293
# TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
282
294
cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -414,6 +426,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
414
426
# TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
415
427
cortex_alertmanager_nflog_snapshot_size_bytes 111
416
428
429
+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
430
+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
431
+ cortex_alertmanager_nflog_maintenance_total 111
432
+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
433
+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
434
+ cortex_alertmanager_nflog_maintenance_errors_total 111
435
+
417
436
# HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
418
437
# TYPE cortex_alertmanager_notification_latency_seconds histogram
419
438
cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -598,6 +617,14 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
598
617
# HELP cortex_alertmanager_silences_snapshot_size_bytes Size of the last silence snapshot in bytes.
599
618
# TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
600
619
cortex_alertmanager_silences_snapshot_size_bytes 111
620
+
621
+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
622
+ # TYPE cortex_alertmanager_silences_maintenance_total counter
623
+ cortex_alertmanager_silences_maintenance_total 111
624
+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
625
+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
626
+ cortex_alertmanager_silences_maintenance_errors_total 111
627
+
601
628
# HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
602
629
# TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
603
630
cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -715,6 +742,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
715
742
# TYPE cortex_alertmanager_nflog_snapshot_size_bytes gauge
716
743
cortex_alertmanager_nflog_snapshot_size_bytes 11
717
744
745
+ # HELP cortex_alertmanager_nflog_maintenance_total How many maintenances were executed for the notification log.
746
+ # TYPE cortex_alertmanager_nflog_maintenance_total counter
747
+ cortex_alertmanager_nflog_maintenance_total 111
748
+ # HELP cortex_alertmanager_nflog_maintenance_errors_total How many maintenances were executed for the notification log that failed.
749
+ # TYPE cortex_alertmanager_nflog_maintenance_errors_total counter
750
+ cortex_alertmanager_nflog_maintenance_errors_total 111
751
+
718
752
# HELP cortex_alertmanager_notification_latency_seconds The latency of notifications in seconds.
719
753
# TYPE cortex_alertmanager_notification_latency_seconds histogram
720
754
cortex_alertmanager_notification_latency_seconds_bucket{le="1"} 14
@@ -863,6 +897,13 @@ func TestAlertmanagerMetricsRemoval(t *testing.T) {
863
897
# TYPE cortex_alertmanager_silences_snapshot_size_bytes gauge
864
898
cortex_alertmanager_silences_snapshot_size_bytes 11
865
899
900
+ # HELP cortex_alertmanager_silences_maintenance_total How many maintenances were executed for silences.
901
+ # TYPE cortex_alertmanager_silences_maintenance_total counter
902
+ cortex_alertmanager_silences_maintenance_total 111
903
+ # HELP cortex_alertmanager_silences_maintenance_errors_total How many maintenances were executed for silences that failed.
904
+ # TYPE cortex_alertmanager_silences_maintenance_errors_total counter
905
+ cortex_alertmanager_silences_maintenance_errors_total 111
906
+
866
907
# HELP cortex_alertmanager_state_fetch_replica_state_failed_total Number of times we have failed to read and merge the full state from another replica.
867
908
# TYPE cortex_alertmanager_state_fetch_replica_state_failed_total counter
868
909
cortex_alertmanager_state_fetch_replica_state_failed_total 0
@@ -913,6 +954,8 @@ func populateAlertmanager(base float64) *prometheus.Registry {
913
954
s .silencesActive .Set (base )
914
955
s .silencesExpired .Set (base * 2 )
915
956
s .silencesPending .Set (base * 3 )
957
+ s .silencesMaintenanceTotal .Add (base )
958
+ s .silencesMaintenanceErrorsTotal .Add (base )
916
959
917
960
n := newNflogMetrics (reg )
918
961
n .gcDuration .Observe (base )
@@ -922,6 +965,8 @@ func populateAlertmanager(base float64) *prometheus.Registry {
922
965
n .queryErrorsTotal .Add (base )
923
966
n .queryDuration .Observe (base )
924
967
n .propagatedMessagesTotal .Add (base )
968
+ n .maintenanceTotal .Add (base )
969
+ n .maintenanceErrorsTotal .Add (base )
925
970
926
971
nm := newNotifyMetrics (reg )
927
972
for i , integration := range integrations {
@@ -967,6 +1012,8 @@ type nflogMetrics struct {
967
1012
queryErrorsTotal prometheus.Counter
968
1013
queryDuration prometheus.Histogram
969
1014
propagatedMessagesTotal prometheus.Counter
1015
+ maintenanceTotal prometheus.Counter
1016
+ maintenanceErrorsTotal prometheus.Counter
970
1017
}
971
1018
972
1019
func newNflogMetrics (r prometheus.Registerer ) * nflogMetrics {
@@ -1002,22 +1049,32 @@ func newNflogMetrics(r prometheus.Registerer) *nflogMetrics {
1002
1049
Name : "alertmanager_nflog_gossip_messages_propagated_total" ,
1003
1050
Help : "Number of received gossip messages that have been further gossiped." ,
1004
1051
})
1052
+ m .maintenanceTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1053
+ Name : "alertmanager_nflog_maintenance_total" ,
1054
+ Help : "How many maintenances were executed for the notification log." ,
1055
+ })
1056
+ m .maintenanceErrorsTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1057
+ Name : "alertmanager_nflog_maintenance_errors_total" ,
1058
+ Help : "How many maintenances were executed for the notification log that failed." ,
1059
+ })
1005
1060
1006
1061
return m
1007
1062
}
1008
1063
1009
1064
// Copied from github.com/alertmanager/silence/silence.go
1010
1065
type silenceMetrics struct {
1011
- gcDuration prometheus.Summary
1012
- snapshotDuration prometheus.Summary
1013
- snapshotSize prometheus.Gauge
1014
- queriesTotal prometheus.Counter
1015
- queryErrorsTotal prometheus.Counter
1016
- queryDuration prometheus.Histogram
1017
- silencesActive prometheus.Gauge
1018
- silencesPending prometheus.Gauge
1019
- silencesExpired prometheus.Gauge
1020
- propagatedMessagesTotal prometheus.Counter
1066
+ gcDuration prometheus.Summary
1067
+ snapshotDuration prometheus.Summary
1068
+ snapshotSize prometheus.Gauge
1069
+ queriesTotal prometheus.Counter
1070
+ queryErrorsTotal prometheus.Counter
1071
+ queryDuration prometheus.Histogram
1072
+ silencesActive prometheus.Gauge
1073
+ silencesPending prometheus.Gauge
1074
+ silencesExpired prometheus.Gauge
1075
+ propagatedMessagesTotal prometheus.Counter
1076
+ silencesMaintenanceTotal prometheus.Counter
1077
+ silencesMaintenanceErrorsTotal prometheus.Counter
1021
1078
}
1022
1079
1023
1080
func newSilenceMetrics (r prometheus.Registerer ) * silenceMetrics {
@@ -1068,6 +1125,14 @@ func newSilenceMetrics(r prometheus.Registerer) *silenceMetrics {
1068
1125
Help : "How many silences by state." ,
1069
1126
ConstLabels : prometheus.Labels {"state" : string (types .SilenceStateExpired )},
1070
1127
})
1128
+ m .silencesMaintenanceTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1129
+ Name : "alertmanager_silences_maintenance_total" ,
1130
+ Help : "How many maintenances were executed for silences." ,
1131
+ })
1132
+ m .silencesMaintenanceErrorsTotal = promauto .With (r ).NewCounter (prometheus.CounterOpts {
1133
+ Name : "alertmanager_silences_maintenance_errors_total" ,
1134
+ Help : "How many maintenances were executed for silences that failed." ,
1135
+ })
1071
1136
1072
1137
return m
1073
1138
}
0 commit comments