forked from banzaicloud/spot-termination-exporter
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathmetadata.go
147 lines (130 loc) · 5.54 KB
/
metadata.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
package main
import (
"encoding/json"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"io/ioutil"
"net/http"
"time"
)
type terminationCollector struct {
metadataEndpoint string
rebalanceIndicator *prometheus.Desc
rebalanceScrapeSuccessful *prometheus.Desc
scrapeSuccessful *prometheus.Desc
terminationIndicator *prometheus.Desc
terminationTime *prometheus.Desc
}
type instanceAction struct {
Action string `json:"action"`
Time time.Time `json:"time"`
}
type instanceEvent struct {
NoticeTime time.Time `json:"noticeTime"`
}
func NewTerminationCollector(me string) *terminationCollector {
return &terminationCollector{
metadataEndpoint: me,
rebalanceIndicator: prometheus.NewDesc("aws_instance_rebalance_recommended", "Instance rebalance is recommended", []string{"instance_id", "instance_type"}, nil),
rebalanceScrapeSuccessful: prometheus.NewDesc("aws_instance_metadata_service_events_available", "Metadata service events endpoint available", []string{"instance_id"}, nil),
scrapeSuccessful: prometheus.NewDesc("aws_instance_metadata_service_available", "Metadata service available", []string{"instance_id"}, nil),
terminationIndicator: prometheus.NewDesc("aws_instance_termination_imminent", "Instance is about to be terminated", []string{"instance_action", "instance_id", "instance_type"}, nil),
terminationTime: prometheus.NewDesc("aws_instance_termination_in", "Instance will be terminated in", []string{"instance_id", "instance_type"}, nil),
}
}
func (c *terminationCollector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.rebalanceIndicator
ch <- c.rebalanceScrapeSuccessful
ch <- c.scrapeSuccessful
ch <- c.terminationIndicator
ch <- c.terminationTime
}
func (c *terminationCollector) Collect(ch chan<- prometheus.Metric) {
log.Info("Fetching termination data from metadata-service")
timeout := time.Duration(1 * time.Second)
client := http.Client{
Timeout: timeout,
}
idResp, err := client.Get(c.metadataEndpoint + "instance-id")
var instanceID string
if err != nil {
log.Errorf("couldn't parse instance-id from metadata: %s", err.Error())
return
}
if idResp.StatusCode == 404 {
log.Errorf("couldn't parse instance-id from metadata: endpoint not found")
return
}
defer idResp.Body.Close()
body, _ := ioutil.ReadAll(idResp.Body)
instanceID = string(body)
typeResp, err := client.Get(c.metadataEndpoint + "instance-type")
var instanceType string
if err != nil {
log.Errorf("couldn't parse instance-type from metadata: %s", err.Error())
return
}
if typeResp.StatusCode == 404 {
log.Errorf("couldn't parse instance-type from metadata: endpoint not found")
return
}
defer typeResp.Body.Close()
body, _ = ioutil.ReadAll(typeResp.Body)
instanceType = string(body)
resp, err := client.Get(c.metadataEndpoint + "spot/instance-action")
if err != nil {
log.Errorf("Failed to fetch data from metadata service: %s", err)
ch <- prometheus.MustNewConstMetric(c.scrapeSuccessful, prometheus.GaugeValue, 0, instanceID)
} else {
ch <- prometheus.MustNewConstMetric(c.scrapeSuccessful, prometheus.GaugeValue, 1, instanceID)
if resp.StatusCode == 404 {
log.Debug("instance-action endpoint not found")
ch <- prometheus.MustNewConstMetric(c.terminationIndicator, prometheus.GaugeValue, 0, "", instanceID, instanceType)
} else {
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
var ia = instanceAction{}
err := json.Unmarshal(body, &ia)
// value may be present but not be a time according to AWS docs,
// so parse error is not fatal
if err != nil {
log.Errorf("Couldn't parse instance-action metadata: %s", err)
ch <- prometheus.MustNewConstMetric(c.terminationIndicator, prometheus.GaugeValue, 0, instanceID, instanceType)
} else {
log.Infof("instance-action endpoint available, termination time: %v", ia.Time)
ch <- prometheus.MustNewConstMetric(c.terminationIndicator, prometheus.GaugeValue, 1, ia.Action, instanceID, instanceType)
delta := ia.Time.Sub(time.Now())
if delta.Seconds() > 0 {
ch <- prometheus.MustNewConstMetric(c.terminationTime, prometheus.GaugeValue, delta.Seconds(), instanceID, instanceType)
}
}
}
}
eventResp, err := client.Get(c.metadataEndpoint + "events/recommendations/rebalance")
if err != nil {
log.Errorf("Failed to fetch events data from metadata service: %s", err)
ch <- prometheus.MustNewConstMetric(c.rebalanceScrapeSuccessful, prometheus.GaugeValue, 0, instanceID)
// Return early as this is the last metric/metadata scrape attempt
return
} else {
ch <- prometheus.MustNewConstMetric(c.rebalanceScrapeSuccessful, prometheus.GaugeValue, 1, instanceID)
if eventResp.StatusCode == 404 {
log.Debug("rebalance endpoint not found")
ch <- prometheus.MustNewConstMetric(c.rebalanceIndicator, prometheus.GaugeValue, 0, instanceID, instanceType)
// Return early as this is the last metric/metadata scrape attempt
return
} else {
defer eventResp.Body.Close()
body, _ := ioutil.ReadAll(eventResp.Body)
var ie = instanceEvent{}
err := json.Unmarshal(body, &ie)
if err != nil {
log.Errorf("Couldn't parse rebalance recommendation event metadata: %s", err)
ch <- prometheus.MustNewConstMetric(c.rebalanceIndicator, prometheus.GaugeValue, 0, instanceID, instanceType)
} else {
log.Infof("rebalance recommendation event endpoint available, recommendation time: %v", ie.NoticeTime)
ch <- prometheus.MustNewConstMetric(c.rebalanceIndicator, prometheus.GaugeValue, 1, instanceID, instanceType)
}
}
}
}