Skip to content

Commit dea3b3b

Browse files
committed
feat: cache last scrape results
1 parent e5659ac commit dea3b3b

File tree

2 files changed

+110
-36
lines changed

2 files changed

+110
-36
lines changed

collectors/monitoring_collector.go

+99-25
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,59 @@ type MonitoringCollector struct {
2929
lastScrapeDurationSecondsMetric prometheus.Gauge
3030
collectorFillMissingLabels bool
3131
monitoringDropDelegatedProjects bool
32+
33+
cache *CollectionCache
34+
}
35+
36+
type CollectionCache struct {
37+
// This map holds the read-only result of a collection run
38+
// It will be served from the promethus scrape endpoint until the next
39+
// collection is complete.
40+
cachedTimeSeries map[string]*TimeSeriesMetrics
41+
42+
// This map holds the (potentially incomplete) metrics that have been collected.
43+
// Once completed it will replace the `cachedTimeSeries` and will start being served.
44+
activeTimeSeries map[string]*TimeSeriesMetrics
45+
46+
// Indicates whether there is a collection currently running, and populating `activeTimeSeries`
47+
// at the moment.
48+
collectionActive bool
49+
50+
// Guards `activeTimeSeries` and `collectionActive`
51+
mu sync.Mutex
52+
}
53+
54+
// Update the cache state to indicate that a collection has started
55+
func (c *CollectionCache) markCollectionStarted() {
56+
log.Debugf("markCollectionStarted")
57+
c.mu.Lock()
58+
c.collectionActive = true
59+
c.mu.Unlock()
60+
}
61+
62+
// Update the cache state to indicate that a collection has completed
63+
func (c *CollectionCache) markCollectionCompleted() {
64+
log.Debugf("markCollectionCompleted")
65+
c.mu.Lock()
66+
defer c.mu.Unlock()
67+
collected := c.activeTimeSeries
68+
c.cachedTimeSeries = collected
69+
c.activeTimeSeries = make(map[string]*TimeSeriesMetrics)
70+
c.collectionActive = false
71+
}
72+
73+
// Check if there is a collection running int he background
74+
func (c *CollectionCache) isCollectionActive() bool {
75+
c.mu.Lock()
76+
defer c.mu.Unlock()
77+
return c.collectionActive
78+
}
79+
80+
// During a collection, this func should be used to save the collected data
81+
func (c *CollectionCache) putMetric(metricType string, timeSeries *TimeSeriesMetrics) {
82+
c.mu.Lock()
83+
c.activeTimeSeries[metricType] = timeSeries
84+
c.mu.Unlock()
3285
}
3386

3487
func NewMonitoringCollector(
@@ -114,6 +167,11 @@ func NewMonitoringCollector(
114167
lastScrapeDurationSecondsMetric: lastScrapeDurationSecondsMetric,
115168
collectorFillMissingLabels: collectorFillMissingLabels,
116169
monitoringDropDelegatedProjects: monitoringDropDelegatedProjects,
170+
cache: &CollectionCache{
171+
cachedTimeSeries: make(map[string]*TimeSeriesMetrics),
172+
activeTimeSeries: make(map[string]*TimeSeriesMetrics),
173+
collectionActive: false,
174+
},
117175
}
118176

119177
return monitoringCollector, nil
@@ -129,32 +187,41 @@ func (c *MonitoringCollector) Describe(ch chan<- *prometheus.Desc) {
129187
}
130188

131189
func (c *MonitoringCollector) Collect(ch chan<- prometheus.Metric) {
132-
var begun = time.Now()
133190

134-
errorMetric := float64(0)
135-
if err := c.reportMonitoringMetrics(ch); err != nil {
136-
errorMetric = float64(1)
137-
c.scrapeErrorsTotalMetric.Inc()
138-
log.Errorf("Error while getting Google Stackdriver Monitoring metrics: %s", err)
191+
for _, timeSeries := range c.cache.cachedTimeSeries {
192+
timeSeries.Complete(ch)
139193
}
140-
c.scrapeErrorsTotalMetric.Collect(ch)
141194

195+
c.scrapeErrorsTotalMetric.Collect(ch)
142196
c.apiCallsTotalMetric.Collect(ch)
143-
144-
c.scrapesTotalMetric.Inc()
145197
c.scrapesTotalMetric.Collect(ch)
146-
147-
c.lastScrapeErrorMetric.Set(errorMetric)
148198
c.lastScrapeErrorMetric.Collect(ch)
149-
150-
c.lastScrapeTimestampMetric.Set(float64(time.Now().Unix()))
151199
c.lastScrapeTimestampMetric.Collect(ch)
152-
153-
c.lastScrapeDurationSecondsMetric.Set(time.Since(begun).Seconds())
154200
c.lastScrapeDurationSecondsMetric.Collect(ch)
201+
202+
if !c.cache.isCollectionActive() {
203+
go func() {
204+
start := time.Now()
205+
errorMetric := float64(0)
206+
207+
c.cache.markCollectionStarted()
208+
if err := c.updateMetricsCache(); err != nil {
209+
errorMetric = float64(1)
210+
c.scrapeErrorsTotalMetric.Inc()
211+
log.Errorf("Error while getting Google Stackdriver Monitoring metrics: %s", err)
212+
}
213+
214+
c.scrapesTotalMetric.Inc()
215+
c.lastScrapeErrorMetric.Set(errorMetric)
216+
c.lastScrapeTimestampMetric.Set(float64(time.Now().Unix()))
217+
c.lastScrapeDurationSecondsMetric.Set(time.Since(start).Seconds())
218+
219+
c.cache.markCollectionCompleted()
220+
}()
221+
}
155222
}
156223

157-
func (c *MonitoringCollector) reportMonitoringMetrics(ch chan<- prometheus.Metric) error {
224+
func (c *MonitoringCollector) updateMetricsCache() error {
158225
metricDescriptorsFunction := func(page *monitoring.ListMetricDescriptorsResponse) error {
159226
var wg = &sync.WaitGroup{}
160227

@@ -181,7 +248,7 @@ func (c *MonitoringCollector) reportMonitoringMetrics(ch chan<- prometheus.Metri
181248

182249
for _, metricDescriptor := range uniqueDescriptors {
183250
wg.Add(1)
184-
go func(metricDescriptor *monitoring.MetricDescriptor, ch chan<- prometheus.Metric) {
251+
go func(metricDescriptor *monitoring.MetricDescriptor) {
185252
defer wg.Done()
186253
log.Debugf("Retrieving Google Stackdriver Monitoring metrics for descriptor `%s`...", metricDescriptor.Type)
187254
filter := fmt.Sprintf("metric.type=\"%s\"", metricDescriptor.Type)
@@ -193,9 +260,13 @@ func (c *MonitoringCollector) reportMonitoringMetrics(ch chan<- prometheus.Metri
193260
}
194261
timeSeriesListCall := c.monitoringService.Projects.TimeSeries.List(utils.ProjectResource(c.projectID)).
195262
Filter(filter).
263+
PageSize(100000).
196264
IntervalStartTime(startTime.Format(time.RFC3339Nano)).
197265
IntervalEndTime(endTime.Format(time.RFC3339Nano))
198266

267+
pageNumber := 0
268+
269+
start := time.Now()
199270
for {
200271
c.apiCallsTotalMetric.Inc()
201272
page, err := timeSeriesListCall.Do()
@@ -204,20 +275,26 @@ func (c *MonitoringCollector) reportMonitoringMetrics(ch chan<- prometheus.Metri
204275
errChannel <- err
205276
break
206277
}
278+
207279
if page == nil {
208280
break
209281
}
210-
if err := c.reportTimeSeriesMetrics(page, metricDescriptor, ch); err != nil {
282+
if err := c.updateMetricsCacheForMetric(page, metricDescriptor); err != nil {
211283
log.Errorf("Error reporting Time Series metrics for descriptor `%s`: %v", metricDescriptor.Type, err)
212284
errChannel <- err
213285
break
214286
}
215287
if page.NextPageToken == "" {
216288
break
217289
}
290+
pageNumber++
218291
timeSeriesListCall.PageToken(page.NextPageToken)
219292
}
220-
}(metricDescriptor, ch)
293+
294+
elapsed := time.Since(start)
295+
log.Debugf("Took %s to retrieve %v pages for metric descriptor %s", elapsed, pageNumber+1, metricDescriptor.Type)
296+
297+
}(metricDescriptor)
221298
}
222299

223300
wg.Wait()
@@ -257,18 +334,15 @@ func (c *MonitoringCollector) reportMonitoringMetrics(ch chan<- prometheus.Metri
257334
return <-errChannel
258335
}
259336

260-
func (c *MonitoringCollector) reportTimeSeriesMetrics(
337+
func (c *MonitoringCollector) updateMetricsCacheForMetric(
261338
page *monitoring.ListTimeSeriesResponse,
262-
metricDescriptor *monitoring.MetricDescriptor,
263-
ch chan<- prometheus.Metric,
264-
) error {
339+
metricDescriptor *monitoring.MetricDescriptor) error {
265340
var metricValue float64
266341
var metricValueType prometheus.ValueType
267342
var newestTSPoint *monitoring.Point
268343

269344
timeSeriesMetrics := &TimeSeriesMetrics{
270345
metricDescriptor: metricDescriptor,
271-
ch: ch,
272346
fillMissingLabels: c.collectorFillMissingLabels,
273347
constMetrics: make(map[string][]ConstMetric),
274348
histogramMetrics: make(map[string][]HistogramMetric),
@@ -354,7 +428,7 @@ func (c *MonitoringCollector) reportTimeSeriesMetrics(
354428

355429
timeSeriesMetrics.CollectNewConstMetric(timeSeries, labelKeys, metricValueType, metricValue, labelValues)
356430
}
357-
timeSeriesMetrics.Complete()
431+
c.cache.putMetric(metricDescriptor.Type, timeSeriesMetrics)
358432
return nil
359433
}
360434

collectors/monitoring_metrics.go

+11-11
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,9 @@ import (
44
"github.com/prometheus/client_golang/prometheus"
55
"google.golang.org/api/monitoring/v3"
66

7-
"github.com/frodenas/stackdriver_exporter/utils"
87
"sort"
8+
9+
"github.com/frodenas/stackdriver_exporter/utils"
910
)
1011

1112
func buildFQName(timeSeries *monitoring.TimeSeries) string {
@@ -18,7 +19,6 @@ func buildFQName(timeSeries *monitoring.TimeSeries) string {
1819

1920
type TimeSeriesMetrics struct {
2021
metricDescriptor *monitoring.MetricDescriptor
21-
ch chan<- prometheus.Metric
2222

2323
fillMissingLabels bool
2424
constMetrics map[string][]ConstMetric
@@ -74,7 +74,7 @@ func (t *TimeSeriesMetrics) CollectNewConstHistogram(timeSeries *monitoring.Time
7474
t.histogramMetrics[fqName] = append(vs, v)
7575
return
7676
}
77-
t.ch <- t.newConstHistogram(fqName, labelKeys, dist, buckets, labelValues)
77+
// t.ch <- t.newConstHistogram(fqName, labelKeys, dist, buckets, labelValues)
7878
}
7979

8080
func (t *TimeSeriesMetrics) newConstHistogram(fqName string, labelKeys []string, dist *monitoring.Distribution, buckets map[float64]uint64, labelValues []string) prometheus.Metric {
@@ -107,7 +107,7 @@ func (t *TimeSeriesMetrics) CollectNewConstMetric(timeSeries *monitoring.TimeSer
107107
t.constMetrics[fqName] = append(vs, v)
108108
return
109109
}
110-
t.ch <- t.newConstMetric(fqName, labelKeys, metricValueType, metricValue, labelValues)
110+
// t.ch <- t.newConstMetric(fqName, labelKeys, metricValueType, metricValue, labelValues)
111111
}
112112

113113
func (t *TimeSeriesMetrics) newConstMetric(fqName string, labelKeys []string, metricValueType prometheus.ValueType, metricValue float64, labelValues []string) prometheus.Metric {
@@ -131,12 +131,12 @@ func hashLabelKeys(labelKeys []string) uint64 {
131131
return dh
132132
}
133133

134-
func (t *TimeSeriesMetrics) Complete() {
135-
t.completeConstMetrics()
136-
t.completeHistogramMetrics()
134+
func (t *TimeSeriesMetrics) Complete(ch chan<- prometheus.Metric) {
135+
t.completeConstMetrics(ch)
136+
t.completeHistogramMetrics(ch)
137137
}
138138

139-
func (t *TimeSeriesMetrics) completeConstMetrics() {
139+
func (t *TimeSeriesMetrics) completeConstMetrics(ch chan<- prometheus.Metric) {
140140
for _, vs := range t.constMetrics {
141141
if len(vs) > 1 {
142142
var needFill bool
@@ -151,12 +151,12 @@ func (t *TimeSeriesMetrics) completeConstMetrics() {
151151
}
152152

153153
for _, v := range vs {
154-
t.ch <- t.newConstMetric(v.fqName, v.labelKeys, v.valueType, v.value, v.labelValues)
154+
ch <- t.newConstMetric(v.fqName, v.labelKeys, v.valueType, v.value, v.labelValues)
155155
}
156156
}
157157
}
158158

159-
func (t *TimeSeriesMetrics) completeHistogramMetrics() {
159+
func (t *TimeSeriesMetrics) completeHistogramMetrics(ch chan<- prometheus.Metric) {
160160
for _, vs := range t.histogramMetrics {
161161
if len(vs) > 1 {
162162
var needFill bool
@@ -170,7 +170,7 @@ func (t *TimeSeriesMetrics) completeHistogramMetrics() {
170170
}
171171
}
172172
for _, v := range vs {
173-
t.ch <- t.newConstHistogram(v.fqName, v.labelKeys, v.dist, v.buckets, v.labelValues)
173+
ch <- t.newConstHistogram(v.fqName, v.labelKeys, v.dist, v.buckets, v.labelValues)
174174
}
175175
}
176176
}

0 commit comments

Comments
 (0)