Skip to content

Commit dbdc01a

Browse files
committed
emit the metric on every heartbeat
1 parent 64f7fe1 commit dbdc01a

File tree

4 files changed

+152
-29
lines changed

4 files changed

+152
-29
lines changed

Diff for: ddtrace/tracer/telemetry.go

+43-27
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,7 @@ func startTelemetry(c *config) {
3333
// Do not do extra work populating config data if instrumentation telemetry is disabled.
3434
return
3535
}
36-
telemetry.GlobalClient.ApplyOps(
37-
telemetry.WithService(c.serviceName),
38-
telemetry.WithEnv(c.env),
39-
telemetry.WithHTTPClient(c.httpClient),
40-
// c.logToStdout is true if serverless is turned on
41-
// c.ciVisibilityAgentless is true if ci visibility mode is turned on and agentless writer is configured
42-
telemetry.WithURL(c.logToStdout || c.ciVisibilityAgentless, c.agentURL.String()),
43-
telemetry.WithVersion(c.version),
44-
)
36+
4537
telemetryConfigs := []telemetry.Configuration{
4638
{Name: "trace_debug_enabled", Value: c.debug},
4739
{Name: "agent_feature_drop_p0s", Value: c.agent.DropP0s},
@@ -73,6 +65,38 @@ func startTelemetry(c *config) {
7365
c.traceSampleRules.toTelemetry(),
7466
telemetry.Sanitize(telemetry.Configuration{Name: "span_sample_rules", Value: c.spanRules}),
7567
}
68+
69+
// Process orchestrion enablement metric emission...
70+
const orchestrionEnabledMetric = "orchestrion.enabled"
71+
var (
72+
orchestrionEnabledValue float64
73+
orchestrionEnabledTags []string
74+
)
75+
if c.orchestrionCfg.Enabled {
76+
orchestrionEnabledValue = 1
77+
orchestrionEnabledTags = make([]string, 0, len(c.orchestrionCfg.Metadata))
78+
for k, v := range c.orchestrionCfg.Metadata {
79+
telemetryConfigs = append(telemetryConfigs, telemetry.Configuration{Name: "orchestrion_" + k, Value: v})
80+
orchestrionEnabledTags = append(orchestrionEnabledTags, k+":"+v)
81+
}
82+
if testing.Testing() {
83+
// In tests, ensure tags are consistently ordered... Ordering is irrelevant outside of tests.
84+
slices.Sort(orchestrionEnabledTags)
85+
}
86+
}
87+
88+
// Apply the GlobalClient options...
89+
telemetry.GlobalClient.ApplyOps(
90+
telemetry.WithService(c.serviceName),
91+
telemetry.WithEnv(c.env),
92+
telemetry.WithHTTPClient(c.httpClient),
93+
// c.logToStdout is true if serverless is turned on
94+
// c.ciVisibilityAgentless is true if ci visibility mode is turned on and agentless writer is configured
95+
telemetry.WithURL(c.logToStdout || c.ciVisibilityAgentless, c.agentURL.String()),
96+
telemetry.WithVersion(c.version),
97+
telemetry.WithHeartbeatMetric(telemetry.NamespaceTracers, telemetry.MetricKindGauge, orchestrionEnabledMetric, func() float64 { return orchestrionEnabledValue }, orchestrionEnabledTags, false),
98+
)
99+
76100
var peerServiceMapping []string
77101
for key, value := range c.peerServiceMappings {
78102
peerServiceMapping = append(peerServiceMapping, fmt.Sprintf("%s:%s", key, value))
@@ -109,24 +133,16 @@ func startTelemetry(c *config) {
109133
telemetry.Configuration{Name: fmt.Sprintf("sr_%s_(%s)_(%s)", rule.ruleType.String(), service, name),
110134
Value: fmt.Sprintf("rate:%f_maxPerSecond:%f", rule.Rate, rule.MaxPerSecond)})
111135
}
112-
if c.orchestrionCfg.Enabled {
113-
tags := make([]string, 0, len(c.orchestrionCfg.Metadata))
114-
for k, v := range c.orchestrionCfg.Metadata {
115-
telemetryConfigs = append(telemetryConfigs, telemetry.Configuration{Name: "orchestrion_" + k, Value: v})
116-
tags = append(tags, k+":"+v)
117-
}
118-
if testing.Testing() {
119-
// In tests, ensure tags are consistently ordered...
120-
slices.Sort(tags)
121-
}
122-
telemetry.GlobalClient.Record(
123-
telemetry.NamespaceTracers,
124-
telemetry.MetricKindGauge,
125-
"orchestrion.enabled", 1,
126-
tags,
127-
false, // Go-specific
128-
)
129-
}
136+
137+
// Submit the initial metric tick
138+
telemetry.GlobalClient.Record(
139+
telemetry.NamespaceTracers,
140+
telemetry.MetricKindGauge,
141+
orchestrionEnabledMetric, orchestrionEnabledValue,
142+
orchestrionEnabledTags,
143+
false, // Go-specific
144+
)
145+
130146
telemetryConfigs = append(telemetryConfigs, additionalConfigs...)
131147
telemetry.GlobalClient.ProductChange(telemetry.NamespaceTracers, true, telemetryConfigs)
132148
}

Diff for: internal/telemetry/client.go

+38-2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,19 @@ type client struct {
147147
// Globally registered application configuration sent in the app-started request, along with the locally-defined
148148
// configuration of the event.
149149
globalAppConfig []Configuration
150+
151+
// heartbeatMetrics is a set of metrics to be emitted each time a heartbeat is sent.
152+
heartbeatMetrics []heartbeatMetric
153+
}
154+
155+
// heartbeatMetric is a metric that is emitted each time a heartbeat is sent.
156+
type heartbeatMetric struct {
157+
namespace Namespace
158+
kind MetricKind
159+
name string
160+
value func() float64 // Called to determine the current value of the metric.
161+
tags []string
162+
common bool
150163
}
151164

152165
func log(msg string, args ...interface{}) {
@@ -338,14 +351,22 @@ func metricKey(name string, tags []string, kind MetricKind) string {
338351
return name + string(kind) + strings.Join(tags, "-")
339352
}
340353

341-
// Record sets the value for a gauge or distribution metric type
342-
// with the given name and tags. If the metric is not language-specific, common should be set to true
354+
// Record sets the value for a gauge or distribution metric type with the given
355+
// name and tags. If the metric is not language-specific, common should be set
356+
// to true
343357
func (c *client) Record(namespace Namespace, kind MetricKind, name string, value float64, tags []string, common bool) {
344358
c.mu.Lock()
345359
defer c.mu.Unlock()
346360
if !c.started {
347361
return
348362
}
363+
c.record(namespace, kind, name, value, tags, common)
364+
}
365+
366+
// record sets the value for a gauge or distribution metric type with the given
367+
// name and tags. If the metric is not language-soecific, common should be set
368+
// to true. Must be called with c.mu locked.
369+
func (c *client) record(namespace Namespace, kind MetricKind, name string, value float64, tags []string, common bool) {
349370
if _, ok := c.metrics[namespace]; !ok {
350371
c.metrics[namespace] = map[string]*metric{}
351372
}
@@ -606,7 +627,22 @@ func (c *client) backgroundHeartbeat() {
606627
if !c.started {
607628
return
608629
}
630+
631+
// Emit all the metrics that were registered for heartbeat.
632+
c.emitHeartbeatMetrics()
633+
634+
// Send the actual app heartbeat.
609635
c.scheduleSubmit(c.newRequest(RequestTypeAppHeartbeat))
610636
c.flush(false)
611637
c.heartbeatT.Reset(c.heartbeatInterval)
612638
}
639+
640+
// emitHeartbeatMetrics is invoked as part of each heartbeat tick, and is
641+
// responsible for emitting periodic metrics that are expected to be sent
642+
// throughout the lifetime of the service. These are typically gauge metrics.
643+
// Must be called with c.mu locked.
644+
func (c *client) emitHeartbeatMetrics() {
645+
for _, m := range c.heartbeatMetrics {
646+
c.record(m.namespace, m.kind, m.name, m.value(), m.tags, m.common)
647+
}
648+
}

Diff for: internal/telemetry/client_test.go

+60
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,67 @@ func TestClient(t *testing.T) {
5252
t.Fatal("Heartbeat took more than 30 seconds. Should have been ~1 second")
5353
case <-heartbeat:
5454
}
55+
}
5556

57+
func TestHeartbeatMetric(t *testing.T) {
58+
t.Setenv("DD_TELEMETRY_HEARTBEAT_INTERVAL", "1")
59+
heartbeat := make(chan struct{})
60+
metrics := make(chan string)
61+
62+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
63+
h := r.Header.Get("DD-Telemetry-Request-Type")
64+
if len(h) == 0 {
65+
t.Fatal("didn't get telemetry request type header")
66+
}
67+
switch RequestType(h) {
68+
case RequestTypeAppHeartbeat:
69+
select {
70+
case heartbeat <- struct{}{}:
71+
default:
72+
}
73+
case RequestTypeGenerateMetrics:
74+
var data struct {
75+
Payload *Metrics
76+
}
77+
if err := json.NewDecoder(r.Body).Decode(&data); err != nil {
78+
t.Fatal(err)
79+
}
80+
for _, s := range data.Payload.Series {
81+
select {
82+
case metrics <- s.Metric:
83+
default:
84+
}
85+
}
86+
}
87+
}))
88+
defer server.Close()
89+
90+
client := &client{
91+
URL: server.URL,
92+
}
93+
const metricName = "test.metric"
94+
client.ApplyOps(WithHeartbeatMetric(NamespaceGeneral, MetricKindGauge, metricName, func() float64 { return 1 }, nil, false))
95+
96+
client.mu.Lock()
97+
client.start(nil, NamespaceTracers, true)
98+
client.start(nil, NamespaceTracers, true) // test idempotence
99+
client.mu.Unlock()
100+
defer client.Stop()
101+
102+
timeout := time.After(30 * time.Second)
103+
waitingForHeartbeat := true
104+
waitingForMetric := true
105+
for waitingForHeartbeat || waitingForMetric {
106+
select {
107+
case <-timeout:
108+
t.Fatal("Heartbeat took more than 30 seconds. Should have been ~1 second")
109+
case <-heartbeat:
110+
waitingForHeartbeat = false
111+
case m := <-metrics:
112+
assert.Equal(t, metricName, m)
113+
waitingForMetric = false
114+
}
115+
}
56116
}
57117

58118
func TestMetrics(t *testing.T) {

Diff for: internal/telemetry/option.go

+11
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,17 @@ func WithVersion(version string) Option {
5656
}
5757
}
5858

59+
// WithHeartbeatMetric register a metric data point to be emitted at each
60+
// heartbeat tick. This is useful to maintain gauge metrics at a specific level.
61+
func WithHeartbeatMetric(namespace Namespace, kind MetricKind, name string, value func() float64, tags []string, common bool) Option {
62+
return func(client *client) {
63+
client.heartbeatMetrics = append(
64+
client.heartbeatMetrics,
65+
heartbeatMetric{namespace, kind, name, value, tags, common},
66+
)
67+
}
68+
}
69+
5970
// WithHTTPClient specifies the http client for the telemetry client
6071
func WithHTTPClient(httpClient *http.Client) Option {
6172
return func(client *client) {

0 commit comments

Comments
 (0)