Skip to content

Commit 9611ef6

Browse files
committed
Fix docker health state
1 parent 4b788c6 commit 9611ef6

File tree

4 files changed

+83
-21
lines changed

4 files changed

+83
-21
lines changed

container/docker/handler.go

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -68,9 +68,8 @@ type dockerContainerHandler struct {
6868
creationTime time.Time
6969

7070
// Metadata associated with the container.
71-
envs map[string]string
72-
labels map[string]string
73-
healthStatus string
71+
envs map[string]string
72+
labels map[string]string
7473

7574
// Image name used for this container.
7675
image string
@@ -93,6 +92,9 @@ type dockerContainerHandler struct {
9392
reference info.ContainerReference
9493

9594
libcontainerHandler *containerlibcontainer.Handler
95+
96+
// the docker client is needed to inspect the container and get the health status
97+
client docker.APIClient
9698
}
9799

98100
var _ container.ContainerHandler = &dockerContainerHandler{}
@@ -201,10 +203,7 @@ func newDockerContainerHandler(
201203
labels: ctnr.Config.Labels,
202204
includedMetrics: metrics,
203205
zfsParent: zfsParent,
204-
}
205-
// Health status may be nil if no health check is configured
206-
if ctnr.State.Health != nil {
207-
handler.healthStatus = ctnr.State.Health.Status
206+
client: client,
208207
}
209208
// Timestamp returned by Docker is in time.RFC3339Nano format.
210209
handler.creationTime, err = time.Parse(time.RFC3339Nano, ctnr.Created)
@@ -331,7 +330,16 @@ func (h *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
331330
if err != nil {
332331
return stats, err
333332
}
334-
stats.Health.Status = h.healthStatus
333+
334+
// We assume that if Inspect fails then the container is not known to docker.
335+
ctnr, err := h.client.ContainerInspect(context.Background(), h.reference.Id)
336+
if err != nil {
337+
return nil, fmt.Errorf("failed to inspect container %q: %v", h.reference.Id, err)
338+
}
339+
340+
if ctnr.State.Health != nil {
341+
stats.Health.Status = ctnr.State.Health.Status
342+
}
335343

336344
// Get filesystem stats.
337345
err = FsStats(stats, h.machineInfoFactory, h.includedMetrics, h.storageDriver,

integration/tests/api/docker_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,3 +398,34 @@ func TestDockerFilesystemStats(t *testing.T) {
398398
t.Fail()
399399
}
400400
}
401+
402+
func TestDockerHealthState(t *testing.T) {
403+
fm := framework.New(t)
404+
defer fm.Cleanup()
405+
406+
containerID := fm.Docker().Run(framework.DockerRunArgs{
407+
Image: "registry.k8s.io/busybox:1.27",
408+
Args: []string{
409+
"--health-cmd", "exit 0",
410+
"--health-interval", "1s",
411+
},
412+
}, "sh", "-c", "sleep 10")
413+
414+
// Wait for the container to show up.
415+
waitForContainer(containerID, fm)
416+
417+
getHealth := func() string {
418+
containerInfo, err := fm.Cadvisor().Client().DockerContainer(containerID, &info.ContainerInfoRequest{NumStats: 1})
419+
require.NoError(t, err)
420+
require.Len(t, containerInfo.Stats, 1)
421+
return containerInfo.Stats[0].Health.Status
422+
}
423+
424+
// Initially the container is in starting state.
425+
require.Equal(t, "starting", getHealth())
426+
427+
// Eventually the container should be in healthy state.
428+
require.Eventually(t, func() bool {
429+
return getHealth() == "healthy"
430+
}, 10*time.Second, 100*time.Millisecond)
431+
}

metrics/prometheus.go

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -139,19 +139,7 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
139139
name: "container_health_state",
140140
help: "The result of the container's health check",
141141
valueType: prometheus.GaugeValue,
142-
getValues: func(s *info.ContainerStats) metricValues {
143-
return metricValues{{
144-
// inline if to check if s.health.status = healthy
145-
value: func(s *info.ContainerStats) float64 {
146-
if s.Health.Status == "healthy" {
147-
return 1
148-
} else {
149-
return 0
150-
}
151-
}(s),
152-
timestamp: s.Timestamp,
153-
}}
154-
},
142+
getValues: getContainerHealthState,
155143
},
156144
},
157145
includedMetrics: includedMetrics,
@@ -2109,3 +2097,18 @@ func getMinCoreScalingRatio(s *info.ContainerStats) metricValues {
21092097
}
21102098
return values
21112099
}
2100+
2101+
func getContainerHealthState(s *info.ContainerStats) metricValues {
2102+
value := float64(0)
2103+
switch s.Health.Status {
2104+
case "healthy":
2105+
value = 1
2106+
case "": // if container has no health check defined
2107+
value = -1
2108+
default: // starting or unhealthy
2109+
}
2110+
return metricValues{{
2111+
value: value,
2112+
timestamp: s.Timestamp,
2113+
}}
2114+
}

metrics/prometheus_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -336,3 +336,23 @@ func TestGetMinCoreScalingRatio(t *testing.T) {
336336
assert.Contains(t, values, 0.5)
337337
assert.Contains(t, values, 0.3)
338338
}
339+
340+
func TestGetContainerHealthState(t *testing.T) {
341+
testCases := []struct {
342+
name string
343+
containerStats *info.ContainerStats
344+
expectedValue float64
345+
}{
346+
{name: "healthy", expectedValue: 1.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "healthy"}}},
347+
{name: "unhealthy", expectedValue: 0.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "unhealthy"}}},
348+
{name: "starting", expectedValue: 0.0, containerStats: &info.ContainerStats{Health: info.Health{Status: "unknown"}}},
349+
{name: "empty", expectedValue: -1.0, containerStats: &info.ContainerStats{}},
350+
}
351+
for _, tc := range testCases {
352+
t.Run(tc.name, func(t *testing.T) {
353+
metricVals := getContainerHealthState(tc.containerStats)
354+
assert.Equal(t, 1, len(metricVals))
355+
assert.Equal(t, tc.expectedValue, metricVals[0].value)
356+
})
357+
}
358+
}

0 commit comments

Comments
 (0)