Skip to content

Commit 015619f

Browse files
committed
cgroupv2: Add CRI stat gathering support
1 parent 08809b0 commit 015619f

File tree

4 files changed

+101
-16
lines changed

4 files changed

+101
-16
lines changed

cmd/container-monitor/cmd/cmd.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) {
127127
wg.Add(1)
128128

129129
go func(c ctrstats.Container, results chan<- prometheus.Metric) {
130-
stats, err := ctrstats.GetContainerStats(context.Background(), c)
130+
stats, err := ctrstats.GetContainerStatsV1(context.Background(), c)
131131
if err != nil {
132132
monitorLog.WithFields(log.Fields{
133133
"container": c.ContainerID,

cmd/metrics-node-sampler/cmd/cmd_linux_amd64.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,8 @@ func (collector *Collector) Collect(ch chan<- prometheus.Metric) {
124124
wg.Add(1)
125125

126126
go func(c ctrstats.Container, results chan<- prometheus.Metric) {
127-
stats, err := ctrstats.GetContainerStats(context.Background(), c)
127+
// TODO: should we add cgroupv2 support here too?
128+
stats, err := ctrstats.GetContainerStatsV1(context.Background(), c)
128129
if err != nil {
129130
monitorLog.WithFields(logrus.Fields{
130131
"container": c.ContainerID,

pkg/ctrstats/ctrstats.go

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"fmt"
2121

2222
v1 "github.com/containerd/cgroups/stats/v1"
23+
v2 "github.com/containerd/cgroups/v2/stats"
2324
"github.com/containerd/containerd"
2425
"github.com/containerd/typeurl"
2526
"github.com/sirupsen/logrus"
@@ -75,7 +76,7 @@ func GetContainers(client *containerd.Client) ([]Container, error) {
7576
return cids, nil
7677
}
7778

78-
func GetContainerStats(ctx context.Context, c Container) (*v1.Metrics, error) {
79+
func GetContainerStatsV1(ctx context.Context, c Container) (*v1.Metrics, error) {
7980
task, err := c.Container.Task(ctx, nil)
8081
if err != nil {
8182
return nil, err
@@ -101,3 +102,30 @@ func GetContainerStats(ctx context.Context, c Container) (*v1.Metrics, error) {
101102

102103
return nil, fmt.Errorf("no stats obtained for container: %s", c.ContainerID)
103104
}
105+
106+
func GetContainerStatsV2(ctx context.Context, c Container) (*v2.Metrics, error) {
107+
task, err := c.Container.Task(ctx, nil)
108+
if err != nil {
109+
return nil, err
110+
}
111+
112+
r, err := task.Metrics(ctx)
113+
if err != nil {
114+
return nil, err
115+
}
116+
117+
if r.Data != nil {
118+
s, err := typeurl.UnmarshalAny(r.Data)
119+
if err != nil {
120+
return nil, err
121+
}
122+
123+
stats, ok := s.(*v2.Metrics)
124+
if !ok {
125+
return nil, fmt.Errorf("type assertion failure for task.Metrics' Data field")
126+
}
127+
return stats, nil
128+
}
129+
130+
return nil, fmt.Errorf("no stats obtained for container: %s", c.ContainerID)
131+
}

pkg/sampler/ctrstats_linux_amd64.go

Lines changed: 69 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"time"
2121

2222
v1 "github.com/containerd/containerd/metrics/types/v1"
23+
v2 "github.com/containerd/containerd/metrics/types/v2"
2324
"k8s.io/apimachinery/pkg/util/sets"
2425
"sigs.k8s.io/usage-metrics-collector/pkg/ctrstats"
2526
)
@@ -45,23 +46,42 @@ func (s *sampleCache) getContainerCPUAndMemoryCM() (cpuMetrics, memoryMetrics, e
4546
log.V(9).Info("found containers", "count", len(containers))
4647

4748
for _, c := range containers {
48-
// TODO: is this a reasonable key for the metric read time?
4949
readTime := s.metricsReader.readTimeFunc(c.PodID + "/" + c.ContainerID)
50-
stats, err := ctrstats.GetContainerStats(context.Background(), c)
50+
var statsV1 *v1.Metrics
51+
var statsV2 *v2.Metrics
52+
var err error
53+
if s.metricsReader.IsCgroupV2() {
54+
statsV2, err = ctrstats.GetContainerStatsV2(context.Background(), c)
55+
} else {
56+
statsV1, err = ctrstats.GetContainerStatsV1(context.Background(), c)
57+
}
58+
5159
if err != nil {
5260
log.V(10).WithValues(
5361
"container", c.ContainerID,
5462
).Info("failed to get container stats - likely an issue with non-running containers being tracked in containerd state", "err", err)
55-
} else if stats != nil {
56-
cpu, err := cmStatsToCPUResult(stats, readTime)
63+
} else if statsV1 != nil || statsV2 != nil {
64+
var cpu containerCPUMetrics
65+
if statsV1 != nil {
66+
cpu, err = cmStatsToCPUResultV1(statsV1, readTime)
67+
} else if statsV2 != nil {
68+
cpu, err = cmStatsToCPUResultV2(statsV2, readTime)
69+
}
5770
if err != nil {
5871
log.Error(err, "no cpu stats available for container",
5972
"namespace", c.NamespaceName,
6073
"pod", c.PodName,
6174
"container", c.ContainerName,
6275
)
6376
}
64-
mem, err := cmStatsToMemoryResult(stats, readTime)
77+
78+
var mem containerMemoryMetrics
79+
if statsV1 != nil {
80+
mem, err = cmStatsToMemoryResultV1(statsV1, readTime)
81+
82+
} else if statsV2 != nil {
83+
mem, err = cmStatsToMemoryResultV2(statsV2, readTime)
84+
}
6585
if err != nil {
6686
log.Error(err, "no memory stats available for container",
6787
"namespace", c.NamespaceName,
@@ -87,16 +107,15 @@ func (s *sampleCache) getContainerCPUAndMemoryCM() (cpuMetrics, memoryMetrics, e
87107
return cpuResult, memResult, nil
88108
}
89109

90-
// cmStatsToCPUResult converts cpu stats read from containerd into a compatible type.
91-
func cmStatsToCPUResult(stats *v1.Metrics, readTime time.Time) (containerCPUMetrics, error) {
110+
// cmStatsToCPUResultV1 converts cpu stats read from containerd into a compatible type.
111+
func cmStatsToCPUResultV1(stats *v1.Metrics, readTime time.Time) (containerCPUMetrics, error) {
92112
metrics := containerCPUMetrics{}
93113
if stats.CPU == nil {
94114
err := errors.New("no cpu stats available")
95115
return metrics, err
96116
}
97117

98118
metrics.usage.Time = readTime
99-
// TODO: I assume we want usage.total but kernel/user breakouts are also available
100119
metrics.usage.UsageNanoSec = stats.CPU.Usage.Total
101120

102121
metrics.throttling.Time = readTime
@@ -107,22 +126,38 @@ func cmStatsToCPUResult(stats *v1.Metrics, readTime time.Time) (containerCPUMetr
107126
return metrics, nil
108127
}
109128

110-
// cmStatsToMemoryResult converts memory stats read from containerd into a compatible type.
111-
func cmStatsToMemoryResult(stats *v1.Metrics, readTime time.Time) (containerMemoryMetrics, error) {
129+
// cmStatsToCPUResultV2 converts cpu stats read from containerd into a compatible type.
130+
func cmStatsToCPUResultV2(stats *v2.Metrics, readTime time.Time) (containerCPUMetrics, error) {
131+
metrics := containerCPUMetrics{}
132+
if stats.CPU == nil {
133+
err := errors.New("no cpu stats available")
134+
return metrics, err
135+
}
136+
137+
metrics.usage.Time = readTime
138+
metrics.usage.UsageNanoSec = stats.CPU.UsageUsec * 1000 // convert usec to nanosec
139+
140+
metrics.throttling.Time = readTime
141+
metrics.throttling.ThrottledNanoSec = stats.CPU.ThrottledUsec * 1000 // convert usec to nanosec
142+
metrics.throttling.ThrottledPeriods = stats.CPU.NrThrottled
143+
metrics.throttling.TotalPeriods = stats.CPU.NrPeriods
144+
145+
return metrics, nil
146+
}
147+
148+
// cmStatsToMemoryResultV1 converts memory stats read from containerd into a compatible type.
149+
func cmStatsToMemoryResultV1(stats *v1.Metrics, readTime time.Time) (containerMemoryMetrics, error) {
112150
metrics := containerMemoryMetrics{}
113151
if stats.Memory == nil {
114152
err := errors.New("no memory stats available")
115153
return metrics, err
116154
}
117155

118156
metrics.Time = readTime
119-
// NOTE: RSSHuge, MappedFiles, pgfaults, (in)active anon, etc. also available
120-
// TODO: should this be Total{RSS,Cache} or is this fine?
121157
metrics.RSS = stats.Memory.RSS
122158
metrics.Cache = stats.Memory.Cache
123159

124160
if stats.MemoryOomControl != nil {
125-
// TODO: not sure if these are the right metrics?
126161
metrics.OOMKills = stats.MemoryOomControl.OomKill
127162
metrics.OOMs = stats.MemoryOomControl.UnderOom
128163
} else {
@@ -131,3 +166,24 @@ func cmStatsToMemoryResult(stats *v1.Metrics, readTime time.Time) (containerMemo
131166

132167
return metrics, nil
133168
}
169+
170+
// cmStatsToMemoryResultV2 converts memory stats read from containerd into a compatible type.
171+
func cmStatsToMemoryResultV2(stats *v2.Metrics, readTime time.Time) (containerMemoryMetrics, error) {
172+
metrics := containerMemoryMetrics{}
173+
if stats.Memory == nil {
174+
err := errors.New("no memory stats available")
175+
return metrics, err
176+
}
177+
178+
metrics.Time = readTime
179+
metrics.Current = stats.Memory.Usage
180+
181+
if stats.MemoryEvents != nil {
182+
metrics.OOMKills = stats.MemoryEvents.OomKill
183+
metrics.OOMs = stats.MemoryEvents.Oom
184+
} else {
185+
log.V(10).Info("no OOM stats available")
186+
}
187+
188+
return metrics, nil
189+
}

0 commit comments

Comments
 (0)