Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Pressure Stall Information Metrics #3649

Merged
merged 7 commits into from
Feb 20, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cmd/cadvisor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ func TestToIncludedMetrics(t *testing.T) {
container.ResctrlMetrics: struct{}{},
container.CPUSetMetrics: struct{}{},
container.OOMMetrics: struct{}{},
container.PressureMetrics: struct{}{},
},
container.AllMetrics,
{},
Expand Down
2 changes: 2 additions & 0 deletions container/factory.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ const (
ResctrlMetrics MetricKind = "resctrl"
CPUSetMetrics MetricKind = "cpuset"
OOMMetrics MetricKind = "oom_event"
PressureMetrics MetricKind = "pressure"
)

// AllMetrics represents all kinds of metrics that cAdvisor supported.
Expand All @@ -91,6 +92,7 @@ var AllMetrics = MetricSet{
ResctrlMetrics: struct{}{},
CPUSetMetrics: struct{}{},
OOMMetrics: struct{}{},
PressureMetrics: struct{}{},
}

// AllNetworkMetrics represents all network metrics that cAdvisor supports.
Expand Down
19 changes: 19 additions & 0 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -771,6 +771,7 @@ func setCPUStats(s *cgroups.Stats, ret *info.ContainerStats, withPerCPU bool) {
ret.Cpu.CFS.Periods = s.CpuStats.ThrottlingData.Periods
ret.Cpu.CFS.ThrottledPeriods = s.CpuStats.ThrottlingData.ThrottledPeriods
ret.Cpu.CFS.ThrottledTime = s.CpuStats.ThrottlingData.ThrottledTime
setPSIStats(s.CpuStats.PSI, &ret.Cpu.PSI)

if !withPerCPU {
return
Expand All @@ -792,13 +793,15 @@ func setDiskIoStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.DiskIo.IoWaitTime = diskStatsCopy(s.BlkioStats.IoWaitTimeRecursive)
ret.DiskIo.IoMerged = diskStatsCopy(s.BlkioStats.IoMergedRecursive)
ret.DiskIo.IoTime = diskStatsCopy(s.BlkioStats.IoTimeRecursive)
setPSIStats(s.BlkioStats.PSI, &ret.DiskIo.PSI)
}

func setMemoryStats(s *cgroups.Stats, ret *info.ContainerStats) {
ret.Memory.Usage = s.MemoryStats.Usage.Usage
ret.Memory.MaxUsage = s.MemoryStats.Usage.MaxUsage
ret.Memory.Failcnt = s.MemoryStats.Usage.Failcnt
ret.Memory.KernelUsage = s.MemoryStats.KernelUsage.Usage
setPSIStats(s.MemoryStats.PSI, &ret.Memory.PSI)

if cgroups.IsCgroup2UnifiedMode() {
ret.Memory.Cache = s.MemoryStats.Stats["file"]
Expand Down Expand Up @@ -884,6 +887,22 @@ func setHugepageStats(s *cgroups.Stats, ret *info.ContainerStats) {
}
}

func setPSIData(d *cgroups.PSIData, ret *info.PSIData) {
if d != nil {
ret.Total = d.Total
ret.Avg10 = d.Avg10
ret.Avg60 = d.Avg60
ret.Avg300 = d.Avg300
}
}

func setPSIStats(s *cgroups.PSIStats, ret *info.PSIStats) {
if s != nil {
setPSIData(&s.Full, &ret.Full)
setPSIData(&s.Some, &ret.Some)
}
}

// read from pids path not cpu
func setThreadsStats(s *cgroups.Stats, ret *info.ContainerStats) {
if s != nil {
Expand Down
28 changes: 28 additions & 0 deletions container/libcontainer/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,20 @@ func TestSetCPUStats(t *testing.T) {
UsageInKernelmode: 734746 * nanosecondsInSeconds / clockTicks,
UsageInUsermode: 2767637 * nanosecondsInSeconds / clockTicks,
},
PSI: &cgroups.PSIStats{
Full: cgroups.PSIData{
Avg10: 0.3,
Avg60: 0.2,
Avg300: 0.1,
Total: 100,
},
Some: cgroups.PSIData{
Avg10: 0.6,
Avg60: 0.4,
Avg300: 0.2,
Total: 200,
},
},
},
}
var ret info.ContainerStats
Expand All @@ -123,6 +137,20 @@ func TestSetCPUStats(t *testing.T) {
System: s.CpuStats.CpuUsage.UsageInKernelmode,
Total: 33802947350272,
},
PSI: info.PSIStats{
Full: info.PSIData{
Avg10: 0.3,
Avg60: 0.2,
Avg300: 0.1,
Total: 100,
},
Some: info.PSIData{
Avg10: 0.6,
Avg60: 0.4,
Avg300: 0.2,
Total: 200,
},
},
},
}

Expand Down
26 changes: 25 additions & 1 deletion info/v1/container.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,26 @@ func (ci *ContainerInfo) StatsEndTime() time.Time {
return ret
}

// PSI statistics for an individual resource.
type PSIStats struct {
// PSI data for all tasks of in the cgroup.
Full PSIData `json:"full,omitempty"`
// PSI data for some tasks in the cgroup.
Some PSIData `json:"some,omitempty"`
}

type PSIData struct {
// Total time duration for tasks in the cgroup have waited due to congestion.
// Unit: nanoseconds.
Total uint64 `json:"total"`
// The average (in %) tasks have waited due to congestion over a 10 second window.
Avg10 float64 `json:"avg10"`
// The average (in %) tasks have waited due to congestion over a 60 second window.
Avg60 float64 `json:"avg60"`
// The average (in %) tasks have waited due to congestion over a 300 second window.
Avg300 float64 `json:"avg300"`
}

// This mirrors kernel internal structure.
type LoadStats struct {
// Number of sleeping tasks.
Expand Down Expand Up @@ -334,7 +354,8 @@ type CpuStats struct {
// from LoadStats.NrRunning.
LoadAverage int32 `json:"load_average"`
// from LoadStats.NrUninterruptible
LoadDAverage int32 `json:"load_d_average"`
LoadDAverage int32 `json:"load_d_average"`
PSI PSIStats `json:"psi"`
}

type PerDiskStats struct {
Expand All @@ -353,6 +374,7 @@ type DiskIoStats struct {
IoWaitTime []PerDiskStats `json:"io_wait_time,omitempty"`
IoMerged []PerDiskStats `json:"io_merged,omitempty"`
IoTime []PerDiskStats `json:"io_time,omitempty"`
PSI PSIStats `json:"psi"`
}

type HugetlbStats struct {
Expand Down Expand Up @@ -411,6 +433,8 @@ type MemoryStats struct {

ContainerData MemoryStatsMemoryData `json:"container_data,omitempty"`
HierarchicalData MemoryStatsMemoryData `json:"hierarchical_data,omitempty"`

PSI PSIStats `json:"psi"`
}

type CPUSetStats struct {
Expand Down
55 changes: 54 additions & 1 deletion metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,14 @@ import (
// asFloat64 converts a uint64 into a float64.
func asFloat64(v uint64) float64 { return float64(v) }

// asMicrosecondsToSeconds converts nanoseconds into a float64 representing seconds.
func asMicrosecondsToSeconds(v uint64) float64 {
return float64(v) / 1e6
}

// asNanosecondsToSeconds converts nanoseconds into a float64 representing seconds.
func asNanosecondsToSeconds(v uint64) float64 {
return float64(v) / float64(time.Second)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the old way reads as slightly easier to understand to me. What motivates this change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned in the review comments, it's confusing when you compare asMicrosecondsToSeconds(). The base unit is time.Nanosecond, so you would have to use time.Millisecond to convert microseconds to seconds.

By simply using the float factor, it's consistent to read both functions.

return float64(v) / 1e9
}

// fsValues is a helper method for assembling per-filesystem stats.
Expand Down Expand Up @@ -1746,6 +1751,54 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
})
}

if includedMetrics.Has(container.PressureMetrics) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can almost see these as also being nested under cpu/memory/disk metrics. I am not sure if there is precedent for this, but maybe add both a PressureMetrics for included metrics, as well as check the respective other metric?


	if includedMetrics.Has(container.PressureMetrics) && includedMetrics.Has(container..CPUMetrics) {
report CPU pressure metrics

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@haircommander I couldn't find any precedent for this. I'll add a check for each of PSI resource (cpu, memory, io). I can't find a strong argument against or for adding such a check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't see why having pressure metrics needs to depend on other metrics. Each metric dataset is independent of each other.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer not to nest them under other metrics (e.g. container_cpu_) as this adds confusion to end-users used to the reporting scheme of the node-exporter (e.g. node_pressure_cpu_)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SuperQ as PSI metrics are reported as part of the cpu/memory/io controller it might make sense to not report them when a user actively decides against getting metrics for one of these controllers.

It's a bit strange in the case of io as it's reported for block io not only disk.

Like pointed out before I'm undecided about this, it might also make sense to defer this decision to a later point in time when a real use-case arises. This then raises the question which decision provides more backwards compatibility.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can always emit differently in the future, I think this makes sense for now.

c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_pressure_cpu_stalled_seconds_total",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm trying to reason about which metrics make sense to emit. It's interesting we emit the totals but don't emit the 10/60/300... I almost wonder if we should add the structure pieces first and discuss actual metrics we emit after..

Copy link
Contributor

@SuperQ SuperQ Feb 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No we only need the totals as other metrics are derived values. With the totals the end user can derive arbitrary intervals. For example rate(container_pressure_cpu_stalled_seconds_total[60s]).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fair, I am not sure that prom query would get exactly the same but true that you could reconstruct those intervals from the total.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, it's never going to be exactly the same because it depends on exactly the timestamps and values involved.

But it will be within the same tolerance over time.

help: "Total time duration no tasks in the container could make progress due to CPU congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.Cpu.PSI.Full.Total), timestamp: s.Timestamp}}
},
}, {
name: "container_pressure_cpu_waiting_seconds_total",
help: "Total time duration tasks in the container have waited due to CPU congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.Cpu.PSI.Some.Total), timestamp: s.Timestamp}}
},
}, {
name: "container_pressure_memory_stalled_seconds_total",
help: "Total time duration no tasks in the container could make progress due to memory congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.Memory.PSI.Full.Total), timestamp: s.Timestamp}}
},
}, {
name: "container_pressure_memory_waiting_seconds_total",
help: "Total time duration tasks in the container have waited due to memory congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.Memory.PSI.Some.Total), timestamp: s.Timestamp}}
},
}, {
name: "container_pressure_io_stalled_seconds_total",
help: "Total time duration no tasks in the container could make progress due to IO congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.DiskIo.PSI.Full.Total), timestamp: s.Timestamp}}
},
}, {
name: "container_pressure_io_waiting_seconds_total",
help: "Total time duration tasks in the container have waited due to IO congestion.",
valueType: prometheus.CounterValue,
getValues: func(s *info.ContainerStats) metricValues {
return metricValues{{value: asMicrosecondsToSeconds(s.DiskIo.PSI.Some.Total), timestamp: s.Timestamp}}
},
},
}...)
}

return c
}

Expand Down
42 changes: 42 additions & 0 deletions metrics/prometheus_fake.go
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,20 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
},
LoadAverage: 2,
LoadDAverage: 2,
PSI: info.PSIStats{
Full: info.PSIData{
Avg10: 0.3,
Avg60: 0.2,
Avg300: 0.1,
Total: 100,
},
Some: info.PSIData{
Avg10: 0.6,
Avg60: 0.4,
Avg300: 0.2,
Total: 200,
},
},
},
Memory: info.MemoryStats{
Usage: 8,
Expand Down Expand Up @@ -358,6 +372,20 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
MappedFile: 16,
KernelUsage: 17,
Swap: 8192,
PSI: info.PSIStats{
Full: info.PSIData{
Avg10: 0.3,
Avg60: 0.2,
Avg300: 0.1,
Total: 1000,
},
Some: info.PSIData{
Avg10: 0.6,
Avg60: 0.4,
Avg300: 0.2,
Total: 2000,
},
},
},
Hugetlb: map[string]info.HugetlbStats{
"2Mi": {
Expand Down Expand Up @@ -550,6 +578,20 @@ func (p testSubcontainersInfoProvider) GetRequestedContainersInfo(string, v2.Req
"Write": 6,
},
}},
PSI: info.PSIStats{
Full: info.PSIData{
Avg10: 0.3,
Avg60: 0.2,
Avg300: 0.1,
Total: 1100,
},
Some: info.PSIData{
Avg10: 0.6,
Avg60: 0.4,
Avg300: 0.2,
Total: 2200,
},
},
},
Filesystem: []info.FsStats{
{
Expand Down
18 changes: 18 additions & 0 deletions metrics/testdata/prometheus_metrics
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,24 @@ container_perf_uncore_events_total{container_env_foo_env="prod",container_label_
# TYPE container_perf_uncore_events_scaling_ratio gauge
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1 1395066363000
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",container_label_foo_label="bar",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1 1395066363000
# HELP container_pressure_cpu_stalled_seconds_total Total time duration no tasks in the container could make progress due to CPU congestion.
# TYPE container_pressure_cpu_stalled_seconds_total counter
container_pressure_cpu_stalled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0001 1395066363000
# HELP container_pressure_cpu_waiting_seconds_total Total time duration tasks in the container have waited due to CPU congestion.
# TYPE container_pressure_cpu_waiting_seconds_total counter
container_pressure_cpu_waiting_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0002 1395066363000
# HELP container_pressure_io_stalled_seconds_total Total time duration no tasks in the container could make progress due to IO congestion.
# TYPE container_pressure_io_stalled_seconds_total counter
container_pressure_io_stalled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0011 1395066363000
# HELP container_pressure_io_waiting_seconds_total Total time duration tasks in the container have waited due to IO congestion.
# TYPE container_pressure_io_waiting_seconds_total counter
container_pressure_io_waiting_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0022 1395066363000
# HELP container_pressure_memory_stalled_seconds_total Total time duration no tasks in the container could make progress due to memory congestion.
# TYPE container_pressure_memory_stalled_seconds_total counter
container_pressure_memory_stalled_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.001 1395066363000
# HELP container_pressure_memory_waiting_seconds_total Total time duration tasks in the container have waited due to memory congestion.
# TYPE container_pressure_memory_waiting_seconds_total counter
container_pressure_memory_waiting_seconds_total{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.002 1395066363000
# HELP container_processes Number of processes running inside the container.
# TYPE container_processes gauge
container_processes{container_env_foo_env="prod",container_label_foo_label="bar",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000
Expand Down
18 changes: 18 additions & 0 deletions metrics/testdata/prometheus_metrics_whitelist_filtered
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,24 @@ container_perf_uncore_events_total{container_env_foo_env="prod",event="cas_count
# TYPE container_perf_uncore_events_scaling_ratio gauge
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="0",zone_name="hello"} 1 1395066363000
container_perf_uncore_events_scaling_ratio{container_env_foo_env="prod",event="cas_count_read",id="testcontainer",image="test",name="testcontaineralias",pmu="uncore_imc_0",socket="1",zone_name="hello"} 1 1395066363000
# HELP container_pressure_cpu_stalled_seconds_total Total time duration no tasks in the container could make progress due to CPU congestion.
# TYPE container_pressure_cpu_stalled_seconds_total counter
container_pressure_cpu_stalled_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0001 1395066363000
# HELP container_pressure_cpu_waiting_seconds_total Total time duration tasks in the container have waited due to CPU congestion.
# TYPE container_pressure_cpu_waiting_seconds_total counter
container_pressure_cpu_waiting_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0002 1395066363000
# HELP container_pressure_io_stalled_seconds_total Total time duration no tasks in the container could make progress due to IO congestion.
# TYPE container_pressure_io_stalled_seconds_total counter
container_pressure_io_stalled_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0011 1395066363000
# HELP container_pressure_io_waiting_seconds_total Total time duration tasks in the container have waited due to IO congestion.
# TYPE container_pressure_io_waiting_seconds_total counter
container_pressure_io_waiting_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.0022 1395066363000
# HELP container_pressure_memory_stalled_seconds_total Total time duration no tasks in the container could make progress due to memory congestion.
# TYPE container_pressure_memory_stalled_seconds_total counter
container_pressure_memory_stalled_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.001 1395066363000
# HELP container_pressure_memory_waiting_seconds_total Total time duration tasks in the container have waited due to memory congestion.
# TYPE container_pressure_memory_waiting_seconds_total counter
container_pressure_memory_waiting_seconds_total{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 0.002 1395066363000
# HELP container_processes Number of processes running inside the container.
# TYPE container_processes gauge
container_processes{container_env_foo_env="prod",id="testcontainer",image="test",name="testcontaineralias",zone_name="hello"} 1 1395066363000
Expand Down
Loading