Skip to content

Commit 30af212

Browse files
authored
Fix truncated Go CPU profiles (#3344)
* Fix truncated CPU profiles * Write a test for minGroupSize. Not quite sure where the requirement for minGroupSize 2 comes from * Update maxRecursionDepth to 56 * Rename method, to something better maybe?
1 parent 5eda406 commit 30af212

15 files changed

+135
-59
lines changed

pkg/pprof/fix_go_heap_truncated_test.go

-42
This file was deleted.

pkg/pprof/fix_go_profile.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ func FixGoProfile(p *profilev1.Profile) *profilev1.Profile {
1919
// Now that the profile is normalized, we can try to repair
2020
// truncated stack traces, if any. Note that repaired stacks
2121
// are not deduplicated, so the caller need to normalize the
22-
if MayHaveGoHeapTruncatedStacktraces(p) {
23-
RepairGoHeapTruncatedStacktraces(p)
22+
if PotentialTruncatedGoStacktraces(p) {
23+
RepairGoTruncatedStacktraces(p)
2424
}
2525
return p
2626
}

pkg/pprof/fix_go_profile_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import (
1010
)
1111

1212
func Test_FixGoProfile(t *testing.T) {
13-
p, err := OpenFile("testdata/goheapfix/heap_go_truncated_4.pb.gz")
13+
p, err := OpenFile("testdata/gotruncatefix/heap_go_truncated_4.pb.gz")
1414
require.NoError(t, err)
1515

1616
f := FixGoProfile(p.Profile)

pkg/pprof/fix_go_heap_truncated.go renamed to pkg/pprof/fix_go_truncated.go

+43-14
Original file line numberDiff line numberDiff line change
@@ -10,30 +10,49 @@ import (
1010
)
1111

1212
const (
13-
minGroupSize = 2
13+
minGroupSize = 2
14+
maxRecursiveDepth = 56 // Profiles with deeply recursive stack traces are ignored.
1415

1516
tokens = 8
1617
tokenLen = 16
17-
suffixLen = tokens + tokenLen
18+
suffixLen = tokens + tokenLen // stacktraces shorter than suffixLen are not considered as truncated or missing stacks copied from
1819

1920
tokenBytesLen = tokenLen * 8
2021
suffixBytesLen = suffixLen * 8
2122
)
2223

23-
// MayHaveGoHeapTruncatedStacktraces reports whether there are
24+
// PotentialTruncatedGoStacktraces reports whether there are
2425
// any chances that the profile may have truncated stack traces.
25-
func MayHaveGoHeapTruncatedStacktraces(p *profilev1.Profile) bool {
26-
if !hasGoHeapSampleTypes(p) {
26+
func PotentialTruncatedGoStacktraces(p *profilev1.Profile) bool {
27+
var minDepth int
28+
var maxDepth int
29+
30+
if hasGoHeapSampleTypes(p) {
31+
minDepth = 32
32+
33+
// Go heap profiles in Go 1.23+ have a depth limit of 128 frames. Let's not try to fix truncation if we see any longer stacks.
34+
maxDepth = 33
35+
} else if hasGoCPUSampleTypes(p) {
36+
minDepth = 64
37+
} else {
2738
return false
2839
}
29-
// Some truncated stacks have depth less than the depth limit (32).
30-
const minDepth = 28
40+
41+
// Some truncated heap stacks have depth less than the depth limit.
42+
// https://github.com/golang/go/blob/f7c330eac7777612574d8a1652fd415391f6095e/src/runtime/mprof.go#L446
43+
minDepth -= 4
44+
45+
deepEnough := false
3146
for _, s := range p.Sample {
3247
if len(s.LocationId) >= minDepth {
33-
return true
48+
deepEnough = true
49+
}
50+
// when it's too deep we no longer perform reassemlbing of truncated stacktraces
51+
if maxDepth != 0 && len(s.LocationId) >= maxDepth {
52+
return false
3453
}
3554
}
36-
return false
55+
return deepEnough
3756
}
3857

3958
func hasGoHeapSampleTypes(p *profilev1.Profile) bool {
@@ -50,7 +69,18 @@ func hasGoHeapSampleTypes(p *profilev1.Profile) bool {
5069
return false
5170
}
5271

53-
// RepairGoHeapTruncatedStacktraces repairs truncated stack traces
72+
func hasGoCPUSampleTypes(p *profilev1.Profile) bool {
73+
for _, st := range p.SampleType {
74+
switch p.StringTable[st.Type] {
75+
case
76+
"cpu":
77+
return true
78+
}
79+
}
80+
return false
81+
}
82+
83+
// RepairGoTruncatedStacktraces repairs truncated stack traces
5484
// in Go heap profiles.
5585
//
5686
// Go heap profile has a depth limit of 32 frames, which often
@@ -66,7 +96,7 @@ func hasGoHeapSampleTypes(p *profilev1.Profile) bool {
6696
// one by at least 16 frames in a row from the top, and has frames
6797
// above its root, stack S considered truncated, and the missing part
6898
// is copied from R.
69-
func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) {
99+
func RepairGoTruncatedStacktraces(p *profilev1.Profile) {
70100
// Group stack traces by bottom (closest to the root) locations.
71101
// Typically, there are very few groups (a hundred or two).
72102
samples, groups := split(p)
@@ -82,7 +112,7 @@ func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) {
82112
if i+1 < len(groups) {
83113
n = groups[i+1]
84114
}
85-
if s := n - g; s < minGroupSize {
115+
if s := n - g; s < (minGroupSize - 1) {
86116
continue
87117
}
88118
// We take suffix of the first sample in the group.
@@ -168,8 +198,7 @@ func RepairGoHeapTruncatedStacktraces(p *profilev1.Profile) {
168198
}
169199
c = n
170200
j++
171-
if j == tokenLen {
172-
// Profiles with deeply recursive stack traces are ignored.
201+
if j == maxRecursiveDepth {
173202
return
174203
}
175204
}

pkg/pprof/fix_go_truncated_test.go

+89
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
package pprof
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"testing"
7+
8+
"github.com/stretchr/testify/assert"
9+
"github.com/stretchr/testify/require"
10+
11+
"github.com/grafana/pyroscope/pkg/pprof/testhelper"
12+
)
13+
14+
func Benchmark_RepairGoTruncatedStacktraces(b *testing.B) {
15+
p, err := OpenFile("testdata/gotruncatefix/heap_go_truncated_3.pb.gz")
16+
require.NoError(b, err)
17+
b.ResetTimer()
18+
b.ReportAllocs()
19+
for i := 0; i < b.N; i++ {
20+
RepairGoTruncatedStacktraces(FixGoProfile(p.CloneVT()))
21+
}
22+
}
23+
24+
func generateStackTrace(n int) []string {
25+
res := make([]string, n)
26+
runes := []rune("abcdefghijklmnopqrstuvwxyz")
27+
28+
for idx := range res {
29+
dest := n - (idx + 1)
30+
if idx == 0 {
31+
res[dest] = "start"
32+
continue
33+
}
34+
res[dest] = fmt.Sprintf("%c%d", runes[(idx-1)/10], (idx-1)%10)
35+
}
36+
return res
37+
}
38+
39+
func Test_RepairGoTruncatedStacktraces(t *testing.T) {
40+
n := 128
41+
fullStack := generateStackTrace(n)
42+
b := testhelper.NewProfileBuilder(0).CPUProfile()
43+
b.ForStacktraceString(fullStack[n-24:]...).AddSamples(1)
44+
b.ForStacktraceString(fullStack[n-58 : n-9]...).AddSamples(2)
45+
b.ForStacktraceString(fullStack[n-57 : n-8]...).AddSamples(4)
46+
b.ForStacktraceString(fullStack[n-56 : n-7]...).AddSamples(8)
47+
b.ForStacktraceString(append([]string{"yy1"}, fullStack[n-22:]...)...).AddSamples(16)
48+
49+
RepairGoTruncatedStacktraces(b.Profile)
50+
51+
// ensure all stacktraces start with the same 8 location ids
52+
stacks := make([]uint64, 8)
53+
for idx, sample := range b.Profile.Sample {
54+
first8Stacks := sample.LocationId[len(sample.LocationId)-8:]
55+
if idx == 0 {
56+
copy(stacks, first8Stacks)
57+
continue
58+
}
59+
t.Log(stacks)
60+
assert.Equal(t, stacks, first8Stacks)
61+
}
62+
}
63+
64+
func Test_UpdateFixtures_RepairGoTruncatedStacktraces(t *testing.T) {
65+
if os.Getenv("UPDATE_FIXTURES") != "true" {
66+
t.Skip()
67+
}
68+
t.Helper()
69+
paths := []string{
70+
"testdata/gotruncatefix/heap_go_truncated_1.pb.gz", // Cortex.
71+
"testdata/gotruncatefix/heap_go_truncated_2.pb.gz", // Cortex.
72+
"testdata/gotruncatefix/heap_go_truncated_3.pb.gz", // Loki. Pathological.
73+
"testdata/gotruncatefix/heap_go_truncated_4.pb.gz", // Pyroscope.
74+
"testdata/gotruncatefix/cpu_go_truncated_1.pb.gz", // Cloudwatch Exporter
75+
}
76+
for _, path := range paths {
77+
func() {
78+
p, err := OpenFile(path)
79+
require.NoError(t, err, path)
80+
f, err := os.Create(path + ".fixed")
81+
require.NoError(t, err, path)
82+
defer f.Close()
83+
p.Profile = FixGoProfile(p.Profile)
84+
RepairGoTruncatedStacktraces(p.Profile)
85+
_, err = p.WriteTo(f)
86+
require.NoError(t, err, path)
87+
}()
88+
}
89+
}
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)