Skip to content

Commit 798e55b

Browse files
stats: Measure percentiles, max etc. (#240)
1 parent 935b6f0 commit 798e55b

File tree

3 files changed

+72
-7
lines changed

3 files changed

+72
-7
lines changed

cmd/scip/stats.go

+69-7
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@ import (
99
"path/filepath"
1010

1111
"github.com/hhatto/gocloc"
12+
"github.com/montanaflynn/stats"
1213
"github.com/urfave/cli/v2"
14+
"google.golang.org/protobuf/proto"
1315

1416
"github.com/sourcegraph/sourcegraph/lib/errors"
1517

@@ -59,35 +61,95 @@ func statsMain(flags statsFlags) error {
5961
return nil
6062
}
6163

64+
type Stats struct {
65+
Percentiles struct {
66+
Fifty int32 `json:"50"`
67+
Ninety int32 `json:"90"`
68+
NinetyFive int32 `json:"95"`
69+
NinetyNine int32 `json:"99"`
70+
NinetyNinePointNine int32 `json:"99.9"`
71+
}
72+
Mean int32 `json:"mean"`
73+
Stddev int32 `json:"stddev"`
74+
Max int32 `json:"max"`
75+
Sum int32 `json:"sum"`
76+
Comment string `json:"comment"`
77+
}
78+
79+
func NewStats(values []float64) Stats {
80+
s := Stats{}
81+
s.Percentiles.Fifty = percentile(values, 50)
82+
s.Percentiles.Ninety = percentile(values, 90)
83+
s.Percentiles.NinetyFive = percentile(values, 95)
84+
s.Percentiles.NinetyNine = percentile(values, 99)
85+
s.Percentiles.NinetyNinePointNine = percentile(values, 99.9)
86+
mean, _ := stats.Mean(values)
87+
s.Mean = int32(mean)
88+
stddev, _ := stats.StandardDeviation(values)
89+
s.Stddev = int32(stddev)
90+
max, _ := stats.Max(values)
91+
s.Max = int32(max)
92+
sum, _ := stats.Sum(values)
93+
s.Sum = int32(sum)
94+
return s
95+
}
96+
6297
type indexStatistics struct {
63-
Documents int32 `json:"documents"`
64-
LinesOfCode int32 `json:"linesOfCode"`
65-
Occurrences int32 `json:"occurrences"`
66-
Definitions int32 `json:"definitions"`
98+
Documents int32 `json:"documents"`
99+
DocumentSizes Stats `json:"documentSizes"`
100+
LinesOfCode int32 `json:"linesOfCode"`
101+
Occurrences int32 `json:"occurrences"`
102+
OccurrenceCounts Stats `json:"occurrenceCounts"`
103+
Definitions int32 `json:"definitions"`
104+
DefinitionCounts Stats `json:"definitionCounts"`
67105
}
68106

69107
func countStatistics(index *scip.Index, customProjectRoot string) (*indexStatistics, error) {
70108
loc, err := countLinesOfCode(index, customProjectRoot)
109+
var linesOfCode int32
71110
if err != nil {
72-
return nil, err
111+
// Keep this a non-fatal error so that we can measure other index stats
112+
// even if the project is not cloned locally (e.g. if it's a huge
113+
// project like Chromium or the Linux kernel).
114+
log.Printf("Couldn't count lines of code: %s", err)
115+
} else {
116+
linesOfCode = loc.Total.Code
73117
}
74118
stats := &indexStatistics{
75119
Documents: int32(len(index.Documents)),
76-
LinesOfCode: loc.Total.Code,
120+
LinesOfCode: linesOfCode,
77121
Occurrences: 0,
78122
Definitions: 0,
79123
}
124+
documentSizes := []float64{}
125+
occurrenceCounts := []float64{}
126+
definitionCounts := []float64{}
80127
for _, document := range index.Documents {
128+
bytes, _ := proto.Marshal(document)
129+
documentSizes = append(documentSizes, float64(len(bytes)))
130+
stats.Occurrences += int32(len(document.Occurrences))
131+
occurrenceCounts = append(occurrenceCounts, float64(len(document.Occurrences)))
132+
definitionCounts = append(definitionCounts, 0)
81133
for _, occurrence := range document.Occurrences {
82-
stats.Occurrences += 1
83134
if scip.SymbolRole_Definition.Matches(occurrence) {
84135
stats.Definitions += 1
136+
definitionCounts[len(definitionCounts)-1] += 1
85137
}
86138
}
87139
}
140+
stats.DocumentSizes = NewStats(documentSizes)
141+
stats.DocumentSizes.Comment = "sizes are in bytes"
142+
stats.OccurrenceCounts = NewStats(occurrenceCounts)
143+
stats.DefinitionCounts = NewStats(definitionCounts)
144+
stats.DefinitionCounts.Comment = "counted using occurrences"
88145
return stats, nil
89146
}
90147

148+
func percentile(buf []float64, percent float64) int32 {
149+
res, _ := stats.Percentile(buf, percent)
150+
return int32(res)
151+
}
152+
91153
func countLinesOfCode(index *scip.Index, customProjectRoot string) (*gocloc.Result, error) {
92154
var localSource string
93155
root, err := url.Parse(index.Metadata.ProjectRoot)

go.mod

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ require (
99
github.com/hexops/gotextdiff v1.0.3
1010
github.com/hhatto/gocloc v0.4.2
1111
github.com/k0kubun/pp/v3 v3.1.0
12+
github.com/montanaflynn/stats v0.7.1
1213
github.com/pseudomuto/protoc-gen-doc v1.5.1
1314
github.com/smacker/go-tree-sitter v0.0.0-20220209044044-0d3022e933c3
1415
github.com/sourcegraph/sourcegraph/lib v0.0.0-20220511160847-5a43d3ea24eb

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -282,6 +282,8 @@ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lN
282282
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
283283
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
284284
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
285+
github.com/montanaflynn/stats v0.7.1 h1:etflOAAHORrCC44V+aR6Ftzort912ZU+YLiSTuV8eaE=
286+
github.com/montanaflynn/stats v0.7.1/go.mod h1:etXPPgVO6n31NxCd9KQUMvCM+ve0ruNzt6R8Bnaayow=
285287
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
286288
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
287289
github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ=

0 commit comments

Comments
 (0)