|
9 | 9 | "path/filepath"
|
10 | 10 |
|
11 | 11 | "github.com/hhatto/gocloc"
|
| 12 | + "github.com/montanaflynn/stats" |
12 | 13 | "github.com/urfave/cli/v2"
|
| 14 | + "google.golang.org/protobuf/proto" |
13 | 15 |
|
14 | 16 | "github.com/sourcegraph/sourcegraph/lib/errors"
|
15 | 17 |
|
@@ -59,35 +61,95 @@ func statsMain(flags statsFlags) error {
|
59 | 61 | return nil
|
60 | 62 | }
|
61 | 63 |
|
| 64 | +type Stats struct { |
| 65 | + Percentiles struct { |
| 66 | + Fifty int32 `json:"50"` |
| 67 | + Ninety int32 `json:"90"` |
| 68 | + NinetyFive int32 `json:"95"` |
| 69 | + NinetyNine int32 `json:"99"` |
| 70 | + NinetyNinePointNine int32 `json:"99.9"` |
| 71 | + } |
| 72 | + Mean int32 `json:"mean"` |
| 73 | + Stddev int32 `json:"stddev"` |
| 74 | + Max int32 `json:"max"` |
| 75 | + Sum int32 `json:"sum"` |
| 76 | + Comment string `json:"comment"` |
| 77 | +} |
| 78 | + |
| 79 | +func NewStats(values []float64) Stats { |
| 80 | + s := Stats{} |
| 81 | + s.Percentiles.Fifty = percentile(values, 50) |
| 82 | + s.Percentiles.Ninety = percentile(values, 90) |
| 83 | + s.Percentiles.NinetyFive = percentile(values, 95) |
| 84 | + s.Percentiles.NinetyNine = percentile(values, 99) |
| 85 | + s.Percentiles.NinetyNinePointNine = percentile(values, 99.9) |
| 86 | + mean, _ := stats.Mean(values) |
| 87 | + s.Mean = int32(mean) |
| 88 | + stddev, _ := stats.StandardDeviation(values) |
| 89 | + s.Stddev = int32(stddev) |
| 90 | + max, _ := stats.Max(values) |
| 91 | + s.Max = int32(max) |
| 92 | + sum, _ := stats.Sum(values) |
| 93 | + s.Sum = int32(sum) |
| 94 | + return s |
| 95 | +} |
| 96 | + |
62 | 97 | type indexStatistics struct {
|
63 |
| - Documents int32 `json:"documents"` |
64 |
| - LinesOfCode int32 `json:"linesOfCode"` |
65 |
| - Occurrences int32 `json:"occurrences"` |
66 |
| - Definitions int32 `json:"definitions"` |
| 98 | + Documents int32 `json:"documents"` |
| 99 | + DocumentSizes Stats `json:"documentSizes"` |
| 100 | + LinesOfCode int32 `json:"linesOfCode"` |
| 101 | + Occurrences int32 `json:"occurrences"` |
| 102 | + OccurrenceCounts Stats `json:"occurrenceCounts"` |
| 103 | + Definitions int32 `json:"definitions"` |
| 104 | + DefinitionCounts Stats `json:"definitionCounts"` |
67 | 105 | }
|
68 | 106 |
|
69 | 107 | func countStatistics(index *scip.Index, customProjectRoot string) (*indexStatistics, error) {
|
70 | 108 | loc, err := countLinesOfCode(index, customProjectRoot)
|
| 109 | + var linesOfCode int32 |
71 | 110 | if err != nil {
|
72 |
| - return nil, err |
| 111 | + // Keep this a non-fatal error so that we can measure other index stats |
| 112 | + // even if the project is not cloned locally (e.g. if it's a huge |
| 113 | + // project like Chromium or the Linux kernel). |
| 114 | + log.Printf("Couldn't count lines of code: %s", err) |
| 115 | + } else { |
| 116 | + linesOfCode = loc.Total.Code |
73 | 117 | }
|
74 | 118 | stats := &indexStatistics{
|
75 | 119 | Documents: int32(len(index.Documents)),
|
76 |
| - LinesOfCode: loc.Total.Code, |
| 120 | + LinesOfCode: linesOfCode, |
77 | 121 | Occurrences: 0,
|
78 | 122 | Definitions: 0,
|
79 | 123 | }
|
| 124 | + documentSizes := []float64{} |
| 125 | + occurrenceCounts := []float64{} |
| 126 | + definitionCounts := []float64{} |
80 | 127 | for _, document := range index.Documents {
|
| 128 | + bytes, _ := proto.Marshal(document) |
| 129 | + documentSizes = append(documentSizes, float64(len(bytes))) |
| 130 | + stats.Occurrences += int32(len(document.Occurrences)) |
| 131 | + occurrenceCounts = append(occurrenceCounts, float64(len(document.Occurrences))) |
| 132 | + definitionCounts = append(definitionCounts, 0) |
81 | 133 | for _, occurrence := range document.Occurrences {
|
82 |
| - stats.Occurrences += 1 |
83 | 134 | if scip.SymbolRole_Definition.Matches(occurrence) {
|
84 | 135 | stats.Definitions += 1
|
| 136 | + definitionCounts[len(definitionCounts)-1] += 1 |
85 | 137 | }
|
86 | 138 | }
|
87 | 139 | }
|
| 140 | + stats.DocumentSizes = NewStats(documentSizes) |
| 141 | + stats.DocumentSizes.Comment = "sizes are in bytes" |
| 142 | + stats.OccurrenceCounts = NewStats(occurrenceCounts) |
| 143 | + stats.DefinitionCounts = NewStats(definitionCounts) |
| 144 | + stats.DefinitionCounts.Comment = "counted using occurrences" |
88 | 145 | return stats, nil
|
89 | 146 | }
|
90 | 147 |
|
| 148 | +func percentile(buf []float64, percent float64) int32 { |
| 149 | + res, _ := stats.Percentile(buf, percent) |
| 150 | + return int32(res) |
| 151 | +} |
| 152 | + |
91 | 153 | func countLinesOfCode(index *scip.Index, customProjectRoot string) (*gocloc.Result, error) {
|
92 | 154 | var localSource string
|
93 | 155 | root, err := url.Parse(index.Metadata.ProjectRoot)
|
|
0 commit comments