Skip to content

Commit 523396d

Browse files
authored
Refactoring (#291)
1 parent a4868ee commit 523396d

File tree

9 files changed

+173
-65
lines changed

9 files changed

+173
-65
lines changed

analyze/analyze.go

Lines changed: 39 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -259,19 +259,30 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
259259
bar.Describe("Check which workflows match regex: " + regex.String())
260260
_ = bar.Add(1)
261261

262-
workflowDir := filepath.Join(tempDir, ".github", "workflows")
263-
if err = os.MkdirAll(workflowDir, 0700); err != nil {
264-
return nil, fmt.Errorf("failed to create .github/workflows/ dir: %w", err)
265-
}
266-
267-
wg := sync.WaitGroup{}
268262
errChan := make(chan error, 1)
269263
maxGoroutines := 5
270264
if numberOfGoroutines != nil {
271265
maxGoroutines = *numberOfGoroutines
272266
}
273267
semaphore := semaphore.NewWeighted(int64(maxGoroutines))
274268
m := sync.Mutex{}
269+
type file struct {
270+
path string
271+
data []byte
272+
}
273+
filesChan := make(chan *file)
274+
files := make(map[string][]byte)
275+
276+
wgConsumer := sync.WaitGroup{}
277+
wgProducer := sync.WaitGroup{}
278+
279+
wgConsumer.Add(1)
280+
go func() {
281+
defer wgConsumer.Done()
282+
for v := range filesChan {
283+
files[v.path] = v.data
284+
}
285+
}()
275286
blobShas := make([]string, 0, len(workflows))
276287
for sha := range workflows {
277288
blobShas = append(blobShas, sha)
@@ -281,19 +292,19 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
281292
errChan <- fmt.Errorf("failed to acquire semaphore: %w", err)
282293
break
283294
}
284-
wg.Add(1)
295+
wgProducer.Add(1)
285296
go func(blobSha string) {
286-
defer wg.Done()
297+
defer wgProducer.Done()
287298
defer semaphore.Release(1)
288299
match, content, err := a.GitClient.BlobMatches(ctx, tempDir, blobSha, regex)
289300
if err != nil {
290301
errChan <- fmt.Errorf("failed to blob match %s: %w", blobSha, err)
291302
return
292303
}
293304
if match {
294-
err = os.WriteFile(filepath.Join(workflowDir, blobSha+".yaml"), content, 0644)
295-
if err != nil {
296-
errChan <- fmt.Errorf("failed to write file for blob %s: %w", blobSha, err)
305+
filesChan <- &file{
306+
path: ".github/workflows/" + blobSha + ".yaml",
307+
data: content,
297308
}
298309
} else {
299310
m.Lock()
@@ -302,8 +313,10 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
302313
}
303314
}(blobSha)
304315
}
305-
wg.Wait()
316+
wgProducer.Wait()
306317
close(errChan)
318+
close(filesChan)
319+
wgConsumer.Wait()
307320
for err := range errChan {
308321
return nil, err
309322
}
@@ -315,9 +328,9 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
315328
return nil, fmt.Errorf("failed to generate package insight: %w", err)
316329
}
317330

318-
inventoryScanner := scanner.InventoryScanner{
319-
Path: tempDir,
320-
Parsers: []scanner.Parser{
331+
inventoryScanner := scanner.InventoryScannerMem{
332+
Files: files,
333+
Parsers: []scanner.MemParser{
321334
scanner.NewGithubActionWorkflowParser(),
322335
},
323336
}
@@ -353,7 +366,16 @@ func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string,
353366
return nil, fmt.Errorf("failed to finalize analysis of package: %w", err)
354367
}
355368
} else {
356-
if err := a.Formatter.FormatWithPath(ctx, []*models.PackageInsights{scannedPackage}, workflows); err != nil {
369+
results := make(map[string][]*models.RepoInfo, len(workflows))
370+
for blobsha, branchinfos := range workflows {
371+
results[blobsha] = []*models.RepoInfo{{
372+
RepoName: repoName,
373+
Purl: pkg.Purl,
374+
BranchInfos: branchinfos,
375+
}}
376+
}
377+
378+
if err := a.Formatter.FormatWithPath(ctx, []*models.PackageInsights{scannedPackage}, results); err != nil {
357379
return nil, fmt.Errorf("failed to finalize analysis of package: %w", err)
358380
}
359381

@@ -458,7 +480,7 @@ func (a *Analyzer) AnalyzeLocalRepo(ctx context.Context, repoPath string) (*mode
458480

459481
type Formatter interface {
460482
Format(ctx context.Context, packages []*models.PackageInsights) error
461-
FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociation map[string][]models.BranchInfo) error
483+
FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociation map[string][]*models.RepoInfo) error
462484
}
463485

464486
func (a *Analyzer) finalizeAnalysis(ctx context.Context, scannedPackages []*models.PackageInsights) error {

cmd/analyzeRepoStaleBranches.go

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ import (
99
"github.com/spf13/viper"
1010
)
1111

12-
var threadsRepoStaleBranch int
1312
var expand bool
1413
var regex string
1514

@@ -38,7 +37,7 @@ Example Scanning a remote Github Repository: poutine analyze_repo_stale_branches
3837
return fmt.Errorf("error compiling regex: %w", err)
3938
}
4039

41-
_, err = analyzer.AnalyzeStaleBranches(ctx, repo, &threadsRepoStaleBranch, &expand, reg)
40+
_, err = analyzer.AnalyzeStaleBranches(ctx, repo, &threads, &expand, reg)
4241
if err != nil {
4342
return fmt.Errorf("failed to analyze repo %s: %w", repo, err)
4443
}
@@ -51,7 +50,7 @@ func init() {
5150
rootCmd.AddCommand(analyzeRepoStaleBranches)
5251

5352
analyzeRepoStaleBranches.Flags().StringVarP(&token, "token", "t", "", "SCM access token (env: GH_TOKEN)")
54-
analyzeRepoStaleBranches.Flags().IntVarP(&threadsRepoStaleBranch, "threads", "j", 5, "Parallelization factor for scanning stale branches")
53+
analyzeRepoStaleBranches.Flags().IntVarP(&threads, "threads", "j", 5, "Parallelization factor for scanning stale branches")
5554
analyzeRepoStaleBranches.Flags().BoolVarP(&expand, "expand", "e", false, "Expand the output to the classic representation from analyze_repo")
5655
analyzeRepoStaleBranches.Flags().StringVarP(&regex, "regex", "r", "pull_request_target", "Regex to check if the workflow is accessible in stale branches")
5756

formatters/json/json.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ func (f *Format) Format(ctx context.Context, packages []*models.PackageInsights)
6464
return nil
6565
}
6666

67-
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]models.BranchInfo) error {
67+
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]*models.RepoInfo) error {
6868
var result struct {
6969
Output string `json:"output"`
7070
Error string `json:"error"`

formatters/pretty/pretty.go

Lines changed: 41 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -47,49 +47,52 @@ func (f *Format) Format(ctx context.Context, packages []*models.PackageInsights)
4747
return nil
4848
}
4949

50-
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]models.BranchInfo) error {
50+
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]*models.RepoInfo) error {
5151
failures := map[string]int{}
5252
rules := map[string]results.Rule{}
5353

5454
for _, pkg := range packages {
55-
findings := map[string][]string{}
55+
findings := make(map[string]map[string]bool)
5656
for _, finding := range pkg.FindingsResults.Findings {
57-
failures[finding.RuleId]++
5857
filename := filepath.Base(finding.Meta.Path)
5958
filename = strings.TrimSuffix(filename, filepath.Ext(filename))
60-
findings[filename] = append(findings[filename], finding.RuleId)
59+
if _, ok := findings[filename]; !ok {
60+
findings[filename] = make(map[string]bool)
61+
}
62+
if _, ok := findings[filename][finding.RuleId]; !ok {
63+
failures[finding.RuleId]++
64+
}
65+
findings[filename][finding.RuleId] = true
6166
}
6267

6368
for _, rule := range pkg.FindingsResults.Rules {
6469
rules[rule.Id] = rule
6570
}
6671

67-
_ = f.printFindingsPerWorkflow(os.Stdout, findings, pkg.Purl, pathAssociations)
72+
_ = f.printFindingsPerWorkflow(os.Stdout, findings, pathAssociations)
6873
}
6974
printSummaryTable(os.Stdout, failures, rules)
7075

7176
return nil
7277
}
7378

74-
func (f *Format) printFindingsPerWorkflow(out io.Writer, results map[string][]string, purlStr string, pathAssociations map[string][]models.BranchInfo) error {
79+
func (f *Format) printFindingsPerWorkflow(out io.Writer, results map[string]map[string]bool, pathAssociations map[string][]*models.RepoInfo) error {
7580
// Skip rules with no findings.
7681
table := tablewriter.NewWriter(out)
7782
table.SetAutoMergeCells(true)
78-
table.SetHeader([]string{"Workflow sha", "Rule", "Branch", "URL"})
83+
table.SetHeader([]string{"Workflow sha", "Rule", "Location", "URL"})
7984

80-
purl, err := models.NewPurl(purlStr)
81-
if err != nil {
82-
return fmt.Errorf("error creating purl: %w", err)
83-
}
84-
for blobsha, branchInfos := range pathAssociations {
85+
for blobsha, repoInfos := range pathAssociations {
8586
findings := results[blobsha]
8687
if len(findings) == 0 {
8788
continue
8889
}
8990
largestElement := len(findings)
9091
sumPath := 0
91-
for _, branchInfo := range branchInfos {
92-
sumPath += len(branchInfo.FilePath)
92+
for _, repoInfo := range repoInfos {
93+
for _, branchInfo := range repoInfo.BranchInfos {
94+
sumPath += len(branchInfo.FilePath)
95+
}
9396
}
9497
blobshaTable := make([][]string, max(largestElement, sumPath))
9598
for i := range blobshaTable {
@@ -98,19 +101,35 @@ func (f *Format) printFindingsPerWorkflow(out io.Writer, results map[string][]st
98101

99102
blobshaTable[0][0] = blobsha
100103

101-
for i, finding := range findings {
104+
// Extract and sort the keys of the findings map
105+
sortedFindings := make([]string, 0, len(findings))
106+
for finding := range findings {
107+
sortedFindings = append(sortedFindings, finding)
108+
}
109+
sort.Strings(sortedFindings)
110+
111+
// Iterate over the sorted keys
112+
i := 0
113+
for _, finding := range sortedFindings {
102114
blobshaTable[i][1] = finding
115+
i++
103116
}
104117

105118
index := 0
106-
for _, branchInfo := range branchInfos {
107-
for j, path := range branchInfo.FilePath {
108-
if j == 0 {
109-
blobshaTable[index][2] = branchInfo.BranchName
119+
for _, repoInfo := range repoInfos {
120+
purl, err := models.NewPurl(repoInfo.Purl)
121+
if err != nil {
122+
return fmt.Errorf("failed to parse purl: %w", err)
123+
}
124+
for _, branchInfo := range repoInfo.BranchInfos {
125+
for j, path := range branchInfo.FilePath {
126+
if j == 0 {
127+
blobshaTable[index][2] = repoInfo.RepoName + "/" + branchInfo.BranchName
128+
}
129+
130+
blobshaTable[index][3] = purl.Link() + "/tree/" + branchInfo.BranchName + "/" + path
131+
index += 1
110132
}
111-
112-
blobshaTable[index][3] = purl.Link() + "/tree/" + branchInfo.BranchName + "/" + path
113-
index += 1
114133
}
115134
}
116135

formatters/sarif/sarif.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,6 @@ func (f *Format) Format(ctx context.Context, packages []*models.PackageInsights)
117117
return nil
118118
}
119119

120-
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]models.BranchInfo) error {
120+
func (f *Format) FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociations map[string][]*models.RepoInfo) error {
121121
return errors.New("not implemented")
122122
}

models/branch_info.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,9 @@ type BranchInfo struct {
44
BranchName string `json:"branch_name"`
55
FilePath []string `json:"file_path"`
66
}
7+
8+
type RepoInfo struct {
9+
Purl string `json:"purl"`
10+
RepoName string `json:"repo_name"`
11+
BranchInfos []BranchInfo `json:"branch_infos"`
12+
}

scanner/inventory.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@ func NewInventory(opa *opa.Opa, pkgSupplyClient ReputationClient, provider strin
3030
}
3131
}
3232

33-
func (i *Inventory) ScanPackageScanner(ctx context.Context, pkgInsights models.PackageInsights, inventoryScanner *InventoryScanner) (*models.PackageInsights, error) {
33+
type InventoryScannerI interface {
34+
Run(pkgInsights *models.PackageInsights) error
35+
}
36+
37+
func (i *Inventory) ScanPackageScanner(ctx context.Context, pkgInsights models.PackageInsights, inventoryScanner InventoryScannerI) (*models.PackageInsights, error) {
3438
refPkgInsights := &pkgInsights
3539

3640
if err := inventoryScanner.Run(refPkgInsights); err != nil {

scanner/inventory_scanner_mem.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package scanner
2+
3+
import (
4+
"regexp"
5+
6+
"github.com/boostsecurityio/poutine/models"
7+
"github.com/rs/zerolog/log"
8+
)
9+
10+
type MemParser interface {
11+
MatchPattern() *regexp.Regexp
12+
ParseFromMemory(data []byte, filePath string, pkgInsights *models.PackageInsights) error
13+
}
14+
15+
type InventoryScannerMem struct {
16+
Files map[string][]byte
17+
Parsers []MemParser
18+
}
19+
20+
func (s *InventoryScannerMem) Run(pkgInsights *models.PackageInsights) error {
21+
for path, data := range s.Files {
22+
for _, parser := range s.Parsers {
23+
if !parser.MatchPattern().MatchString(path) {
24+
continue
25+
}
26+
if err := parser.ParseFromMemory(data, path, pkgInsights); err != nil {
27+
log.Error().Str("file", path).Err(err).Msg("error parsing matched file")
28+
}
29+
}
30+
}
31+
return nil
32+
}

0 commit comments

Comments
 (0)