Skip to content

Commit 57eb93e

Browse files
TalgarrCopilotfproulx-boostsecurity
authored
Add stale branches scannning support (#285)
* Add stale branches scanning support Signed-off-by: Sébastien Graveline <[email protected]> Co-authored-by: Copilot <[email protected]> Co-authored-by: François Proulx <[email protected]>
1 parent de6e426 commit 57eb93e

File tree

10 files changed

+804
-14
lines changed

10 files changed

+804
-14
lines changed

analyze/analyze.go

Lines changed: 173 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,19 @@ import (
55
"context"
66
"fmt"
77
"os"
8+
"path/filepath"
9+
"regexp"
10+
"strings"
811
"sync"
912
"time"
1013

1114
"github.com/boostsecurityio/poutine/models"
15+
"github.com/boostsecurityio/poutine/results"
1216
"golang.org/x/sync/semaphore"
1317

1418
"github.com/boostsecurityio/poutine/opa"
1519
"github.com/boostsecurityio/poutine/providers/pkgsupply"
16-
"github.com/boostsecurityio/poutine/providers/scm/domain"
20+
scm_domain "github.com/boostsecurityio/poutine/providers/scm/domain"
1721
"github.com/boostsecurityio/poutine/scanner"
1822
"github.com/rs/zerolog/log"
1923
"github.com/schollz/progressbar/v3"
@@ -60,10 +64,13 @@ type ScmClient interface {
6064

6165
type GitClient interface {
6266
Clone(ctx context.Context, clonePath string, url string, token string, ref string) error
67+
FetchCone(ctx context.Context, clonePath string, url string, token string, ref string, cone string) error
6368
CommitSHA(clonePath string) (string, error)
6469
LastCommitDate(ctx context.Context, clonePath string) (time.Time, error)
6570
GetRemoteOriginURL(ctx context.Context, repoPath string) (string, error)
6671
GetRepoHeadBranchName(ctx context.Context, repoPath string) (string, error)
72+
GetUniqWorkflowsBranches(ctx context.Context, clonePath string) (map[string][]models.BranchInfo, error)
73+
BlobMatches(ctx context.Context, clonePath string, blobsha string, regex *regexp.Regexp) (bool, []byte, error)
6774
}
6875

6976
func NewAnalyzer(scmClient ScmClient, gitClient GitClient, formatter Formatter, config *models.Config, opaClient *opa.Opa) *Analyzer {
@@ -208,6 +215,153 @@ func (a *Analyzer) AnalyzeOrg(ctx context.Context, org string, numberOfGoroutine
208215
return scannedPackages, nil
209216
}
210217

218+
func (a *Analyzer) AnalyzeStaleBranches(ctx context.Context, repoString string, numberOfGoroutines *int, expand *bool, regex *regexp.Regexp) (*models.PackageInsights, error) {
219+
org, repoName, err := a.ScmClient.ParseRepoAndOrg(repoString)
220+
if err != nil {
221+
return nil, fmt.Errorf("failed to parse repository: %w", err)
222+
}
223+
repo, err := a.ScmClient.GetRepo(ctx, org, repoName)
224+
if err != nil {
225+
return nil, fmt.Errorf("failed to get repo: %w", err)
226+
}
227+
provider := repo.GetProviderName()
228+
229+
providerVersion, err := a.ScmClient.GetProviderVersion(ctx)
230+
if err != nil {
231+
log.Debug().Err(err).Msgf("Failed to get provider version for %s", provider)
232+
}
233+
234+
log.Debug().Msgf("Provider: %s, Version: %s, BaseURL: %s", provider, providerVersion, a.ScmClient.GetProviderBaseURL())
235+
236+
pkgsupplyClient := pkgsupply.NewStaticClient()
237+
238+
inventory := scanner.NewInventory(a.Opa, pkgsupplyClient, provider, providerVersion)
239+
240+
log.Debug().Msgf("Starting repository analysis for: %s/%s on %s", org, repoName, provider)
241+
bar := a.progressBar(3, "Cloning repository")
242+
_ = bar.RenderBlank()
243+
244+
repoUrl := repo.BuildGitURL(a.ScmClient.GetProviderBaseURL())
245+
tempDir, err := a.fetchConeToTemp(ctx, repoUrl, a.ScmClient.GetToken(), "refs/heads/*:refs/remotes/origin/*", ".github/workflows")
246+
if err != nil {
247+
return nil, fmt.Errorf("failed to fetch cone: %w", err)
248+
}
249+
defer os.RemoveAll(tempDir)
250+
251+
bar.Describe("Listing unique workflows")
252+
_ = bar.Add(1)
253+
254+
workflows, err := a.GitClient.GetUniqWorkflowsBranches(ctx, tempDir)
255+
if err != nil {
256+
return nil, fmt.Errorf("failed to get unique workflow: %w", err)
257+
}
258+
259+
bar.Describe("Check which workflows match regex: " + regex.String())
260+
_ = bar.Add(1)
261+
262+
workflowDir := filepath.Join(tempDir, ".github", "workflows")
263+
if err = os.MkdirAll(workflowDir, 0700); err != nil {
264+
return nil, fmt.Errorf("failed to create .github/workflows/ dir: %w", err)
265+
}
266+
267+
wg := sync.WaitGroup{}
268+
errChan := make(chan error, 1)
269+
maxGoroutines := 5
270+
if numberOfGoroutines != nil {
271+
maxGoroutines = *numberOfGoroutines
272+
}
273+
semaphore := semaphore.NewWeighted(int64(maxGoroutines))
274+
m := sync.Mutex{}
275+
blobShas := make([]string, 0, len(workflows))
276+
for sha := range workflows {
277+
blobShas = append(blobShas, sha)
278+
}
279+
for _, blobSha := range blobShas {
280+
if err := semaphore.Acquire(ctx, 1); err != nil {
281+
errChan <- fmt.Errorf("failed to acquire semaphore: %w", err)
282+
break
283+
}
284+
wg.Add(1)
285+
go func(blobSha string) {
286+
defer wg.Done()
287+
defer semaphore.Release(1)
288+
match, content, err := a.GitClient.BlobMatches(ctx, tempDir, blobSha, regex)
289+
if err != nil {
290+
errChan <- fmt.Errorf("failed to blob match %s: %w", blobSha, err)
291+
return
292+
}
293+
if match {
294+
err = os.WriteFile(filepath.Join(workflowDir, blobSha+".yaml"), content, 0644)
295+
if err != nil {
296+
errChan <- fmt.Errorf("failed to write file for blob %s: %w", blobSha, err)
297+
}
298+
} else {
299+
m.Lock()
300+
delete(workflows, blobSha)
301+
m.Unlock()
302+
}
303+
}(blobSha)
304+
}
305+
wg.Wait()
306+
close(errChan)
307+
for err := range errChan {
308+
return nil, err
309+
}
310+
311+
bar.Describe("Scanning package")
312+
_ = bar.Add(1)
313+
pkg, err := a.generatePackageInsights(ctx, tempDir, repo, "HEAD")
314+
if err != nil {
315+
return nil, fmt.Errorf("failed to generate package insight: %w", err)
316+
}
317+
318+
inventoryScanner := scanner.InventoryScanner{
319+
Path: tempDir,
320+
Parsers: []scanner.Parser{
321+
scanner.NewGithubActionWorkflowParser(),
322+
},
323+
}
324+
325+
scannedPackage, err := inventory.ScanPackageScanner(ctx, *pkg, &inventoryScanner)
326+
if err != nil {
327+
return nil, fmt.Errorf("failed to scan package: %w", err)
328+
}
329+
330+
_ = bar.Finish()
331+
if *expand {
332+
expanded := []results.Finding{}
333+
for _, finding := range scannedPackage.FindingsResults.Findings {
334+
filename := filepath.Base(finding.Meta.Path)
335+
blobsha := strings.TrimSuffix(filename, filepath.Ext(filename))
336+
purl, err := models.NewPurl(finding.Purl)
337+
if err != nil {
338+
log.Warn().Err(err).Str("purl", finding.Purl).Msg("failed to evaluate PURL, skipping")
339+
continue
340+
}
341+
for _, branchInfo := range workflows[blobsha] {
342+
for _, path := range branchInfo.FilePath {
343+
finding.Meta.Path = path
344+
purl.Version = branchInfo.BranchName
345+
finding.Purl = purl.String()
346+
expanded = append(expanded, finding)
347+
}
348+
}
349+
}
350+
scannedPackage.FindingsResults.Findings = expanded
351+
352+
if err := a.Formatter.Format(ctx, []*models.PackageInsights{scannedPackage}); err != nil {
353+
return nil, fmt.Errorf("failed to finalize analysis of package: %w", err)
354+
}
355+
} else {
356+
if err := a.Formatter.FormatWithPath(ctx, []*models.PackageInsights{scannedPackage}, workflows); err != nil {
357+
return nil, fmt.Errorf("failed to finalize analysis of package: %w", err)
358+
}
359+
360+
}
361+
362+
return scannedPackage, nil
363+
}
364+
211365
func (a *Analyzer) AnalyzeRepo(ctx context.Context, repoString string, ref string) (*models.PackageInsights, error) {
212366
org, repoName, err := a.ScmClient.ParseRepoAndOrg(repoString)
213367
if err != nil {
@@ -304,6 +458,7 @@ func (a *Analyzer) AnalyzeLocalRepo(ctx context.Context, repoPath string) (*mode
304458

305459
type Formatter interface {
306460
Format(ctx context.Context, packages []*models.PackageInsights) error
461+
FormatWithPath(ctx context.Context, packages []*models.PackageInsights, pathAssociation map[string][]models.BranchInfo) error
307462
}
308463

309464
func (a *Analyzer) finalizeAnalysis(ctx context.Context, scannedPackages []*models.PackageInsights) error {
@@ -316,14 +471,15 @@ func (a *Analyzer) finalizeAnalysis(ctx context.Context, scannedPackages []*mode
316471
}
317472

318473
func (a *Analyzer) generatePackageInsights(ctx context.Context, tempDir string, repo Repository, ref string) (*models.PackageInsights, error) {
474+
var err error
319475
commitDate, err := a.GitClient.LastCommitDate(ctx, tempDir)
320476
if err != nil {
321-
return nil, fmt.Errorf("failed to get last commit date: %w", err)
477+
log.Ctx(ctx).Warn().Err(err).Msg("failed to get last commit date")
322478
}
323479

324480
commitSha, err := a.GitClient.CommitSHA(tempDir)
325481
if err != nil {
326-
return nil, fmt.Errorf("failed to get commit SHA: %w", err)
482+
log.Ctx(ctx).Warn().Err(err).Msg("failed to get commit SHA")
327483
}
328484

329485
var (
@@ -376,6 +532,20 @@ func (a *Analyzer) generatePackageInsights(ctx context.Context, tempDir string,
376532
return pkg, nil
377533
}
378534

535+
func (a *Analyzer) fetchConeToTemp(ctx context.Context, gitURL, token, ref string, cone string) (string, error) {
536+
tempDir, err := os.MkdirTemp("", TEMP_DIR_PREFIX)
537+
if err != nil {
538+
return "", fmt.Errorf("failed to create temp directory: %w", err)
539+
}
540+
541+
err = a.GitClient.FetchCone(ctx, tempDir, gitURL, token, ref, cone)
542+
if err != nil {
543+
os.RemoveAll(tempDir) // Clean up if cloning fails
544+
return "", fmt.Errorf("failed to clone repo: %w", err)
545+
}
546+
return tempDir, nil
547+
}
548+
379549
func (a *Analyzer) cloneRepoToTemp(ctx context.Context, gitURL string, token string, ref string) (string, error) {
380550
tempDir, err := os.MkdirTemp("", TEMP_DIR_PREFIX)
381551
if err != nil {

cmd/analyzeRepoStaleBranches.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
package cmd
2+
3+
import (
4+
"errors"
5+
"fmt"
6+
"regexp"
7+
8+
"github.com/spf13/cobra"
9+
"github.com/spf13/viper"
10+
)
11+
12+
var threadsRepoStaleBranch int
13+
var expand bool
14+
var regex string
15+
16+
var analyzeRepoStaleBranches = &cobra.Command{
17+
Use: "analyze_repo_stale_branches",
18+
Short: "Analyzes a remote repository for pull_request_target vulnerabilities in stale branches",
19+
Long: `Analyzes a remote repository, looping through all remote branches to find unique GitHub Actions workflows with old pull_request_target vulnerabilities, even though the default branch does not have that vulnerability anymore.
20+
Example Scanning a remote Github Repository: poutine analyze_repo_stale_branches org/repo --token "$GH_TOKEN"`,
21+
Args: cobra.ExactArgs(1),
22+
RunE: func(cmd *cobra.Command, args []string) error {
23+
token = viper.GetString("token")
24+
ctx := cmd.Context()
25+
analyzer, err := GetAnalyzer(ctx, "analyze_repo_stale_branches")
26+
if err != nil {
27+
return fmt.Errorf("error getting analyzer analyze_repo_stale_branches: %w", err)
28+
}
29+
30+
if Format == "sarif" {
31+
return errors.New("sarif formatter not supported for analyze_repo_stale_branches")
32+
}
33+
34+
repo := args[0]
35+
36+
reg, err := regexp.Compile(regex)
37+
if err != nil {
38+
return fmt.Errorf("error compiling regex: %w", err)
39+
}
40+
41+
_, err = analyzer.AnalyzeStaleBranches(ctx, repo, &threadsRepoStaleBranch, &expand, reg)
42+
if err != nil {
43+
return fmt.Errorf("failed to analyze repo %s: %w", repo, err)
44+
}
45+
46+
return nil
47+
},
48+
}
49+
50+
func init() {
51+
rootCmd.AddCommand(analyzeRepoStaleBranches)
52+
53+
analyzeRepoStaleBranches.Flags().StringVarP(&token, "token", "t", "", "SCM access token (env: GH_TOKEN)")
54+
analyzeRepoStaleBranches.Flags().IntVarP(&threadsRepoStaleBranch, "threads", "j", 5, "Parallelization factor for scanning stale branches")
55+
analyzeRepoStaleBranches.Flags().BoolVarP(&expand, "expand", "e", false, "Expand the output to the classic representation from analyze_repo")
56+
analyzeRepoStaleBranches.Flags().StringVarP(&regex, "regex", "r", "pull_request_target", "Regex to check if the workflow is accessible in stale branches")
57+
58+
_ = viper.BindPFlag("token", analyzeRepoStaleBranches.Flags().Lookup("token"))
59+
_ = viper.BindEnv("token", "GH_TOKEN")
60+
}

0 commit comments

Comments
 (0)