Skip to content

Commit 7dcccc3

Browse files
bohdewxiaoguang
andauthored
improve performance of diffs (#32393)
This has two major changes that significantly reduce the amount of work done for large diffs: * Kill a running git process when reaching the maximum number of files in a diff, preventing it from processing the entire diff. * When loading a diff with the URL param `file-only=true`, skip loading stats. This speeds up loading both hidden files of a diff and sections of a diff when clicking the "Show More" button. A couple of minor things from profiling are also included: * Reuse existing repo in `PrepareViewPullInfo` if head and base are the same. The performance impact is going to depend heavily on the individual diff and the hardware it runs on, but when testing locally on a diff changing 100k+ lines over hundreds of files, I'm seeing a roughly 75% reduction in time to load the result of "Show More" --------- Co-authored-by: wxiaoguang <[email protected]>
1 parent ec2d159 commit 7dcccc3

File tree

5 files changed

+44
-42
lines changed

5 files changed

+44
-42
lines changed

routers/web/repo/commit.go

+1
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ func Diff(ctx *context.Context) {
328328
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
329329
MaxFiles: maxFiles,
330330
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
331+
FileOnly: fileOnly,
331332
}, files...)
332333
if err != nil {
333334
ctx.NotFound("GetDiff", err)

routers/web/repo/compare.go

+3
Original file line numberDiff line numberDiff line change
@@ -611,6 +611,8 @@ func PrepareCompareDiff(
611611
maxLines, maxFiles = -1, -1
612612
}
613613

614+
fileOnly := ctx.FormBool("file-only")
615+
614616
diff, err := gitdiff.GetDiff(ctx, ci.HeadGitRepo,
615617
&gitdiff.DiffOptions{
616618
BeforeCommitID: beforeCommitID,
@@ -621,6 +623,7 @@ func PrepareCompareDiff(
621623
MaxFiles: maxFiles,
622624
WhitespaceBehavior: whitespaceBehavior,
623625
DirectComparison: ci.DirectComparison,
626+
FileOnly: fileOnly,
624627
}, ctx.FormStrings("files")...)
625628
if err != nil {
626629
ctx.ServerError("GetDiffRangeWithWhitespaceBehavior", err)

routers/web/repo/pull.go

+4-3
Original file line numberDiff line numberDiff line change
@@ -395,12 +395,12 @@ func PrepareViewPullInfo(ctx *context.Context, issue *issues_model.Issue) *git.C
395395
var headBranchSha string
396396
// HeadRepo may be missing
397397
if pull.HeadRepo != nil {
398-
headGitRepo, err := gitrepo.OpenRepository(ctx, pull.HeadRepo)
398+
headGitRepo, closer, err := gitrepo.RepositoryFromContextOrOpen(ctx, pull.HeadRepo)
399399
if err != nil {
400-
ctx.ServerError("OpenRepository", err)
400+
ctx.ServerError("RepositoryFromContextOrOpen", err)
401401
return nil
402402
}
403-
defer headGitRepo.Close()
403+
defer closer.Close()
404404

405405
if pull.Flow == issues_model.PullRequestFlowGithub {
406406
headBranchExist = headGitRepo.IsBranchExist(pull.HeadBranch)
@@ -747,6 +747,7 @@ func viewPullFiles(ctx *context.Context, specifiedStartCommit, specifiedEndCommi
747747
MaxLineCharacters: setting.Git.MaxGitDiffLineCharacters,
748748
MaxFiles: maxFiles,
749749
WhitespaceBehavior: gitdiff.GetWhitespaceFlag(ctx.Data["WhitespaceBehavior"].(string)),
750+
FileOnly: fileOnly,
750751
}
751752

752753
if !willShowSpecifiedCommit {

services/gitdiff/gitdiff.go

+27-35
Original file line numberDiff line numberDiff line change
@@ -380,18 +380,11 @@ func (diffFile *DiffFile) GetType() int {
380380
}
381381

382382
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
383-
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
383+
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommit, rightCommit *git.Commit) *DiffSection {
384384
if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
385385
return nil
386386
}
387-
leftCommit, err := gitRepo.GetCommit(leftCommitID)
388-
if err != nil {
389-
return nil
390-
}
391-
rightCommit, err := gitRepo.GetCommit(rightCommitID)
392-
if err != nil {
393-
return nil
394-
}
387+
395388
lastSection := diffFile.Sections[len(diffFile.Sections)-1]
396389
lastLine := lastSection.Lines[len(lastSection.Lines)-1]
397390
leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
@@ -536,11 +529,6 @@ parsingLoop:
536529
lastFile := createDiffFile(diff, line)
537530
diff.End = lastFile.Name
538531
diff.IsIncomplete = true
539-
_, err := io.Copy(io.Discard, reader)
540-
if err != nil {
541-
// By the definition of io.Copy this never returns io.EOF
542-
return diff, fmt.Errorf("error during io.Copy: %w", err)
543-
}
544532
break parsingLoop
545533
}
546534

@@ -1101,6 +1089,7 @@ type DiffOptions struct {
11011089
MaxFiles int
11021090
WhitespaceBehavior git.TrustedCmdArgs
11031091
DirectComparison bool
1092+
FileOnly bool
11041093
}
11051094

11061095
// GetDiff builds a Diff between two commits of a repository.
@@ -1109,12 +1098,16 @@ type DiffOptions struct {
11091098
func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
11101099
repoPath := gitRepo.Path
11111100

1101+
var beforeCommit *git.Commit
11121102
commit, err := gitRepo.GetCommit(opts.AfterCommitID)
11131103
if err != nil {
11141104
return nil, err
11151105
}
11161106

1117-
cmdDiff := git.NewCommand(gitRepo.Ctx)
1107+
cmdCtx, cmdCancel := context.WithCancel(ctx)
1108+
defer cmdCancel()
1109+
1110+
cmdDiff := git.NewCommand(cmdCtx)
11181111
objectFormat, err := gitRepo.GetObjectFormat()
11191112
if err != nil {
11201113
return nil, err
@@ -1136,6 +1129,12 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
11361129
AddArguments(opts.WhitespaceBehavior...).
11371130
AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
11381131
opts.BeforeCommitID = actualBeforeCommitID
1132+
1133+
var err error
1134+
beforeCommit, err = gitRepo.GetCommit(opts.BeforeCommitID)
1135+
if err != nil {
1136+
return nil, err
1137+
}
11391138
}
11401139

11411140
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
@@ -1163,14 +1162,16 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
11631162
Dir: repoPath,
11641163
Stdout: writer,
11651164
Stderr: stderr,
1166-
}); err != nil {
1165+
}); err != nil && err.Error() != "signal: killed" {
11671166
log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
11681167
}
11691168

11701169
_ = writer.Close()
11711170
}()
11721171

1173-
diff, err := ParsePatch(ctx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
1172+
diff, err := ParsePatch(cmdCtx, opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
1173+
// Ensure the git process is killed if it didn't exit already
1174+
cmdCancel()
11741175
if err != nil {
11751176
return nil, fmt.Errorf("unable to ParsePatch: %w", err)
11761177
}
@@ -1205,37 +1206,28 @@ func GetDiff(ctx context.Context, gitRepo *git.Repository, opts *DiffOptions, fi
12051206
}
12061207
diffFile.IsGenerated = isGenerated.Value()
12071208

1208-
tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
1209+
tailSection := diffFile.GetTailSection(gitRepo, beforeCommit, commit)
12091210
if tailSection != nil {
12101211
diffFile.Sections = append(diffFile.Sections, tailSection)
12111212
}
12121213
}
12131214

1214-
separator := "..."
1215-
if opts.DirectComparison {
1216-
separator = ".."
1215+
if opts.FileOnly {
1216+
return diff, nil
12171217
}
12181218

1219-
diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
1220-
if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == objectFormat.EmptyObjectID().String() {
1221-
diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
1222-
}
1223-
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
1224-
if err != nil && strings.Contains(err.Error(), "no merge base") {
1225-
// git >= 2.28 now returns an error if base and head have become unrelated.
1226-
// previously it would return the results of git diff --shortstat base head so let's try that...
1227-
diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
1228-
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
1229-
}
1219+
stats, err := GetPullDiffStats(gitRepo, opts)
12301220
if err != nil {
12311221
return nil, err
12321222
}
12331223

1224+
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion = stats.NumFiles, stats.TotalAddition, stats.TotalDeletion
1225+
12341226
return diff, nil
12351227
}
12361228

12371229
type PullDiffStats struct {
1238-
TotalAddition, TotalDeletion int
1230+
NumFiles, TotalAddition, TotalDeletion int
12391231
}
12401232

12411233
// GetPullDiffStats
@@ -1259,12 +1251,12 @@ func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStat
12591251
diffPaths = []string{objectFormat.EmptyTree().String(), opts.AfterCommitID}
12601252
}
12611253

1262-
_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
1254+
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
12631255
if err != nil && strings.Contains(err.Error(), "no merge base") {
12641256
// git >= 2.28 now returns an error if base and head have become unrelated.
12651257
// previously it would return the results of git diff --shortstat base head so let's try that...
12661258
diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
1267-
_, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
1259+
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
12681260
}
12691261
if err != nil {
12701262
return nil, err

services/repository/files/temp_repo.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
358358
Stderr: stderr,
359359
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
360360
_ = stdoutWriter.Close()
361+
defer cancel()
361362
diff, finalErr = gitdiff.ParsePatch(t.ctx, setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, stdoutReader, "")
362363
if finalErr != nil {
363364
log.Error("ParsePatch: %v", finalErr)
@@ -371,10 +372,14 @@ func (t *TemporaryUploadRepository) DiffIndex() (*gitdiff.Diff, error) {
371372
log.Error("Unable to ParsePatch in temporary repo %s (%s). Error: %v", t.repo.FullName(), t.basePath, finalErr)
372373
return nil, finalErr
373374
}
374-
log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
375-
t.repo.FullName(), t.basePath, err, stderr)
376-
return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
377-
t.repo.FullName(), err, stderr)
375+
376+
// If the process exited early, don't error
377+
if err != context.Canceled {
378+
log.Error("Unable to run diff-index pipeline in temporary repo %s (%s). Error: %v\nStderr: %s",
379+
t.repo.FullName(), t.basePath, err, stderr)
380+
return nil, fmt.Errorf("Unable to run diff-index pipeline in temporary repo %s. Error: %w\nStderr: %s",
381+
t.repo.FullName(), err, stderr)
382+
}
378383
}
379384

380385
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(t.ctx, t.basePath, git.TrustedCmdArgs{"--cached"}, "HEAD")

0 commit comments

Comments
 (0)