Skip to content

Commit b88e5fc

Browse files
authored
Fix slow patch checking with commits that add or remove many files (#31548)
Running git update-index for every individual file is slow, so add and remove everything with a single git command. When such a big commit lands in the default branch, it could cause PR creation and patch checking for all open PRs to be slow, or time out entirely. For example, a commit that removes 1383 files was measured to take more than 60 seconds and timed out. With this change checking took about a second. This is related to #27967, though this will not help with commits that change many lines in few files.
1 parent 2c92c7c commit b88e5fc

File tree

2 files changed

+48
-16
lines changed

2 files changed

+48
-16
lines changed

modules/git/repo_index.go

+27-8
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,8 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
104104
buffer := new(bytes.Buffer)
105105
for _, file := range filenames {
106106
if file != "" {
107-
buffer.WriteString("0 ")
108-
buffer.WriteString(objectFormat.EmptyObjectID().String())
109-
buffer.WriteByte('\t')
110-
buffer.WriteString(file)
111-
buffer.WriteByte('\000')
107+
// using format: mode SP type SP sha1 TAB path
108+
buffer.WriteString("0 blob " + objectFormat.EmptyObjectID().String() + "\t" + file + "\000")
112109
}
113110
}
114111
return cmd.Run(&RunOpts{
@@ -119,11 +116,33 @@ func (repo *Repository) RemoveFilesFromIndex(filenames ...string) error {
119116
})
120117
}
121118

119+
type IndexObjectInfo struct {
120+
Mode string
121+
Object ObjectID
122+
Filename string
123+
}
124+
125+
// AddObjectsToIndex adds the provided object hashes to the index at the provided filenames
126+
func (repo *Repository) AddObjectsToIndex(objects ...IndexObjectInfo) error {
127+
cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "-z", "--index-info")
128+
stdout := new(bytes.Buffer)
129+
stderr := new(bytes.Buffer)
130+
buffer := new(bytes.Buffer)
131+
for _, object := range objects {
132+
// using format: mode SP type SP sha1 TAB path
133+
buffer.WriteString(object.Mode + " blob " + object.Object.String() + "\t" + object.Filename + "\000")
134+
}
135+
return cmd.Run(&RunOpts{
136+
Dir: repo.Path,
137+
Stdin: bytes.NewReader(buffer.Bytes()),
138+
Stdout: stdout,
139+
Stderr: stderr,
140+
})
141+
}
142+
122143
// AddObjectToIndex adds the provided object hash to the index at the provided filename
123144
func (repo *Repository) AddObjectToIndex(mode string, object ObjectID, filename string) error {
124-
cmd := NewCommand(repo.Ctx, "update-index", "--add", "--replace", "--cacheinfo").AddDynamicArguments(mode, object.String(), filename)
125-
_, _, err := cmd.RunStdString(&RunOpts{Dir: repo.Path})
126-
return err
145+
return repo.AddObjectsToIndex(IndexObjectInfo{Mode: mode, Object: object, Filename: filename})
127146
}
128147

129148
// WriteTree writes the current index as a tree to the object db and returns its hash

services/pull/patch.go

+21-8
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ func (e *errMergeConflict) Error() string {
128128
return fmt.Sprintf("conflict detected at: %s", e.filename)
129129
}
130130

131-
func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, gitRepo *git.Repository) error {
131+
func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, filesToRemove *[]string, filesToAdd *[]git.IndexObjectInfo) error {
132132
log.Trace("Attempt to merge:\n%v", file)
133133

134134
switch {
@@ -142,14 +142,13 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
142142
}
143143

144144
// Not a genuine conflict and we can simply remove the file from the index
145-
return gitRepo.RemoveFilesFromIndex(file.stage1.path)
145+
*filesToRemove = append(*filesToRemove, file.stage1.path)
146+
return nil
146147
case file.stage1 == nil && file.stage2 != nil && (file.stage3 == nil || file.stage2.SameAs(file.stage3)):
147148
// 2. Added in ours but not in theirs or identical in both
148149
//
149150
// Not a genuine conflict just add to the index
150-
if err := gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(file.stage2.sha), file.stage2.path); err != nil {
151-
return err
152-
}
151+
*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(file.stage2.sha), Filename: file.stage2.path})
153152
return nil
154153
case file.stage1 == nil && file.stage2 != nil && file.stage3 != nil && file.stage2.sha == file.stage3.sha && file.stage2.mode != file.stage3.mode:
155154
// 3. Added in both with the same sha but the modes are different
@@ -160,7 +159,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
160159
// 4. Added in theirs but not ours:
161160
//
162161
// Not a genuine conflict just add to the index
163-
return gitRepo.AddObjectToIndex(file.stage3.mode, git.MustIDFromString(file.stage3.sha), file.stage3.path)
162+
*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage3.mode, Object: git.MustIDFromString(file.stage3.sha), Filename: file.stage3.path})
163+
return nil
164164
case file.stage1 == nil:
165165
// 5. Created by new in both
166166
//
@@ -221,7 +221,8 @@ func attemptMerge(ctx context.Context, file *unmergedFile, tmpBasePath string, g
221221
return err
222222
}
223223
hash = strings.TrimSpace(hash)
224-
return gitRepo.AddObjectToIndex(file.stage2.mode, git.MustIDFromString(hash), file.stage2.path)
224+
*filesToAdd = append(*filesToAdd, git.IndexObjectInfo{Mode: file.stage2.mode, Object: git.MustIDFromString(hash), Filename: file.stage2.path})
225+
return nil
225226
default:
226227
if file.stage1 != nil {
227228
return &errMergeConflict{file.stage1.path}
@@ -245,6 +246,9 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
245246
return false, nil, fmt.Errorf("unable to run read-tree -m! Error: %w", err)
246247
}
247248

249+
var filesToRemove []string
250+
var filesToAdd []git.IndexObjectInfo
251+
248252
// Then we use git ls-files -u to list the unmerged files and collate the triples in unmergedfiles
249253
unmerged := make(chan *unmergedFile)
250254
go unmergedFiles(ctx, gitPath, unmerged)
@@ -270,7 +274,7 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
270274
}
271275

272276
// OK now we have the unmerged file triplet attempt to merge it
273-
if err := attemptMerge(ctx, file, gitPath, gitRepo); err != nil {
277+
if err := attemptMerge(ctx, file, gitPath, &filesToRemove, &filesToAdd); err != nil {
274278
if conflictErr, ok := err.(*errMergeConflict); ok {
275279
log.Trace("Conflict: %s in %s", conflictErr.filename, description)
276280
conflict = true
@@ -283,6 +287,15 @@ func AttemptThreeWayMerge(ctx context.Context, gitPath string, gitRepo *git.Repo
283287
return false, nil, err
284288
}
285289
}
290+
291+
// Add and remove files in one command, as this is slow with many files otherwise
292+
if err := gitRepo.RemoveFilesFromIndex(filesToRemove...); err != nil {
293+
return false, nil, err
294+
}
295+
if err := gitRepo.AddObjectsToIndex(filesToAdd...); err != nil {
296+
return false, nil, err
297+
}
298+
286299
return conflict, conflictedFiles, nil
287300
}
288301

0 commit comments

Comments
 (0)