Skip to content

Commit e0ad72e

Browse files
rremerwxiaoguang
andauthored
Fail mirroring more gracefully (#34002)
* reuse recoverable error checks across mirror_pull * add new cases for 'cannot lock ref/not our ref' (race condition in fetch) and 'Unable to create/lock" * move lfs sync right after commit graph write, and before other maintenance which may fail * try a prune for 'broken reference' as well as 'not our ref' * always sync LFS right after commit graph write, and before other maintenance which may fail This handles a few cases where our very large and very active repositories could serve mirrored git refs, but be missing lfs files: ## Case 1 (multiple variants): Race condition in git fetch There was already a check for 'unable to resolve reference' on a failed git fetch, after which a git prune and then subsequent fetch are performed. This is to work around a race condition where the git remote tells Gitea about a ref for some HEAD of a branch, then fails a few seconds later because the remote branch was deleted, or the ref was updated (force push). There are two more variants to the error message you can get, but for the same kind of race condition. These *may* be related to the git binary version Gitea has access to (in my case, it was 2.48.1). ## Case 2: githttp.go can serve updated git refs before it's synced lfs oids There is probably a more aggressive refactor we could do here to have the cat-file loop use FETCH_HEAD instead of relying on the commit graphs to be committed locally (and thus serveable to clients of Gitea), but a simple reduction in the occurrences of this for me was to move the lfs sync block immediately after the commit-graph write and before any other time-consuming (or potentially erroring/exiting) blocks. --------- Co-authored-by: wxiaoguang <[email protected]>
1 parent c7b85f7 commit e0ad72e

File tree

2 files changed

+57
-11
lines changed

2 files changed

+57
-11
lines changed

services/mirror/mirror_pull.go

+29-11
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,24 @@ func pruneBrokenReferences(ctx context.Context,
235235
return pruneErr
236236
}
237237

238+
// checkRecoverableSyncError takes an error message from a git fetch command and returns false if it should be a fatal/blocking error
239+
func checkRecoverableSyncError(stderrMessage string) bool {
240+
switch {
241+
case strings.Contains(stderrMessage, "unable to resolve reference") && strings.Contains(stderrMessage, "reference broken"):
242+
return true
243+
case strings.Contains(stderrMessage, "remote error") && strings.Contains(stderrMessage, "not our ref"):
244+
return true
245+
case strings.Contains(stderrMessage, "cannot lock ref") && strings.Contains(stderrMessage, "but expected"):
246+
return true
247+
case strings.Contains(stderrMessage, "cannot lock ref") && strings.Contains(stderrMessage, "unable to resolve reference"):
248+
return true
249+
case strings.Contains(stderrMessage, "Unable to create") && strings.Contains(stderrMessage, ".lock"):
250+
return true
251+
default:
252+
return false
253+
}
254+
}
255+
238256
// runSync returns true if sync finished without error.
239257
func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bool) {
240258
repoPath := m.Repo.RepoPath()
@@ -275,7 +293,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
275293
stdoutMessage := util.SanitizeCredentialURLs(stdout)
276294

277295
// Now check if the error is a resolve reference due to broken reference
278-
if strings.Contains(stderr, "unable to resolve reference") && strings.Contains(stderr, "reference broken") {
296+
if checkRecoverableSyncError(stderr) {
279297
log.Warn("SyncMirrors [repo: %-v]: failed to update mirror repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune", m.Repo, stdoutMessage, stderrMessage, err)
280298
err = nil
281299

@@ -324,6 +342,15 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
324342
return nil, false
325343
}
326344

345+
if m.LFS && setting.LFS.StartServer {
346+
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
347+
endpoint := lfs.DetermineEndpoint(remoteURL.String(), m.LFSEndpoint)
348+
lfsClient := lfs.NewClient(endpoint, nil)
349+
if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, m.Repo, gitRepo, lfsClient); err != nil {
350+
log.Error("SyncMirrors [repo: %-v]: failed to synchronize LFS objects for repository: %v", m.Repo, err)
351+
}
352+
}
353+
327354
log.Trace("SyncMirrors [repo: %-v]: syncing branches...", m.Repo)
328355
if _, err = repo_module.SyncRepoBranchesWithRepo(ctx, m.Repo, gitRepo, 0); err != nil {
329356
log.Error("SyncMirrors [repo: %-v]: failed to synchronize branches: %v", m.Repo, err)
@@ -333,15 +360,6 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
333360
if err = repo_module.SyncReleasesWithTags(ctx, m.Repo, gitRepo); err != nil {
334361
log.Error("SyncMirrors [repo: %-v]: failed to synchronize tags to releases: %v", m.Repo, err)
335362
}
336-
337-
if m.LFS && setting.LFS.StartServer {
338-
log.Trace("SyncMirrors [repo: %-v]: syncing LFS objects...", m.Repo)
339-
endpoint := lfs.DetermineEndpoint(remoteURL.String(), m.LFSEndpoint)
340-
lfsClient := lfs.NewClient(endpoint, nil)
341-
if err = repo_module.StoreMissingLfsObjectsInRepository(ctx, m.Repo, gitRepo, lfsClient); err != nil {
342-
log.Error("SyncMirrors [repo: %-v]: failed to synchronize LFS objects for repository: %v", m.Repo, err)
343-
}
344-
}
345363
gitRepo.Close()
346364

347365
log.Trace("SyncMirrors [repo: %-v]: updating size of repository", m.Repo)
@@ -368,7 +386,7 @@ func runSync(ctx context.Context, m *repo_model.Mirror) ([]*mirrorSyncResult, bo
368386
stdoutMessage := util.SanitizeCredentialURLs(stdout)
369387

370388
// Now check if the error is a resolve reference due to broken reference
371-
if strings.Contains(stderrMessage, "unable to resolve reference") && strings.Contains(stderrMessage, "reference broken") {
389+
if checkRecoverableSyncError(stderrMessage) {
372390
log.Warn("SyncMirrors [repo: %-v Wiki]: failed to update mirror wiki repository due to broken references:\nStdout: %s\nStderr: %s\nErr: %v\nAttempting Prune", m.Repo, stdoutMessage, stderrMessage, err)
373391
err = nil
374392

services/mirror/mirror_test.go renamed to services/mirror/mirror_pull_test.go

+28
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,31 @@ func Test_parseRemoteUpdateOutput(t *testing.T) {
6464
assert.EqualValues(t, "1c97ebc746", results[9].oldCommitID)
6565
assert.EqualValues(t, "976d27d52f", results[9].newCommitID)
6666
}
67+
68+
func Test_checkRecoverableSyncError(t *testing.T) {
69+
cases := []struct {
70+
recoverable bool
71+
message string
72+
}{
73+
// A race condition in http git-fetch where certain refs were listed on the remote and are no longer there, would exit status 128
74+
{true, "fatal: remote error: upload-pack: not our ref 988881adc9fc3655077dc2d4d757d480b5ea0e11"},
75+
// A race condition where a local gc/prune removes a named ref during a git-fetch would exit status 1
76+
{true, "cannot lock ref 'refs/pull/123456/merge': unable to resolve reference 'refs/pull/134153/merge'"},
77+
// A race condition in http git-fetch where named refs were listed on the remote and are no longer there
78+
{true, "error: cannot lock ref 'refs/remotes/origin/foo': unable to resolve reference 'refs/remotes/origin/foo': reference broken"},
79+
// A race condition in http git-fetch where named refs were force-pushed during the update, would exit status 128
80+
{true, "error: cannot lock ref 'refs/pull/123456/merge': is at 988881adc9fc3655077dc2d4d757d480b5ea0e11 but expected 7f894307ffc9553edbd0b671cab829786866f7b2"},
81+
// A race condition with other local git operations, such as git-maintenance, would exit status 128 (well, "Unable" the "U" is uppercase)
82+
{true, "fatal: Unable to create '/data/gitea-repositories/foo-org/bar-repo.git/./objects/info/commit-graphs/commit-graph-chain.lock': File exists."},
83+
// Missing or unauthorized credentials, would exit status 128
84+
{false, "fatal: Authentication failed for 'https://example.com/foo-does-not-exist/bar.git/'"},
85+
// A non-existent remote repository, would exit status 128
86+
{false, "fatal: Could not read from remote repository."},
87+
// A non-functioning proxy, would exit status 128
88+
{false, "fatal: unable to access 'https://example.com/foo-does-not-exist/bar.git/': Failed to connect to configured-https-proxy port 1080 after 0 ms: Couldn't connect to server"},
89+
}
90+
91+
for _, c := range cases {
92+
assert.Equal(t, c.recoverable, checkRecoverableSyncError(c.message), "test case: %s", c.message)
93+
}
94+
}

0 commit comments

Comments
 (0)