Skip to content

Commit f7e176d

Browse files
authored
heal: Avoid deadline error with very large objects (minio#140) (minio#20586)
Healing a large object with a normal scan mode where no parts read is involved can still fail after 30 seconds if an object has There are too many parts when hard disks are being used mainly. The reason is there is a general deadline that checks for all parts we do a deadline per part.
1 parent 72a0d14 commit f7e176d

File tree

2 files changed

+41
-42
lines changed

2 files changed

+41
-42
lines changed

cmd/xl-storage-disk-id-check.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -509,9 +509,7 @@ func (p *xlStorageDiskIDCheck) CheckParts(ctx context.Context, volume string, pa
509509
}
510510
defer done(0, &err)
511511

512-
return xioutil.WithDeadline[*CheckPartsResp](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (res *CheckPartsResp, err error) {
513-
return p.storage.CheckParts(ctx, volume, path, fi)
514-
})
512+
return p.storage.CheckParts(ctx, volume, path, fi)
515513
}
516514

517515
func (p *xlStorageDiskIDCheck) DeleteBulk(ctx context.Context, volume string, paths ...string) (err error) {

cmd/xl-storage.go

Lines changed: 40 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2367,6 +2367,41 @@ func (s *xlStorage) AppendFile(ctx context.Context, volume string, path string,
23672367
return nil
23682368
}
23692369

2370+
// checkPart is a light check of an existing and size of a part, without doing a bitrot operation
2371+
// For any unexpected error, return checkPartUnknown (zero)
2372+
func (s *xlStorage) checkPart(volumeDir, path, dataDir string, partNum int, expectedSize int64, skipAccessCheck bool) (resp int) {
2373+
partPath := pathJoin(path, dataDir, fmt.Sprintf("part.%d", partNum))
2374+
filePath := pathJoin(volumeDir, partPath)
2375+
st, err := Lstat(filePath)
2376+
if err != nil {
2377+
if osIsNotExist(err) {
2378+
if !skipAccessCheck {
2379+
// Stat a volume entry.
2380+
if verr := Access(volumeDir); verr != nil {
2381+
if osIsNotExist(verr) {
2382+
resp = checkPartVolumeNotFound
2383+
}
2384+
return
2385+
}
2386+
}
2387+
}
2388+
if osErrToFileErr(err) == errFileNotFound {
2389+
resp = checkPartFileNotFound
2390+
}
2391+
return
2392+
}
2393+
if st.Mode().IsDir() {
2394+
resp = checkPartFileNotFound
2395+
return
2396+
}
2397+
// Check if shard is truncated.
2398+
if st.Size() < expectedSize {
2399+
resp = checkPartFileCorrupt
2400+
return
2401+
}
2402+
return checkPartSuccess
2403+
}
2404+
23702405
// CheckParts check if path has necessary parts available.
23712406
func (s *xlStorage) CheckParts(ctx context.Context, volume string, path string, fi FileInfo) (*CheckPartsResp, error) {
23722407
volumeDir, err := s.getVolDir(volume)
@@ -2385,36 +2420,12 @@ func (s *xlStorage) CheckParts(ctx context.Context, volume string, path string,
23852420
}
23862421

23872422
for i, part := range fi.Parts {
2388-
partPath := pathJoin(path, fi.DataDir, fmt.Sprintf("part.%d", part.Number))
2389-
filePath := pathJoin(volumeDir, partPath)
2390-
st, err := Lstat(filePath)
2423+
resp.Results[i], err = xioutil.WithDeadline[int](ctx, globalDriveConfig.GetMaxTimeout(), func(ctx context.Context) (int, error) {
2424+
return s.checkPart(volumeDir, path, fi.DataDir, part.Number, fi.Erasure.ShardFileSize(part.Size), skipAccessChecks(volume)), nil
2425+
})
23912426
if err != nil {
2392-
if osIsNotExist(err) {
2393-
if !skipAccessChecks(volume) {
2394-
// Stat a volume entry.
2395-
if verr := Access(volumeDir); verr != nil {
2396-
if osIsNotExist(verr) {
2397-
resp.Results[i] = checkPartVolumeNotFound
2398-
}
2399-
continue
2400-
}
2401-
}
2402-
}
2403-
if osErrToFileErr(err) == errFileNotFound {
2404-
resp.Results[i] = checkPartFileNotFound
2405-
}
2406-
continue
2407-
}
2408-
if st.Mode().IsDir() {
2409-
resp.Results[i] = checkPartFileNotFound
2410-
continue
2411-
}
2412-
// Check if shard is truncated.
2413-
if st.Size() < fi.Erasure.ShardFileSize(part.Size) {
2414-
resp.Results[i] = checkPartFileCorrupt
2415-
continue
2427+
return nil, err
24162428
}
2417-
resp.Results[i] = checkPartSuccess
24182429
}
24192430

24202431
return &resp, nil
@@ -2546,17 +2557,7 @@ func (s *xlStorage) Delete(ctx context.Context, volume string, path string, dele
25462557
}
25472558

25482559
func skipAccessChecks(volume string) (ok bool) {
2549-
for _, prefix := range []string{
2550-
minioMetaTmpDeletedBucket,
2551-
minioMetaTmpBucket,
2552-
minioMetaMultipartBucket,
2553-
minioMetaBucket,
2554-
} {
2555-
if strings.HasPrefix(volume, prefix) {
2556-
return true
2557-
}
2558-
}
2559-
return ok
2560+
return strings.HasPrefix(volume, minioMetaBucket)
25602561
}
25612562

25622563
// RenameData - rename source path to destination path atomically, metadata and data directory.

0 commit comments

Comments
 (0)