Skip to content

Commit 7c09aaf

Browse files
committed
fix ENOMEM dedup errors
Fixes #1 I can now dedup my whole FS without any error. This was occuring because linux only allows to pass one page worth of description and this end up at 127 files, if trying to dedup 128+ files together it would error.
1 parent 1b813ea commit 7c09aaf

File tree

1 file changed

+42
-30
lines changed

1 file changed

+42
-30
lines changed

Diff for: main.go

+42-30
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ import (
3838
"golang.org/x/sys/unix"
3939
)
4040

41+
const pageSize = 4096 // FIXME: should vary this properly, it's fine to underestimate but not overestimate
42+
const maxInfos = (pageSize - unix.SizeofRawFileDedupeRange) / unix.SizeofRawFileDedupeRangeInfo
43+
4144
var printLock sync.Mutex
4245

4346
func print(s string) {
@@ -306,6 +309,7 @@ func dedup(backoff chan struct{}, length uint64, paths ...string) {
306309
var hasFailed atomic.Bool
307310

308311
dedups := make([]unix.FileDedupeRangeInfo, len(paths))
312+
// FIXME: we don't need that many paths open at once, we could be more economical about fds if we only openned the current batch files.
309313
for i, p := range paths {
310314
// Use a goroutine instead of recursion and block the callback in case we have an enormous amount of duplicates.
311315
go func() {
@@ -369,46 +373,54 @@ func dedup(backoff chan struct{}, length uint64, paths ...string) {
369373
return
370374
}
371375

376+
var dedupped uint64
372377
source := valid[0].Dest_fd
373378
valid = valid[1:]
374-
var dedupped, offset uint64
375379
for {
376-
arg := &unix.FileDedupeRange{
377-
Src_length: length,
378-
Src_offset: offset,
379-
Info: valid,
380-
}
381-
err := unix.IoctlFileDedupeRange(int(source), arg)
382-
if err != nil {
383-
print(paths[0] + ": (FileDedupeRange): " + err.Error())
384-
totalDeddupingErrors.Add(uint64(len(valid)))
385-
return
386-
}
380+
current := valid[:min(len(valid), maxInfos)]
381+
var offset uint64
382+
for {
383+
arg := &unix.FileDedupeRange{
384+
Src_length: length - offset,
385+
Src_offset: offset,
386+
Info: current,
387+
}
388+
err := unix.IoctlFileDedupeRange(int(source), arg)
389+
if err != nil {
390+
print(paths[0] + ": (FileDedupeRange): " + err.Error())
391+
totalDeddupingErrors.Add(uint64(len(current)))
392+
return
393+
}
387394

388-
var best uint64
389-
nextValid := valid[:0]
390-
for i, v := range valid {
391-
bytesDedupped := v.Bytes_deduped
392-
dedupped += bytesDedupped
393-
if bytesDedupped < best {
394-
// this file is having issues, forget about it.
395-
continue
395+
var best uint64
396+
nextCurrent := current[:0]
397+
for i, v := range current {
398+
bytesDedupped := v.Bytes_deduped
399+
dedupped += bytesDedupped
400+
if bytesDedupped < best {
401+
// this file is having issues, forget about it.
402+
continue
403+
}
404+
if best < bytesDedupped {
405+
// previous files were doing poorly, forget about them.
406+
best = bytesDedupped
407+
nextCurrent = current[i:i]
408+
}
409+
v.Dest_offset += bytesDedupped
410+
nextCurrent = append(nextCurrent, v)
396411
}
397-
if best < bytesDedupped {
398-
// previous files were doing poorly, forget about them.
399-
best = bytesDedupped
400-
nextValid = valid[i:i]
412+
current = nextCurrent
413+
offset += best
414+
415+
if offset == length || best == 0 {
416+
break
401417
}
402-
v.Dest_offset += bytesDedupped
403-
nextValid = append(nextValid, v)
404418
}
405-
valid = nextValid
406-
offset += best
407-
length -= best
408419

409-
if length == 0 || best == 0 {
420+
if len(valid) <= maxInfos {
410421
break
411422
}
423+
valid = valid[maxInfos:]
412424
}
413425
totalDedupped.Add(dedupped)
414426
}

0 commit comments

Comments
 (0)