@@ -38,6 +38,9 @@ import (
38
38
"golang.org/x/sys/unix"
39
39
)
40
40
41
+ const pageSize = 4096 // FIXME: should vary this properly, it's fine to underestimate but not overestimate
42
+ const maxInfos = (pageSize - unix .SizeofRawFileDedupeRange ) / unix .SizeofRawFileDedupeRangeInfo
43
+
41
44
var printLock sync.Mutex
42
45
43
46
func print (s string ) {
@@ -306,6 +309,7 @@ func dedup(backoff chan struct{}, length uint64, paths ...string) {
306
309
var hasFailed atomic.Bool
307
310
308
311
dedups := make ([]unix.FileDedupeRangeInfo , len (paths ))
312
+ // FIXME: we don't need that many paths open at once, we could be more economical about fds if we only openned the current batch files.
309
313
for i , p := range paths {
310
314
// Use a goroutine instead of recursion and block the callback in case we have an enormous amount of duplicates.
311
315
go func () {
@@ -369,46 +373,54 @@ func dedup(backoff chan struct{}, length uint64, paths ...string) {
369
373
return
370
374
}
371
375
376
+ var dedupped uint64
372
377
source := valid [0 ].Dest_fd
373
378
valid = valid [1 :]
374
- var dedupped , offset uint64
375
379
for {
376
- arg := & unix.FileDedupeRange {
377
- Src_length : length ,
378
- Src_offset : offset ,
379
- Info : valid ,
380
- }
381
- err := unix .IoctlFileDedupeRange (int (source ), arg )
382
- if err != nil {
383
- print (paths [0 ] + ": (FileDedupeRange): " + err .Error ())
384
- totalDeddupingErrors .Add (uint64 (len (valid )))
385
- return
386
- }
380
+ current := valid [:min (len (valid ), maxInfos )]
381
+ var offset uint64
382
+ for {
383
+ arg := & unix.FileDedupeRange {
384
+ Src_length : length - offset ,
385
+ Src_offset : offset ,
386
+ Info : current ,
387
+ }
388
+ err := unix .IoctlFileDedupeRange (int (source ), arg )
389
+ if err != nil {
390
+ print (paths [0 ] + ": (FileDedupeRange): " + err .Error ())
391
+ totalDeddupingErrors .Add (uint64 (len (current )))
392
+ return
393
+ }
387
394
388
- var best uint64
389
- nextValid := valid [:0 ]
390
- for i , v := range valid {
391
- bytesDedupped := v .Bytes_deduped
392
- dedupped += bytesDedupped
393
- if bytesDedupped < best {
394
- // this file is having issues, forget about it.
395
- continue
395
+ var best uint64
396
+ nextCurrent := current [:0 ]
397
+ for i , v := range current {
398
+ bytesDedupped := v .Bytes_deduped
399
+ dedupped += bytesDedupped
400
+ if bytesDedupped < best {
401
+ // this file is having issues, forget about it.
402
+ continue
403
+ }
404
+ if best < bytesDedupped {
405
+ // previous files were doing poorly, forget about them.
406
+ best = bytesDedupped
407
+ nextCurrent = current [i :i ]
408
+ }
409
+ v .Dest_offset += bytesDedupped
410
+ nextCurrent = append (nextCurrent , v )
396
411
}
397
- if best < bytesDedupped {
398
- // previous files were doing poorly, forget about them.
399
- best = bytesDedupped
400
- nextValid = valid [i :i ]
412
+ current = nextCurrent
413
+ offset += best
414
+
415
+ if offset == length || best == 0 {
416
+ break
401
417
}
402
- v .Dest_offset += bytesDedupped
403
- nextValid = append (nextValid , v )
404
418
}
405
- valid = nextValid
406
- offset += best
407
- length -= best
408
419
409
- if length == 0 || best == 0 {
420
+ if len ( valid ) <= maxInfos {
410
421
break
411
422
}
423
+ valid = valid [maxInfos :]
412
424
}
413
425
totalDedupped .Add (dedupped )
414
426
}
0 commit comments