@@ -286,12 +286,10 @@ impl<T: Change> Tracker<T> {
286
286
CopySource :: FromSetOfModifiedFiles => { }
287
287
CopySource :: FromSetOfModifiedFilesAndAllSources => {
288
288
push_source_tree ( & mut |change, location| {
289
- assert ! (
290
- self . try_push_change( change, location) . is_none( ) ,
291
- "we must accept every change"
292
- ) ;
293
- // make sure these aren't viable to be emitted anymore.
294
- self . items . last_mut ( ) . expect ( "just pushed" ) . emitted = true ;
289
+ if self . try_push_change ( change, location) . is_none ( ) {
290
+ // make sure these aren't viable to be emitted anymore.
291
+ self . items . last_mut ( ) . expect ( "just pushed" ) . emitted = true ;
292
+ }
295
293
} )
296
294
. map_err ( |err| emit:: Error :: GetItemsForExhaustiveCopyDetection ( Box :: new ( err) ) ) ?;
297
295
self . items . sort_by ( by_id_and_location) ;
@@ -341,6 +339,10 @@ impl<T: Change> Tracker<T> {
341
339
) -> Result < ( ) , emit:: Error > {
342
340
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
343
341
let needs_second_pass = !needs_exact_match ( percentage) ;
342
+
343
+ // https://github.com/git/git/blob/cc01bad4a9f566cf4453c7edd6b433851b0835e2/diffcore-rename.c#L350-L369
344
+ // We would need a hashmap to be OK to not use the limit here, otherwise the performance is too bad.
345
+ // This also means we don't find all renames if we hit the rename limit.
344
346
if self . match_pairs ( cb, None /* by identity */ , kind, out, diff_cache, objects, filter) ? == Action :: Cancel {
345
347
return Ok ( ( ) ) ;
346
348
}
@@ -384,10 +386,35 @@ impl<T: Change> Tracker<T> {
384
386
filter : Option < fn ( & T ) -> bool > ,
385
387
) -> Result < Action , emit:: Error > {
386
388
let mut dest_ofs = 0 ;
389
+ let mut num_checks = 0 ;
390
+ let max_checks = {
391
+ let limit = self . rewrites . limit . saturating_pow ( 2 ) ;
392
+ // There can be trees with a lot of entries and pathological search behaviour, as they can be repeated
393
+ // and then have a lot of similar hashes. This also means we have to search a lot of candidates which
394
+ // can be too slow despite best attempts. So play it save and detect such cases 'roughly' by amount of items.
395
+ if self . items . len ( ) < 100_000 {
396
+ 0
397
+ } else {
398
+ limit
399
+ }
400
+ } ;
401
+
387
402
while let Some ( ( mut dest_idx, dest) ) = self . items [ dest_ofs..] . iter ( ) . enumerate ( ) . find_map ( |( idx, item) | {
388
403
( !item. emitted
389
404
&& matches ! ( item. change. kind( ) , ChangeKind :: Addition )
390
- && filter. map_or ( true , |f| f ( & item. change ) ) )
405
+ && filter. map_or_else (
406
+ || {
407
+ self . rewrites . track_empty
408
+ // We always want to keep track of entries that are involved of a directory rename.
409
+ // Note that this may still match them up arbitrarily if empty, but empty is empty.
410
+ || matches ! ( item. change. relation( ) , Some ( Relation :: ChildOfParent ( _) ) )
411
+ || {
412
+ let id = item. change . id ( ) ;
413
+ id != gix_hash:: ObjectId :: empty_blob ( id. kind ( ) )
414
+ }
415
+ } ,
416
+ |f| f ( & item. change ) ,
417
+ ) )
391
418
. then_some ( ( idx, item) )
392
419
} ) {
393
420
dest_idx += dest_ofs;
@@ -403,6 +430,7 @@ impl<T: Change> Tracker<T> {
403
430
objects,
404
431
diff_cache,
405
432
& self . path_backing ,
433
+ & mut num_checks,
406
434
) ?
407
435
. map ( |( src_idx, src, diff) | {
408
436
let ( id, entry_mode) = src. change . id_and_entry_mode ( ) ;
@@ -420,6 +448,12 @@ impl<T: Change> Tracker<T> {
420
448
src_idx,
421
449
)
422
450
} ) ;
451
+ if max_checks != 0 && num_checks > max_checks {
452
+ gix_trace:: warn!(
453
+ "Cancelled rename matching as there were too many iterations ({num_checks} > {max_checks})"
454
+ ) ;
455
+ return Ok ( Action :: Cancel ) ;
456
+ }
423
457
let Some ( ( src, src_idx) ) = src else {
424
458
continue ;
425
459
} ;
@@ -631,6 +665,7 @@ fn find_match<'a, T: Change>(
631
665
objects : & impl gix_object:: FindObjectOrHeader ,
632
666
diff_cache : & mut crate :: blob:: Platform ,
633
667
path_backing : & [ u8 ] ,
668
+ num_checks : & mut usize ,
634
669
) -> Result < Option < SourceTuple < ' a , T > > , emit:: Error > {
635
670
let ( item_id, item_mode) = item. change . id_and_entry_mode ( ) ;
636
671
if needs_exact_match ( percentage) || item_mode. is_link ( ) {
@@ -651,6 +686,7 @@ fn find_match<'a, T: Change>(
651
686
}
652
687
let res = items[ range. clone ( ) ] . iter ( ) . enumerate ( ) . find_map ( |( mut src_idx, src) | {
653
688
src_idx += range. start ;
689
+ * num_checks += 1 ;
654
690
( src_idx != item_idx && src. is_source_for_destination_of ( kind, item_mode) ) . then_some ( ( src_idx, src, None ) )
655
691
} ) ;
656
692
if let Some ( src) = res {
@@ -685,6 +721,7 @@ fn find_match<'a, T: Change>(
685
721
) ?;
686
722
let prep = diff_cache. prepare_diff ( ) ?;
687
723
stats. num_similarity_checks += 1 ;
724
+ * num_checks += 1 ;
688
725
match prep. operation {
689
726
Operation :: InternalDiff { algorithm } => {
690
727
let tokens =
0 commit comments