Skip to content

Commit 155b5e1

Browse files
authored
Merge pull request #1630 from GitoxideLabs/diff-fix
diff fix
2 parents f186c23 + 53fa8ab commit 155b5e1

File tree

4 files changed

+606
-15
lines changed

4 files changed

+606
-15
lines changed

gix-diff/src/rewrites/tracker.rs

+36-5
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,10 @@ impl<T: Change> Tracker<T> {
172172
.relation()
173173
.filter(|_| matches!(change_kind, ChangeKind::Addition | ChangeKind::Deletion));
174174
let entry_kind = change.entry_mode().kind();
175-
if let (None | Some(Relation::ChildOfParent(_)), EntryKind::Commit | EntryKind::Tree) = (relation, entry_kind) {
175+
if entry_kind == EntryKind::Commit {
176+
return Some(change);
177+
}
178+
if let (None, EntryKind::Tree) = (relation, entry_kind) {
176179
return Some(change);
177180
};
178181

@@ -221,27 +224,46 @@ impl<T: Change> Tracker<T> {
221224
PushSourceTreeFn: FnMut(&mut dyn FnMut(T, &BStr)) -> Result<(), E>,
222225
E: std::error::Error + Send + Sync + 'static,
223226
{
227+
fn is_parent(change: &impl Change) -> bool {
228+
matches!(change.relation(), Some(Relation::Parent(_)))
229+
}
224230
diff_cache.options.skip_internal_diff_if_external_is_configured = false;
225231

232+
// The point of this is to optimize for identity-based lookups, which should be easy to find
233+
// by partitioning.
226234
fn by_id_and_location<T: Change>(a: &Item<T>, b: &Item<T>) -> std::cmp::Ordering {
227235
a.change
228236
.id()
229237
.cmp(b.change.id())
230238
.then_with(|| a.path.start.cmp(&b.path.start).then(a.path.end.cmp(&b.path.end)))
231239
}
232-
self.items.sort_by(by_id_and_location);
233240

234241
let mut out = Outcome {
235242
options: self.rewrites,
236243
..Default::default()
237244
};
245+
self.items.sort_by(by_id_and_location);
246+
247+
// Rewrites by directory can be pruned out quickly, quickly pruning candidates
248+
// for the following per-item steps.
249+
self.match_pairs_of_kind(
250+
visit::SourceKind::Rename,
251+
&mut cb,
252+
None, /* by identity for parents */
253+
&mut out,
254+
diff_cache,
255+
objects,
256+
Some(is_parent),
257+
)?;
258+
238259
self.match_pairs_of_kind(
239260
visit::SourceKind::Rename,
240261
&mut cb,
241262
self.rewrites.percentage,
242263
&mut out,
243264
diff_cache,
244265
objects,
266+
None,
245267
)?;
246268

247269
if let Some(copies) = self.rewrites.copies {
@@ -252,6 +274,7 @@ impl<T: Change> Tracker<T> {
252274
&mut out,
253275
diff_cache,
254276
objects,
277+
None,
255278
)?;
256279

257280
match copies.source {
@@ -275,6 +298,7 @@ impl<T: Change> Tracker<T> {
275298
&mut out,
276299
diff_cache,
277300
objects,
301+
None,
278302
)?;
279303
}
280304
}
@@ -299,6 +323,7 @@ impl<T: Change> Tracker<T> {
299323
}
300324

301325
impl<T: Change> Tracker<T> {
326+
#[allow(clippy::too_many_arguments)]
302327
fn match_pairs_of_kind(
303328
&mut self,
304329
kind: visit::SourceKind,
@@ -307,10 +332,11 @@ impl<T: Change> Tracker<T> {
307332
out: &mut Outcome,
308333
diff_cache: &mut crate::blob::Platform,
309334
objects: &impl gix_object::FindObjectOrHeader,
335+
filter: Option<fn(&T) -> bool>,
310336
) -> Result<(), emit::Error> {
311337
// we try to cheaply reduce the set of possibilities first, before possibly looking more exhaustively.
312338
let needs_second_pass = !needs_exact_match(percentage);
313-
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects)? == Action::Cancel {
339+
if self.match_pairs(cb, None /* by identity */, kind, out, diff_cache, objects, filter)? == Action::Cancel {
314340
return Ok(());
315341
}
316342
if needs_second_pass {
@@ -335,12 +361,13 @@ impl<T: Change> Tracker<T> {
335361
}
336362
};
337363
if !is_limited {
338-
self.match_pairs(cb, percentage, kind, out, diff_cache, objects)?;
364+
self.match_pairs(cb, percentage, kind, out, diff_cache, objects, None)?;
339365
}
340366
}
341367
Ok(())
342368
}
343369

370+
#[allow(clippy::too_many_arguments)]
344371
fn match_pairs(
345372
&mut self,
346373
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
@@ -349,10 +376,14 @@ impl<T: Change> Tracker<T> {
349376
stats: &mut Outcome,
350377
diff_cache: &mut crate::blob::Platform,
351378
objects: &impl gix_object::FindObjectOrHeader,
379+
filter: Option<fn(&T) -> bool>,
352380
) -> Result<Action, emit::Error> {
353381
let mut dest_ofs = 0;
354382
while let Some((mut dest_idx, dest)) = self.items[dest_ofs..].iter().enumerate().find_map(|(idx, item)| {
355-
(!item.emitted && matches!(item.change.kind(), ChangeKind::Addition)).then_some((idx, item))
383+
(!item.emitted
384+
&& matches!(item.change.kind(), ChangeKind::Addition)
385+
&& filter.map_or(true, |f| f(&item.change)))
386+
.then_some((idx, item))
356387
}) {
357388
dest_idx += dest_ofs;
358389
dest_ofs = dest_idx + 1;

gix-diff/tests/diff/rewrites/tracker.rs

+9-8
Original file line numberDiff line numberDiff line change
@@ -650,9 +650,9 @@ fn simple_directory_rename_by_id() -> crate::Result {
650650
},
651651
"d/subdir".into(),
652652
)
653-
.is_some(),
654-
"trees that are children are simply ignored. It's easier to compare views of worktrees (`gix-dirwalk`) \
655-
and trees/indices that way."
653+
.is_none(),
654+
"trees that are children are kept and matched. That way, they can quickly be pruned which is done first.\
655+
Those who don't need them can prune them in a later step."
656656
);
657657
assert!(track
658658
.try_push_change(
@@ -664,7 +664,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
664664
},
665665
"d-renamed/subdir".into(),
666666
)
667-
.is_some());
667+
.is_none());
668668
let _odb = util::add_retained_blobs(
669669
&mut track,
670670
[
@@ -692,22 +692,23 @@ fn simple_directory_rename_by_id() -> crate::Result {
692692
assert_eq!(dst.change.relation, Some(Relation::Parent(1)));
693693
assert_eq!(dst.change.mode.kind(), EntryKind::Tree);
694694
}
695-
1..=4 => {
695+
1..=5 => {
696696
let src = src.unwrap();
697697
let (expected_src, expected_dst) = &[
698698
("d/a", "d-renamed/a"),
699699
("d/c", "d-renamed/c"),
700700
("d/b", "d-renamed/b"),
701+
("d/subdir", "d-renamed/subdir"),
701702
("d/subdir/d", "d-renamed/subdir/d"),
702703
][calls - 1];
703704
assert_eq!(src.location, expected_src);
704705
assert_eq!(dst.location, expected_dst);
705706
}
706-
5 => {
707+
6 => {
707708
assert_eq!(src, None);
708709
assert_eq!(dst.location, "a");
709710
}
710-
6 => {
711+
7 => {
711712
assert_eq!(src, None);
712713
assert_eq!(dst.location, "b");
713714
}
@@ -723,7 +724,7 @@ fn simple_directory_rename_by_id() -> crate::Result {
723724
..Default::default()
724725
}
725726
);
726-
assert_eq!(calls, 7, "Should not have too few calls");
727+
assert_eq!(calls, 8, "Should not have too few calls");
727728
Ok(())
728729
}
729730

gix/tests/fixtures/make_diff_repos.sh

+6
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,10 @@ git init jj-trackcopy-1
1414
rm -f "$index"
1515
git update-index --index-info < "$ROOT/assets/jj-trackcopy-1/47bd6f4aa4a7eeef8b01ce168c6c771bdfffcbd3.tree"
1616
git commit --allow-empty -F "$ROOT/assets/jj-trackcopy-1/47bd6f4aa4a7eeef8b01ce168c6c771bdfffcbd3.msg"
17+
18+
git checkout -f HEAD
19+
git mv cli c
20+
git commit -m "renamed cli to c"
21+
22+
rm -Rf c/
1723
)

0 commit comments

Comments
 (0)