Skip to content

Commit feef98d

Browse files
committed
feat: support rename tracking across changed directories
1 parent d9218e5 commit feef98d

File tree

6 files changed

+230
-12
lines changed

6 files changed

+230
-12
lines changed

gix-diff/src/rewrites/mod.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
use crate::tree::visit::ChangeId;
12
use crate::Rewrites;
3+
use std::collections::BTreeSet;
24

35
/// Types related to the rename tracker for renames, rewrites and copies.
46
pub mod tracker;
@@ -12,6 +14,8 @@ pub struct Tracker<T> {
1214
path_backing: Vec<u8>,
1315
/// How to track copies and/or rewrites.
1416
rewrites: Rewrites,
17+
/// Previously emitted relation ids of rewrite pairs, with `(deleted source, added destination)`.
18+
child_renames: BTreeSet<(ChangeId, ChangeId)>,
1519
}
1620

1721
/// Determine in which set of files to search for copies.

gix-diff/src/rewrites/tracker.rs

+71-8
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use std::ops::Range;
1010
use bstr::{BStr, ByteSlice};
1111
use gix_object::tree::{EntryKind, EntryMode};
1212

13+
use crate::rewrites::tracker::visit::SourceKind;
1314
use crate::tree::visit::{Action, ChangeId, Relation};
1415
use crate::{
1516
blob::{platform::prepare_diff::Operation, DiffLineStats, ResourceKind},
@@ -155,6 +156,7 @@ impl<T: Change> Tracker<T> {
155156
items: vec![],
156157
path_backing: vec![],
157158
rewrites,
159+
child_renames: Default::default(),
158160
}
159161
}
160162
}
@@ -244,8 +246,9 @@ impl<T: Change> Tracker<T> {
244246
};
245247
self.items.sort_by(by_id_and_location);
246248

247-
// Rewrites by directory can be pruned out quickly, quickly pruning candidates
248-
// for the following per-item steps.
249+
// Rewrites by directory (without local changes) can be pruned out quickly,
250+
// by finding only parents, their counterpart, and then all children can be matched by
251+
// relationship ID.
249252
self.match_pairs_of_kind(
250253
visit::SourceKind::Rename,
251254
&mut cb,
@@ -266,6 +269,8 @@ impl<T: Change> Tracker<T> {
266269
None,
267270
)?;
268271

272+
self.match_renamed_directories(&mut cb)?;
273+
269274
if let Some(copies) = self.rewrites.copies {
270275
self.match_pairs_of_kind(
271276
visit::SourceKind::Copy,
@@ -387,6 +392,7 @@ impl<T: Change> Tracker<T> {
387392
}) {
388393
dest_idx += dest_ofs;
389394
dest_ofs = dest_idx + 1;
395+
self.items[dest_idx].location(&self.path_backing);
390396
let src = find_match(
391397
&self.items,
392398
dest,
@@ -434,18 +440,25 @@ impl<T: Change> Tracker<T> {
434440
return Ok(Action::Cancel);
435441
}
436442

437-
if let Some((Relation::Parent(src), Relation::Parent(dst))) = relations {
438-
let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?;
439-
if res == Action::Cancel {
440-
return Ok(Action::Cancel);
443+
match relations {
444+
Some((Relation::Parent(src), Relation::Parent(dst))) => {
445+
let res = self.emit_child_renames_matching_identity(cb, kind, src, dst)?;
446+
if res == Action::Cancel {
447+
return Ok(Action::Cancel);
448+
}
441449
}
450+
Some((Relation::ChildOfParent(src), Relation::ChildOfParent(dst))) => {
451+
self.child_renames.insert((src, dst));
452+
}
453+
_ => {}
442454
}
443455
}
444456
Ok(Action::Continue)
445457
}
446458

447459
/// Emit the children of `src_parent_id` and `dst_parent_id` as pairs of exact matches, which are assumed
448460
/// as `src` and `dst` were an exact match (so all children have to match exactly).
461+
/// Note that we intentionally do not record them as their parents will be emitted, too.
449462
fn emit_child_renames_matching_identity(
450463
&mut self,
451464
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
@@ -504,6 +517,56 @@ impl<T: Change> Tracker<T> {
504517
}
505518
Ok(Action::Continue)
506519
}
520+
521+
/// Find directories with relation id that haven't been emitted yet and store them for lookup.
522+
/// Then use the previously stored emitted renames with relation id to learn which directories they 'link'
523+
/// and emit them, too.
524+
/// Note that this works whenever top-level directories are renamed because they are always added and deleted,
525+
/// and we only match those. Thus, one rewrite inside the directory is enough.
526+
fn match_renamed_directories(
527+
&mut self,
528+
cb: &mut impl FnMut(visit::Destination<'_, T>, Option<visit::Source<'_, T>>) -> Action,
529+
) -> Result<(), emit::Error> {
530+
fn unemitted_directory_matching_relation_id<T: Change>(items: &[Item<T>], child_id: ChangeId) -> Option<usize> {
531+
items.iter().position(|i| {
532+
!i.emitted && matches!(i.change.relation(), Some(Relation::Parent(pid)) if pid == child_id)
533+
})
534+
}
535+
for (deleted_child_id, added_child_id) in &self.child_renames {
536+
let Some(src_idx) = unemitted_directory_matching_relation_id(&self.items, *deleted_child_id) else {
537+
continue;
538+
};
539+
let Some(dst_idx) = unemitted_directory_matching_relation_id(&self.items, *added_child_id) else {
540+
// This could go wrong in case there are mismatches, so be defensive here.
541+
// But generally, we'd expect the destination item to exist.
542+
continue;
543+
};
544+
545+
let (src_item, dst_item) = (&self.items[src_idx], &self.items[dst_idx]);
546+
let entry_mode = src_item.change.entry_mode();
547+
let location = src_item.location(&self.path_backing);
548+
let src = visit::Source {
549+
entry_mode,
550+
id: src_item.change.id().to_owned(),
551+
kind: SourceKind::Rename,
552+
location,
553+
change: &src_item.change,
554+
diff: None,
555+
};
556+
let location = dst_item.location(&self.path_backing);
557+
let change = dst_item.change.clone();
558+
let dst = visit::Destination { change, location };
559+
let res = cb(dst, Some(src));
560+
561+
self.items[src_idx].emitted = true;
562+
self.items[dst_idx].emitted = true;
563+
564+
if res == Action::Cancel {
565+
return Ok(());
566+
}
567+
}
568+
Ok(())
569+
}
507570
}
508571

509572
fn filename(path: &BStr) -> &BStr {
@@ -572,8 +635,8 @@ fn find_match<'a, T: Change>(
572635
let (item_id, item_mode) = item.change.id_and_entry_mode();
573636
if needs_exact_match(percentage) || item_mode.is_link() {
574637
let first_idx = items.partition_point(|a| a.change.id() < item_id);
575-
let range = items.get(first_idx..).map(|items| {
576-
let end = items
638+
let range = items.get(first_idx..).map(|slice| {
639+
let end = slice
577640
.iter()
578641
.position(|a| a.change.id() != item_id)
579642
.map_or(items.len(), |idx| first_idx + idx);

gix-diff/tests/diff/rewrites/tracker.rs

+12-4
Original file line numberDiff line numberDiff line change
@@ -588,6 +588,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
588588
(Change::addition(), "b", "firt\nsecond\n"),
589589
],
590590
);
591+
591592
let mut calls = 0;
592593
let out = util::assert_emit_with_objects(
593594
&mut track,
@@ -600,10 +601,17 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
600601
assert_eq!(src.location, expected_src);
601602
assert_eq!(dst.location, expected_dst);
602603
}
603-
3..=6 => {
604+
3 => {
605+
assert_eq!(src.unwrap().location, "d");
606+
assert_eq!(
607+
dst.location, "d-renamed",
608+
"it can now track modified and renamed directories"
609+
);
610+
}
611+
4 => {
604612
assert_eq!(src, None);
605-
let expected_dst = ["d", "d-renamed", "d/subdir/d"][calls - 3];
606-
assert_eq!(dst.location, expected_dst);
613+
assert_eq!(dst.change.kind, ChangeKind::Deletion);
614+
assert_eq!(dst.location, "d/subdir/d");
607615
}
608616
_ => unreachable!("Should have expected emission call {calls}"),
609617
}
@@ -620,7 +628,7 @@ fn directory_renames_by_id_can_fail_gracefully() -> crate::Result {
620628
..Default::default()
621629
}
622630
);
623-
assert_eq!(calls, 6, "Should not have too few calls");
631+
assert_eq!(calls, 5, "Should not have too few calls");
624632
Ok(())
625633
}
626634

gix-diff/tests/diff/tree_with_rewrites.rs

+131
Original file line numberDiff line numberDiff line change
@@ -1904,6 +1904,137 @@ rename to gix-sec/tests/sec.rs
19041904
Ok(())
19051905
}
19061906

1907+
#[test]
1908+
fn realistic_renames_3_without_identity() -> crate::Result {
1909+
let (changes, out) = collect_changes_opts(
1910+
"r4-base",
1911+
"r4-dir-rename-non-identity",
1912+
Options {
1913+
location: Some(Location::Path),
1914+
rewrites: Some(Rewrites {
1915+
copies: None,
1916+
percentage: None,
1917+
limit: 0,
1918+
}),
1919+
},
1920+
)?;
1921+
1922+
// Look how nicely it captures and associates this directory rename.
1923+
insta::assert_debug_snapshot!(changes.into_iter()
1924+
.filter(|c| !c.entry_mode().is_tree() ||
1925+
c.relation().map_or(false, |r| matches!(r, Relation::Parent(_)))
1926+
).collect::<Vec<_>>(), @r#"
1927+
[
1928+
Rewrite {
1929+
source_location: "src/plumbing/options.rs",
1930+
source_entry_mode: EntryMode(
1931+
33188,
1932+
),
1933+
source_relation: Some(
1934+
ChildOfParent(
1935+
2,
1936+
),
1937+
),
1938+
source_id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba),
1939+
diff: None,
1940+
entry_mode: EntryMode(
1941+
33188,
1942+
),
1943+
id: Sha1(00750edc07d6415dcc07ae0351e9397b0222b7ba),
1944+
location: "src/plumbing-renamed/options/mod.rs",
1945+
relation: Some(
1946+
ChildOfParent(
1947+
1,
1948+
),
1949+
),
1950+
copy: false,
1951+
},
1952+
Rewrite {
1953+
source_location: "src/plumbing/mod.rs",
1954+
source_entry_mode: EntryMode(
1955+
33188,
1956+
),
1957+
source_relation: Some(
1958+
ChildOfParent(
1959+
2,
1960+
),
1961+
),
1962+
source_id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f),
1963+
diff: None,
1964+
entry_mode: EntryMode(
1965+
33188,
1966+
),
1967+
id: Sha1(0cfbf08886fca9a91cb753ec8734c84fcbe52c9f),
1968+
location: "src/plumbing-renamed/mod.rs",
1969+
relation: Some(
1970+
ChildOfParent(
1971+
1,
1972+
),
1973+
),
1974+
copy: false,
1975+
},
1976+
Rewrite {
1977+
source_location: "src/plumbing/main.rs",
1978+
source_entry_mode: EntryMode(
1979+
33188,
1980+
),
1981+
source_relation: Some(
1982+
ChildOfParent(
1983+
2,
1984+
),
1985+
),
1986+
source_id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d),
1987+
diff: None,
1988+
entry_mode: EntryMode(
1989+
33188,
1990+
),
1991+
id: Sha1(d00491fd7e5bb6fa28c517a0bb32b8b506539d4d),
1992+
location: "src/plumbing-renamed/main.rs",
1993+
relation: Some(
1994+
ChildOfParent(
1995+
1,
1996+
),
1997+
),
1998+
copy: false,
1999+
},
2000+
Rewrite {
2001+
source_location: "src/plumbing",
2002+
source_entry_mode: EntryMode(
2003+
16384,
2004+
),
2005+
source_relation: Some(
2006+
Parent(
2007+
2,
2008+
),
2009+
),
2010+
source_id: Sha1(b9d41dcdbd92fcab2fb6594d04f2ad99b3472621),
2011+
diff: None,
2012+
entry_mode: EntryMode(
2013+
16384,
2014+
),
2015+
id: Sha1(202702465d7bb291153629dc2e8b353afe9cbdae),
2016+
location: "src/plumbing-renamed",
2017+
relation: Some(
2018+
Parent(
2019+
1,
2020+
),
2021+
),
2022+
copy: false,
2023+
},
2024+
]
2025+
"#);
2026+
2027+
let out = out.expect("tracking enabled");
2028+
assert_eq!(
2029+
out.num_similarity_checks, 0,
2030+
"similarity checks disabled, and not necessary"
2031+
);
2032+
assert_eq!(out.num_similarity_checks_skipped_for_rename_tracking_due_to_limit, 0);
2033+
assert_eq!(out.num_similarity_checks_skipped_for_copy_tracking_due_to_limit, 0);
2034+
2035+
Ok(())
2036+
}
2037+
19072038
mod util {
19082039
use gix_diff::rewrites;
19092040
use gix_object::{FindExt, TreeRefIter};
Binary file not shown.

gix-diff/tests/fixtures/make_diff_for_rewrites_repo.sh

+12
Original file line numberDiff line numberDiff line change
@@ -794,4 +794,16 @@ git -c diff.renames=1 show HEAD~2 > baseline-2.with-renames
794794
git -c diff.renames=0 show HEAD~4 > baseline.no-renames
795795
git -c diff.renames=1 show HEAD~4 > baseline.with-renames
796796

797+
echo 1 >src/plumbing/main.rs
798+
echo 2 >src/plumbing/mod.rs
799+
echo 3 >src/plumbing/options.rs
800+
git add src/plumbing && git commit -m "r4-base"
801+
store_tree "r4-base"
802+
803+
mkdir src/plumbing/options
804+
git mv src/plumbing/options.rs src/plumbing/options/mod.rs
805+
git mv src/plumbing src/plumbing-renamed
806+
git commit -m "r4-dir-rename-non-identity"
807+
store_tree "r4-dir-rename-non-identity"
808+
797809
mv ../*.tree .

0 commit comments

Comments
 (0)