Skip to content

Commit bb718e8

Browse files
committed
feat: add tree() and commit() merge support, en par with merge-ORT as far as tests go.
Note that this judgement of quality is based on a limited amount of partially complex test, but it's likely that in practice there will be deviations of sorts. Also, given the complexity of the implementation it is definitely under-tested, but with that it's mostly en par with Git, unfortunatly. On the bright side, some of the tests are very taxing and I'd hope this means something for real-world quality.
1 parent de8ca41 commit bb718e8

File tree

15 files changed

+3455
-20
lines changed

15 files changed

+3455
-20
lines changed

Cargo.lock

+8-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,20 @@ Check out the [performance discussion][gix-diff-performance] as well.
338338

339339
### gix-merge
340340

341-
* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts
341+
* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts
342+
- [x] respect git attributes and drivers.
342343
- [ ] choose how to resolve conflicts on the data-structure
343-
- [ ] produce a new blob based on data-structure containing possible resolutions
344+
- [ ] more efficient handling of paths with `merge=binary` attributes (do not load them into memory)
345+
- [x] produce a new blob based on data-structure containing possible resolutions
344346
- [x] `merge` style
345347
- [x] `diff3` style
346348
- [x] `zdiff` style
349+
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
347350
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
348-
* [ ] diff-heuristics match Git perfectly
351+
* [x] **tree**-diff-heuristics match Git for its test-cases
352+
- [ ] a way to generate an index with stages
353+
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
354+
- [ ] submodule merges (*right now they count as conflicts if they differ*)
349355
* [x] API documentation
350356
* [ ] Examples
351357

gix-merge/Cargo.toml

+15-12
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,36 @@ workspace = true
1515
doctest = false
1616

1717
[features]
18-
default = ["blob"]
19-
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
20-
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"]
2118
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
2219
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
2320

2421
[dependencies]
2522
gix-hash = { version = "^0.15.0", path = "../gix-hash" }
2623
gix-object = { version = "^0.45.0", path = "../gix-object" }
27-
gix-filter = { version = "^0.14.0", path = "../gix-filter", optional = true }
28-
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
29-
gix-command = { version = "^0.3.10", path = "../gix-command", optional = true }
30-
gix-path = { version = "^0.10.12", path = "../gix-path", optional = true }
31-
gix-fs = { version = "^0.12.0", path = "../gix-fs", optional = true }
32-
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile", optional = true }
33-
gix-trace = { version = "^0.1.11", path = "../gix-trace", optional = true }
34-
gix-quote = { version = "^0.4.13", path = "../gix-quote", optional = true }
24+
gix-filter = { version = "^0.14.0", path = "../gix-filter" }
25+
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"] }
26+
gix-command = { version = "^0.3.10", path = "../gix-command" }
27+
gix-path = { version = "^0.10.12", path = "../gix-path" }
28+
gix-fs = { version = "^0.12.0", path = "../gix-fs" }
29+
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile" }
30+
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
31+
gix-quote = { version = "^0.4.13", path = "../gix-quote" }
32+
gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] }
33+
gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" }
34+
gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] }
3535

3636
thiserror = "1.0.63"
37-
imara-diff = { version = "0.1.7", optional = true }
37+
imara-diff = { version = "0.1.7" }
3838
bstr = { version = "1.5.0", default-features = false }
3939
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
4040

4141
document-features = { version = "0.2.0", optional = true }
4242

4343
[dev-dependencies]
4444
gix-testtools = { path = "../tests/tools" }
45+
gix-odb = { path = "../gix-odb" }
46+
gix-utils = { version = "^0.1.12", path = "../gix-utils" }
47+
termtree = "0.5.1"
4548
pretty_assertions = "1.4.0"
4649

4750
[package.metadata.docs.rs]

gix-merge/src/blob/platform/merge.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::path::PathBuf;
77
pub struct Options {
88
/// If `true`, the resources being merged are contained in a virtual ancestor,
99
/// which is the case when merge bases are merged into one.
10+
/// This flag affects the choice of merge drivers.
1011
pub is_virtual_ancestor: bool,
1112
/// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
1213
pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,

gix-merge/src/commit.rs

+213
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,213 @@
1+
/// The error returned by [`commit()`](crate::commit()).
2+
#[derive(Debug, thiserror::Error)]
3+
#[allow(missing_docs)]
4+
pub enum Error {
5+
#[error(transparent)]
6+
MergeBase(#[from] gix_revision::merge_base::Error),
7+
#[error(transparent)]
8+
MergeTree(#[from] crate::tree::Error),
9+
#[error("Failed to write tree for merged merge-base or virtual commit")]
10+
WriteObject(gix_object::write::Error),
11+
#[error("No common ancestor between {our_commit_id} and {their_commit_id}")]
12+
NoMergeBase {
13+
/// The commit on our side that was to be merged.
14+
our_commit_id: gix_hash::ObjectId,
15+
/// The commit on their side that was to be merged.
16+
their_commit_id: gix_hash::ObjectId,
17+
},
18+
#[error("Could not find ancestor, our or their commit to extract tree from")]
19+
FindCommit(#[from] gix_object::find::existing_object::Error),
20+
}
21+
22+
/// A way to configure [`commit()`](crate::commit()).
23+
#[derive(Default, Debug, Clone)]
24+
pub struct Options {
25+
/// If `true`, merging unrelated commits is allowed, with the merge-base being assumed as empty tree.
26+
pub allow_missing_merge_base: bool,
27+
/// Options to define how trees should be merged.
28+
pub tree_merge: crate::tree::Options,
29+
/// If `true`, do not merge multiple merge-bases into one. Instead, just use the first one.
30+
// TODO: test
31+
#[doc(alias = "no_recursive", alias = "git2")]
32+
pub use_first_merge_base: bool,
33+
}
34+
35+
/// The result of [`commit()`](crate::commit()).
36+
#[derive(Clone)]
37+
pub struct Outcome<'a> {
38+
/// The outcome of the actual tree-merge.
39+
pub tree_merge: crate::tree::Outcome<'a>,
40+
/// The tree id of the base commit we used. This is either…
41+
/// * the single merge-base we found
42+
/// * the first of multiple merge-bases if [`use_first_merge_base`](Options::use_first_merge_base) was `true`.
43+
/// * the merged tree of all merge-bases, which then isn't linked to an actual commit.
44+
/// * an empty tree, if [`allow_missing_merge_base`](Options::allow_missing_merge_base) is enabled.
45+
pub merge_base_tree_id: gix_hash::ObjectId,
46+
/// The object ids of all the commits which were found to be merge-bases, or `None` if there was no merge-base.
47+
pub merge_bases: Option<Vec<gix_hash::ObjectId>>,
48+
/// A list of virtual commits that were created to merge multiple merge-bases into one.
49+
/// As they are not reachable by anything they will be garbage collected, but knowing them provides options.
50+
pub virtual_merge_bases: Vec<gix_hash::ObjectId>,
51+
}
52+
53+
pub(super) mod function {
54+
use crate::commit::{Error, Options};
55+
use gix_object::FindExt;
56+
use std::borrow::Cow;
57+
58+
/// Like [`tree()`](crate::tree()), but it takes only two commits, `our_commit` and `their_commit` to automatically
59+
/// compute the merge-bases among them.
60+
/// If there are multiple merge bases, these will be auto-merged into one, recursively, if
61+
/// [`allow_missing_merge_base`](Options::allow_missing_merge_base) is `true`.
62+
///
63+
/// `labels` are names where [`current`](crate::blob::builtin_driver::text::Labels::current) is a name for `our_commit`
64+
/// and [`other`](crate::blob::builtin_driver::text::Labels::other) is a name for `their_commit`.
65+
/// If [`ancestor`](crate::blob::builtin_driver::text::Labels::ancestor) is unset, it will be set by us based on the
66+
/// merge-bases of `our_commit` and `their_commit`.
67+
///
68+
/// The `graph` is used to find the merge-base between `our_commit` and `their_commit`, and can also act as cache
69+
/// to speed up subsequent merge-base queries.
70+
///
71+
/// Use `abbreviate_hash(id)` to shorten the given `id` according to standard git shortening rules. It's used in case
72+
/// the ancestor-label isn't explicitly set so that the merge base label becomes the shortened `id`.
73+
/// Note that it's a dyn closure only to make it possible to recursively call this function in case of multiple merge-bases.
74+
///
75+
/// `write_object` is used only if it's allowed to merge multiple merge-bases into one, and if there
76+
/// are multiple merge bases, and to write merged buffers as blobs.
77+
///
78+
/// ### Performance
79+
///
80+
/// Note that `objects` *should* have an object cache to greatly accelerate tree-retrieval.
81+
///
82+
/// ### Notes
83+
///
84+
/// When merging merge-bases recursively, the options are adjusted automatically to act like Git, i.e. merge binary
85+
/// blobs and resolve with *ours*.
86+
///
87+
/// ### Deviation
88+
///
89+
/// * It's known that certain conflicts around symbolic links can be auto-resolved. We don't have an option for this
90+
/// at all, yet, primarily as Git seems to not implement the *ours*/*theirs* choice in other places even though it
91+
/// reasonably could. So we leave it to the caller to continue processing the returned tree at will.
92+
/// * Git treats symbolic links, when merged, like binaries choosing one over the other, which is also affected by
93+
/// which side is chosen. In our case, they always conflict. TODO: fix this, with custom merge-strategy for symlinks.
94+
#[allow(clippy::too_many_arguments)]
95+
pub fn commit<'objects>(
96+
our_commit: gix_hash::ObjectId,
97+
their_commit: gix_hash::ObjectId,
98+
labels: crate::blob::builtin_driver::text::Labels<'_>,
99+
graph: &mut gix_revwalk::Graph<'_, '_, gix_revwalk::graph::Commit<gix_revision::merge_base::Flags>>,
100+
diff_resource_cache: &mut gix_diff::blob::Platform,
101+
blob_merge: &mut crate::blob::Platform,
102+
objects: &'objects (impl gix_object::FindObjectOrHeader + gix_object::Write),
103+
abbreviate_hash: &mut dyn FnMut(&gix_hash::oid) -> String,
104+
options: Options,
105+
) -> Result<super::Outcome<'objects>, Error> {
106+
let merge_bases = gix_revision::merge_base(our_commit, &[their_commit], graph)?;
107+
let mut virtual_merge_bases = Vec::new();
108+
let mut state = gix_diff::tree::State::default();
109+
let mut commit_to_tree =
110+
|commit_id: gix_hash::ObjectId| objects.find_commit(&commit_id, &mut state.buf1).map(|c| c.tree());
111+
112+
let (merge_base_tree_id, ancestor_name): (_, Cow<'_, str>) = match merge_bases.clone() {
113+
Some(base_commit) if base_commit.len() == 1 => {
114+
(commit_to_tree(base_commit[0])?, abbreviate_hash(&base_commit[0]).into())
115+
}
116+
Some(mut base_commits) => {
117+
let virtual_base_tree = if options.use_first_merge_base {
118+
let first = *base_commits.first().expect("if Some() there is at least one.");
119+
commit_to_tree(first)?
120+
} else {
121+
let mut merged_commit_id = base_commits.pop().expect("at least one base");
122+
let mut options = options.clone();
123+
options.tree_merge.blob_merge.is_virtual_ancestor = true;
124+
options.tree_merge.blob_merge.resolve_binary_with =
125+
Some(crate::blob::builtin_driver::binary::ResolveWith::Ours);
126+
let labels = crate::blob::builtin_driver::text::Labels {
127+
current: Some("Temporary merge branch 1".into()),
128+
other: Some("Temporary merge branch 2".into()),
129+
..labels
130+
};
131+
while let Some(next_commit_id) = base_commits.pop() {
132+
options.tree_merge.call_depth += 1;
133+
let mut out = commit(
134+
merged_commit_id,
135+
next_commit_id,
136+
labels,
137+
graph,
138+
diff_resource_cache,
139+
blob_merge,
140+
objects,
141+
abbreviate_hash,
142+
options.clone(),
143+
)?;
144+
let merged_tree_id = out
145+
.tree_merge
146+
.tree
147+
.write(|tree| objects.write(tree))
148+
.map_err(Error::WriteObject)?;
149+
150+
merged_commit_id =
151+
create_virtual_commit(objects, merged_commit_id, next_commit_id, merged_tree_id)?;
152+
153+
virtual_merge_bases.extend(out.virtual_merge_bases);
154+
virtual_merge_bases.push(merged_commit_id);
155+
}
156+
commit_to_tree(merged_commit_id)?
157+
};
158+
(virtual_base_tree, "merged common ancestors".into())
159+
}
160+
None => {
161+
if options.allow_missing_merge_base {
162+
(gix_hash::ObjectId::empty_tree(our_commit.kind()), "empty tree".into())
163+
} else {
164+
return Err(Error::NoMergeBase {
165+
our_commit_id: our_commit,
166+
their_commit_id: their_commit,
167+
});
168+
}
169+
}
170+
};
171+
172+
let mut labels = labels; // TODO(borrowchk): this re-assignment shouldn't be needed.
173+
if labels.ancestor.is_none() {
174+
labels.ancestor = Some(ancestor_name.as_ref().into());
175+
}
176+
177+
let our_tree_id = objects.find_commit(&our_commit, &mut state.buf1)?.tree();
178+
let their_tree_id = objects.find_commit(&their_commit, &mut state.buf1)?.tree();
179+
180+
let outcome = crate::tree(
181+
&merge_base_tree_id,
182+
&our_tree_id,
183+
&their_tree_id,
184+
labels,
185+
objects,
186+
|buf| objects.write_buf(gix_object::Kind::Blob, buf),
187+
&mut state,
188+
diff_resource_cache,
189+
blob_merge,
190+
options.tree_merge,
191+
)?;
192+
193+
Ok(super::Outcome {
194+
tree_merge: outcome,
195+
merge_bases,
196+
merge_base_tree_id,
197+
virtual_merge_bases,
198+
})
199+
}
200+
201+
fn create_virtual_commit(
202+
objects: &(impl gix_object::Find + gix_object::Write),
203+
parent_a: gix_hash::ObjectId,
204+
parent_b: gix_hash::ObjectId,
205+
tree_id: gix_hash::ObjectId,
206+
) -> Result<gix_hash::ObjectId, Error> {
207+
let mut buf = Vec::new();
208+
let mut commit: gix_object::Commit = objects.find_commit(&parent_a, &mut buf)?.into();
209+
commit.parents = vec![parent_a, parent_b].into();
210+
commit.tree = tree_id;
211+
objects.write(&commit).map_err(Error::WriteObject)
212+
}
213+
}

gix-merge/src/lib.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,10 @@
22
#![forbid(unsafe_code)]
33

44
///
5-
#[cfg(feature = "blob")]
65
pub mod blob;
6+
///
7+
pub mod commit;
8+
pub use commit::function::commit;
9+
///
10+
pub mod tree;
11+
pub use tree::function::tree;

0 commit comments

Comments
 (0)