Skip to content

Commit bd2327c

Browse files
committed
feat: add tree() and commit() merge support, en par with merge-ORT as far as tests go.
Note that this judgement of quality is based on a limited amount of partially complex test, but it's likely that in practice there will be deviations of sorts. Also, given the complexity of the implementation it is definitely under-tested, but with that it's mostly en par with Git, unfortunatly. On the bright side, some of the tests are very taxing and I'd hope this means something for real-world quality.
1 parent 9954bf8 commit bd2327c

File tree

15 files changed

+3564
-20
lines changed

15 files changed

+3564
-20
lines changed

Cargo.lock

+8-2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crate-status.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -338,14 +338,20 @@ Check out the [performance discussion][gix-diff-performance] as well.
338338

339339
### gix-merge
340340

341-
* [x] three-way merge analysis of **blobs** with choice of how to resolve conflicts
341+
* [x] three-way content-merge analysis of **blobs** with choice of how to resolve conflicts
342+
- [x] respect git attributes and drivers.
342343
- [ ] choose how to resolve conflicts on the data-structure
343-
- [ ] produce a new blob based on data-structure containing possible resolutions
344+
- [ ] more efficient handling of paths with `merge=binary` attributes (do not load them into memory)
345+
- [x] produce a new blob based on data-structure containing possible resolutions
344346
- [x] `merge` style
345347
- [x] `diff3` style
346348
- [x] `zdiff` style
349+
- [ ] various newlines-related options during the merge (see https://git-scm.com/docs/git-merge#Documentation/git-merge.txt-ignore-space-change).
347350
- [ ] a way to control inter-hunk merging based on proximity (maybe via `gix-diff` feature which could use the same)
348-
* [ ] diff-heuristics match Git perfectly
351+
* [x] **tree**-diff-heuristics match Git for its test-cases
352+
- [ ] a way to generate an index with stages
353+
- *currently the data it provides won't generate index entries, and possibly can't be used for it yet*
354+
- [ ] submodule merges (*right now they count as conflicts if they differ*)
349355
* [x] API documentation
350356
* [ ] Examples
351357

gix-merge/Cargo.toml

+15-12
Original file line numberDiff line numberDiff line change
@@ -15,33 +15,36 @@ workspace = true
1515
doctest = false
1616

1717
[features]
18-
default = ["blob"]
19-
## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation.
20-
blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"]
2118
## Data structures implement `serde::Serialize` and `serde::Deserialize`.
2219
serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"]
2320

2421
[dependencies]
2522
gix-hash = { version = "^0.15.0", path = "../gix-hash" }
2623
gix-object = { version = "^0.45.0", path = "../gix-object" }
27-
gix-filter = { version = "^0.14.0", path = "../gix-filter", optional = true }
28-
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true }
29-
gix-command = { version = "^0.3.10", path = "../gix-command", optional = true }
30-
gix-path = { version = "^0.10.12", path = "../gix-path", optional = true }
31-
gix-fs = { version = "^0.12.0", path = "../gix-fs", optional = true }
32-
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile", optional = true }
33-
gix-trace = { version = "^0.1.11", path = "../gix-trace", optional = true }
34-
gix-quote = { version = "^0.4.13", path = "../gix-quote", optional = true }
24+
gix-filter = { version = "^0.14.0", path = "../gix-filter" }
25+
gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"] }
26+
gix-command = { version = "^0.3.10", path = "../gix-command" }
27+
gix-path = { version = "^0.10.12", path = "../gix-path" }
28+
gix-fs = { version = "^0.12.0", path = "../gix-fs" }
29+
gix-tempfile = { version = "^15.0.0", path = "../gix-tempfile" }
30+
gix-trace = { version = "^0.1.11", path = "../gix-trace" }
31+
gix-quote = { version = "^0.4.13", path = "../gix-quote" }
32+
gix-revision = { version = "^0.30.0", path = "../gix-revision", default-features = false, features = ["merge_base"] }
33+
gix-revwalk = { version = "^0.16.0", path = "../gix-revwalk" }
34+
gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] }
3535

3636
thiserror = "1.0.63"
37-
imara-diff = { version = "0.1.7", optional = true }
37+
imara-diff = { version = "0.1.7" }
3838
bstr = { version = "1.5.0", default-features = false }
3939
serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] }
4040

4141
document-features = { version = "0.2.0", optional = true }
4242

4343
[dev-dependencies]
4444
gix-testtools = { path = "../tests/tools" }
45+
gix-odb = { path = "../gix-odb" }
46+
gix-utils = { version = "^0.1.12", path = "../gix-utils" }
47+
termtree = "0.5.1"
4548
pretty_assertions = "1.4.0"
4649

4750
[package.metadata.docs.rs]

gix-merge/src/blob/platform/merge.rs

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use std::path::PathBuf;
77
pub struct Options {
88
/// If `true`, the resources being merged are contained in a virtual ancestor,
99
/// which is the case when merge bases are merged into one.
10+
/// This flag affects the choice of merge drivers.
1011
pub is_virtual_ancestor: bool,
1112
/// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
1213
pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,

gix-merge/src/commit.rs

+227
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,227 @@
1+
/// The error returned by [`commit()`](crate::commit()).
2+
#[derive(Debug, thiserror::Error)]
3+
#[allow(missing_docs)]
4+
pub enum Error {
5+
#[error(transparent)]
6+
MergeBase(#[from] gix_revision::merge_base::Error),
7+
#[error(transparent)]
8+
MergeTree(#[from] crate::tree::Error),
9+
#[error("Failed to write tree for merged merge-base or virtual commit")]
10+
WriteObject(gix_object::write::Error),
11+
#[error("No common ancestor between {our_commit_id} and {their_commit_id}")]
12+
NoMergeBase {
13+
/// The commit on our side that was to be merged.
14+
our_commit_id: gix_hash::ObjectId,
15+
/// The commit on their side that was to be merged.
16+
their_commit_id: gix_hash::ObjectId,
17+
},
18+
#[error(
19+
"Conflicts occurred when trying to resolve multiple merge-bases by merging them. This is most certainly a bug."
20+
)]
21+
VirtualMergeBaseConflict,
22+
#[error("Could not find ancestor, our or their commit to extract tree from")]
23+
FindCommit(#[from] gix_object::find::existing_object::Error),
24+
}
25+
26+
/// A way to configure [`commit()`](crate::commit()).
27+
#[derive(Default, Debug, Clone)]
28+
pub struct Options {
29+
/// If `true`, merging unrelated commits is allowed, with the merge-base being assumed as empty tree.
30+
pub allow_missing_merge_base: bool,
31+
/// Options to define how trees should be merged.
32+
pub tree_merge: crate::tree::Options,
33+
/// If `true`, do not merge multiple merge-bases into one. Instead, just use the first one.
34+
// TODO: test
35+
#[doc(alias = "no_recursive", alias = "git2")]
36+
pub use_first_merge_base: bool,
37+
}
38+
39+
/// The result of [`commit()`](crate::commit()).
40+
#[derive(Clone)]
41+
pub struct Outcome<'a> {
42+
/// The outcome of the actual tree-merge.
43+
pub tree_merge: crate::tree::Outcome<'a>,
44+
/// The tree id of the base commit we used. This is either…
45+
/// * the single merge-base we found
46+
/// * the first of multiple merge-bases if [`use_first_merge_base`](Options::use_first_merge_base) was `true`.
47+
/// * the merged tree of all merge-bases, which then isn't linked to an actual commit.
48+
/// * an empty tree, if [`allow_missing_merge_base`](Options::allow_missing_merge_base) is enabled.
49+
pub merge_base_tree_id: gix_hash::ObjectId,
50+
/// The object ids of all the commits which were found to be merge-bases, or `None` if there was no merge-base.
51+
pub merge_bases: Option<Vec<gix_hash::ObjectId>>,
52+
/// A list of virtual commits that were created to merge multiple merge-bases into one.
53+
/// As they are not reachable by anything they will be garbage collected, but knowing them provides options.
54+
pub virtual_merge_bases: Vec<gix_hash::ObjectId>,
55+
}
56+
57+
pub(super) mod function {
58+
use crate::blob::builtin_driver;
59+
use crate::commit::{Error, Options};
60+
use crate::tree::UnresolvedConflict;
61+
use gix_object::FindExt;
62+
use std::borrow::Cow;
63+
64+
/// Like [`tree()`](crate::tree()), but it takes only two commits, `our_commit` and `their_commit` to automatically
65+
/// compute the merge-bases among them.
66+
/// If there are multiple merge bases, these will be auto-merged into one, recursively, if
67+
/// [`allow_missing_merge_base`](Options::allow_missing_merge_base) is `true`.
68+
///
69+
/// `labels` are names where [`current`](crate::blob::builtin_driver::text::Labels::current) is a name for `our_commit`
70+
/// and [`other`](crate::blob::builtin_driver::text::Labels::other) is a name for `their_commit`.
71+
/// If [`ancestor`](crate::blob::builtin_driver::text::Labels::ancestor) is unset, it will be set by us based on the
72+
/// merge-bases of `our_commit` and `their_commit`.
73+
///
74+
/// The `graph` is used to find the merge-base between `our_commit` and `their_commit`, and can also act as cache
75+
/// to speed up subsequent merge-base queries.
76+
///
77+
/// Use `abbreviate_hash(id)` to shorten the given `id` according to standard git shortening rules. It's used in case
78+
/// the ancestor-label isn't explicitly set so that the merge base label becomes the shortened `id`.
79+
/// Note that it's a dyn closure only to make it possible to recursively call this function in case of multiple merge-bases.
80+
///
81+
/// `write_object` is used only if it's allowed to merge multiple merge-bases into one, and if there
82+
/// are multiple merge bases, and to write merged buffers as blobs.
83+
///
84+
/// ### Performance
85+
///
86+
/// Note that `objects` *should* have an object cache to greatly accelerate tree-retrieval.
87+
///
88+
/// ### Notes
89+
///
90+
/// When merging merge-bases recursively, the options are adjusted automatically to act like Git, i.e. merge binary
91+
/// blobs and resolve with *ours*, while resorting to using the base/ancestor in case of unresolvable conflicts.
92+
///
93+
/// ### Deviation
94+
///
95+
/// * It's known that certain conflicts around symbolic links can be auto-resolved. We don't have an option for this
96+
/// at all, yet, primarily as Git seems to not implement the *ours*/*theirs* choice in other places even though it
97+
/// reasonably could. So we leave it to the caller to continue processing the returned tree at will.
98+
#[allow(clippy::too_many_arguments)]
99+
pub fn commit<'objects>(
100+
our_commit: gix_hash::ObjectId,
101+
their_commit: gix_hash::ObjectId,
102+
labels: builtin_driver::text::Labels<'_>,
103+
graph: &mut gix_revwalk::Graph<'_, '_, gix_revwalk::graph::Commit<gix_revision::merge_base::Flags>>,
104+
diff_resource_cache: &mut gix_diff::blob::Platform,
105+
blob_merge: &mut crate::blob::Platform,
106+
objects: &'objects (impl gix_object::FindObjectOrHeader + gix_object::Write),
107+
abbreviate_hash: &mut dyn FnMut(&gix_hash::oid) -> String,
108+
options: Options,
109+
) -> Result<super::Outcome<'objects>, Error> {
110+
let merge_bases = gix_revision::merge_base(our_commit, &[their_commit], graph)?;
111+
let mut virtual_merge_bases = Vec::new();
112+
let mut state = gix_diff::tree::State::default();
113+
let mut commit_to_tree =
114+
|commit_id: gix_hash::ObjectId| objects.find_commit(&commit_id, &mut state.buf1).map(|c| c.tree());
115+
116+
let (merge_base_tree_id, ancestor_name): (_, Cow<'_, str>) = match merge_bases.clone() {
117+
Some(base_commit) if base_commit.len() == 1 => {
118+
(commit_to_tree(base_commit[0])?, abbreviate_hash(&base_commit[0]).into())
119+
}
120+
Some(mut base_commits) => {
121+
let virtual_base_tree = if options.use_first_merge_base {
122+
let first = *base_commits.first().expect("if Some() there is at least one.");
123+
commit_to_tree(first)?
124+
} else {
125+
let mut merged_commit_id = base_commits.pop().expect("at least one base");
126+
let mut options = options.clone();
127+
options.tree_merge.allow_lossy_resolution = true;
128+
options.tree_merge.blob_merge.is_virtual_ancestor = true;
129+
options.tree_merge.blob_merge.text.conflict = builtin_driver::text::Conflict::ResolveWithOurs;
130+
let favor_ancestor = Some(builtin_driver::binary::ResolveWith::Ancestor);
131+
options.tree_merge.blob_merge.resolve_binary_with = favor_ancestor;
132+
options.tree_merge.symlink_conflicts = favor_ancestor;
133+
let labels = builtin_driver::text::Labels {
134+
current: Some("Temporary merge branch 1".into()),
135+
other: Some("Temporary merge branch 2".into()),
136+
..labels
137+
};
138+
while let Some(next_commit_id) = base_commits.pop() {
139+
options.tree_merge.marker_size_multiplier += 1;
140+
let mut out = commit(
141+
merged_commit_id,
142+
next_commit_id,
143+
labels,
144+
graph,
145+
diff_resource_cache,
146+
blob_merge,
147+
objects,
148+
abbreviate_hash,
149+
options.clone(),
150+
)?;
151+
// This shouldn't happen, but if for some buggy reason it does, we rather bail.
152+
if out
153+
.tree_merge
154+
.has_unresolved_conflicts(UnresolvedConflict::ConflictMarkers)
155+
{
156+
return Err(Error::VirtualMergeBaseConflict);
157+
}
158+
let merged_tree_id = out
159+
.tree_merge
160+
.tree
161+
.write(|tree| objects.write(tree))
162+
.map_err(Error::WriteObject)?;
163+
164+
merged_commit_id =
165+
create_virtual_commit(objects, merged_commit_id, next_commit_id, merged_tree_id)?;
166+
167+
virtual_merge_bases.extend(out.virtual_merge_bases);
168+
virtual_merge_bases.push(merged_commit_id);
169+
}
170+
commit_to_tree(merged_commit_id)?
171+
};
172+
(virtual_base_tree, "merged common ancestors".into())
173+
}
174+
None => {
175+
if options.allow_missing_merge_base {
176+
(gix_hash::ObjectId::empty_tree(our_commit.kind()), "empty tree".into())
177+
} else {
178+
return Err(Error::NoMergeBase {
179+
our_commit_id: our_commit,
180+
their_commit_id: their_commit,
181+
});
182+
}
183+
}
184+
};
185+
186+
let mut labels = labels; // TODO(borrowchk): this re-assignment shouldn't be needed.
187+
if labels.ancestor.is_none() {
188+
labels.ancestor = Some(ancestor_name.as_ref().into());
189+
}
190+
191+
let our_tree_id = objects.find_commit(&our_commit, &mut state.buf1)?.tree();
192+
let their_tree_id = objects.find_commit(&their_commit, &mut state.buf1)?.tree();
193+
194+
let outcome = crate::tree(
195+
&merge_base_tree_id,
196+
&our_tree_id,
197+
&their_tree_id,
198+
labels,
199+
objects,
200+
|buf| objects.write_buf(gix_object::Kind::Blob, buf),
201+
&mut state,
202+
diff_resource_cache,
203+
blob_merge,
204+
options.tree_merge,
205+
)?;
206+
207+
Ok(super::Outcome {
208+
tree_merge: outcome,
209+
merge_bases,
210+
merge_base_tree_id,
211+
virtual_merge_bases,
212+
})
213+
}
214+
215+
fn create_virtual_commit(
216+
objects: &(impl gix_object::Find + gix_object::Write),
217+
parent_a: gix_hash::ObjectId,
218+
parent_b: gix_hash::ObjectId,
219+
tree_id: gix_hash::ObjectId,
220+
) -> Result<gix_hash::ObjectId, Error> {
221+
let mut buf = Vec::new();
222+
let mut commit: gix_object::Commit = objects.find_commit(&parent_a, &mut buf)?.into();
223+
commit.parents = vec![parent_a, parent_b].into();
224+
commit.tree = tree_id;
225+
objects.write(&commit).map_err(Error::WriteObject)
226+
}
227+
}

gix-merge/src/lib.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,10 @@
22
#![forbid(unsafe_code)]
33

44
///
5-
#[cfg(feature = "blob")]
65
pub mod blob;
6+
///
7+
pub mod commit;
8+
pub use commit::function::commit;
9+
///
10+
pub mod tree;
11+
pub use tree::function::tree;

0 commit comments

Comments
 (0)