From ec771b453aee586ea677baf056614d83c2d4c5e5 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Mon, 9 Sep 2024 11:42:28 +0200 Subject: [PATCH 01/10] add performance notes related to an arena-backed internet BString compatible type --- crate-status.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/crate-status.md b/crate-status.md index b34dffa0754..df1a09e9e40 100644 --- a/crate-status.md +++ b/crate-status.md @@ -196,6 +196,9 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] probe capabilities * [x] symlink creation and removal * [x] file snapshots +* [ ] **BString Interner with Arena-Backing and arbitrary value association** + - probably based on [`internment`](https://docs.rs/internment/latest/internment/struct.Arena.html#), + but needs `bumpalo` support to avoid item allocations/boxing, and avoid internal `Mutex`. (key type is pointer based). ### gix-fs * [x] probe capabilities @@ -215,6 +218,7 @@ The top-level crate that acts as hub to all functionality provided by the `gix-* * [x] [name validation][tagname-validation] * [x] transform borrowed to owned objects * [x] edit trees efficiently and write changes back + - [ ] See if `gix-fs::InternedMap` improves performance. * [x] API documentation * [ ] Some examples From 65efcb7624031ae478056fbb49a07ef176f0c96b Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 30 Aug 2024 09:49:29 +0200 Subject: [PATCH 02/10] add the `gix-merge` crate for capturing merge algorithms --- Cargo.lock | 4 ++++ Cargo.toml | 1 + README.md | 5 +++-- crate-status.md | 9 +++++++++ gix-merge/Cargo.toml | 18 ++++++++++++++++++ gix-merge/LICENSE-APACHE | 1 + gix-merge/LICENSE-MIT | 1 + gix-merge/src/lib.rs | 2 ++ 8 files changed, 39 insertions(+), 2 deletions(-) create mode 100644 gix-merge/Cargo.toml create mode 120000 gix-merge/LICENSE-APACHE create mode 120000 gix-merge/LICENSE-MIT create mode 100644 gix-merge/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index ad92273b749..fbc4bc1a3a4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2040,6 +2040,10 @@ dependencies = [ "thiserror", ] +[[package]] +name = "gix-merge" +version = "0.0.0" + [[package]] name = "gix-negotiate" version = "0.15.0" diff --git a/Cargo.toml b/Cargo.toml index c4fe1097bc4..6e5b2dfe1bf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -243,6 +243,7 @@ members = [ "gix-object", "gix-glob", "gix-diff", + "gix-merge", "gix-date", "gix-traverse", "gix-dir", diff --git a/README.md b/README.md index 49bbcf1150f..5d5ca7e9f88 100644 --- a/README.md +++ b/README.md @@ -130,10 +130,11 @@ is usable to some extent. * [gix-submodule](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-submodule) * [gix-status](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-status) * [gix-worktree-state](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-worktree-state) - * `gitoxide-core` -* **very early** _(possibly without any documentation and many rough edges)_ * [gix-date](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-date) * [gix-dir](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-dir) + * `gitoxide-core` +* **very early** _(possibly without any documentation and many rough edges)_ + * [gix-merge](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-merge) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/Byron/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index df1a09e9e40..ad686714572 100644 --- a/crate-status.md +++ b/crate-status.md @@ -326,6 +326,15 @@ Check out the [performance discussion][gix-diff-performance] as well. * [ ] working with hunks of data * [x] API documentation * [ ] Examples + +### gix-merge + +* [ ] three-way merge analysis of blobs with choice of how to resolve conflicts + - [ ] choose how to resolve conflicts on the data-structure + - [ ] produce a new blob based on data-structure containing possible resolutions + - [ ] `merge` style + - [ ] `diff3` style + - [ ] `zdiff` style [gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74 diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml new file mode 100644 index 00000000000..2114995cf07 --- /dev/null +++ b/gix-merge/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "gix-merge" +version = "0.0.0" +repository = "https://github.com/Byron/gitoxide" +license = "MIT OR Apache-2.0" +description = "A crate of the gitoxide project implementing merge algorithms" +authors = ["Sebastian Thiel "] +edition = "2021" +rust-version = "1.65" + +[lints] +workspace = true + +[lib] +doctest = false + +[dependencies] + diff --git a/gix-merge/LICENSE-APACHE b/gix-merge/LICENSE-APACHE new file mode 120000 index 00000000000..965b606f331 --- /dev/null +++ b/gix-merge/LICENSE-APACHE @@ -0,0 +1 @@ +../LICENSE-APACHE \ No newline at end of file diff --git a/gix-merge/LICENSE-MIT b/gix-merge/LICENSE-MIT new file mode 120000 index 00000000000..76219eb72e8 --- /dev/null +++ b/gix-merge/LICENSE-MIT @@ -0,0 +1 @@ +../LICENSE-MIT \ No newline at end of file diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs new file mode 100644 index 00000000000..3a6cd994a53 --- /dev/null +++ b/gix-merge/src/lib.rs @@ -0,0 +1,2 @@ +#![deny(rust_2018_idioms)] +#![forbid(unsafe_code)] From fe7ecd00c7c7091ea0225a5c437abeca85ce0dca Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 12 Sep 2024 11:06:26 +0200 Subject: [PATCH 03/10] feat: Add `blob::pipeline::WorktreeRoots::is_unset()` That way it's easy to determine if a worktree root has any root set. --- gix-diff/src/blob/pipeline.rs | 12 +++++++++++- gix-diff/src/blob/platform.rs | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/gix-diff/src/blob/pipeline.rs b/gix-diff/src/blob/pipeline.rs index 45018218426..b9c727e4ca8 100644 --- a/gix-diff/src/blob/pipeline.rs +++ b/gix-diff/src/blob/pipeline.rs @@ -22,6 +22,7 @@ pub struct WorktreeRoots { pub new_root: Option, } +/// Access impl WorktreeRoots { /// Return the root path for the given `kind` pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { @@ -30,6 +31,11 @@ impl WorktreeRoots { ResourceKind::NewOrDestination => self.new_root.as_deref(), } } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.new_root.is_none() && self.old_root.is_none() + } } /// Data as part of an [Outcome]. @@ -184,6 +190,8 @@ impl Pipeline { /// Access impl Pipeline { /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](Driver::name) to support binary searches. pub fn drivers(&self) -> &[super::Driver] { &self.drivers } @@ -445,7 +453,7 @@ impl Pipeline { } } .map_err(|err| { - convert_to_diffable::Error::CreateTempfile { + convert_to_diffable::Error::StreamCopy { source: err, rela_path: rela_path.to_owned(), } @@ -533,6 +541,8 @@ impl Driver { pub fn prepare_binary_to_text_cmd(&self, path: &Path) -> Option { let command: &BStr = self.binary_to_text_command.as_ref()?.as_ref(); let cmd = gix_command::prepare(gix_path::from_bstr(command).into_owned()) + // TODO: Add support for an actual Context, validate it *can* match Git + .with_context(Default::default()) .with_shell() .stdin(Stdio::null()) .stdout(Stdio::piped()) diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 6a550bc2dcf..4c540cce85d 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -184,7 +184,7 @@ pub mod prepare_diff { use crate::blob::platform::Resource; - /// The kind of operation that was performed during the [`diff`](super::Platform::prepare_diff()) operation. + /// The kind of operation that should be performed based on the configuration of the resources involved in the diff. #[derive(Debug, Copy, Clone, Eq, PartialEq)] pub enum Operation<'a> { /// The [internal diff algorithm](imara_diff::diff) should be called with the provided arguments. From 25c68067d87ddcb476f973af6d7e29f9533166a8 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Thu, 12 Sep 2024 11:11:39 +0200 Subject: [PATCH 04/10] use new `WorktreeRoot` API provided by `gix-diff` --- gix/src/repository/diff.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gix/src/repository/diff.rs b/gix/src/repository/diff.rs index e2efb11ec14..4f98ebe52f3 100644 --- a/gix/src/repository/diff.rs +++ b/gix/src/repository/diff.rs @@ -38,10 +38,10 @@ impl Repository { mode, self.attributes_only( &index, - if worktree_roots.new_root.is_some() || worktree_roots.old_root.is_some() { - gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping - } else { + if worktree_roots.is_unset() { gix_worktree::stack::state::attributes::Source::IdMapping + } else { + gix_worktree::stack::state::attributes::Source::WorktreeThenIdMapping }, )? .inner, From 9efa09f10d042dc4d5db4edf4589594450a30b31 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 30 Aug 2024 09:57:16 +0200 Subject: [PATCH 05/10] Sketch the entire API surface to capture all parts of blob-merges --- Cargo.lock | 16 + gix-diff/src/blob/platform.rs | 1 + gix-merge/Cargo.toml | 26 ++ gix-merge/src/blob/builtin_driver.rs | 156 ++++++++++ gix-merge/src/blob/mod.rs | 154 +++++++++ gix-merge/src/blob/pipeline.rs | 436 ++++++++++++++++++++++++++ gix-merge/src/blob/platform.rs | 447 +++++++++++++++++++++++++++ gix-merge/src/lib.rs | 4 + 8 files changed, 1240 insertions(+) create mode 100644 gix-merge/src/blob/builtin_driver.rs create mode 100644 gix-merge/src/blob/mod.rs create mode 100644 gix-merge/src/blob/pipeline.rs create mode 100644 gix-merge/src/blob/platform.rs diff --git a/Cargo.lock b/Cargo.lock index fbc4bc1a3a4..47d8945ec56 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2043,6 +2043,22 @@ dependencies = [ [[package]] name = "gix-merge" version = "0.0.0" +dependencies = [ + "bstr", + "document-features", + "gix-command", + "gix-filter", + "gix-fs 0.11.3", + "gix-hash 0.14.2", + "gix-object 0.44.0", + "gix-path 0.10.11", + "gix-tempfile 14.0.2", + "gix-trace 0.1.10", + "gix-worktree 0.36.0", + "imara-diff", + "serde", + "thiserror", +] [[package]] name = "gix-negotiate" diff --git a/gix-diff/src/blob/platform.rs b/gix-diff/src/blob/platform.rs index 4c540cce85d..495d23bd433 100644 --- a/gix-diff/src/blob/platform.rs +++ b/gix-diff/src/blob/platform.rs @@ -383,6 +383,7 @@ impl Platform { /// /// If one of the resources is binary, the operation reports an error as such resources don't make their data available /// which is required for the external diff to run. + // TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles. pub fn prepare_diff_command( &self, diff_command: BString, diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index 2114995cf07..b75d4cb3845 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -14,5 +14,31 @@ workspace = true [lib] doctest = false +[features] +default = ["blob"] +## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. +blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"] +## Data structures implement `serde::Serialize` and `serde::Deserialize`. +serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] + [dependencies] +gix-hash = { version = "^0.14.2", path = "../gix-hash" } +gix-object = { version = "^0.44.0", path = "../gix-object" } +gix-filter = { version = "^0.13.0", path = "../gix-filter", optional = true } +gix-worktree = { version = "^0.36.0", path = "../gix-worktree", default-features = false, features = ["attributes"], optional = true } +gix-command = { version = "^0.3.9", path = "../gix-command", optional = true } +gix-path = { version = "^0.10.11", path = "../gix-path", optional = true } +gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true } +gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true } +gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true } + +thiserror = "1.0.63" +imara-diff = { version = "0.1.7", optional = true } +bstr = { version = "1.5.0", default-features = false } +serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"] } + +document-features = { version = "0.2.0", optional = true } +[package.metadata.docs.rs] +all-features = true +features = ["document-features"] diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs new file mode 100644 index 00000000000..cacef327ac6 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver.rs @@ -0,0 +1,156 @@ +use crate::blob::BuiltinDriver; + +impl BuiltinDriver { + /// Return the name of this instance. + pub fn as_str(&self) -> &str { + match self { + BuiltinDriver::Text => "text", + BuiltinDriver::Binary => "binary", + BuiltinDriver::Union => "union", + } + } + + /// Get all available built-in drivers. + pub fn all() -> &'static [Self] { + &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union] + } + + /// Try to match one of our variants to `name`, case-sensitive, and return its instance. + pub fn by_name(name: &str) -> Option { + Self::all().iter().find(|variant| variant.as_str() == name).copied() + } +} + +/// +pub mod binary { + use crate::blob::Resolution; + + /// What to do when having to pick a side to resolve a conflict. + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ResolveWith { + /// Chose the ancestor to resolve a conflict. + Ancestor, + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, + } + + /// Tell the caller of [`merge()`] which side was picked + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, + } + + /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. + /// + /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. + pub fn merge(on_conflict: Option) -> (Pick, Resolution) { + match on_conflict { + None => (Pick::Ours, Resolution::Conflict), + Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete), + Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete), + Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete), + } + } +} + +/// +pub mod text { + use crate::blob::Resolution; + + /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express + /// merge conflicts in the resulting file. + #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ConflictStyle { + /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, + /// hiding the base version entirely. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + ///``` + #[default] + Merge, + /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes. + /// + /// This mode does not hide any information. + /// ``` + /// <<<<<<< local + /// line1-changed-by-both + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line1-changed-by-both + /// line2-changed + /// >>>>>>> incoming + ///``` + Diff3, + /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes, + /// as well as non-minimized hunks of the base. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + /// ``` + ZealousDiff3, + } + + /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub struct Options { + /// How to visualize conflicts in merged files. + pub conflict_style: ConflictStyle, + /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` + pub marker_size: usize, + /// Decide what to do to automatically resolve conflicts. + /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. + pub on_conflict: Option, + } + + impl Default for Options { + fn default() -> Self { + Options { + conflict_style: Default::default(), + marker_size: 7, + on_conflict: None, + } + } + } + + /// What to do to resolve a conflict. + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum ResolveWith { + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, + /// Place our and their lines one after another, in any order + Union, + } + + /// Merge `current` and `other` with `ancestor` as base according to `opts`. + /// + /// Place the merged result in `out` and return the resolution. + pub fn merge(_out: &mut Vec, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution { + todo!("text merge"); + } +} diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs new file mode 100644 index 00000000000..f14a517d5ec --- /dev/null +++ b/gix-merge/src/blob/mod.rs @@ -0,0 +1,154 @@ +// TODO: remove this - only needed while &mut Vec isn't used. +#![allow(clippy::ptr_arg)] + +use bstr::BString; +use std::path::PathBuf; + +/// +pub mod builtin_driver; +/// +pub mod pipeline; +/// +pub mod platform; + +/// Identify a merge resolution. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Resolution { + /// Everything could be resolved during the merge. + Complete, + /// A conflict is still present. + Conflict, +} + +/// A way to classify a resource suitable for merging. +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum ResourceKind { + /// Our side of the state. + CurrentOrOurs, + /// Their side of the state. + OtherOrTheirs, + /// The state of the common base of both ours and theirs. + CommonAncestorOrBase, +} + +/// Define a driver program that merges +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum BuiltinDriver { + /// Perform a merge between text-sources such that conflicts are marked according to + /// `merge.conflictStyle` in the Git configuration. + /// + /// If any of the inputs, *base*, *ours* or *theirs* looks like non-text/binary, + /// the [`Binary`](Self::Binary) driver will be used instead. + /// + /// Also see [`builtin_driver::text::ConflictStyle`]. + #[default] + Text, + /// Merge 'unmergable' content by choosing *ours* or *theirs*, without performing + /// an actual merge. + /// + /// Note that if the merge operation is for virtual ancestor (a merge for merge-bases), + /// then *ours* will always be chosen. + Binary, + /// Merge text-sources and resolve conflicts by adding conflicting lines one after another, + /// in random order, without adding conflict markers either. + /// + /// This can be useful for files that change a lot, but will remain usable merely by adding + /// all changed lines. + Union, +} + +/// Define a driver program that merges +/// +/// Some values are related to diffing, some are related to conversions. +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Driver { + /// The name of the driver, as referred to by `[merge "name"]` in the git configuration. + pub name: BString, + /// The human-readable version of `name`, only to be used for displaying driver-information to the user. + pub display_name: BString, + /// The command to execute to perform the merge entirely like ` %O %A %B %L %P %S %X %Y`. + /// + /// * **%O** + /// - the common ancestor version, or *base*. + /// * **%A** + /// - the current version, or *ours*. + /// * **%B** + /// - the other version, or *theirs*. + /// * **%L** + /// - The conflict-marker size as positive number. + /// * **%P** + /// - The path in which the merged result will be stored. + /// * **%S** + /// - The conflict-label for the common ancestor or *base*. + /// * **%X** + /// - The conflict-label for the current version or *ours*. + /// * **%Y** + /// - The conflict-label for the other version or *theirs*. + /// + /// Note that conflict-labels are behind the conflict markers, to annotate them. + /// + /// A typical invocation with all arguments substituted could then look like this: + /// + /// ``` + /// .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature + /// ``` + pub command: BString, + /// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all + /// available merge-bases if there are more than one. + /// + /// This value can also be special built-in drivers named `text`, `binary` or `union`. Note that user-defined + /// drivers with the same name will be preferred over built-in ones, but only for files whose git attributes + /// specified the driver by *name*. + pub recursive: Option, +} + +/// A conversion pipeline to take an object or path from what's stored in Git to what can be merged, while +/// following the guidance of git-attributes at the respective path to learn how the merge should be performed. +/// +/// Depending on the source, different conversions are performed: +/// +/// * `worktree on disk` -> `object for storage in git` +/// * `object` -> `possibly renormalized object` +/// - Renormalization means that the `object` is converted to what would be checked out into the work-tree, +/// just to turn it back into an object. +#[derive(Clone)] +pub struct Pipeline { + /// A way to read data directly from the worktree. + pub roots: pipeline::WorktreeRoots, + /// A pipeline to convert objects from the worktree to Git, and also from Git to the worktree, and back to Git. + pub filter: gix_filter::Pipeline, + /// Options affecting the way we read files. + pub options: pipeline::Options, + /// All available merge drivers. + /// + /// They are referenced in git-attributes by name, and we hand out indices into this array. + drivers: Vec, + /// Pre-configured attributes to obtain additional merge-related information. + attrs: gix_filter::attributes::search::Outcome, + /// A buffer to produce disk-accessible paths from worktree roots. + path: PathBuf, +} + +/// A utility for gathering and processing all state necessary to perform a three-way merge. +/// +/// It can re-use buffers if all three parts of participating in the merge are +/// set repeatedly. +#[derive(Clone)] +pub struct Platform { + /// The current version (ours). + current: Option, + /// The ancestor version (base). + ancestor: Option, + /// The other version (theirs). + other: Option, + + /// A way to convert objects into a diff-able format. + pub filter: Pipeline, + /// A way to access `.gitattributes` + pub attr_stack: gix_worktree::Stack, + + /// The way we convert resources into mergeable states. + filter_mode: pipeline::Mode, +} diff --git a/gix-merge/src/blob/pipeline.rs b/gix-merge/src/blob/pipeline.rs new file mode 100644 index 00000000000..90adb615051 --- /dev/null +++ b/gix-merge/src/blob/pipeline.rs @@ -0,0 +1,436 @@ +use super::{BuiltinDriver, Pipeline, ResourceKind}; +use bstr::{BStr, ByteSlice}; +use gix_filter::attributes; +use gix_filter::driver::apply::{Delay, MaybeDelayed}; +use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome}; +use gix_object::tree::EntryKind; +use std::io::Read; +use std::path::{Path, PathBuf}; + +/// Options for use in a [`Pipeline`]. +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// The amount of bytes that an object has to reach before being treated as binary. + /// These objects will not be queried, nor will their data be processed in any way. + /// If `0`, no file is ever considered binary due to their size. + /// + /// Note that for files stored in `git`, what counts is their stored, decompressed size, + /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets + /// them. + /// However, if they are to be retrieved from the worktree, the worktree size is what matters, + /// even though that also might be a `git-lfs` file which is small in Git. + pub large_file_threshold_bytes: u64, + /// Capabilities of the file system which affect how we read worktree files. + pub fs: gix_fs::Capabilities, + /// Define which driver to use if the `merge` attribute for a resource is unspecified. + /// + /// This is the value of the `merge.default` git configuration. + pub default_driver: Option, +} + +/// The specific way to convert a resource. +#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Mode { + /// Prepare resources as they are stored in `git`. + /// + /// This is naturally the case when object-ids are used, but a conversion is needed + /// when data is read from a worktree. + #[default] + ToGit, + /// For sources that are object-ids, convert them to what *would* be stored in the worktree, + /// and back to what *would* be stored in Git. + /// + /// Sources that are located in a worktree are merely converted to what *would* be stored in Git. + /// + /// This is useful to prevent merge conflicts due to inconcistent whitespace. + Renormalize, +} + +/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree. +#[derive(Clone, Debug, Default)] +pub struct WorktreeRoots { + /// The worktree root where the current (or our) version of the resource is present. + pub current_root: Option, + /// The worktree root where the other (or their) version of the resource is present. + pub other_root: Option, + /// The worktree root where containing the resource of the common ancestor of our and their version. + pub common_ancestor_root: Option, +} + +impl WorktreeRoots { + /// Return the root path for the given `kind` + pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> { + match kind { + ResourceKind::CurrentOrOurs => self.current_root.as_deref(), + ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(), + ResourceKind::OtherOrTheirs => self.other_root.as_deref(), + } + } + + /// Return `true` if all worktree roots are unset. + pub fn is_unset(&self) -> bool { + self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none() + } +} + +/// Lifecycle +impl Pipeline { + /// Create a new instance of a pipeline which produces blobs suitable for merging. + /// + /// `roots` allow to read worktree files directly, and `worktree_filter` is used + /// to transform object database data directly. `drivers` further configure individual paths. + /// `options` are used to further configure the way we act.. + pub fn new( + roots: WorktreeRoots, + worktree_filter: gix_filter::Pipeline, + mut drivers: Vec, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); + Pipeline { + roots, + filter: worktree_filter, + drivers, + options, + attrs: { + let mut out = gix_filter::attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("merge")); + out + }, + path: Default::default(), + } + } +} + +/// Access +impl Pipeline { + /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](super::Driver::name) to support binary searches. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + +/// Data as part of an [Outcome]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub enum Data { + /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`]. + Buffer, + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be merged. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in git, and once it's the size of file + /// in the worktree - both can differ a lot depending on filters. + size: u64, + }, +} + +/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] +pub enum DriverChoice { + /// Use the given built-in driver to perform the merge. + BuiltIn(BuiltinDriver), + /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers(). + Index(usize), +} + +impl Default for DriverChoice { + fn default() -> Self { + DriverChoice::BuiltIn(Default::default()) + } +} + +/// The outcome returned by [Pipeline::convert_to_mergeable()]. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub struct Outcome { + /// If available, an index into the `drivers` field to access more diff-related information of the driver for items + /// at the given path, as previously determined by git-attributes. + /// + /// * `merge` is set + /// - Use the [`BuiltinDriver::Text`] + /// * `-merge` is unset + /// - Use the [`BuiltinDriver::Binary`] + /// * `!merge` is unspecified + /// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. + /// * `merge=name` + /// - Search for a user-configured or built-in driver called `name`. + /// - If not found, silently default to [`BuiltinDriver::Text`] + /// + /// Note that drivers are queried even if there is no object available. + pub driver: DriverChoice, + /// The data itself, suitable for diffing, and if the object or worktree item is present at all. + /// Otherwise, it's `None`. + pub data: Option, +} + +/// +pub mod convert_to_mergeable { + use std::collections::TryReserveError; + + use bstr::BString; + use gix_object::tree::EntryKind; + + /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")] + InvalidEntryKind { rela_path: BString, actual: EntryKind }, + #[error("Entry at '{rela_path}' could not be read as symbolic link")] + ReadLink { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be opened for reading or read from")] + OpenOrRead { rela_path: BString, source: std::io::Error }, + #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")] + StreamCopy { rela_path: BString, source: std::io::Error }, + #[error(transparent)] + FindObject(#[from] gix_object::find::existing_object::Error), + #[error(transparent)] + ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error), + #[error(transparent)] + ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error), + #[error("Memory allocation failed")] + OutOfMemory(#[from] TryReserveError), + } +} + +/// Conversion +impl Pipeline { + /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`. + /// The resulting merge-able data is written into `out`, if it's not too large or considered binary. + /// The returned [`Outcome`] contains information on how to use `out`, or if it's filled at all. + /// + /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is + /// a resource in the object database, i.e. if no worktree root is available. It's notable that if a worktree root + /// is present for `kind`, then a `rela_path` is used to access it on disk. + /// + /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case + /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`. + /// This is useful to simplify the calling code as empty buffers signal that nothing is there. + /// + /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode. + /// Only blobs are allowed. + /// + /// Use `convert` to control what kind of the resource will be produced. + #[allow(clippy::too_many_arguments)] + pub fn convert_to_mergeable( + &mut self, + id: &gix_hash::oid, + mode: EntryKind, + rela_path: &BStr, + kind: ResourceKind, + attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome), + objects: &dyn gix_object::FindObjectOrHeader, + convert: Mode, + out: &mut Vec, + ) -> Result { + if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) { + return Err(convert_to_mergeable::Error::InvalidEntryKind { + rela_path: rela_path.to_owned(), + actual: mode, + }); + } + + out.clear(); + attributes(rela_path, &mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let driver = match attr.assignment.state { + attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), + attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), + attributes::StateRef::Value(name) => { + let name = name.as_bstr(); + self.drivers + .binary_search_by(|d| d.name.as_bstr().cmp(name)) + .ok() + .map(DriverChoice::Index) + .or_else(|| { + name.to_str() + .ok() + .and_then(BuiltinDriver::by_name) + .map(DriverChoice::BuiltIn) + }) + .unwrap_or_default() + } + attributes::StateRef::Unspecified => self + .options + .default_driver + .map(DriverChoice::BuiltIn) + .unwrap_or_default(), + }; + match self.roots.by_kind(kind) { + Some(root) => { + self.path.clear(); + self.path.push(root); + self.path.push(gix_path::from_bstr(rela_path)); + let size_in_bytes = (self.options.large_file_threshold_bytes > 0) + .then(|| { + none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + }) + }) + .transpose()?; + let data = match size_in_bytes { + Some(None) => None, // missing as identified by the size check + Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::Binary { size }), + _ => { + let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + + if let Some(file) = file { + match convert { + Mode::ToGit | Mode::Renormalize => { + let res = self.filter.convert_to_git( + file, + gix_path::from_bstr(rela_path).as_ref(), + attributes, + &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + )?; + + match res { + ToGitOutcome::Unchanged(mut file) => { + file.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Process(mut stream) => { + stream.read_to_end(out).map_err(|err| { + convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + } + } + + Some(if is_binary_buf(out) { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + }) + } else { + None + } + } + }; + Ok(Outcome { driver, data }) + } + None => { + let data = if id.is_null() { + None + } else { + let header = objects + .try_header(id) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + let is_binary = self.options.large_file_threshold_bytes > 0 + && header.size > self.options.large_file_threshold_bytes; + let data = if is_binary { + Data::Binary { size: header.size } + } else { + objects + .try_find(id, out) + .map_err(gix_object::find::existing_object::Error::Find)? + .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; + + if convert == Mode::Renormalize { + let res = self + .filter + .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + + match res { + ToWorktreeOutcome::Unchanged(_) => {} + ToWorktreeOutcome::Buffer(src) => { + out.clear(); + out.try_reserve(src.len())?; + out.extend_from_slice(src); + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, out).map_err(|err| { + convert_to_mergeable::Error::StreamCopy { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + }; + } + + let res = self.filter.convert_to_git( + &**out, + &gix_path::from_bstr(rela_path), + attributes, + &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + )?; + + match res { + ToGitOutcome::Unchanged(_) => {} + ToGitOutcome::Process(mut stream) => { + stream + .read_to_end(out) + .map_err(|err| convert_to_mergeable::Error::OpenOrRead { + rela_path: rela_path.to_owned(), + source: err, + })?; + } + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); + } + } + + if is_binary_buf(out) { + let size = out.len() as u64; + out.clear(); + Data::Binary { size } + } else { + Data::Buffer + } + }; + Some(data) + }; + Ok(Outcome { driver, data }) + } + } + } +} + +fn none_if_missing(res: std::io::Result) -> std::io::Result> { + match res { + Ok(data) => Ok(Some(data)), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None), + Err(err) => Err(err), + } +} + +fn is_binary_buf(buf: &[u8]) -> bool { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) +} diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs new file mode 100644 index 00000000000..497b9bf887e --- /dev/null +++ b/gix-merge/src/blob/platform.rs @@ -0,0 +1,447 @@ +use bstr::{BStr, BString}; + +use crate::blob::pipeline::DriverChoice; +use crate::blob::{pipeline, Pipeline, Platform, ResourceKind}; + +/// A stored value representing a resource that participates in a merge. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub(super) struct Resource { + /// The `id` of the value, or `null` if it's only living in a worktree. + id: gix_hash::ObjectId, + /// The repository-relative path where the resource lives in the tree. + rela_path: BString, + /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_mergeable()]. + conversion: pipeline::Outcome, + /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. + mode: gix_object::tree::EntryKind, + /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary + /// or the resource doesn't exist. + buffer: Vec, +} + +/// A blob or executable ready to be merged in one way or another. +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct ResourceRef<'a> { + /// The data itself, suitable for merging, and if the object or worktree item is present at all. + pub data: resource::Data<'a>, + /// The location of the resource, relative to the working tree. + pub rela_path: &'a BStr, + /// Which driver to use according to the resource's configuration. + pub driver_choice: DriverChoice, + /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at + /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which + /// after its 'to-git' conversion never had its hash computed. + pub id: &'a gix_hash::oid, +} + +/// +pub mod resource { + use crate::blob::{ + pipeline, + platform::{Resource, ResourceRef}, + }; + + impl<'a> ResourceRef<'a> { + pub(super) fn new(cache: &'a Resource) -> Self { + ResourceRef { + data: cache.conversion.data.map_or(Data::Missing, |data| match data { + pipeline::Data::Buffer => Data::Buffer(&cache.buffer), + pipeline::Data::Binary { size } => Data::Binary { size }, + }), + driver_choice: cache.conversion.driver, + rela_path: cache.rela_path.as_ref(), + id: &cache.id, + } + } + } + + /// The data of a mergeable resource, as it could be determined and computed previously. + #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] + pub enum Data<'a> { + /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. + Missing, + /// The textual data as processed and ready for merging, i.e. suitable for storage in Git. + Buffer(&'a [u8]), + /// The size that the binary blob had at the given revision, without having applied filters, as it's either + /// considered binary or above the big-file threshold. + /// + /// In this state, the binary file cannot be merged. + Binary { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in Git, and once it's the size of file + /// in the worktree. + size: u64, + }, + } + + impl<'a> Data<'a> { + /// Return ourselves as slice of bytes if this instance stores data. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + Data::Buffer(d) => Some(d), + Data::Binary { .. } | Data::Missing => None, + } + } + } +} + +/// +pub mod set_resource { + use bstr::BString; + + use crate::blob::{pipeline, ResourceKind}; + + /// The error returned by [Platform::set_resource](super::Platform::set_resource). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Can only diff blobs, not {mode:?}")] + InvalidMode { mode: gix_object::tree::EntryKind }, + #[error("Failed to read {kind:?} worktree data from '{rela_path}'")] + Io { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error(transparent)] + ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error), + } +} + +/// +pub mod merge { + use crate::blob::pipeline::DriverChoice; + use crate::blob::platform::ResourceRef; + use crate::blob::{builtin_driver, BuiltinDriver, Driver, Resolution}; + use bstr::BString; + + /// The product of a [`prepare_merge()`](crate::blob::Platform::prepare_merge_state()) call to finally + /// perform the merge and retrieve the merge results. + #[derive(Copy, Clone)] + pub struct State<'parent> { + /// The platform that hosts the resources, used to access drivers. + pub(super) parent: &'parent super::Platform, + /// The current or our side of the merge operation. + pub current: ResourceRef<'parent>, + /// The ancestor or base of the merge operation. + pub ancestor: ResourceRef<'parent>, + /// The other or their side of the merge operation. + pub other: ResourceRef<'parent>, + } + + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub struct Options { + /// If `true`, the resources being merged are contained in a virtual ancestor, + /// which is the case when merge bases are merged into one. + pub is_virtual_ancestor: bool, + /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible and it picks a side. + pub resolve_binary_with: Option, + /// Options for the builtin [text driver](BuiltinDriver::Text). + pub text: builtin_driver::text::Options, + } + + /// + pub mod prepare_external_driver { + use std::ops::{Deref, DerefMut}; + + use crate::blob::ResourceKind; + use bstr::BString; + + /// The error returned by [State::prepare_merge_command()](super::State::prepare_external_driver()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")] + SourceOrDestinationAreBinary, + #[error( + "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created" + )] + CreateTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error( + "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command" + )] + WriteTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + } + + /// The product of a [`prepare_external_driver`](super::State::prepare_external_driver()) operation. + /// + /// This type acts like [`std::process::Command`], ready to run, with `stderr` set to *inherit*, + /// but `stdin` closed and `stdout` setup to be captured. + // TODO: remove this + #[allow(dead_code)] + pub struct Command { + /// The pre-configured command + cmd: std::process::Command, + /// A tempfile holding the *current* (ours) state of the resource. + current: gix_tempfile::Handle, + /// A tempfile holding the *ancestor* (base) state of the resource. + ancestor: gix_tempfile::Handle, + /// A tempfile holding the *other* (their) state of the resource. + other: gix_tempfile::Handle, + } + + impl Deref for Command { + type Target = std::process::Command; + + fn deref(&self) -> &Self::Target { + &self.cmd + } + } + + impl DerefMut for Command { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cmd + } + } + } + + /// + pub mod builtin_merge { + /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](super::State::builtin_merge). + pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, + /// New data was produced with the result of the merge, to be found in the buffer that was passed to + /// [builtin_merge()](super::State::builtin_merge). + Buffer, + } + } + + /// The error returned by [State::merge()]. + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error(transparent)] + PrepareExternalDriver(#[from] prepare_external_driver::Error), + } + + /// Plumbing + impl<'parent> State<'parent> { + /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources, + /// prepare the invocation and temporary files needed to launch it according to protocol. + /// + /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge. + /// + /// ### Deviation + /// + /// We allow passing more context than Git would by taking a whole `context`, it's up to the caller to decide how much is filled. + pub fn prepare_external_driver( + &self, + _merge_command: BString, + _context: gix_command::Context, + ) -> Result { + todo!("prepare command") + } + + /// Perform the merge according to our resources and + /// Note that if the *pick* wasn't [`Buffer`](builtin_merge::Pick::Buffer), then `out` will not have been cleared. + pub fn builtin_merge( + &self, + _out: &mut Vec, + _driver: BuiltinDriver, + _opts: Options, + ) -> (builtin_merge::Pick, Resolution) { + todo!("do full merge") + } + + /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` + /// with the built-in driver to use instead. + pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { + match self.current.driver_choice { + DriverChoice::BuiltIn(builtin) => Err(builtin), + DriverChoice::Index(idx) => self.parent.filter.drivers.get(idx).ok_or(BuiltinDriver::default()), + } + } + } + + /// Convenience + impl<'parent> State<'parent> { + /// Perform the merge, possibly invoking an external merge command, and store the result in `out`. + /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`. + pub fn merge( + &self, + _out: &mut Vec, + _opts: Options, + _context: gix_command::Context, + ) -> Result { + match self.configured_driver() { + Ok(driver) => { + let _cmd = self.prepare_external_driver(driver.command.clone(), _context)?; + todo!("invoke command and copy result") + } + Err(_builtin) => { + todo!("call builtins and copy results") + } + } + } + } +} + +/// +pub mod prepare_merge { + /// The error returned by [Platform::prepare_merge()](super::Platform::prepare_merge_state()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] + UnsetResource, + #[error("Tried to merge 'current' and 'other' where at least one of them is removed")] + CurrentOrOtherRemoved, + } +} + +/// Lifecycle +impl Platform { + /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. + /// `filter_mode` decides how to do that specifically. + /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. + pub fn new(filter: Pipeline, filter_mode: pipeline::Mode, attr_stack: gix_worktree::Stack) -> Self { + Platform { + current: None, + ancestor: None, + other: None, + filter, + filter_mode, + attr_stack, + } + } +} + +/// Preparation +impl Platform { + /// Store enough information about a resource to eventually use it in a merge, where… + /// + /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either + /// be a resource in the worktree, or it's considered a non-existing, deleted object. + /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided + /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root + /// is available for `kind`. + /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `rela_path` is the relative path as seen from the (work)tree root. + /// * `kind` identifies the side of the merge this resource will be used for. + /// * `objects` provides access to the object database in case the resource can't be read from a worktree. + pub fn set_resource( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), set_resource::Error> { + self.set_resource_inner(id, mode, rela_path, kind, objects) + } + + /// Returns the resource of the given kind if it was set. + pub fn resource(&self, kind: ResourceKind) -> Option> { + let cache = match kind { + ResourceKind::CurrentOrOurs => self.current.as_ref(), + ResourceKind::CommonAncestorOrBase => self.ancestor.as_ref(), + ResourceKind::OtherOrTheirs => self.other.as_ref(), + }?; + ResourceRef::new(cache).into() + } + + /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. + pub fn prepare_merge_state(&self) -> Result, prepare_merge::Error> { + let current = self.current.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + let ancestor = self.ancestor.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + let other = self.other.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + + let out = merge::State { + parent: self, + current: ResourceRef::new(current), + ancestor: ResourceRef::new(ancestor), + other: ResourceRef::new(other), + }; + + match (current.conversion.data, other.conversion.data) { + (None, None) => Err(prepare_merge::Error::CurrentOrOtherRemoved), + (_, _) => Ok(out), + } + } +} + +impl Platform { + fn set_resource_inner( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), set_resource::Error> { + if !matches!( + mode, + gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable + ) { + return Err(set_resource::Error::InvalidMode { mode }); + } + let entry = + self.attr_stack + .at_entry(rela_path, None, objects) + .map_err(|err| set_resource::Error::Attributes { + source: err, + kind, + rela_path: rela_path.to_owned(), + })?; + + let storage = match kind { + ResourceKind::OtherOrTheirs => &mut self.other, + ResourceKind::CommonAncestorOrBase => &mut self.ancestor, + ResourceKind::CurrentOrOurs => &mut self.current, + }; + + let mut buf_storage = Vec::new(); + let out = self.filter.convert_to_mergeable( + &id, + mode, + rela_path, + kind, + &mut |_, out| { + let _ = entry.matching_attributes(out); + }, + objects, + self.filter_mode, + storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer), + )?; + + match storage { + None => { + *storage = Some(Resource { + id, + rela_path: rela_path.to_owned(), + conversion: out, + mode, + buffer: buf_storage, + }); + } + Some(storage) => { + storage.id = id; + storage.rela_path = rela_path.to_owned(); + storage.conversion = out; + storage.mode = mode; + } + }; + Ok(()) + } +} diff --git a/gix-merge/src/lib.rs b/gix-merge/src/lib.rs index 3a6cd994a53..8e608c53ab4 100644 --- a/gix-merge/src/lib.rs +++ b/gix-merge/src/lib.rs @@ -1,2 +1,6 @@ #![deny(rust_2018_idioms)] #![forbid(unsafe_code)] + +/// +#[cfg(feature = "blob")] +pub mod blob; From 07628465a0a3f047ec809d287c9a4567b4acd607 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Wed, 25 Sep 2024 17:22:51 +0200 Subject: [PATCH 06/10] Implement `text` and `binary` merge algorithms, also with baseline tests for correctness. --- Cargo.lock | 2 + crate-status.md | 16 +- gix-merge/Cargo.toml | 4 + gix-merge/src/blob/builtin_driver.rs | 156 ----- gix-merge/src/blob/builtin_driver/binary.rs | 43 ++ gix-merge/src/blob/builtin_driver/mod.rs | 30 + .../src/blob/builtin_driver/text/function.rs | 248 +++++++ gix-merge/src/blob/builtin_driver/text/mod.rs | 89 +++ .../src/blob/builtin_driver/text/utils.rs | 474 +++++++++++++ gix-merge/src/blob/mod.rs | 14 +- gix-merge/src/blob/platform.rs | 2 +- .../generated-archives/text-baseline.tar | Bin 0 -> 390144 bytes gix-merge/tests/fixtures/text-baseline.sh | 659 ++++++++++++++++++ gix-merge/tests/merge/blob/builtin_driver.rs | 214 ++++++ gix-merge/tests/merge/blob/mod.rs | 1 + gix-merge/tests/merge/main.rs | 4 + 16 files changed, 1788 insertions(+), 168 deletions(-) delete mode 100644 gix-merge/src/blob/builtin_driver.rs create mode 100644 gix-merge/src/blob/builtin_driver/binary.rs create mode 100644 gix-merge/src/blob/builtin_driver/mod.rs create mode 100644 gix-merge/src/blob/builtin_driver/text/function.rs create mode 100644 gix-merge/src/blob/builtin_driver/text/mod.rs create mode 100644 gix-merge/src/blob/builtin_driver/text/utils.rs create mode 100644 gix-merge/tests/fixtures/generated-archives/text-baseline.tar create mode 100644 gix-merge/tests/fixtures/text-baseline.sh create mode 100644 gix-merge/tests/merge/blob/builtin_driver.rs create mode 100644 gix-merge/tests/merge/blob/mod.rs create mode 100644 gix-merge/tests/merge/main.rs diff --git a/Cargo.lock b/Cargo.lock index 47d8945ec56..5912a30127d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2053,9 +2053,11 @@ dependencies = [ "gix-object 0.44.0", "gix-path 0.10.11", "gix-tempfile 14.0.2", + "gix-testtools", "gix-trace 0.1.10", "gix-worktree 0.36.0", "imara-diff", + "pretty_assertions", "serde", "thiserror", ] diff --git a/crate-status.md b/crate-status.md index ad686714572..c0e24adabf2 100644 --- a/crate-status.md +++ b/crate-status.md @@ -324,19 +324,23 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] prepare invocation of external diff program - [ ] pass meta-info * [ ] working with hunks of data +* [ ] diff-heuristics match Git perfectly * [x] API documentation * [ ] Examples +[gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74 + ### gix-merge -* [ ] three-way merge analysis of blobs with choice of how to resolve conflicts +* [x] three-way merge analysis of blobs with choice of how to resolve conflicts - [ ] choose how to resolve conflicts on the data-structure - [ ] produce a new blob based on data-structure containing possible resolutions - - [ ] `merge` style - - [ ] `diff3` style - - [ ] `zdiff` style - -[gix-diff-performance]: https://github.com/Byron/gitoxide/discussions/74 + - [x] `merge` style + - [x] `diff3` style + - [x] `zdiff` style +* [ ] diff-heuristics match Git perfectly +* [x] API documentation + * [ ] Examples ### gix-traverse diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index b75d4cb3845..6d8da010147 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -39,6 +39,10 @@ serde = { version = "1.0.114", optional = true, default-features = false, featur document-features = { version = "0.2.0", optional = true } +[dev-dependencies] +gix-testtools = { path = "../tests/tools" } +pretty_assertions = "1.4.0" + [package.metadata.docs.rs] all-features = true features = ["document-features"] diff --git a/gix-merge/src/blob/builtin_driver.rs b/gix-merge/src/blob/builtin_driver.rs deleted file mode 100644 index cacef327ac6..00000000000 --- a/gix-merge/src/blob/builtin_driver.rs +++ /dev/null @@ -1,156 +0,0 @@ -use crate::blob::BuiltinDriver; - -impl BuiltinDriver { - /// Return the name of this instance. - pub fn as_str(&self) -> &str { - match self { - BuiltinDriver::Text => "text", - BuiltinDriver::Binary => "binary", - BuiltinDriver::Union => "union", - } - } - - /// Get all available built-in drivers. - pub fn all() -> &'static [Self] { - &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union] - } - - /// Try to match one of our variants to `name`, case-sensitive, and return its instance. - pub fn by_name(name: &str) -> Option { - Self::all().iter().find(|variant| variant.as_str() == name).copied() - } -} - -/// -pub mod binary { - use crate::blob::Resolution; - - /// What to do when having to pick a side to resolve a conflict. - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum ResolveWith { - /// Chose the ancestor to resolve a conflict. - Ancestor, - /// Chose our side to resolve a conflict. - Ours, - /// Chose their side to resolve a conflict. - Theirs, - } - - /// Tell the caller of [`merge()`] which side was picked - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum Pick { - /// Chose the ancestor. - Ancestor, - /// Chose our side. - Ours, - /// Chose their side. - Theirs, - } - - /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. - /// - /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. - pub fn merge(on_conflict: Option) -> (Pick, Resolution) { - match on_conflict { - None => (Pick::Ours, Resolution::Conflict), - Some(ResolveWith::Ours) => (Pick::Ours, Resolution::Complete), - Some(ResolveWith::Theirs) => (Pick::Theirs, Resolution::Complete), - Some(ResolveWith::Ancestor) => (Pick::Ancestor, Resolution::Complete), - } - } -} - -/// -pub mod text { - use crate::blob::Resolution; - - /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express - /// merge conflicts in the resulting file. - #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum ConflictStyle { - /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, - /// hiding the base version entirely. - /// - /// ``` - /// line1-changed-by-both - /// <<<<<<< local - /// line2-to-be-changed-in-incoming - /// ======= - /// line2-changed - /// >>>>>>> incoming - ///``` - #[default] - Merge, - /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes. - /// - /// This mode does not hide any information. - /// ``` - /// <<<<<<< local - /// line1-changed-by-both - /// line2-to-be-changed-in-incoming - /// ||||||| 9a8d80c - /// line1-to-be-changed-by-both - /// line2-to-be-changed-in-incoming - /// ======= - /// line1-changed-by-both - /// line2-changed - /// >>>>>>> incoming - ///``` - Diff3, - /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes, - /// as well as non-minimized hunks of the base. - /// - /// ``` - /// line1-changed-by-both - /// <<<<<<< local - /// line2-to-be-changed-in-incoming - /// ||||||| 9a8d80c - /// line1-to-be-changed-by-both - /// line2-to-be-changed-in-incoming - /// ======= - /// line2-changed - /// >>>>>>> incoming - /// ``` - ZealousDiff3, - } - - /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub struct Options { - /// How to visualize conflicts in merged files. - pub conflict_style: ConflictStyle, - /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` - pub marker_size: usize, - /// Decide what to do to automatically resolve conflicts. - /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. - pub on_conflict: Option, - } - - impl Default for Options { - fn default() -> Self { - Options { - conflict_style: Default::default(), - marker_size: 7, - on_conflict: None, - } - } - } - - /// What to do to resolve a conflict. - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] - pub enum ResolveWith { - /// Chose our side to resolve a conflict. - Ours, - /// Chose their side to resolve a conflict. - Theirs, - /// Place our and their lines one after another, in any order - Union, - } - - /// Merge `current` and `other` with `ancestor` as base according to `opts`. - /// - /// Place the merged result in `out` and return the resolution. - pub fn merge(_out: &mut Vec, _current: &[u8], _ancestor: &[u8], _other: &[u8], _opts: Options) -> Resolution { - todo!("text merge"); - } -} diff --git a/gix-merge/src/blob/builtin_driver/binary.rs b/gix-merge/src/blob/builtin_driver/binary.rs new file mode 100644 index 00000000000..6d4a9696584 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/binary.rs @@ -0,0 +1,43 @@ +/// What to do when having to pick a side to resolve a conflict. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum ResolveWith { + /// Chose the ancestor to resolve a conflict. + Ancestor, + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, +} + +/// Tell the caller of [`merge()`](function::merge) which side was picked. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum Pick { + /// Chose the ancestor. + Ancestor, + /// Chose our side. + Ours, + /// Chose their side. + Theirs, +} + +pub(super) mod function { + use crate::blob::builtin_driver::binary::{Pick, ResolveWith}; + use crate::blob::Resolution; + + /// As this algorithm doesn't look at the actual data, it returns a choice solely based on logic. + /// + /// It always results in a conflict with `current` being picked unless `on_conflict` is not `None`. + pub fn merge(on_conflict: Option) -> (Pick, Resolution) { + match on_conflict { + None => (Pick::Ours, Resolution::Conflict), + Some(resolve) => ( + match resolve { + ResolveWith::Ours => Pick::Ours, + ResolveWith::Theirs => Pick::Theirs, + ResolveWith::Ancestor => Pick::Ancestor, + }, + Resolution::Complete, + ), + } + } +} diff --git a/gix-merge/src/blob/builtin_driver/mod.rs b/gix-merge/src/blob/builtin_driver/mod.rs new file mode 100644 index 00000000000..ecbc1f93373 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/mod.rs @@ -0,0 +1,30 @@ +use crate::blob::BuiltinDriver; + +impl BuiltinDriver { + /// Return the name of this instance. + pub fn as_str(&self) -> &str { + match self { + BuiltinDriver::Text => "text", + BuiltinDriver::Binary => "binary", + BuiltinDriver::Union => "union", + } + } + + /// Get all available built-in drivers. + pub fn all() -> &'static [Self] { + &[BuiltinDriver::Text, BuiltinDriver::Binary, BuiltinDriver::Union] + } + + /// Try to match one of our variants to `name`, case-sensitive, and return its instance. + pub fn by_name(name: &str) -> Option { + Self::all().iter().find(|variant| variant.as_str() == name).copied() + } +} + +/// +pub mod binary; +pub use binary::function::merge as binary; + +/// +pub mod text; +pub use text::function::merge as text; diff --git a/gix-merge/src/blob/builtin_driver/text/function.rs b/gix-merge/src/blob/builtin_driver/text/function.rs new file mode 100644 index 00000000000..a69b9a1a58d --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/function.rs @@ -0,0 +1,248 @@ +use crate::blob::builtin_driver::text::utils::{ + assure_ends_with_nl, contains_lines, detect_line_ending, detect_line_ending_or_nl, fill_ancestor, + hunks_differ_in_diff3, take_intersecting, tokens, write_ancestor, write_conflict_marker, write_hunks, + zealously_contract_hunks, CollectHunks, Hunk, Side, +}; +use crate::blob::builtin_driver::text::{ConflictStyle, Options, ResolveWith}; +use crate::blob::Resolution; +use bstr::BStr; + +/// Merge `current` and `other` with `ancestor` as base according to `opts`. +/// +/// Use `current_label`, `other_label` and `ancestor_label` to annotate conflict sections. +/// +/// `input` is for reusing memory for lists of tokens, but note that it grows indefinitely +/// while tokens for `current`, `ancestor` and `other` are added. +/// Place the merged result in `out` (cleared before use) and return the resolution. +/// +/// # Important +/// +/// *The caller* is responsible for clearing `input`, otherwise tokens will accumulate. +/// This idea is to save time if the input is known to be very similar. +#[allow(clippy::too_many_arguments)] +pub fn merge<'a>( + out: &mut Vec, + input: &mut imara_diff::intern::InternedInput<&'a [u8]>, + current: &'a [u8], + current_label: Option<&BStr>, + ancestor: &'a [u8], + ancestor_label: Option<&BStr>, + other: &'a [u8], + other_label: Option<&BStr>, + opts: Options, +) -> Resolution { + out.clear(); + input.update_before(tokens(ancestor)); + input.update_after(tokens(current)); + + let current_hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Current, + hunks: Default::default(), + }, + ); + + let current_tokens = std::mem::take(&mut input.after); + input.update_after(tokens(other)); + + let mut hunks = imara_diff::diff( + opts.diff_algorithm, + input, + CollectHunks { + side: Side::Other, + hunks: current_hunks, + }, + ); + + hunks.sort_by(|a, b| a.before.start.cmp(&b.before.start)); + let mut hunks = hunks.into_iter().peekable(); + let mut intersecting = Vec::new(); + let mut ancestor_integrated_until = 0; + let mut resolution = Resolution::Complete; + let mut filled_hunks = Vec::with_capacity(2); + while let Some(hunk) = hunks.next() { + if take_intersecting(&hunk, &mut hunks, &mut intersecting) { + fill_ancestor(&hunk.before, &mut intersecting); + + let filled_hunks_side = hunk.side; + filled_hunks.clear(); + filled_hunks.push(hunk); + fill_ancestor( + &intersecting + .first() + .zip(intersecting.last()) + .map(|(f, l)| f.before.start..l.before.end) + .expect("at least one entry"), + &mut filled_hunks, + ); + match opts.on_conflict { + None => { + let (hunks_front_and_back, num_hunks_front) = match opts.conflict_style { + ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => { + zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens) + } + ConflictStyle::Diff3 => (Vec::new(), 0), + }; + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + let last_hunk = back_hunks + .last() + .or(their_hunks.last()) + .or(our_hunks.last()) + .or(front_hunks.last()) + .expect("at least one hunk"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + + write_hunks(front_hunks, input, ¤t_tokens, out); + if their_hunks.is_empty() { + write_hunks(our_hunks, input, ¤t_tokens, out); + } else if our_hunks.is_empty() { + // TODO: assure we run into this - currently no test triggers this. Can this happen at all? + write_hunks(their_hunks, input, ¤t_tokens, out); + } else { + // DEVIATION: this makes tests (mostly) pass, but probably is very different from what Git does. + let hunk_storage; + let nl = detect_line_ending( + if front_hunks.is_empty() { + hunk_storage = Hunk { + before: ancestor_integrated_until..first_hunk.before.start, + after: Default::default(), + side: Side::Ancestor, + }; + std::slice::from_ref(&hunk_storage) + } else { + front_hunks + }, + input, + ¤t_tokens, + ) + .or_else(|| detect_line_ending(our_hunks, input, ¤t_tokens)) + .unwrap_or(b"\n".into()); + match opts.conflict_style { + ConflictStyle::Merge => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + resolution = Resolution::Conflict; + write_conflict_marker(out, b'<', current_label, opts.marker_size, nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, opts.marker_size, nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, opts.marker_size, nl); + } + } + ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { + if contains_lines(our_hunks) || contains_lines(their_hunks) { + if hunks_differ_in_diff3( + opts.conflict_style, + our_hunks, + their_hunks, + input, + ¤t_tokens, + ) { + resolution = Resolution::Conflict; + write_conflict_marker(out, b'<', current_label, opts.marker_size, nl); + write_hunks(our_hunks, input, ¤t_tokens, out); + let ancestor_hunk = Hunk { + before: first_hunk.before.start..last_hunk.before.end, + after: Default::default(), + side: Side::Ancestor, + }; + let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); + let ancestor_nl = + detect_line_ending_or_nl(ancestor_hunk, input, ¤t_tokens); + write_conflict_marker(out, b'|', ancestor_label, opts.marker_size, ancestor_nl); + write_hunks(ancestor_hunk, input, ¤t_tokens, out); + write_conflict_marker(out, b'=', None, opts.marker_size, nl); + write_hunks(their_hunks, input, ¤t_tokens, out); + write_conflict_marker(out, b'>', other_label, opts.marker_size, nl); + } else { + write_hunks(our_hunks, input, ¤t_tokens, out); + } + } + } + } + } + write_hunks(back_hunks, input, ¤t_tokens, out); + ancestor_integrated_until = last_hunk.before.end; + } + Some(resolve) => { + match resolve { + ResolveWith::Ours | ResolveWith::Theirs => { + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let hunks_to_write = if resolve == ResolveWith::Ours { + our_hunks + } else { + their_hunks + }; + if let Some(first_hunk) = hunks_to_write.first() { + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + } + write_hunks(hunks_to_write, input, ¤t_tokens, out); + if let Some(last_hunk) = hunks_to_write.last() { + ancestor_integrated_until = last_hunk.before.end; + } + } + ResolveWith::Union => { + let (hunks_front_and_back, num_hunks_front) = + zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens); + + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") + } + }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + write_hunks(front_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); + write_hunks(our_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); + write_hunks(their_hunks, input, ¤t_tokens, out); + if !back_hunks.is_empty() { + assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); + } + write_hunks(back_hunks, input, ¤t_tokens, out); + let last_hunk = back_hunks + .last() + .or(their_hunks.last()) + .or(our_hunks.last()) + .or(front_hunks.last()) + .expect("at least one hunk"); + ancestor_integrated_until = last_hunk.before.end; + } + }; + } + } + } else { + write_ancestor(input, ancestor_integrated_until, hunk.before.start as usize, out); + ancestor_integrated_until = hunk.before.end; + write_hunks(std::slice::from_ref(&hunk), input, ¤t_tokens, out); + } + } + write_ancestor(input, ancestor_integrated_until, input.before.len(), out); + + resolution +} diff --git a/gix-merge/src/blob/builtin_driver/text/mod.rs b/gix-merge/src/blob/builtin_driver/text/mod.rs new file mode 100644 index 00000000000..73d3f123cc6 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/mod.rs @@ -0,0 +1,89 @@ +/// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express +/// merge conflicts in the resulting file. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum ConflictStyle { + /// Only show the zealously minified conflicting lines of the local changes and the incoming (other) changes, + /// hiding the base version entirely. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + ///``` + #[default] + Merge, + /// Show non-minimized hunks of local changes, the base, and the incoming (other) changes. + /// + /// This mode does not hide any information. + /// ``` + /// <<<<<<< local + /// line1-changed-by-both + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line1-changed-by-both + /// line2-changed + /// >>>>>>> incoming + ///``` + Diff3, + /// Like [`Diff3](Self::Diff3), but will show *minimized* hunks of local change and the incoming (other) changes, + /// as well as non-minimized hunks of the base. + /// + /// ``` + /// line1-changed-by-both + /// <<<<<<< local + /// line2-to-be-changed-in-incoming + /// ||||||| 9a8d80c + /// line1-to-be-changed-by-both + /// line2-to-be-changed-in-incoming + /// ======= + /// line2-changed + /// >>>>>>> incoming + /// ``` + ZealousDiff3, +} + +/// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). +#[derive(Copy, Clone, Debug, Eq, PartialEq)] +pub struct Options { + /// Determine of the diff will be performed. + /// Defaults to [`imara_diff::Algorithm::Myers`]. + pub diff_algorithm: imara_diff::Algorithm, + /// How to visualize conflicts in merged files. + pub conflict_style: ConflictStyle, + /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` + pub marker_size: usize, + /// Decide what to do to automatically resolve conflicts. + /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. + pub on_conflict: Option, +} + +impl Default for Options { + fn default() -> Self { + Options { + conflict_style: Default::default(), + marker_size: 7, + on_conflict: None, + diff_algorithm: imara_diff::Algorithm::Myers, + } + } +} + +/// What to do to resolve a conflict. +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] +pub enum ResolveWith { + /// Chose our side to resolve a conflict. + Ours, + /// Chose their side to resolve a conflict. + Theirs, + /// Place our and their lines one after another, in any order + Union, +} + +pub(super) mod function; +mod utils; diff --git a/gix-merge/src/blob/builtin_driver/text/utils.rs b/gix-merge/src/blob/builtin_driver/text/utils.rs new file mode 100644 index 00000000000..9d3db8d5599 --- /dev/null +++ b/gix-merge/src/blob/builtin_driver/text/utils.rs @@ -0,0 +1,474 @@ +use crate::blob::builtin_driver::text::ConflictStyle; +use bstr::{BStr, ByteSlice, ByteVec}; +use std::iter::Peekable; +use std::ops::Range; + +/// Used only when `diff3` is the conflict style as `zdiff3` automatically reduces hunks into nothing. +/// Here we check if all hunks are the same. +pub fn hunks_differ_in_diff3( + style: ConflictStyle, + a: &[Hunk], + b: &[Hunk], + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> bool { + if style != ConflictStyle::Diff3 { + return true; + } + + let tokens_for_hunk = + |hunk: &Hunk| -> &[imara_diff::intern::Token] { tokens_for_side(hunk.side, input, current_tokens) }; + + a.iter() + .flat_map(tokens_for_hunk) + .ne(b.iter().flat_map(tokens_for_hunk)) +} + +pub fn contains_lines(hunks: &[Hunk]) -> bool { + hunks.iter().any(|h| !h.after.is_empty()) +} + +/// ## Deviation +/// +/// This implementation definitely isn't the same as in Git, primarily because it seemed impossible +/// to understand what's going on there without investing more time than it seemed worth. +pub fn detect_line_ending( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> Option<&'static BStr> { + fn is_eol_crlf( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + ) -> Option { + let (range, side) = hunks.iter().rev().find_map(|h| { + (!h.after.is_empty()) + .then_some((&h.after, h.side)) + .or((!h.before.is_empty()).then_some((&h.before, Side::Ancestor))) + })?; + + let tokens = tokens_for_side(side, input, current_tokens); + { + let last_line = tokens + .get(range.end as usize - 1) + .map(|token| &input.interner[*token])?; + if last_line.last() == Some(&b'\n') { + return last_line.get(last_line.len().checked_sub(2)?).map(|c| *c == b'\r'); + } + } + let second_to_last_line = tokens + .get(range.end.checked_sub(2)? as usize) + .map(|token| &input.interner[*token])?; + second_to_last_line + .get(second_to_last_line.len().checked_sub(2)?) + .map(|c| *c == b'\r') + } + is_eol_crlf(hunks, input, current_tokens).map(|is_crlf| if is_crlf { b"\r\n".into() } else { b"\n".into() }) +} + +pub fn detect_line_ending_or_nl( + hunks: &[Hunk], + input: &mut imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> &'static BStr { + detect_line_ending(hunks, input, current_tokens).unwrap_or(b"\n".into()) +} + +fn tokens_for_side<'a>( + side: Side, + input: &'a imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &'a [imara_diff::intern::Token], +) -> &'a [imara_diff::intern::Token] { + match side { + Side::Current => current_tokens, + Side::Other => &input.after, + Side::Ancestor => &input.before, + } +} + +pub fn assure_ends_with_nl(out: &mut Vec, nl: &BStr) { + if !out.is_empty() && !out.ends_with(b"\n") { + out.push_str(nl); + } +} + +pub fn write_conflict_marker(out: &mut Vec, marker: u8, label: Option<&BStr>, marker_size: usize, nl: &BStr) { + assure_ends_with_nl(out, nl); + out.extend(std::iter::repeat(marker).take(marker_size)); + if let Some(label) = label { + out.push(b' '); + out.extend_from_slice(label); + } + out.push_str(nl); +} + +pub fn write_ancestor(input: &imara_diff::intern::InternedInput<&[u8]>, from: u32, to: usize, out: &mut Vec) { + if to < from as usize { + return; + } + if let Some(tokens) = input.before.get(from as usize..to) { + write_tokens(&input.interner, tokens, out); + } +} + +/// Look at all hunks in `in_out` and fill in the ancestor in the range of `ancestor_range`. +/// This is all based on knowing the ranges are sequences of tokens. +pub fn fill_ancestor(Range { start, end }: &Range, in_out: &mut Vec) { + fn is_nonzero(num: &u32) -> bool { + *num > 0 + } + if in_out.is_empty() { + return; + } + let first = &in_out[0]; + let mut first_idx = 0; + if let Some(lines_to_add) = first.before.start.checked_sub(*start).filter(is_nonzero) { + in_out.insert(0, ancestor_hunk(*start, lines_to_add)); + first_idx += 1; + } + + let mut added_hunks = false; + for (idx, next_idx) in (first_idx..in_out.len()).map(|idx| (idx, idx + 1)) { + let Some(next_hunk) = in_out.get(next_idx) else { break }; + let hunk = &in_out[idx]; + if let Some(lines_to_add) = next_hunk.after.start.checked_sub(hunk.after.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(hunk.after.end, lines_to_add)); + added_hunks = true; + } + } + let in_out_len = in_out.len(); + if added_hunks { + in_out[first_idx..in_out_len].sort_by_key(|hunk| hunk.before.start); + } + + let last = &in_out[in_out_len - 1]; + if let Some(lines_to_add) = end.checked_sub(last.before.end).filter(is_nonzero) { + in_out.push(ancestor_hunk(last.before.end, lines_to_add)); + } +} + +fn ancestor_hunk(start: u32, num_lines: u32) -> Hunk { + let range = start..start + num_lines; + Hunk { + before: range.clone(), + after: range, + side: Side::Ancestor, + } +} + +/// Reduce the area of `a_hunks` and the hunks in `b_hunks` so that only those lines that are +/// actually different remain. Note that we have to compare the resolved values, not only the tokens, +/// so `current_tokens` is expected to be known to the `input` (and its `interner`). +/// Hunks from all input arrays maybe removed in the process from the front and back, in case they +/// are entirely equal to what's in `hunk`. Note also that `a_hunks` and `b_hunks` are treated to be consecutive, +/// so [`fill_ancestor()`] must have been called beforehand, and are assumed to covert the same space in the +/// ancestor buffer. +/// Use `mode` to determine how hunks may be handled. +/// +/// Return a new vector of all the hunks that were removed from front and back, with partial hunks inserted, +/// along with the amount of hunks that go front, with the remaining going towards the back. +// TODO: refactor so hunks and their associated data can go into an array for easier handling. +#[must_use] +pub fn zealously_contract_hunks( + a_hunks: &mut Vec, + b_hunks: &mut Vec, + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], +) -> (Vec, usize) { + let line_content = |token_idx: u32, side: Side| { + let tokens = match side { + Side::Current => current_tokens, + Side::Other => &input.after, + Side::Ancestor => &input.before, + }; + &input.interner[tokens[token_idx as usize]] + }; + let (mut last_a_hunk_idx, mut last_b_hunk_idx) = (0, 0); + let (mut out, hunks_in_front) = { + let (mut remove_leading_a_hunks_from, mut remove_leading_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_till, mut b_hunk_token_equal_till) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks(a_hunks).zip(iterate_hunks(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_till = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_till = None; + last_b_hunk_idx = b_hunk_idx; + } + if a_line == b_line { + (remove_leading_a_hunks_from, remove_leading_b_hunks_from) = (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_till, b_hunk_token_equal_till) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + let mut out = Vec::with_capacity(remove_leading_a_hunks_from.unwrap_or_else(|| { + if a_hunk_token_equal_till.is_some() { + 1 + } else { + 0 + } + })); + truncate_hunks_from_from_front( + a_hunks, + remove_leading_a_hunks_from, + a_hunk_token_equal_till, + Some(&mut out), + ); + truncate_hunks_from_from_front(b_hunks, remove_leading_b_hunks_from, b_hunk_token_equal_till, None); + let hunks_in_front = out.len(); + (out, hunks_in_front) + }; + + (last_a_hunk_idx, last_b_hunk_idx) = (0, 0); + { + let (mut remove_trailing_a_hunks_from, mut remove_trailing_b_hunks_from) = (None, None); + let (mut a_hunk_token_equal_from, mut b_hunk_token_equal_from) = (None, None); + for ((a_token_idx, a_hunk_idx, a_hunk_side), (b_token_idx, b_hunk_idx, b_hunk_side)) in + iterate_hunks_rev(a_hunks).zip(iterate_hunks_rev(b_hunks)) + { + let a_line = line_content(a_token_idx, a_hunk_side).as_bstr(); + let b_line = line_content(b_token_idx, b_hunk_side).as_bstr(); + + if last_a_hunk_idx != a_hunk_idx { + a_hunk_token_equal_from = None; + last_a_hunk_idx = a_hunk_idx; + } + if last_b_hunk_idx != b_hunk_idx { + b_hunk_token_equal_from = None; + last_b_hunk_idx = b_hunk_idx; + } + + if a_line == b_line { + (remove_trailing_a_hunks_from, remove_trailing_b_hunks_from) = (Some(a_hunk_idx), Some(b_hunk_idx)); + (a_hunk_token_equal_from, b_hunk_token_equal_from) = (Some(a_token_idx), Some(b_token_idx)); + } else { + break; + } + } + + truncate_hunks_from_from_back( + a_hunks, + remove_trailing_a_hunks_from, + a_hunk_token_equal_from, + Some(&mut out), + ); + truncate_hunks_from_from_back(b_hunks, remove_trailing_b_hunks_from, b_hunk_token_equal_from, None); + } + + (out, hunks_in_front) +} + +fn range_by_side(hunk: &mut Hunk) -> &mut Range { + match hunk.side { + Side::Current | Side::Other => &mut hunk.after, + Side::Ancestor => &mut hunk.before, + } +} +fn truncate_hunks_from_from_front( + hunks: &mut Vec, + hunks_to_remove_until_idx: Option, + hunk_token_equal_till: Option, + mut out_hunks: Option<&mut Vec>, +) { + let Some(hunks_to_remove_until_idx) = hunks_to_remove_until_idx else { + assert!(hunk_token_equal_till.is_none()); + return; + }; + let mut last_index_to_remove = Some(hunks_to_remove_until_idx); + let hunk = &mut hunks[hunks_to_remove_until_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_till) = hunk_token_equal_till { + let orig_start = range.start; + let new_start = hunk_token_equal_till + 1; + range.start = new_start; + if Range::::is_empty(range) { + range.start = orig_start; + } else if let Some(out) = out_hunks.as_deref_mut() { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = orig_start; + new_range.end = new_start; + + out.push(removed_hunk); + } else { + last_index_to_remove = hunks_to_remove_until_idx.checked_sub(1); + } + } + if let Some(last_index_to_remove) = last_index_to_remove { + let mut current_idx = 0; + hunks.retain(|hunk| { + if current_idx > last_index_to_remove { + true + } else { + current_idx += 1; + if let Some(out) = out_hunks.as_deref_mut() { + out.push(hunk.clone()); + } + false + } + }); + } +} + +fn truncate_hunks_from_from_back( + hunks: &mut Vec, + remove_trailing_hunks_from_idx: Option, + hunk_token_equal_from: Option, + mut out_hunks: Option<&mut Vec>, +) { + let Some(mut remove_trailing_hunks_from_idx) = remove_trailing_hunks_from_idx else { + assert!(hunk_token_equal_from.is_none()); + return; + }; + + let hunk = &mut hunks[remove_trailing_hunks_from_idx]; + let range = range_by_side(hunk); + if let Some(hunk_token_equal_from) = hunk_token_equal_from { + let orig_end = range.end; + let new_end = hunk_token_equal_from; + range.end = new_end; + if Range::::is_empty(range) { + range.end = orig_end; + } else if let Some(out) = out_hunks.as_deref_mut() { + remove_trailing_hunks_from_idx += 1; + let mut removed_hunk = hunk.clone(); + let new_range = range_by_side(&mut removed_hunk); + + new_range.start = new_end; + new_range.end = orig_end; + + out.push(removed_hunk); + } else { + remove_trailing_hunks_from_idx += 1; + } + } + if let Some(out) = out_hunks { + out.extend_from_slice(&hunks[remove_trailing_hunks_from_idx..]); + } + hunks.truncate(remove_trailing_hunks_from_idx); +} + +/// Return an iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. +fn iterate_hunks(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) +} + +/// Return a reverse iterator over `(token_idx, hunk_idx, hunk_side)` from `hunks`. +fn iterate_hunks_rev(hunks: &[Hunk]) -> impl Iterator + '_ { + hunks.iter().enumerate().rev().flat_map(|(hunk_idx, hunk)| { + match hunk.side { + Side::Current | Side::Other => &hunk.after, + Side::Ancestor => &hunk.before, + } + .clone() + .rev() + .map(move |idx| (idx, hunk_idx, hunk.side)) + }) +} + +pub fn write_hunks( + hunks: &[Hunk], + input: &imara_diff::intern::InternedInput<&[u8]>, + current_tokens: &[imara_diff::intern::Token], + out: &mut Vec, +) { + for hunk in hunks { + let (tokens, range) = match hunk.side { + Side::Current => (current_tokens, &hunk.after), + Side::Other => (input.after.as_slice(), &hunk.after), + Side::Ancestor => (input.before.as_slice(), &hunk.before), + }; + write_tokens(&input.interner, &tokens[usize_range(range)], out); + } +} + +fn usize_range(range: &Range) -> Range { + range.start as usize..range.end as usize +} + +fn write_tokens( + interner: &imara_diff::intern::Interner<&[u8]>, + tokens: &[imara_diff::intern::Token], + out: &mut Vec, +) { + for token in tokens { + out.extend_from_slice(interner[*token]); + } +} + +/// Find all hunks in `iter` which aren't from the same side as `hunk` and intersect with it. +/// Return `true` if `out` is non-empty after the operation, indicating overlapping hunks were found. +pub fn take_intersecting(hunk: &Hunk, iter: &mut Peekable>, out: &mut Vec) -> bool { + out.clear(); + while iter + .peek() + .filter(|b_hunk| { + b_hunk.side != hunk.side + && (hunk.before.contains(&b_hunk.before.start) + || (hunk.before.is_empty() && hunk.before.start == b_hunk.before.start)) + }) + .is_some() + { + out.extend(iter.next()); + } + !out.is_empty() +} + +pub fn tokens(input: &[u8]) -> imara_diff::sources::ByteLines<'_, true> { + imara_diff::sources::byte_lines_with_terminator(input) +} + +#[derive(Debug, Copy, Clone, Eq, PartialEq)] +pub enum Side { + Current, + Other, + /// A special marker that is just used to be able to mix-in hunks that only point to the ancestor. + /// Only `before` matters then. + Ancestor, +} + +#[derive(Debug, Clone)] +pub struct Hunk { + pub before: Range, + pub after: Range, + pub side: Side, +} + +pub struct CollectHunks { + pub hunks: Vec, + pub side: Side, +} + +impl imara_diff::Sink for CollectHunks { + type Out = Vec; + + fn process_change(&mut self, before: Range, after: Range) { + self.hunks.push(Hunk { + before, + after, + side: self.side, + }); + } + + fn finish(self) -> Self::Out { + self.hunks + } +} diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs index f14a517d5ec..408a58cbf52 100644 --- a/gix-merge/src/blob/mod.rs +++ b/gix-merge/src/blob/mod.rs @@ -11,16 +11,20 @@ pub mod pipeline; /// pub mod platform; -/// Identify a merge resolution. +/// Define if a merge is conflicted or not. #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] pub enum Resolution { /// Everything could be resolved during the merge. + /// + /// Conflicts may have been resolved automatically, depending on the options. Complete, - /// A conflict is still present. + /// A conflict is still present in the form of conflict markers. + /// + /// Note that this won't be the case if conflicts were automatically resolved. Conflict, } -/// A way to classify a resource suitable for merging. +/// A way to classify the side of a resource for merging. #[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq, Hash)] pub enum ResourceKind { /// Our side of the state. @@ -31,7 +35,7 @@ pub enum ResourceKind { CommonAncestorOrBase, } -/// Define a driver program that merges +/// Define a built-in way of performing a three-way merge, including auto-resolution support. /// /// Some values are related to diffing, some are related to conversions. #[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] @@ -59,7 +63,7 @@ pub enum BuiltinDriver { Union, } -/// Define a driver program that merges +/// Define a driver program that performs a three-way merge. /// /// Some values are related to diffing, some are related to conversions. #[derive(Default, Debug, Clone, PartialEq, Eq)] diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs index 497b9bf887e..6b6175ee408 100644 --- a/gix-merge/src/blob/platform.rs +++ b/gix-merge/src/blob/platform.rs @@ -137,7 +137,7 @@ pub mod merge { pub other: ResourceRef<'parent>, } - #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Options { /// If `true`, the resources being merged are contained in a virtual ancestor, /// which is the case when merge bases are merged into one. diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar new file mode 100644 index 0000000000000000000000000000000000000000..b7ffc607c766aa40a4a2948d5aae7d703082d780 GIT binary patch literal 390144 zcmeHwZIkP^k?#2#R{1}$YCpujIhx@+lbkqJPL9v6w-Wn9>?-GOZMira&I}Vt9E;R> z@ow&KzfDP`NCG54vH?+=mXkRW33LP9c)Ib@0B&wyDgRjT34Q-n@h^PZ|L&jJmKWHT zV>#~reds#=D_?oZfWa=yqpfL9X0@+ct#XqI>LKvUee&?&E|N_iZ{sXaHjnsx3(*YI zzwHE0NczWD1w)b5|Lwj-+^pm6V|>q~AM>Elf8cp-+?T&CC$L{N@o)Q90QO=Gv!Quj zIR0(JFHVJe{?QEuGIQXyZJywy$_;=d)_k7nO{9`GghfeAL z>-aDIGGV_*a98q*ga4}AhE+u&{v8(oo=^D4l7Ana690wIC%pGb>N?+X@PC*QdP=f9 zecVRt>0=|)wj|9zg*5Ex2{SxTIh1{Ut4p{85Iru+3LhJyD@b9qWf8X&#!oP<0*2F)J|22qw zRxCL9KaBs6y#S0E01^H<{y*?&{$EQwY2u&!zZRj-h{qzZI#YLAIfP{Lt?`FOmIsY`;DJ^Q@5Oe;bNY^uwwmznd+g3Ia5Kw2dDf<44!{ z(W`>Jn`x_nA*1vr<>V0LEwh$OKh3PQxdy|h*ltBi?jhS$$D!+o82Y{EEW*FV#(zQh zC(Zx%s15Cvb;NfGQvxBHXfJ|L6aSUfM=iS;|IGZa>(l%%st&q{mH0pScS3)coUZkG z6aQ6D4z;>M{M%l;|L=N%OY^^|THq72sy;hmpD&^7e%i!8dg%9Fcq06B#{Yp8(E5*# zmPg}%9a5ezk0$<)t3i07-*Wi382=y8`mdfgv+w_x%f8LVPz?r)`M)yUyP2fwFIK&X za^}yeV!fL=>Jxr6=Fp?#J}yn|9OnNJL%*f)563>W`~SX8>%V72EBq0v| z5hK42A`$ifZI|)?o>Ls9rbD7HeE)~6|0ln$qcQ5|(ZqkPcZOIL5&kVZZ2N!DqWzzU z8t5jq$3HFpbqnaq-!$=$9{RnPo(TUAJO1~afZ~62G(B1WPvd_b+R)FViGTFaZz=q9 z{J-x}{GXP_K>lBgm}kpmlP;D|(dIE;ME5wSJ+oCU{QuDSZ!Zk!{BKs2=uaB@KU4g- zZ87KnxFMYXPV+zL>ShR<<|^65rfnF1a2Ee;rbuFAom> z^~Zm+>wli*(*AE9t%mTgL(22z(Zv6G;=eim-wiFg|650^A^hu*@_czT@qeE9Zw~%F zoBID+8V%uJi2CY#D8<}59dG9`oE4wL-^Mr<@xfsc}#NEr5hgqZG|E1|MXQC ztf2qG_kS1qzum?UnL=3)*S~FrwoUq1sFRX(rT^PJdQ{X1M(E!TJUais)RD@6Px^m~ zqkAPsWU!hWtq$~a3#vhlf-F>WpWugB* z`|rY8e{K5@8$c-jOMx!hB>&BX0ie0c7KN$L%*7%F!@e!P9NWX}xI>1vgJNH&k=hjh2PhyACJ`Y}!K%`DE%^Z_Uw3u8V!nfdu{bL3Ev zSewa%`6=C*AEFJouGHLRdk?k#;`fmG|J?jn8vS?i<^PeJYBhkpV*lgouimzvWBwH3 z-*TAyzbuc=|H4#3CoP!z`91XG9ha5t)g+j2>m-Q-2X)VfAtNZq5tP-`A-EbN9iBpKS=)yl+hx&*Z(rz zJS2~;H+Vk_{r8Rk9CrQ>!hb0K7f%D-l>Td(ZsY$pes!Avdq}r%8T5M?1j2N%`K4(Y zzbeLt>-0W`U-@@Fg`Q7D1+Zj~p;Bj8lA8DalzFChP~N*{H)b^R+k{%zm?@3sFP zd;UY{(EiW5o>YA=ul{MYgX$O{qqrXMNBGZ!+qwS^|NYkZztvShjV#s4A^R+j;qJ2p zMhIDcXOy=Gd~dM-l^26I$kjF?&}@V%-4}ElQde{Qn-Kq&&&>b(HqHO3RC`662XGdn z-xCNciCx#12L3C%k6TC4_|F1g-&+5J(^2@R z`5#;*bWp3`|6TUhJCf4lE0jP3|7wnZ6ZZcdcKq*$a3>g@|D?Xb(fD7zjOWaufqyl} zzm35^oc~7se;w_H@UKJ4^X1XNznbIUg!t!-|9y-4|5_RijsLZXdA3Yg{G%WLCd9wZ z#{YYk6HxzOL#r9#|7kH#gI;v8XyCuL>WayK%=}+K_rGAOppzD@^>vEoMZ9R>U(NAv zLjTX1|MP5{{J)L{IKuyzSAjR0PPKGI@jp7$hQ_0zL^a313GvSx|GRYlBbtu;2$BEq zlg{k$Wy$(^72j5oU*_>a<VE=Ox!A3_GC?0-KBFW5+7iT5xwiyD^qiFXW?t$## zHv(bUVv}x8a^3%&M3x^QfT*$soH~@JkK1T{D)v!%jh@P>y*3`7T5aQTz3nz$G+J=u z4H5%x%T~dtqs6OW)rkak%sLRFjv*I0D%c4SrGn+5sq`Rixr(Fh-niBLLha!RWwnQO zjMW}D5LSDB!dLC#5W-w!)o4v-(19pS7M)1KXVQTnd`4X4V6qV)29uQ}QVz0ZuyTCd zZq&!eM}^$2(mV4|=Hm+vVvcXxP5StFD1#p4;4Ufqa@ zVHS*w>^T*%!y?S7h@F;XeuX?X*L{g=R7`=zn^RGRmhu^nVoajd0uJ$5 zKGlq4^ZCi_G@sW^VDnAG>NQ^#@;Ml~=JVl9xE8EQ6pvYlHB!Z{)3T^#*kL8qGUXy( z6-xnnRk0N!5rpvbE?yk;USMHSoRGRmHdzc1O*b`g3X`6oBurX|urP50)xy*#>q zDlpeV2um}Af_GS(ODKA$<+_~0cUZy8sQ{Feoy)(3;tQ5mD(<|WITRc|~# z8N|lpy0vb+XjtaPo0BnXJRZt`Ye^dvkr*9lo@Po8zYP_vIxUqtW*wGA9Yd}qP{B@s zToo*jG`m*;j|tYv{r##qNpRihM)BS)$hApNM^hOAmSG3t)H3BFPZdi60#&gcfW$2v?f^cs<8VR`8 ztC523lQdpWEkLiuYH(6q5P_A_SrnmV(q94no?OlnHEY9U5E&3AgV?e$StKkSlSgD|m<&29 zGQd(Y<(p-N23gw6sM0`7eMyxXWa%%dVgo$IWmIdFr@4%(Nqeejm#2&UsnE$ovW!{> zo|Nv1mx?vzyi`0##7oEZKD>mih32KgEnKfh;juqQAq8GyF2xjD{&^KtV9n=M*j@`g zm!e8q-MJKZiY0Y8a}=CGLF z4__V^%kAocV^VvQv*xQeInT)UX5d=e-W;q&>rJk@0=*Www`4P`h`?%JQdNYO{qia! zu>O}jyd)|2J*dl^@ShlG3KR# z(OzR-)tKNp=2(%@-eZoH8Rb#A#}?d_tefB_!!BU2N8py=Oa&EK{W%p?XxZmiSb_DPUx9lq^qh(-X@%!hWTB;f zMIS8^fJWYXM;%5XEck@J|El;GKJ9<^&uq&JY|F76w|H;+p5?#tF_?>J?6N%Cnx-h3 z@cq7OwaN)I%p-wF|F?DTJ+jD!Xadh`>%IJKIf47CrT;MSpuLRbHz?Z+*S~EzhHH3+ zZ~Wo#F@-In>YW*(@xO81j4b(R{PFM+XRq~!jX~b}z zkxS=1`TtNl4<)=F2?+6Db==0+S&VfaE# zT}xUa{u@ED<7zF!KWF~WvRs<~A1Cc$xlZ_}@&7Q%T}Nsm{tx5-M)3B~nv3wy`hUlE zsQ*7y%%jqu@K64KlmIV0QxV>?a2CZOJ=x-b@`b(siC3`#{{!Q{-4NFQDgGNz2VIo9 z^^Xzxec&px{|>}@x8{Ff_(%P}E}daMw+y?A{O)NBRTQA{Bm7rJefMOj!iM(o<>fTd z-(jw~jQQ@#T$^*)ho)mzm}oU5(4z%}$nOIy5&mJ@PaFTCXVdymLs5!;6!a8s*2 z2LHbA68@(OULt>i|8fVYL~=@UWfJJYKSJd9L0BUEv*UlqvIC0$)X~&v{I5gG^X1Wl ze}u^I@%VRr+W)Pi)e!!5NO`_Idhm}B`7MWkiyi-m4)y=FG#VQJYZ3EonTYT|E%I9q z|FGG!J^vRtA&viaG@4QVpO*7Cl|LJIMqx?U`|L9PS79KtLM~M8E!#~IWhd$w7i~h;~ zYZ3EonPkcOc@?WIUg7@-#(!I$$TD9z{AM}qG>&+{&sQ>4}TyHD>+X*~c|K-UN z`&qf`@7NBC?!Z3g&nR|;*o!4x-252dFYZ1q?$Z2e|I}IJ>EbS~-X)twvRS5UxZb&n z3_p^qLWqv!ni*w@meWmrMLHI=iY`AEpf-m=p{=~ObCeTWtT&H`2>*cnHvT;;p#A?m zSz&=sZ2>+b%zZbZ4 z{})e|*w1MEk1f&5qIbkUTC6v(_CowS%=r&KTnb42f1WUrpYi+uy;k9@^M9vVhw>bW zV+*F4zELgXC_S`TZ(bop_~(rO-H`l0Pn77-`1miL_Q4#z7)0<)5~$-JBi5T&4H5o1 z=YK=^H}(H{vc!JI$Ny;mUtg*tSB?x4oUKl%R-ab4Zx+9@!z%s{-4hO zR;H4Qa83Wpg1|-gA8vbSkN;dRpz&X&B!yqf)BnmIU%fq?g28_@zB2w~{J9G6uBSTn z@q8mN$_b#qA9EDB%9WaWuKIFKX#Cdr9Yv8U)N(iJx=vIDfs62O!CsHn{ErvHncy`4 z)2%nGHzoc{gx7IPxWtn3eCgEjU)g));tKH(u^uh_yEfhbgscTVxi(e^^ ztDYQkWySbs{J(4Y6#s>+1}?c0|3%-Ou+J6IRX?obUsVve2>+b%zwZUq|JTv*O8nFK zUx&2k$D@w_j-peuh|$N%9weg{B=e}^6ay8-q8wY0nv|6>17<9{s*(afZd|HJtI*bCVIxJN0C z2>%@a?^_iAha!6skrMyp|3#9z&i9-2c#P*_8?V#%N=^bV@INsA+i{@(Px0UC+gw}! zDuTd;_TOR7|Mt9))_>G%|J5tgEcW`CwqYE)iEj%Gvn*CFNk@?h{^h4kQ+S%iPvV*CGe{|BD@`zSH^ zPd4UVoPUVpO>Ia}4h}dkT^cX+$#?GW82l>;0vF-mV#oi17gGGUj<&<#pT_?>#6BAy z4E~P`LYP6|WAIPwzj~So;a`uO=gNh_zk(p}5%~B0fad>nv>C#`4k^!<2Os~qF<>L` z58FR&n*Z0(YzY4vggj#w4E_}afsetz2X}(g`A_N_4&h(DjOWaO!M}na@G;0w00@00KtI|7&P9gntb}o-vE#bU&>6CreeuEbu=t{@Zrp{1=M< zR;7^>-Mao&#C{i?ME2i-yPjI-f4Bjh2~Oj`lb%$4EBF7j_W$ke!EF@_cs$Jet?_mH z2=GI~Drl&24$xP|k8r}z-yu`$B%nj4+Dro-1Efm7BDxLyD~SCr_>1swxy<=LzD4n$ zBGsY1FY$jk-~VleSULv?MZB{o(!hUZ?{SMQ#y@-hk3;AG;VPknS{wgglPm6!)O9{? z;J@m|;nr1%e}_H)r??ZE`v16k;882_Pv?L0q;vhx8u(Wb`&~#-gn!QX-w!OB|JBj@ zO8nFKUx&2k$D@J&;}6W(@1n+H{M+sMKR*m9{zFG=De?c6A+P|&VclEtb{)97s zG~m#o=1v|B{40q4E*OjOZ@J9)-wQ&D|L#;8;)@gh4*(}PA`15cNU`|Gjr}e}Ai}@x zG4ua!=urH(j^=a1zm)$k4*)0Uu0uH*cr@@|Tam?NEy6#?|GR!b{eMgybkREDzg(-Q zmA@`oUE}ix{trLUI{+g5J2o@^_aG0=|7&P)C;UtK|JEVkebGPEMbMzW$t)W9R}lMM z6i|eJj{o-^n*X1yTNwf;{FDEek=M1QS+3$}yEt_GRn@J)|Iqkv59a?V{#%tsPIT+~ zKUe&>4SRjs@!z3K>%S*0sry#;`Wo$^iUTyhtNtr{evO1xz>r}Xr6=)?a+0WA`)OpU z%`wo)f$H=sOSz$&bH#tdUXK?3eVgJxWmPyS-3j26$WZ1Bl|TdkmEFg!jBxzN;@@>C z{sUJD9n=W_9fElwj~n=}x^uXd7vtY+`+wK9DgFaj3w&zzemg$}J-ucLH1L0>_;1em z-wz=sjP`%1Z+SHSS1;o^b7&{yJYOCS{GTiSn;rlA z7Muw~{$G8&q4B?Z8PAymi+}X+FJb&g;pi<72_3W=`ae_rFK7Jc*>wMNhsJOpOWS{n|G*XeOsF;Rf3Emn z*8cmhL-W5nn#eL;Kd<7CN?8^De_;Hl1qZ-T{HIc_9EczFfBPZHpBBq>^RP;md4|h) zg#KL!03-e5s)0+5uYcS~U{U|yXYc>^L!ajVxe7ym`1ZTMe)H`Qe>1o7W0ITe__Yz| z<|cVcR^}T1n)h+MP4XM#n~k|jm#O(2Wy@U@o5@`Qsm*1&g2&POB!6vWyXSaoMnCVO zHIRFVmwA$IlFW?ncl#8O{*4iBk~O?v$M6Tt&HH$p7kTd?(Z_uO%mbM$UMG2!ng7^j zNuESduIFv~oCAYRx{f!HYPY^Iz6B~!-YhYr_1$h20lCzKOUl>reUh5H_1zYVx=w*d z_S)E_8#7PV@m*@JqgC|U{CNlE0}A;LGAwsMD*L_hKG{A-@F*$>-$l#aYL~%J_>(+m z5hN^%ou#WNkDw465Dl_M^pfkqKWZfK2>geR<B z|D#3%i}24G|A!&n|DvT~QvaX&|D66mY9#P*{5zKI(*7SE27vHS_~+msH4=C@{_W86 zX#S7zPxJq(+dxPEe~*8xNZ=9phwEP`{!g{;3;N{$$^UcwKWZef2>-nKzmWR>I^cid z_}3z$t7g*S|4}2sMfm5;{{=Sn|FyI`^8e)jIsP9t5_|;y;rLIA|I=Xr2>*ot0{@%% zgMD+iO79djx)JzyT#x4e6{vzCdD-j#6#vU8bl`)s|G!SMEZICR{_<`S<%_SsdpD3y zU-AO~1NZ;9zGu_^k3OLyz3bFJ;ua8*{SQOt{-402`#+F0+)swF|HZb4S?>R+b}Aos zEZ#B}>lCy+-@PK$F6z!oRFRk5N|dK?@HWF#$L)90uFdL}yH>jKkGBOxgn!>*;y*)& z?tkb9F4Eh}!9TCb&!YdwDHxJ+G?3}WKi*;RLi}^~e|m5yFrELVz5y`!r~RKVa{njW zo2oh@lN+H3|Gf2IH>CNW$%2<4aM}2ms30Zjl|Z-u$J+uT!aqCycZw|J|9dru^OC_o zjsI1i6>&l1f4nUqWAJZ>wEl~O@O~elXT<1wpiFpLHtsL ze}}#Q-46rm|BEF%J;mkVpX2|hSO3LR=*BW z7I4w_zYJp5A*d_m(S?7!=yYw%d350)FZz27{vrO`wJH8vL$hJNc@Q)Y$JqG`FXjA`ROS2>VYZ3Eo zne^fx=>mXa*>C9jpY2inr;bKL_}3xj`SR$(KVJ0rDE}W=Hr@ZDqty`pbx3)>Ji73Y z7yUgB{|@#4wKN*SzZNmimPr@>595D~=%cp4b z7?0iN(OJi0{694Q+jcC9|L&A1-V2`oQG>sss3Ycod^e!+AD))GDUto}md#6e!^J;l z@HYr?1pb4-rua`C?T_%UL(22z!Nosj@V5m1!9KjQEibSw$J)>PShf%UqxcUE&4#Og z%;4W~`uFJmUk%C!7F_*f27gNEpR@lH?ti57zjd?#^8Y%dJYODM{9^`xj>Nz3Q~bA% zRzvvLA?5k<;Nl-M_;Vco1J9@Nzm8Tz_}3xj`SR$-KT`1LIQ;t_o&TVv(GdQ%hC#`7BSD32^asE!Ji}X?^FL@OPeA5YZ3EonQ-xs8T>g8|3OIc zKU&%h;a`iGXUn9-|A!=hs!jmT7RhF@jo0b>_*BbkEHR_?-EI}VkGH9rY|LR-T7LfnWLWNiRQ7x0eX@Ow%zPIWgzuu|Znewc zC;UmCvj`HJ%XE{at0<4mY`4j8j6WVe@O=Heia*{8L|L0#qT`z;f)^pXq}C=y1PkoH z#oqtvICeM)|1BlksfjKd|2Y0X-%T0-O;y2J^Go7?(vAQD{(191 zPUzA89~}+jvhh#zKQ#ZN10~nXL(=|F+7U9s{=2qI`@eLwI*g^8|6_LqiSQqUVe9-4$A$yr>HH53 zP4BYtKYIL65fsO^C5iu9)W+~eDZ)Q*{y%gm{%g3Thv&F#{Nv33PYi$>F7z4Yki`F_ z9YF&8^XC6;+av$4qnTVb{zv!2}F=iG=CKTsRO5~cu3m+Njt&>_MaR7 z>5~0drSb;d%eMd2|7$37#E%mHoAh}4{|FkQECv1t?*DQ;oA!T+^~d-YkNUJ_dj!miTv=g?LV?GULZLZ|C6@*a3web|A9^Ce{*Gt{gCjFEz!%O$Krp| zRv&KVN8sOcUGo22VIn^y{38qW!pO1suVUTMYBC1@j!*GFXbR{dMELI!&I@?O;(yXs zAFjkk;6DiH{6DTNu^$rtu_bz0^jQ2)+Umir{0RJquqZ_Fe_UzeJ|z6(%Jb5wvG|`9 z>d&qI2>ko|^&&1cd!4@fs3Gs z=Qw=Y|8n;K*j^aW{!gwjkslKNkp+5TIe zHxH|1nWI;r!2i(r?=W=f{tt9bumm~!M~?s(*?)Hbx9vMFt^csv!hgcp|KdQwP4bki z%r*Qq@8fuzz%38oY|K@B;dx1yvTbGi9RNILGnN*i`PjWW#&J2S&}DF z{MvZlrq4Mr*re-t1F3fFn|YkZg|8Hi6nXf^jsPEl|G*3A{0AM)fbhSD>utd9jK2U! zhnke};Nc%T0(=Di;re&-|H_nJBh29cqVE5~+5b!U|9D&M|2>ZXZm}c4Mfm6L|8!~n zrv_7HUoRW~%Ea{==_(u5EXrA z@PAS9KZ=C%g2Q zk71h0gZcSK^UE(y>kIf*JTW1tKi_RP=BGFC>tFEyQa;jUlX@&em(to^<(%MNXa;y?TJhV`!3Z2!S@YvzCVo8R!v z_TKV8HC)KSll)RRf%Xl3JUeq+Z8!j@QaGMpj*h2&OCL9LHFAN>mBTrdyu9VN_Tu1) z+IQA*3D=fEfb@u})IH?js+i_p-lr zIa&g}_($3ZCc-~E{&#HKrufg%N*$0%|$ zdbZ~OLI>sqv|OU|!tei4jsHpg|8gzh6lii<4y%^>II$(jYh>7%n-jfW{MS}Mr<7TQ z{{ri6{KJJog#Rf*m&{w>U;X?a2cwM;GK&9_R2O=(Ui>3n1tP*fJO5{gF8P0SUGoUe z+W!xpG^;uV6#tWKvzxnBdIu2}eIu-1{3C4y6XD-++WmhAuKy+f&%-PBGu8N~_#SMH z;nFMdzmB&5jJJzT{9%=B;>A<6xrZ;0i`9cOnPGwdf$`sd=u!OlWWh@ikkJ3+FmRFm zcdhpL&w&VVTK|zy-{d6rXH1B8k104sYJX-phW8n>tl5{hiY9`*pBe6F#&I&IAmx9? z1UC1Ps!dFMM*D`QGy)m?PYnYX;oq{E{-5p9`M*=tDxDYb4?s42=?s+csw5zT|EXc% zLi|IlM|=JkE&!tWUkxqofPW2)h{Y@=AcOy^<2JGeV*G~;{{4{le~uLQplk*Hp))=x z4<)`H3CQ4oY8bc(|D5r^13SRU|7&PwH2&8hHrSU%@3|xeNJ8X~t;rthh|I*U%Ci(v|8k?NB7G0@eB7^_hYU`*X zi15$x|Ca4j{MRU956ra3Kdt@^l-l(qm%;zkFmMt6IpcrJrSpHaG_pzlpT_@Mbfl7r z4F0Evfs63Z@&C3@@qa4SUePB1Uy0&Hei7k+xm`U>6-pzy~(fZF++{xrG?7w^ncwr6dG2jCt!1r0JMWAN<_kif{J(=2s1ciUg zXt_I$6#gd$fQ#^N*$n>efYyJym4^7{fPcWT7H5lCa3X~w0V(`fYyRU{3h{5T>pwQw z1Umm`oVkSO!oiPAx^Z2r$b-ZJf!en zTW=l1Uxa^-|F=9i6O`uv$4Glvo&)}A@o$*at|zq={wD^2i}25x|FZ&@*8g<0vdR8m zHU^yHe{`sGFb^sGPYeJT;h*FG9q0g%{~xT);Tg#P50}|B<*4wVWLa_E_g~&EqI~i7 zckdRPbg@h~536LEPpoi(|AFz}e&{(@O06YhVsY5hk= ze^b-E{qC>deEY-S%x(ObyeH?F-{KoiZW3JL=YCcEVau>yB za+g4AbD6H-ar8dPUmMx(Io_Jl&%0<1o}`;3GvoW+J_V$IV?>)|4e!@6`~maw zKHla<-g`*&abE!QKqiaVNgie9KXzG?ClQqEd7D1xz+jWE;|-+Rt#77Qp7Otv!~e7Z z@Dcd8JujsGpN95H_}3uh8MBbX|Fi(`;rNH!KLYaqI-1Sfbe(K=*~$N#wwnas!dW|Fi&bG5(qHzvB@8C+k**0QLW6q;$P$ zC5_#Yhr zJ_7%i>(Ko_I@%54Ux$?E%R>(T(*nT7_;1hu759JB`42i;%|+v%=Km%)5jp%%3ji16 zpPB!&JnH{X)~yVIi^ji91u0Fh1my5PEdYE3{_TL`|9bU?^^)+9CCSUAQ{q3`<>?|z z*3YZtAz4O6=q6f)3j7a@|Mq-1|5?kfL^}Wbc@=-W<>-H6)c2|R2<*S@dUh-R8;<{_ z`yWmMRQQdF|1jE@!SY&06%WXb7JtKB49lZY#T0RA+*CKj^5W56Gq=X_IMik>Wa8I{ zRa_$4{bryXBuKug#3GS+xRg+%^NnvI#vinoC1$j~+pQu9_D{`Z1Hu3INowxacU#y< zuufqIK=#_$q#H9&*703xuA^1-+WdJ3y9a>6_dh^}xhD8$Y+knZvSx6`8jPtpCtB1l4^b`(K@3GtBWx&2|m*gAycZ{-5UmQH?>L{hzk$=TuCA z{kPcpKgY6Y{C^Ul!f&ko=lAGw&i}fh8G41f979@&|IxdCPNfmx-)7H$wL{yY@&8GJ z>b|k~AJz?0Yc0ybE{_Y|s^K3G9|#?Q@VAawsCq03tzoEZk3?Co1SH%1q(TYoKd1i( z@qa#z|0fAuGBa!c)!JIJosx>tl&lZ`({_EGDj>i=XZ;8Ie<7X!c9NjFZ!G>X`hUab zy?7Rf)_-XIhvI+L;l8BYXaA?|`YSoSdL^*`oc^Cd}qMBdHCO02GFaCaf4SVx)^V1i`{}`s3JeZ$gk8{=PwQRIF6JWGCwf6C3>fyf8AMkqEv-9E}+|EXa@4wmGXlO_3uZ%M#% zG%N{N7BT}!ij^c&;vf3|-nsh!?EQbx{|{*XS5k$Ok}>^%y8maAJTEPCiGR)x4F{_( zMgM>3`9A^l3F!PEEDdzi>C->f0YHNBpY6a|PtE&ZVfD`sX#TfbZD?;8`!93;k8@ic z4Zz*J^~NYZ!jRE7GMkvB4$*OLMZ3v_g^|3!wm%|Gs@mpaAyaXw-CNEr1*V~4;UyC4 zwf{&50152B6~J77)BeNypH1U`9qo~||Gf1-T!YTz@LJAgNZU?YXOwF_z4))Zyd15r zR3iNQaMpJd|2CKeivR4AC+72W;GeLGsljQqdhuU%s;BV~G*!tHlWx3BOL%F zu>Y|Cr#=4%_ka4d|3|g%3;L}6=UFz~`G1=K)1g^89%B5D9rd613-E8lWgqSN{}3(! zr1+1M2$g+f@jtuqKZRV-jY-Pd|4S#_YyWGpMN{M?u>UaC%iRALS}xuHK1JrTd0G3< z>;GZTY*G(URzIrK_2M7t03ZSWIqN@m==ik%L$&S;`Yiru*Z-3^Oo-xti=!Lpa9tgU zxh@{P_8;j0Ac6ho^#2^oqx~OR8X9Z=ssHzq?inue5Bq=J^X>l)+Vg+3|5pq8q5t>Y zU%&bGhrhvD9*;>5^GvU~ku7tL|I;L-D`r!}zcVZR`?UW@N3&!6|0V7Jrz3+VH<1$m z>uCGWc)NgZz$)3qi>GLF4__V^tB1)f_UC^N-2d+fu>ME;|EJ4dL_w_o>8AjQ#(z%W zwfcV${{iPe(D|<-?M;7ge`Yw$mj7;JP}}(L8SRO=o@~`lBEvPx^<3X*2e|bRs4Yd2 z)t|2Rih>0GC+`9m;oq{`=YPP7kRHWE?y1SIf( zpFBLcQ+9!i@Xs0lJGM>xKXkM-8vpB%@_czn;D7Qi;4%1jVNV$OfA#H#@ULFRbLJp{ z|H->RMfkUv@xNm`)c@DgY-s$iMa;8hB8&eiyFf+wx8bhG=J}6~0|LD+=S{@SkpS%l9gny3zw|&CDTE!o!lmFKu=GihS@xM$r536LEFFqvs zQ@YC+e|d*i<^ul%^dt^f3C4(DC5+ru`B zzib#d$0pR`zl%KXM^Mm7T%AR4z(%EfVO%|Kz~$qmV@SXUG2# z{YCNLTAElL|Lv0>dE@Y^Pk0#0)A(PDmeeun!~f*K@A3GzVaAx||J13yqD=U&MDZfO z^x=PU;P+9YBL5G;p6&5}7+N&{r=vmE@n3v*KkDF0Jmw++A^xWXejh6-!oOp+&wsIl zkm5hIG?6;~NA>^9An?8MptU?LN<(DQhyU7|>m-gM{B!(&;D>bo!z7_gX0GG^0Ab0D zlho20pcdj?_3chCPhC07u;J)%*MP%=t0TR? zH_HF2qrFdAYtso7!mU!LLul9Vj}`9CuovMU4*PH8-wQli|78TJ;6aJk?+KC$q&t6- zHT+khJ=mod;-6jraV?7f##RHDUQPd%Yv(4M@KC6KnColB|GO642~Yd~CJ0?JYrPjI znT3*Fkpyb^#|rmlq$9#VXZ+`T4#od7f>iK;#(xTgdO;*>_{R$OWokbL{}lhvh*8M{ z!oL!+UJTJ1{;|S+nYxSc5AWOkf6u4*Uq+599<=q35bntoQ>1@b>}l)Y^&PtZhY_TL z2fg;cjO{#464JE30v()_WDWnd^%>j}iSW;v|99Px#(%h4;8Qz6oBy7kUbh5l_{R$O zWCSL{KWF~OrSsnyF)De`>;Gx}Uy01;M6`x~tZ+}JdLsOD{J(F}`45a7RXia7uR^Mq zL-RQ4yH4)!S1|;GuhR{B;S2l^jQ?~(&!zZJbWN}XIr>Kr1QpqT&t>Aje8-~oe^y)g zPYyAhWiOD|8mh1$>P>*`~$L!FmAvV;I;S$Xn9E} z;0Wf0JmTTMviL|c5aJ)O-irV80-yGOAc?=1OdJ1%`d)Ee-Af++tBxE}ZO7o>r~4m~ zq~A~GfPd()7hWH_%Y^@ad0pFE9{#ZdfkpV|jQ>MBr1SrDv^5(4@8>*k>k|<#$p-T9 ze|+|oWO@3yjn-3R!p7j=4JiJL@Lyk+sO?Lq`9HL#!NWgxAg~Dk7CZhA9P0n0>9~&& zjsN?kbN%l+@jun+*F-16Kiu}&p8xY*+W%XVpt`RE`~SyPf-*4tv|3OGhV$CfOYK{$ z(;6OSfxb`}iSW0{~ty80wM?epK$z=#=Zj4yolF4{2!j-bpS;8=gj~4wEsg- z^Bd^@%ZPAZv#FMiDE>!}?l8FU6p0-OF2X;@{|5o=38wRZFf`mxgZzKLe6IcNPd_~- zdHONAkAGUndGymy3MyRSe`x%-?}QZptw0xTl8pXup($+gB>&V-IsUiM{=;6McKnx1 z "$output" || true + + echo "$ours" "$base" "$theirs" "$output" "$@" >> baseline.cases +} + +mkdir simple +(cd simple + echo -e "line1-changed-by-both\nline2-to-be-changed-in-incoming" > ours.blob + echo -e "line1-to-be-changed-by-both\nline2-to-be-changed-in-incoming" > base.blob + echo -e "line1-changed-by-both\nline2-changed" > theirs.blob +) + +# one big change includes multiple smaller ones +mkdir multi-change +(cd multi-change + cat < base.blob +0 +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +Y +Y +8 +Z +EOF + + cat < theirs.blob +T +T +T +T +T +T +T +T +T +T +EOF +) + +# a change with deletion/clearing our file +mkdir clear-ours +(cd clear-ours + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + touch ours.blob + + cat < theirs.blob +T +T +T +T +T +EOF +) + +# a change with deletion/clearing their file +mkdir clear-theirs +(cd clear-theirs + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +O +O +O +O +O +EOF + + touch theirs.blob +) + +# differently sized changes +mkdir ours-2-lines-theirs-1-line +(cd ours-2-lines-theirs-1-line + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +1 +X +X +4 +5 +EOF + + cat < theirs.blob +0 +1 +Y +3 +4 +5 +EOF +) + +# partial match +mkdir partial-match +(cd partial-match + cat < base.blob +0 +1 +2 +3 +4 +5 +EOF + + cat < ours.blob +0 +X1 +X2 +X3 +X4 +5 +EOF + + cat < theirs.blob +0 +X1 +2 +X3 +X4 +5 +EOF +) + +# based on 'unique merge base' from 'diff3-conflict-markers' +mkdir unique-merge-base-with-insertion +(cd unique-merge-base-with-insertion + cat < base.blob +1 +2 +3 +4 +5 +EOF + + # no trailing newline + echo -n $'1\n2\n3\n4\n5\n7' > ours.blob + echo -n $'1\n2\n3\n4\n5\nsix' > theirs.blob +) + +mkdir zdiff3-basic +(cd zdiff3-basic + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +A +B +C +D +E +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +A +X +C +Y +E +7 +8 +9 +EOF +) + +mkdir zdiff3-middlecommon +(cd zdiff3-middlecommon + cat < base.blob +1 +2 +3 +AA +4 +5 +BB +6 +7 +8 +EOF + + cat < ours.blob +1 +2 +3 +CC +4 +5 +DD +6 +7 +8 +EOF + + cat < theirs.blob +1 +2 +3 +EE +4 +5 +FF +6 +7 +8 +EOF +) + + +mkdir zdiff3-interesting +(cd zdiff3-interesting + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +A +B +C +D +E +F +G +H +I +J +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +A +B +C +5 +6 +G +H +I +J +7 +8 +9 +EOF +) + +mkdir zdiff3-evil +(cd zdiff3-evil + cat < base.blob +1 +2 +3 +4 +5 +6 +7 +8 +9 +EOF + + cat < ours.blob +1 +2 +3 +4 +X +A +B +C +7 +8 +9 +EOF + + cat < theirs.blob +1 +2 +3 +4 +Y +A +B +C +B +C +7 +8 +9 +EOF +) + +mkdir no-change-add +(cd no-change-add + + echo -e " line 1\n\n line 2" > base.blob + echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > ours.blob + cp ours.blob theirs.blob +) + +mkdir no-change-remove +(cd no-change-remove + + echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > base.blob + echo -e " line 1\n\n line 2" > ours.blob + cp ours.blob theirs.blob +) + +mkdir complex +(cd complex + cat <base.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new1.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +Nam et si ambulavero in medio umbrae mortis, +non timebo mala, quoniam tu mecum es: +virga tua et baculus tuus ipsa me consolata sunt. +EOF + + cat <new2.blob +Dominus regit me, et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new3.blob +DOMINUS regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +EOF + + cat <new4.blob +Dominus regit me, et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +EOF + + cat <new5.blob +Dominus regit me, +et nihil mihi deerit. +In loco pascuae ibi me collocavit, +super aquam refectionis educavit me; +animam meam convertit, +deduxit me super semitas jusitiae, +propter nomen suum. +Nam et si ambulavero in medio umbrae mortis, +non timebo mala, quoniam TU mecum es: +virga tua et baculus tuus ipsa me consolata sunt. +EOF + + echo -n "propter nomen suum." >>new4.blob + + cat <base.c +int f(int x, int y) +{ + if (x == 0) + { + return y; + } + return x; +} + +int g(size_t u) +{ + while (u < 30) + { + u++; + } + return u; +} +EOF + + cat <ours.c +int g(size_t u) +{ + while (u < 30) + { + u++; + } + return u; +} + +int h(int x, int y, int z) +{ + if (z == 0) + { + return x; + } + return y; +} +EOF + + cat <theirs.c +int f(int x, int y) +{ + if (x == 0) + { + return y; + } + return x; +} + +int g(size_t u) +{ + while (u > 34) + { + u--; + } + return u; +} +EOF + + mkdir no-change + (cd no-change + cp ../base.blob . + cp base.blob ours.blob + cp base.blob theirs.blob + ) + mkdir no-conflict + (cd no-conflict + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir no-conflict-too + (cd no-conflict-too + cp ../base.blob ours.blob + cp ../base.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir they-changed + (cd they-changed + touch ours.blob base.blob + cp ../new2.blob theirs.blob + ) + mkdir missing-LF-at-EOF + (cd missing-LF-at-EOF + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new4.blob theirs.blob + ) + mkdir missing-LF-at-EOF-no-conflict + (cd missing-LF-at-EOF-no-conflict + cp ../new4.blob ours.blob + cp ../new2.blob base.blob + cp ../new3.blob theirs.blob + ) + mkdir with-conflicts + (cd with-conflicts + cp ../new1.blob ours.blob + cp ../base.blob base.blob + cp ../new3.blob theirs.blob + ) + mkdir with-conflicts-in-removed-tail + (cd with-conflicts-in-removed-tail + cp ../base.blob ours.blob + cp ../new1.blob base.blob + cp ../new5.blob theirs.blob + ) + mkdir auto-simplification + (cd auto-simplification + sed -e "s/deerit.\$/deerit;/" -e "s/me;\$/me./" <../new5.blob >ours.blob + cp ../new5.blob base.blob + sed -e "s/deerit.\$/deerit,/" -e "s/me;\$/me,/" <../new5.blob >theirs.blob + ) + mkdir auto-simplification2 + (cd auto-simplification2 + sed -e "s/deerit./&%%%%/" -e "s/locavit,/locavit;/" <../auto-simplification/ours.blob | tr % "\012" >ours.blob + cp ../new5.blob base.blob + sed -e "s/deerit./&%%%%/" -e "s/locavit,/locavit --/" <../auto-simplification/theirs.blob | tr % "\012" >theirs.blob + ) + mkdir conflict-without-LF + (cd conflict-without-LF + printf "line1\nline2\nline3" >base.blob + printf "line1\nline2\nline3x" >ours.blob + printf "line1\nline2\nline3y" >theirs.blob + ) + + mkdir marker-newline-handling-crlf + (cd marker-newline-handling-crlf + printf "1\\r\\n2\\r\\n3" >base.blob + printf "1\\r\\n2\\r\\n4" >ours.blob + printf "1\\r\\n2\\r\\n5" >theirs.blob + ) + + mkdir marker-newline-handling-lf + (cd marker-newline-handling-lf + printf "1\\r\\n2\\n3" >base.blob + printf "1\\r\\n2\\n4" >ours.blob + printf "1\\r\\n2\\n5" >theirs.blob + ) + + mkdir marker-newline-handling-lf2 + (cd marker-newline-handling-lf2 + printf "1\\r\\n2\\r\\n3" >base.blob + printf "1\\r\\n2\\n4" >ours.blob + printf "1\\r\\n2\\n5" >theirs.blob + ) + + mkdir spurious-c-conflicts + (cd spurious-c-conflicts + cp ../base.c base.blob + cp ../ours.c ours.blob + cp ../theirs.c theirs.blob + ) +) + + +for dir in simple \ + multi-change \ + clear-ours \ + clear-theirs \ + ours-2-lines-theirs-1-line \ + partial-match \ + unique-merge-base-with-insertion \ + zdiff3-basic \ + zdiff3-middlecommon \ + zdiff3-interesting \ + zdiff3-evil \ + no-change-add \ + no-change-remove \ + complex/no-change \ + complex/no-conflict \ + complex/no-conflict-too \ + complex/they-changed \ + complex/missing-LF-at-EOF \ + complex/missing-LF-at-EOF-no-conflict \ + complex/with-conflicts \ + complex/with-conflicts-in-removed-tail \ + complex/auto-simplification \ + complex/auto-simplification2 \ + complex/conflict-without-LF \ + complex/marker-newline-handling-crlf \ + complex/marker-newline-handling-lf \ + complex/marker-newline-handling-lf2 \ + complex/spurious-c-conflicts; do + DIR=$dir + baseline ours base theirs merge + baseline ours base theirs diff3 --diff3 + baseline ours base theirs zdiff3 --zdiff3 + baseline ours base theirs merge-ours --ours + baseline ours base theirs merge-theirs --theirs + baseline ours base theirs merge-union --union + ( + export GIT_CONFIG_COUNT=1 GIT_CONFIG_KEY_0=diff.algorithm GIT_CONFIG_KEY_VALUE=0=histogram + baseline ours base theirs diff3-histogram --diff3 + baseline ours base theirs zdiff3-histogram --zdiff3 + ) +done \ No newline at end of file diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs new file mode 100644 index 00000000000..073280e0afd --- /dev/null +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -0,0 +1,214 @@ +use gix_merge::blob::builtin_driver::binary::{Pick, ResolveWith}; +use gix_merge::blob::{builtin_driver, Resolution}; + +#[test] +fn binary() { + assert_eq!( + builtin_driver::binary(None), + (Pick::Ours, Resolution::Conflict), + "by default it picks ours and marks it as conflict" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ancestor)), + (Pick::Ancestor, Resolution::Complete), + "Otherwise we can pick anything and it will mark it as complete" + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Ours)), + (Pick::Ours, Resolution::Complete) + ); + assert_eq!( + builtin_driver::binary(Some(ResolveWith::Theirs)), + (Pick::Theirs, Resolution::Complete) + ); +} + +mod text { + use bstr::ByteSlice; + use gix_merge::blob::Resolution; + use pretty_assertions::assert_str_eq; + + const DIVERGING: &[&str] = &[ + // Somehow, on in zdiff mode, it's different, and I wasn't able to figure out the rule properly. + // Now we prefer ancestor/before newlines and somewhat ignore our hunks. It's probably a minor issue in practice. + // gix: "1\r\n2\n<<<<<<< complex/marker-newline-handling-lf2/ours.blob\n4\r\n||||||| complex/marker-newline-handling-lf2/base.blob\r\n2\r\n3\n=======\n5\n>>>>>>> complex/marker-newline-handling-lf2/theirs.blob\n" + // git: "1\r\n2\n<<<<<<< complex/marker-newline-handling-lf2/ours.blob\n4 \n||||||| complex/marker-newline-handling-lf2/base.blob \n2\r\n3\n=======\n5\n>>>>>>> complex/marker-newline-handling-lf2/theirs.blob\n" + "complex/marker-newline-handling-lf2/zdiff3.merged", + "complex/marker-newline-handling-lf2/zdiff3-histogram.merged", + // This is related to Git seemingly extending a hunk to increase overlap (see diff3) + "zdiff3-interesting/merge.merged", + "zdiff3-interesting/merge-ours.merged", + "zdiff3-interesting/diff3.merged", + "zdiff3-interesting/diff3-histogram.merged", + "zdiff3-interesting/zdiff3.merged", + "zdiff3-interesting/zdiff3-histogram.merged", + "zdiff3-interesting/merge-union.merged", + // Git can extend hunks, similar to above, but the effect is not as noticeable. + // Implementing this would be interesting, to figure out when the hunk processing should apply. + "zdiff3-evil/merge.merged", + "zdiff3-evil/merge-union.merged", + // Git seems to merge to hunks if they are close together to get a less noisy diff. + "zdiff3-middlecommon/merge.merged", + "zdiff3-middlecommon/merge-union.merged", + // Git has special character handling, which does magic to prevent conflicts + "complex/auto-simplification/merge.merged", + "complex/auto-simplification/merge-union.merged", + // Git has special newline handling when diffing, + // which auto-inserts a newline when it was removed, kind of. + "complex/missing-LF-at-EOF/merge.merged", + "complex/missing-LF-at-EOF/diff3.merged", + "complex/missing-LF-at-EOF/diff3-histogram.merged", + "complex/missing-LF-at-EOF/zdiff3.merged", + "complex/missing-LF-at-EOF/zdiff3-histogram.merged", + "complex/missing-LF-at-EOF/merge-ours.merged", + "complex/missing-LF-at-EOF/merge-theirs.merged", + "complex/missing-LF-at-EOF/merge-union.merged", + // Git has different diff-slider-heuristics so diffs can be different. + // See https://github.com/mhagger/diff-slider-tools. + "complex/spurious-c-conflicts/merge.merged", + "complex/spurious-c-conflicts/merge-union.merged", + "complex/spurious-c-conflicts/diff3-histogram.merged", + "complex/spurious-c-conflicts/zdiff3-histogram.merged", + ]; + + // TODO: fix all of these eventually + fn is_case_diverging(case: &baseline::Expectation) -> bool { + DIVERGING.iter().any(|name| case.name == *name) + } + + #[test] + fn run_baseline() -> crate::Result { + let root = gix_testtools::scripted_fixture_read_only("text-baseline.sh")?; + let cases = std::fs::read_to_string(root.join("baseline.cases"))?; + let mut out = Vec::new(); + let mut num_diverging = 0; + let mut num_cases = 0; + for case in baseline::Expectations::new(&root, &cases) { + num_cases += 1; + let mut input = imara_diff::intern::InternedInput::default(); + let actual = gix_merge::blob::builtin_driver::text( + &mut out, + &mut input, + &case.ours, + Some(case.ours_marker.as_str().as_ref()), + &case.base, + Some(case.base_marker.as_str().as_ref()), + &case.theirs, + Some(case.theirs_marker.as_str().as_ref()), + case.options, + ); + if is_case_diverging(&case) { + num_diverging += 1; + } else { + let expected_resolution = if case.expected.contains_str("<<<<<<<") { + Resolution::Conflict + } else { + Resolution::Complete + }; + assert_eq!(out.as_bstr(), case.expected); + assert_str_eq!( + out.as_bstr().to_str_lossy(), + case.expected.to_str_lossy(), + "{}: output mismatch\n{}", + case.name, + out.as_bstr() + ); + assert_eq!(actual, expected_resolution, "{}: resolution mismatch", case.name,); + } + } + + assert_eq!( + num_diverging, + DIVERGING.len(), + "Number of expected diverging cases must match the actual one - probably the implementation improved" + ); + assert_eq!( + (num_diverging as f32 / num_cases as f32) * 100.0, + 12.053572, + "Just to show the percentage of skipped tests - this should get better" + ); + Ok(()) + } + + mod baseline { + use bstr::BString; + use gix_merge::blob::builtin_driver::text::{ConflictStyle, ResolveWith}; + use std::path::Path; + + #[derive(Debug)] + pub struct Expectation { + pub ours: BString, + pub ours_marker: String, + pub theirs: BString, + pub theirs_marker: String, + pub base: BString, + pub base_marker: String, + pub name: BString, + pub expected: BString, + pub options: gix_merge::blob::builtin_driver::text::Options, + } + + pub struct Expectations<'a> { + root: &'a Path, + lines: std::str::Lines<'a>, + } + + impl<'a> Expectations<'a> { + pub fn new(root: &'a Path, cases: &'a str) -> Self { + Expectations { + root, + lines: cases.lines(), + } + } + } + + impl Iterator for Expectations<'_> { + type Item = Expectation; + + fn next(&mut self) -> Option { + let line = self.lines.next()?; + let mut words = line.split(' '); + let (Some(ours), Some(base), Some(theirs), Some(output)) = + (words.next(), words.next(), words.next(), words.next()) + else { + panic!("need at least the input and output") + }; + + let read = |rela_path: &str| read_blob(self.root, rela_path); + + let mut options = gix_merge::blob::builtin_driver::text::Options::default(); + for arg in words { + match arg { + "--diff3" => options.conflict_style = ConflictStyle::Diff3, + "--zdiff3" => options.conflict_style = ConflictStyle::ZealousDiff3, + "--ours" => options.on_conflict = Some(ResolveWith::Ours), + "--theirs" => options.on_conflict = Some(ResolveWith::Theirs), + "--union" => options.on_conflict = Some(ResolveWith::Union), + _ => panic!("Unknown argument to parse into options: '{arg}'"), + } + } + if output.contains("histogram") { + options.diff_algorithm = imara_diff::Algorithm::Histogram; + } + + Some(Expectation { + ours: read(ours), + ours_marker: ours.into(), + theirs: read(theirs), + theirs_marker: theirs.into(), + base: read(base), + base_marker: base.into(), + expected: read(output), + name: output.into(), + options, + }) + } + } + + fn read_blob(root: &Path, rela_path: &str) -> BString { + std::fs::read(root.join(rela_path)) + .unwrap_or_else(|_| panic!("Failed to read '{rela_path}' in '{}'", root.display())) + .into() + } + } +} diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs new file mode 100644 index 00000000000..f781f63e485 --- /dev/null +++ b/gix-merge/tests/merge/blob/mod.rs @@ -0,0 +1 @@ +mod builtin_driver; diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs new file mode 100644 index 00000000000..05375cb2279 --- /dev/null +++ b/gix-merge/tests/merge/main.rs @@ -0,0 +1,4 @@ +#[cfg(feature = "blob")] +mod blob; + +pub use gix_testtools::Result; From 21bca6c771eaf9d643a4de4b644bccd64f52cf9c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 27 Sep 2024 16:11:40 +0200 Subject: [PATCH 07/10] fix: `ValueRef::as_bstr()` now uses the correct lifetime. This makes it more usable as the value lives longer than the ref itself. --- gix-attributes/src/state.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gix-attributes/src/state.rs b/gix-attributes/src/state.rs index 8f2a466bcbf..0e9b41a2d75 100644 --- a/gix-attributes/src/state.rs +++ b/gix-attributes/src/state.rs @@ -23,9 +23,9 @@ impl<'a> ValueRef<'a> { } /// Access and conversions -impl ValueRef<'_> { +impl<'a> ValueRef<'a> { /// Access this value as byte string. - pub fn as_bstr(&self) -> &BStr { + pub fn as_bstr(&self) -> &'a BStr { self.0.as_bytes().as_bstr() } From ad9587aa7162c0bd443c0369ed7096360364129a Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 27 Sep 2024 19:34:41 +0200 Subject: [PATCH 08/10] improve docs around worktree-filters and renormalization --- gix-filter/src/eol/convert_to_git.rs | 6 ++++-- gix-filter/src/pipeline/convert.rs | 10 +++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/gix-filter/src/eol/convert_to_git.rs b/gix-filter/src/eol/convert_to_git.rs index c83f96428a0..88ed8784f0e 100644 --- a/gix-filter/src/eol/convert_to_git.rs +++ b/gix-filter/src/eol/convert_to_git.rs @@ -57,8 +57,10 @@ pub(crate) mod function { /// Return `true` if `buf` was written or `false` if nothing had to be done. /// Depending on the state in `buf`, `index_object` is called to write the version of `src` as stored in the index /// into the buffer and if it is a blob, or return `Ok(None)` if no such object exists. - /// If renormalization is desired, let it return `Ok(None)` at all times to not let it have any influence over the - /// outcome of this function. + /// + /// *If renormalization is desired*, let it return `Ok(None)` at all times to not let it have any influence over the + /// outcome of this function. Otherwise, it will check if the in-index buffer already has newlines that it would now + /// want to change, and avoid doing so as what's in Git should be what's desired (except for when *renormalizing*). /// If `round_trip_check` is not `None`, round-tripping will be validated and handled accordingly. pub fn convert_to_git( src: &[u8], diff --git a/gix-filter/src/pipeline/convert.rs b/gix-filter/src/pipeline/convert.rs index 0572dd451b1..4962296656d 100644 --- a/gix-filter/src/pipeline/convert.rs +++ b/gix-filter/src/pipeline/convert.rs @@ -91,7 +91,7 @@ impl Pipeline { self.options.eol_config, )?; - let mut in_buffer = false; + let mut in_src_buffer = false; // this is just an approximation, but it's as good as it gets without reading the actual input. let would_convert_eol = eol::convert_to_git( b"\r\n", @@ -119,13 +119,13 @@ impl Pipeline { } self.bufs.clear(); read.read_to_end(&mut self.bufs.src)?; - in_buffer = true; + in_src_buffer = true; } } - if !in_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { + if !in_src_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) { self.bufs.clear(); src.read_to_end(&mut self.bufs.src)?; - in_buffer = true; + in_src_buffer = true; } if let Some(encoding) = encoding { @@ -158,7 +158,7 @@ impl Pipeline { if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest)? { self.bufs.swap(); } - Ok(if in_buffer { + Ok(if in_src_buffer { ToGitOutcome::Buffer(&self.bufs.src) } else { ToGitOutcome::Unchanged(src) From a6f3e30017343c01ba61c49fe74ffc69e443a33c Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Fri, 27 Sep 2024 10:22:01 +0200 Subject: [PATCH 09/10] add all relevant tests for the merge processing pipeline --- gix-merge/src/blob/mod.rs | 15 +- gix-merge/src/blob/pipeline.rs | 248 +++----- gix-merge/src/blob/platform.rs | 168 ++++- .../generated-archives/make_blob_repo.tar | Bin 0 -> 74240 bytes gix-merge/tests/fixtures/make_blob_repo.sh | 22 + gix-merge/tests/merge/blob/builtin_driver.rs | 4 +- gix-merge/tests/merge/blob/mod.rs | 51 ++ gix-merge/tests/merge/blob/pipeline.rs | 433 +++++++++++++ gix-merge/tests/merge/blob/platform.rs | 584 ++++++++++++++++++ 9 files changed, 1310 insertions(+), 215 deletions(-) create mode 100644 gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar create mode 100644 gix-merge/tests/fixtures/make_blob_repo.sh create mode 100644 gix-merge/tests/merge/blob/pipeline.rs create mode 100644 gix-merge/tests/merge/blob/platform.rs diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs index 408a58cbf52..ab4094e9eaf 100644 --- a/gix-merge/src/blob/mod.rs +++ b/gix-merge/src/blob/mod.rs @@ -125,12 +125,6 @@ pub struct Pipeline { pub filter: gix_filter::Pipeline, /// Options affecting the way we read files. pub options: pipeline::Options, - /// All available merge drivers. - /// - /// They are referenced in git-attributes by name, and we hand out indices into this array. - drivers: Vec, - /// Pre-configured attributes to obtain additional merge-related information. - attrs: gix_filter::attributes::search::Outcome, /// A buffer to produce disk-accessible paths from worktree roots. path: PathBuf, } @@ -152,7 +146,14 @@ pub struct Platform { pub filter: Pipeline, /// A way to access `.gitattributes` pub attr_stack: gix_worktree::Stack, - + /// Further configuration that affects the merge. + pub options: platform::Options, + /// All available merge drivers. + /// + /// They are referenced in git-attributes by name, and we hand out indices into this array. + drivers: Vec, + /// Pre-configured attributes to obtain additional merge-related information. + attrs: gix_filter::attributes::search::Outcome, /// The way we convert resources into mergeable states. filter_mode: pipeline::Mode, } diff --git a/gix-merge/src/blob/pipeline.rs b/gix-merge/src/blob/pipeline.rs index 90adb615051..776d908e203 100644 --- a/gix-merge/src/blob/pipeline.rs +++ b/gix-merge/src/blob/pipeline.rs @@ -1,6 +1,5 @@ -use super::{BuiltinDriver, Pipeline, ResourceKind}; -use bstr::{BStr, ByteSlice}; -use gix_filter::attributes; +use super::{Pipeline, ResourceKind}; +use bstr::BStr; use gix_filter::driver::apply::{Delay, MaybeDelayed}; use gix_filter::pipeline::convert::{ToGitOutcome, ToWorktreeOutcome}; use gix_object::tree::EntryKind; @@ -8,7 +7,7 @@ use std::io::Read; use std::path::{Path, PathBuf}; /// Options for use in a [`Pipeline`]. -#[derive(Default, Clone, Copy, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] pub struct Options { /// The amount of bytes that an object has to reach before being treated as binary. /// These objects will not be queried, nor will their data be processed in any way. @@ -20,12 +19,6 @@ pub struct Options { /// However, if they are to be retrieved from the worktree, the worktree size is what matters, /// even though that also might be a `git-lfs` file which is small in Git. pub large_file_threshold_bytes: u64, - /// Capabilities of the file system which affect how we read worktree files. - pub fs: gix_fs::Capabilities, - /// Define which driver to use if the `merge` attribute for a resource is unspecified. - /// - /// This is the value of the `merge.default` git configuration. - pub default_driver: Option, } /// The specific way to convert a resource. @@ -78,50 +71,30 @@ impl Pipeline { /// Create a new instance of a pipeline which produces blobs suitable for merging. /// /// `roots` allow to read worktree files directly, and `worktree_filter` is used - /// to transform object database data directly. `drivers` further configure individual paths. - /// `options` are used to further configure the way we act.. - pub fn new( - roots: WorktreeRoots, - worktree_filter: gix_filter::Pipeline, - mut drivers: Vec, - options: Options, - ) -> Self { - drivers.sort_by(|a, b| a.name.cmp(&b.name)); + /// to transform object database data directly. + /// `options` are used to further configure the way we act. + pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self { Pipeline { roots, filter: worktree_filter, - drivers, options, - attrs: { - let mut out = gix_filter::attributes::search::Outcome::default(); - out.initialize_with_selection(&Default::default(), Some("merge")); - out - }, path: Default::default(), } } } /// Access -impl Pipeline { - /// Return all drivers that this instance was initialized with. - /// - /// They are sorted by [`name`](super::Driver::name) to support binary searches. - pub fn drivers(&self) -> &[super::Driver] { - &self.drivers - } -} +impl Pipeline {} -/// Data as part of an [Outcome]. +/// Data as returned by [`Pipeline::convert_to_mergeable()`]. #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] pub enum Data { /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`]. Buffer, - /// The size that the binary blob had at the given revision, without having applied filters, as it's either - /// considered binary or above the big-file threshold. + /// The file or blob is above the big-file threshold and cannot be processed. /// - /// In this state, the binary file cannot be merged. - Binary { + /// In this state, the file cannot be merged. + TooLarge { /// The size of the object prior to performing any filtering or as it was found on disk. /// /// Note that technically, the size isn't always representative of the same 'state' of the @@ -131,44 +104,6 @@ pub enum Data { }, } -/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`]. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] -pub enum DriverChoice { - /// Use the given built-in driver to perform the merge. - BuiltIn(BuiltinDriver), - /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers(). - Index(usize), -} - -impl Default for DriverChoice { - fn default() -> Self { - DriverChoice::BuiltIn(Default::default()) - } -} - -/// The outcome returned by [Pipeline::convert_to_mergeable()]. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] -pub struct Outcome { - /// If available, an index into the `drivers` field to access more diff-related information of the driver for items - /// at the given path, as previously determined by git-attributes. - /// - /// * `merge` is set - /// - Use the [`BuiltinDriver::Text`] - /// * `-merge` is unset - /// - Use the [`BuiltinDriver::Binary`] - /// * `!merge` is unspecified - /// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. - /// * `merge=name` - /// - Search for a user-configured or built-in driver called `name`. - /// - If not found, silently default to [`BuiltinDriver::Text`] - /// - /// Note that drivers are queried even if there is no object available. - pub driver: DriverChoice, - /// The data itself, suitable for diffing, and if the object or worktree item is present at all. - /// Otherwise, it's `None`. - pub data: Option, -} - /// pub mod convert_to_mergeable { use std::collections::TryReserveError; @@ -202,15 +137,18 @@ pub mod convert_to_mergeable { /// Conversion impl Pipeline { /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`. - /// The resulting merge-able data is written into `out`, if it's not too large or considered binary. - /// The returned [`Outcome`] contains information on how to use `out`, or if it's filled at all. + /// The resulting merge-able data is written into `out`, if it's not too large. + /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating + /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist + /// in the object database. /// - /// `attributes` must be returning the attributes at `rela_path`, and `objects` must be usable if `kind` is - /// a resource in the object database, i.e. if no worktree root is available. It's notable that if a worktree root - /// is present for `kind`, then a `rela_path` is used to access it on disk. + /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings, + /// and `objects` must be usable if `kind` is a resource in the object database, + /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`, + /// then a `rela_path` is used to access it on disk. /// /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case - /// [a root](WorktreeRoots) is present, then `out` will be left cleared and [Outcome::data] will be `None`. + /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`. /// This is useful to simplify the calling code as empty buffers signal that nothing is there. /// /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode. @@ -228,7 +166,7 @@ impl Pipeline { objects: &dyn gix_object::FindObjectOrHeader, convert: Mode, out: &mut Vec, - ) -> Result { + ) -> Result, convert_to_mergeable::Error> { if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) { return Err(convert_to_mergeable::Error::InvalidEntryKind { rela_path: rela_path.to_owned(), @@ -237,31 +175,6 @@ impl Pipeline { } out.clear(); - attributes(rela_path, &mut self.attrs); - let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); - let driver = match attr.assignment.state { - attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), - attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), - attributes::StateRef::Value(name) => { - let name = name.as_bstr(); - self.drivers - .binary_search_by(|d| d.name.as_bstr().cmp(name)) - .ok() - .map(DriverChoice::Index) - .or_else(|| { - name.to_str() - .ok() - .and_then(BuiltinDriver::by_name) - .map(DriverChoice::BuiltIn) - }) - .unwrap_or_default() - } - attributes::StateRef::Unspecified => self - .options - .default_driver - .map(DriverChoice::BuiltIn) - .unwrap_or_default(), - }; match self.roots.by_kind(kind) { Some(root) => { self.path.clear(); @@ -279,7 +192,7 @@ impl Pipeline { .transpose()?; let data = match size_in_bytes { Some(None) => None, // missing as identified by the size check - Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::Binary { size }), + Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }), _ => { let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| { convert_to_mergeable::Error::OpenOrRead { @@ -295,7 +208,13 @@ impl Pipeline { file, gix_path::from_bstr(rela_path).as_ref(), attributes, - &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), + &mut |buf| { + if convert == Mode::Renormalize { + Ok(None) + } else { + objects.try_find(id, buf).map(|obj| obj.map(|_| ())) + } + }, )?; match res { @@ -324,19 +243,13 @@ impl Pipeline { } } - Some(if is_binary_buf(out) { - let size = out.len() as u64; - out.clear(); - Data::Binary { size } - } else { - Data::Buffer - }) + Some(Data::Buffer) } else { None } } }; - Ok(Outcome { driver, data }) + Ok(data) } None => { let data = if id.is_null() { @@ -349,7 +262,7 @@ impl Pipeline { let is_binary = self.options.large_file_threshold_bytes > 0 && header.size > self.options.large_file_threshold_bytes; let data = if is_binary { - Data::Binary { size: header.size } + Data::TooLarge { size: header.size } } else { objects .try_find(id, out) @@ -357,66 +270,62 @@ impl Pipeline { .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?; if convert == Mode::Renormalize { - let res = self - .filter - .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + { + let res = self + .filter + .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?; + + match res { + ToWorktreeOutcome::Unchanged(_) => {} + ToWorktreeOutcome::Buffer(src) => { + out.clear(); + out.try_reserve(src.len())?; + out.extend_from_slice(src); + } + ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { + std::io::copy(&mut stream, out).map_err(|err| { + convert_to_mergeable::Error::StreamCopy { + rela_path: rela_path.to_owned(), + source: err, + } + })?; + } + ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { + unreachable!("we prohibit this") + } + }; + } + + let res = self.filter.convert_to_git( + &**out, + &gix_path::from_bstr(rela_path), + attributes, + &mut |_buf| Ok(None), + )?; match res { - ToWorktreeOutcome::Unchanged(_) => {} - ToWorktreeOutcome::Buffer(src) => { - out.clear(); - out.try_reserve(src.len())?; - out.extend_from_slice(src); - } - ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => { - std::io::copy(&mut stream, out).map_err(|err| { - convert_to_mergeable::Error::StreamCopy { + ToGitOutcome::Unchanged(_) => {} + ToGitOutcome::Process(mut stream) => { + stream + .read_to_end(out) + .map_err(|err| convert_to_mergeable::Error::OpenOrRead { rela_path: rela_path.to_owned(), source: err, - } - })?; + })?; } - ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => { - unreachable!("we prohibit this") + ToGitOutcome::Buffer(buf) => { + out.clear(); + out.try_reserve(buf.len())?; + out.extend_from_slice(buf); } - }; - } - - let res = self.filter.convert_to_git( - &**out, - &gix_path::from_bstr(rela_path), - attributes, - &mut |buf| objects.try_find(id, buf).map(|obj| obj.map(|_| ())), - )?; - - match res { - ToGitOutcome::Unchanged(_) => {} - ToGitOutcome::Process(mut stream) => { - stream - .read_to_end(out) - .map_err(|err| convert_to_mergeable::Error::OpenOrRead { - rela_path: rela_path.to_owned(), - source: err, - })?; - } - ToGitOutcome::Buffer(buf) => { - out.clear(); - out.try_reserve(buf.len())?; - out.extend_from_slice(buf); } } - if is_binary_buf(out) { - let size = out.len() as u64; - out.clear(); - Data::Binary { size } - } else { - Data::Buffer - } + Data::Buffer }; Some(data) }; - Ok(Outcome { driver, data }) + Ok(data) } } } @@ -429,8 +338,3 @@ fn none_if_missing(res: std::io::Result) -> std::io::Result> { Err(err) => Err(err), } } - -fn is_binary_buf(buf: &[u8]) -> bool { - let buf = &buf[..buf.len().min(8000)]; - buf.contains(&0) -} diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs index 6b6175ee408..f749e03c3ec 100644 --- a/gix-merge/src/blob/platform.rs +++ b/gix-merge/src/blob/platform.rs @@ -1,7 +1,6 @@ -use bstr::{BStr, BString}; - -use crate::blob::pipeline::DriverChoice; -use crate::blob::{pipeline, Pipeline, Platform, ResourceKind}; +use crate::blob::{pipeline, BuiltinDriver, Pipeline, Platform, ResourceKind}; +use bstr::{BStr, BString, ByteSlice}; +use gix_filter::attributes; /// A stored value representing a resource that participates in a merge. #[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] @@ -10,8 +9,8 @@ pub(super) struct Resource { id: gix_hash::ObjectId, /// The repository-relative path where the resource lives in the tree. rela_path: BString, - /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_mergeable()]. - conversion: pipeline::Outcome, + /// The outcome of converting a resource into a mergable format using [Pipeline::convert_to_mergeable()]. + data: Option, /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. mode: gix_object::tree::EntryKind, /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary @@ -26,14 +25,51 @@ pub struct ResourceRef<'a> { pub data: resource::Data<'a>, /// The location of the resource, relative to the working tree. pub rela_path: &'a BStr, - /// Which driver to use according to the resource's configuration. - pub driver_choice: DriverChoice, /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which /// after its 'to-git' conversion never had its hash computed. pub id: &'a gix_hash::oid, } +/// Options for use in a [`Platform`]. +#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// Define which driver to use by name if the `merge` attribute for a resource is unspecified. + /// + /// This is the value of the `merge.default` git configuration. + pub default_driver: Option, +} + +/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`]. +/// +/// If available, an index into the `drivers` field to access more diff-related information of the driver for items +/// at the given path, as previously determined by git-attributes. +/// +/// * `merge` is set +/// - Use the [`BuiltinDriver::Text`] +/// * `-merge` is unset +/// - Use the [`BuiltinDriver::Binary`] +/// * `!merge` is unspecified +/// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. +/// * `merge=name` +/// - Search for a user-configured or built-in driver called `name`. +/// - If not found, silently default to [`BuiltinDriver::Text`] +/// +/// Note that drivers are queried even if there is no object available. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] +pub enum DriverChoice { + /// Use the given built-in driver to perform the merge. + BuiltIn(BuiltinDriver), + /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers(). + Index(usize), +} + +impl Default for DriverChoice { + fn default() -> Self { + DriverChoice::BuiltIn(Default::default()) + } +} + /// pub mod resource { use crate::blob::{ @@ -44,11 +80,10 @@ pub mod resource { impl<'a> ResourceRef<'a> { pub(super) fn new(cache: &'a Resource) -> Self { ResourceRef { - data: cache.conversion.data.map_or(Data::Missing, |data| match data { + data: cache.data.map_or(Data::Missing, |data| match data { pipeline::Data::Buffer => Data::Buffer(&cache.buffer), - pipeline::Data::Binary { size } => Data::Binary { size }, + pipeline::Data::TooLarge { size } => Data::Binary { size }, }), - driver_choice: cache.conversion.driver, rela_path: cache.rela_path.as_ref(), id: &cache.id, } @@ -118,7 +153,7 @@ pub mod set_resource { /// pub mod merge { - use crate::blob::pipeline::DriverChoice; + use crate::blob::platform::DriverChoice; use crate::blob::platform::ResourceRef; use crate::blob::{builtin_driver, BuiltinDriver, Driver, Resolution}; use bstr::BString; @@ -135,6 +170,9 @@ pub mod merge { pub ancestor: ResourceRef<'parent>, /// The other or their side of the merge operation. pub other: ResourceRef<'parent>, + /// Which driver to use according to the resource's configuration, + /// using the path of `current` to read git-attributes. + pub driver_choice: DriverChoice, } #[derive(Copy, Clone, Debug, Eq, PartialEq)] @@ -267,9 +305,9 @@ pub mod merge { /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` /// with the built-in driver to use instead. pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { - match self.current.driver_choice { + match self.driver_choice { DriverChoice::BuiltIn(builtin) => Err(builtin), - DriverChoice::Index(idx) => self.parent.filter.drivers.get(idx).ok_or(BuiltinDriver::default()), + DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()), } } } @@ -299,14 +337,21 @@ pub mod merge { /// pub mod prepare_merge { + use crate::blob::ResourceKind; + use bstr::BString; + /// The error returned by [Platform::prepare_merge()](super::Platform::prepare_merge_state()). #[derive(Debug, thiserror::Error)] #[allow(missing_docs)] pub enum Error { #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] UnsetResource, - #[error("Tried to merge 'current' and 'other' where at least one of them is removed")] - CurrentOrOtherRemoved, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, } } @@ -315,18 +360,44 @@ impl Platform { /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. /// `filter_mode` decides how to do that specifically. /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. - pub fn new(filter: Pipeline, filter_mode: pipeline::Mode, attr_stack: gix_worktree::Stack) -> Self { + /// `drivers` are the list of available merge drivers that individual paths can refer to by means of git attributes. + /// `options` further configure the operation. + pub fn new( + filter: Pipeline, + filter_mode: pipeline::Mode, + attr_stack: gix_worktree::Stack, + mut drivers: Vec, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); Platform { + drivers, current: None, ancestor: None, other: None, filter, filter_mode, attr_stack, + attrs: { + let mut out = attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("merge")); + out + }, + options, } } } +/// Access +impl Platform { + /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](super::Driver::name) to support binary searches. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + /// Preparation impl Platform { /// Store enough information about a resource to eventually use it in a merge, where… @@ -351,33 +422,62 @@ impl Platform { self.set_resource_inner(id, mode, rela_path, kind, objects) } - /// Returns the resource of the given kind if it was set. - pub fn resource(&self, kind: ResourceKind) -> Option> { - let cache = match kind { - ResourceKind::CurrentOrOurs => self.current.as_ref(), - ResourceKind::CommonAncestorOrBase => self.ancestor.as_ref(), - ResourceKind::OtherOrTheirs => self.other.as_ref(), - }?; - ResourceRef::new(cache).into() - } - /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. - pub fn prepare_merge_state(&self) -> Result, prepare_merge::Error> { + /// Note that no additional validation is performed here to facilitate inspection. + pub fn prepare_merge_state( + &mut self, + objects: &impl gix_object::Find, + ) -> Result, prepare_merge::Error> { let current = self.current.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; let ancestor = self.ancestor.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; let other = self.other.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; + let entry = self + .attr_stack + .at_entry(current.rela_path.as_bstr(), None, objects) + .map_err(|err| prepare_merge::Error::Attributes { + source: err, + kind: ResourceKind::CurrentOrOurs, + rela_path: current.rela_path.clone(), + })?; + entry.matching_attributes(&mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let driver = match attr.assignment.state { + attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), + attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), + attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => { + let name = match attr.assignment.state { + attributes::StateRef::Value(name) => Some(name.as_bstr()), + attributes::StateRef::Unspecified => { + self.options.default_driver.as_ref().map(|name| name.as_bstr()) + } + _ => unreachable!("only value and unspecified are possible here"), + }; + name.and_then(|name| { + self.drivers + .binary_search_by(|d| d.name.as_bstr().cmp(name)) + .ok() + .map(DriverChoice::Index) + .or_else(|| { + name.to_str() + .ok() + .and_then(BuiltinDriver::by_name) + .map(DriverChoice::BuiltIn) + }) + }) + .unwrap_or_default() + } + }; + let out = merge::State { parent: self, + driver_choice: driver, current: ResourceRef::new(current), ancestor: ResourceRef::new(ancestor), other: ResourceRef::new(other), }; - match (current.conversion.data, other.conversion.data) { - (None, None) => Err(prepare_merge::Error::CurrentOrOtherRemoved), - (_, _) => Ok(out), - } + Ok(out) } } @@ -430,7 +530,7 @@ impl Platform { *storage = Some(Resource { id, rela_path: rela_path.to_owned(), - conversion: out, + data: out, mode, buffer: buf_storage, }); @@ -438,7 +538,7 @@ impl Platform { Some(storage) => { storage.id = id; storage.rela_path = rela_path.to_owned(); - storage.conversion = out; + storage.data = out; storage.mode = mode; } }; diff --git a/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar new file mode 100644 index 0000000000000000000000000000000000000000..ee7859571ca7ed197f13a05aeeec84a1513be7be GIT binary patch literal 74240 zcmeHw4RmB#ah`U;5H;XqCyP1c*yL@GSV=SLUv;Y`&1jZc&Fqer`C~@Ifagd1y8E@% zJFR{_{klgZW@mi@EZ8J)@J|Ro`61XQ2|oe6*ybF94Q8E~Kd%!Ua_l50hF~CeeCz}p z`y`mn_f_5Z`gONPEos!_UAE`2sQdN3_3BpDty@*MZryldsPF%h_?OFOhxix&I{(VE zR5CM_N~V+PN%=iDna&Pn`;Hl2(DWL1-Lh`^RWh{D;BFb+hQa--k4asF>t|L3ylf&T9|V*sE0Pt2b@etcnZ z>B!-Q#p7ol*!8!P|C{UoY={0&rBkWNf&SlhPILn-SIdpT63|VTXUh|r|K)1Yxz@iC z_n7~wWG>Y?|5M4S^kDw?k0k-*!waY9@#RkZ_x#d#{`!f(`;D(%@&Dd2H1v0VWc7_d zH8k}1e_?3oj=w;gyYSzIzkdE~?C*W^((teC`NoI-cIquBUq18hZw&n)CCP3y>g9a1 z;dp2_Byc_weY^kHz%?P2ci7thqd%9H z{`hP9zoE0geeL=0jK1|HABknY`}HS3_3g8-e#1z8=#IShf4owV{`hP9Ki1j*_kQ5_ ze*g8Ks~11$E?8f?^S(d$$;V#!KZk~$Sk;YjKnmw$|1BzT@Yle3V+Wk)eek~?e$J^+ zJoTacYoGHI%TK<@I~=>VZ|Db_)v{Y9ITX&v-}a9M4*nWAZ|a0|;+M`>-*@Q6Zy$f^ zCzpQbb^3 z)%|Bb_OiyWpV@lhZ)tU&n^D1ICil=0JiZ=YD+ z2DXF$iB-F-#!6eOTl+%(Z|eVNyX^mTE|nYD|F=f=E%i=K&t!9{WOl|bOqJ~EndyRE z$W5nbvc=qVK9_T5a+y>*lP?Zp1Y08KGp>=${}tE0+#l61^w^31hru{l|8IjdwfLW? zHNBNs!;KYIoWf zd#poM%ef{@x2O^;ag? zm9nP*@V8vO>Rxt=R(_qWVs)qBl&?Cmnq9a&mKwL#R-CGZ-gU<=G(5fvtpJ12EW27X zZ}!ljvEoQ?06{Ry5ts&j{*SXb?O+0IuS!bF?~ z13_nf?LyUSz_wP}+_!EOos!+GG$t&s2}EoiD!O5WO%IeRm-udL&8=T9SC^H_1|bX_ zky=1@wN2ewb-|Vv9jq1G1NL}Hxn$^NmFn)QKn%tT0EUk;+Jd{fT5jxDUgX^dsk7Pp ztyQ~GSOE|=Ge9i`GQnr-mR)zu;6X=M+e_=Jvj$R=xfp1pTx$(Q`4Vpz9`wxv5~+fZ z?p3E=FBctwWT*%Ld1uAGT6UZD{Sdk}2aOal7cKKn#a#n^%G6}5U^K)5NSn^Bt^h;A zSxg3Lkm@zJTy0n-w~m2TuR7H-`dX_l$XYH4$9++ERML%{D0roXwN?fx*H+3RZos-+ z^Z90oM&wnP!2V^t5!}`h%XBNwMikQANQ%>Qp~8XLE+M~0okifbLb zCv8PX_D1oQ=%gYyh~ddPK@-VU`!du81hI+wN5luM^JYdjUn!ASLQT zk`)K?D?fs9Ubzlw;BX;xmpuvhUQHimp6dV z*@$ADV(dXsG1>wZ41fsBeN796r?S>qbM>u|)m6$DTXf=5CgZ^d2yxl20BODh%R5*E z!9xcRz<~+I%5S~}pqS}zB0DOr;31?My6&>$)C`BLid(QN0gufv+H{xQqrWE+Q1tS$U_i<{0MsFO5^e7ST>8HKvS>8CiE?Zar4P;K-%|S0%gZfx^_vAU8Bk zrQ*-KdGNhR5#z*`SE?>Bv>(%m5yuQI_!&yX@u z@L_Ued2zXvth-HX&4%&bi7#VFT;lB_pIOmD(Tc~Tl&7Z02X%)W0jQeYSn#+8_xe;8WCiW!qF7V{cllXW0~Grj@Q3`YW@ zAuzy2dn4+N@P+uLHw`zUU?2*eS}QuD%U?7MciwGQilRY`Q5P0mN-DLxkddgO4O-j751)_hSp5^(!E&q?6+sz%oJl6NiRsB3++gRXDWNxCYF_yugoG3Zhg$;dL)+jMn;cQ!MdW{93Bg2=P|zRLsS+mJgV~ z>PEs=s)|q5@EyttbdetNgle5LKB|%%)n#>F{4$^tlYjUi;jK6jbJs!xzz>#s8+q5Q zI5g-SxCgK)vB+TE>Kv-SHVYgK5eAHJuv$RN_eA5C)$;LZAgsfatBy9Xtdl+=T#8P` zX*eKb4@{&T7g3>jHyV72fY=EV;?k`5M#ARXVb!nGJW)RgG_XtHyj6hWrnwalja(3d z0BkyTwd}S!eBTa)7^#)i;>1o#K(Ah~ceaFzy4#Eu$cVv`E}Q@ni4{6}-;}?ZXuxKm z*+98;Bs_p_kXdUU{y6=T3JlGNnIm)JNnnn^Gr;F`G=gcOpC_Xk@3-SPRJCWxTwJz=_pG1lA-MUmp$Fa`5>4j@B4}`-bI& zd@9JY>@{n2iOdV-W%Q5QJ6j`3>i~vz&^n;w!3Q18!EwK(`jp0!Gs7^Hh*~jM$ECen z-xWyLFqnW0o@zxHD``GBs{r8>SJ6X=yX?s*%F9w zhw(_KsP+QU*}e@6%;jGFWz>O+GQ9(Bv8CQY>v0OGG1B1@R9*V<5?iEaTdTV?Px%O7 z8S8RmQE2GXcoqvw;c89P?GIBJb1N5^%RN6ovV)*Q$} zh@nZKXf279>Z^^H5dOi4bl_?<{Nbo)UP;s!$l(xw7vW8JzIDPH zX?rW!;)xSsv@d;iQG%^C)UZDc`7Fe4lx7XrLTDu$Jc)T0cp>ow8brQUB%>jpHG3Jx zlCM1hOBLJjFjU(xMXHT%${wYgkrJ-V?WE8j@VqA>4%5J(;h>B6Hb%TDmUy+I*vZ#0 z!-BcrhNX4SJ>t`1;ld6816(79`PM-z@J6ll!Fy5|wXRiR1}uW^qkflBES+>2*BQX` z8eWLXj8H~7*N-z|yS@z?gn(>J1iHFjhm0HJA>fn0FtuhQL?t@Xsgh;7ZX(F@zLjBz z!o-Z7ZAEA|$9@Az>l{RA0nne?IO$swVeW+FO?zx-l5LEyT3nstu^Vmjl>pa{V?iWEWMVh>R9;h$B_|k-$X>0B?lpv z{>3J41LRQrr-q;_gW0z20TuxxCgfS8rQXw@5 zix|(~u_`cl$(FUit#4>VTKm3F>!uF$TL3G5q(zGiY5?iQp+ef#=4C^pPwbErTk9i27 zyJD+EBKG3WBb>oqLIxwazYNSsAaJc(wCfCm)qt7V@8fIy4UYr5BZ&0|{=I&7%SC)P zeAge$7yi=XiY6W*8tASukz`GcX>5sN8%rn)%D1rR9wJeyxCl^juX$O}t2gMF2}h&n z7EVR2G42!)Q`qIp*iG`VtBvpqiKx+)X1$`Jm0NYnBDy_F<5c%W>Fk(K7^#tbd8qGS z!qW)0M(Ok5@KgiFqEL*_Lrz`9H;MOHM?ro>Kh27VE91boSnd4 zi5fiE#6#u}cueEcCobR)V>4@+^l9Nkt3zJ}2*&DwK#LJhY3&3EKXW61E$rg)cIXWI z&~;Q7o30i3XF%Ww<-GBk=}A;z{adlB#hD4l)~wM>=)mxSAW!cBdrg)FMbEr+%Uen` zhFc@S)>0(S$VCjh=?D!rYlvDpMiE6dc-D$(KntNIGaWP{oFFi5JdV9=>i5_+sG$Hv zlyMQ}e6&9txt`h=9eG9cMr5{cURKBaUv=usKCZjw2lSl(neO?Iq~O8%kK3RCTK4~~ z&wtFZ4krMU6Y^v>ei8N$tW?5Rf{kbSXQup3+xBtZ`=S+||E|}FX`pD=roOzU*ZfcCy6V4V zrUvuhXHj4OZgu`+t%4^E?*I;Qf%*KcjS|`XK#sQLXJZ-{o2z+Ef~#Sl2g!bNCSdC; zZ%Yh0!(@(d&qXB@t8X2)2=WC>Zr>KgzEb?@1w1_emBq$ViLH9eyQBts%>Q&Qo$0jy zr;@qB`On)SDcha@478{z8;-YZu<~mov-Go<nf1VM1`w zH1H|dKa>LCN8Ypx`kGGIX!nqzTyR3>1)kNKb)?|P(HzDpiclu4VYGTn5`Z6IG|mZg zifg0XLXJWdiOH3kPe6k?fO_z55C zQ>zhiAzzWKAjeN)bv^9>GLtd^FCXS%6BFF!57J-zAW2-nlJGo{1;oi#q72qM1)D1i zIL6Z}%+4sd*sl^lF&4v{fg|EW^4&wgQ#Pi2S@M@2ybxhv#3~Kln5E8?j5EkI=TSCf zh(pF0;AEh&4RJ}&$6*g(@zj>P)~2^ z{)J;lc!bA1I&<>u>3KXzN0{n8H!>NCB${4b&fy_3zhcG8@w|N*5iMjJz{y3>vsvW< zhq2YfrFbu%CgO24+cT0FX8^1N3&|8S$sBFiOpFj0$>cy<TiA&N zHhahQgP)Ca=Hz`Q7>Gl7l6$5@TRd`lY2omZ6N|{wkgTGrHR>fUT^Nh+J$PX}zIPO4SDuEs!?f`gCdDQJxMM>it1WCf{5z%0Br zdUKjU&?r}%5jR9r5bAEF&=Mte-@c6~1aJsJZfD(SU;q8%nY*;`8$^1n|0w<2<^N~0 zQv?5hJAB+}+pVtua-L6&eBG9^+!|AUsbXaj@>Q!j6vwm#EJWN(&W|SdOe$dDwUb5Hmv*kUm|i+nxj#UM{M6Yq zN0&~XT3k4JVrl;5*%OQYDU|YDGBT_iJkldNQbr1Dg?lzP={L-sonhxH&c{XZe@$oZ zLRV7&K^vo*rIDu9NrHzENK!*gzK3Le1c|x~FZzl0n*K3zT|n&FJ9i^UV`GX8vdu_c zZiuutE<}%FhXhJ=tZePGQsektwlTJ;OLi)QgMNQ=6bR}6lDF!j8Z1szu5kkhx!=3A z3~a9dQ>oNcSN_LTIyvzFZiiGr|A%eQNQ64xL*ax%W50Y}v+GstyLkF-;ne)>?8%x_ zP4B0Na`S`{nor9(Wa?Pwj4+}3*h-^O^JWu?5^N}B%wMMTtu*uTvYYT_E`gd5cqld) ztZ~$;gdfnX!ToDMa{&{;;f^xb3Q00$eu5nEY&kEouY`4>X5+{+vKK5KrsjFzv7iKk zl^z!m6&s#OkR5kfEHX`&p#K5qlr;h57&>35u8P@TDl#Pl$rwN;41i#{mSO1=v~TP-n|wxGPPkz8+`-r11!FmgbNQXmk1jhk%`Upg63MHfisV)jhJhZjFr? zP9PtflwZ=3N+ozZFji_|HL_ zVL-V-54z}f_dIoK}zGCC3`^u6(bp=Hn+qOEM6EZP(@p8ZIl2DR8n3{nM z*Vfn-o>LqfamR%V!#eGdFXN$Z3LeJ}UN0aYlN4KtIV_;Hwf+qOHmMuwq6E@ zu@UK5E~2OO5usbcSeVPQvB(8D8;ER*AOy4t2US{`u%ZW}_&*VTe|#*EgJ4MQgT z2!Q+kW0G&u9ITNEiogYVqCe}az)>ZeKY}FzKv@$p2z9hwc%uot zO{UKMawMNZ4{0`yk^cluJlftRm@&gWri+}kxtB*klB5MDL$i(q9bdd-2ceE*2T9!6 z!6IT#td=#Q}m)aHb>>9GrY=Z_vcyd(rRpBGL{Sff65tk}bheYr7) z2pa1M8w_(6u`mH;Bh5MxGI$(3yW41k9z#c z8O4|isyLKFYYk;eOB7%^kTs@T*Gje}j}in<1f`cTS8Sig0f9gkv0;lc4~PUI;TsE8 z*_*Hw0Zl}dNS?eQ3jS7EQvw))*|Z_>BVo`lsTS4|NJQZx(SA6Tf?5R7fyJ^EZ>WMt zSZ1-jL{XEvQuLy1Rw<(-kq<=Y5+8HU`eUwE)+GR1?MA9aHvn@`7X^F6K&I5J11%}( zgGC*cE@kyqwE|C8lvH?Gl8_)~3uh0t6toKZjdhcA!ooo>RI_rfp!7vll~#8vnRv*g z>sgqai5vbv?6g82G%$#-LKy>)XiNi#C_w_)TvzF8}B5p^3sdXDsYliJWsLsak{)h@1h%`Xc(W_i>$2uSPJIYle5r=FTivX=M-dgia2@N&j#d0d7KKstupk6c zf9)5}BWnc8YYtY5{00+4o`Bmj^^Mte&`OG`8qwyYwfP8?R}(n(%NlS{RG@%cLA@I> z*5iAzGC&iu?u&)+GYZ)0=|hL(UT%N=A{D(F7lwNst#F+Rw(Qku%IbW5Ww0%i_;L0}+4 zjrFKis^AbLB#i4dH@1ZBg~lwOr@l3ghOs)-8Z@w!0I1pP(w}cG zApL?l&jgRY71n`-`;GF9IM9GK`)av}a|N;xW5pCXZL{meA{4uRGghN#hf=vEUMp`l zbP&d(iRd^j@b>jpJWiSJfiodeqdrup2Lo%4ioiEFVR~ZG?b3_wL+D}c$y^Gg)v%u4 zm|Ec--k;H2BgWsSLkJwiX}xrk>T-0|TlYq-M;@_8%!9)EG7hke8r2f&h*6&_S0~f6 zvky3p`L&{9R$Gflk-(FFjL5fFy;?xWdgi z#M=?s8oXmG6bMCZTItTanEOKHOT+n3IlJk%~tpSw0egw5Uds( z&i+9;1cdL9r^B%4(=+=&lx{33CRmPHbcZWkM?R3Bv|?7c?d2}EM$ajZsqAED{$D18oPfdo zzdi-~_jjxFAGYJo4gjmnFeM7}zx@!fIHYA%(lG~n`g;JNTmk}6O}lvpNC#&!1ZWe7 zC#)d~t4&(1qMTa31ga8+wov}1jIyEO;+#0yi(OWf;WfmWg<+3GI@1y+Jt_p(P{m8D zt4rn1793R?ZH0nhrZXonsGgz~d*u9ZxEPISJj4U4u|sF(7ZyalY4A|Wl{1wss9_GR z$g(7HEnI~@wpS=KKY#?-yzZ6XSV}&fhHjcv?2 z6ZRtc0eLC-1~?Ke5;tB>k7~9G0+ia?L;5?Mq!nzk6 z7n1t~X{(-M;SdNMz7uT~fK7G{j?W^&o*6JGCkyB(-cv`4Ax_~kY5;Ex5lm+STPCP0 zMIanM#wTVuwlx?O=4O?hFIUPGBiB0PR#2VZvrEjrurh1T#KND5L}vC=p#Y z2w*)&ehU(GYuxJqm=b+h6Ej7~7Ri&c&nT$D6_jKc6_E`S|6Igt5i;j*tY-`2%^hML zw-I0RAK`x4vH7J#$Bxa-bI`FO%3WEpM=xGLI^ZMLTEU7H#wGM`3c{iiJEENzj-0$d za)jZVS?f@PnTuc*D;AS>?Ls4=V`TeC<|b=+YkDaJ4q=;BBy+aP+7Sd@B2`^A$3+2o zZd{=5Fo-ivr40dW^jXjsu}F&+Rp3-5@rzOg zp5KUG(TBn5sV10&-e)5?Lll~f+=leD!lf^7GJ_)|ji2emW7VP-I09A-fcw9KR8|#f zaYYf}*I+5d0eP{IZ0=l!qVAM;ZfuwtOdN!xezjmlG8QvQp8<1G9Jqgr*Z}g5DbQj6 z)tx*N#&4_rmr7+aUG;x68QlLku>W?<)-88zxBuqd+IqdbywX6X%(#`twQUpp|7q(% z-ePObqwECI!cYSQiewQgB1)e@Wd;Wc7%GIGVnNYx!oMj5Ou>r!tEs3t0AcNxhuyqh z-$IRN)^pL7w7o>%@B$M+WzqysgYcXvKhmfK46699Ya582kI;6DyHNQxYuM>?Y`r)I zXU=$$%o@^xu)Om08@Ekq?rKRFcF?L9ri6-;q-c#{F;;6%a#-atF=q$=;uP(OS|mzT z5OA~dCpgt!E|`}AC9zwm$xSIdpd?3B#cUd_I@cPbK4!Ahi?E`Q&LD`YJLZIi*eXO# zZr@pxBl+0FK)wZ+`~Y#k;$E$p=cX_yft88d8d%fJvpK#sLT>BGw6+RXR+BBehs8+J zW(Akw%ptEA#Ur({9!uM5c@h2XAuciA`fA>-lnaoxk`#A;2$4>+uqCbiDD%RV6Ouq| zVOi9humJ+3kvgWe>!n0KmqC+fD{qiI+Ykcx-kV^mEz)KAN6sYQ^c7+ZaOXHwBkcs2 zbX{I?5=1h8#QT9A7vf#Jpnz@M7C^-U?BuT^_K!_?oQHuy(y&gxlBS3<4BzkNB7T_3 z)f6HH2|?wh9>CSZ??NzC%@JbZMuq5z87G3n2;oZZu#`GE0dt&J5E;=m%>sJ?@mrV+ zOm?Hlm2lm_dPukw%X=fB`BJ&Mq+jQRTXd1MCX30An8V>D+0eeE%>tAjYUN$?h(sGK zKi|5AuOdhlvoSLJK~&g(u$EEO2YYYjMm)Hm#t4a)DHdVJ&suZ}bi@P#RBV>YwLfvW z4Iwmk?!$zJP65AH3Q=Jbg{GP*VL|bed_4%iPNjH+R__u!ETuY#6VT$4&!3U&W(wWl zi51wB4~4p?Z_=IHcKUCF?yXrD!b6JzwH{!?fdp_?4f%!N9dC;UNGb;(lFbL1(NbbM z+!Zuecp*`1vCh{Z*i(X7O9qHc4~5Glc$spk?J>-+NTky>!t$!BL=0l&K*x|`UBVe6 zpg(6Y&cI>Pvspayq}yIPFR=N^oW=7=W@5;87y@?CaHu?N2F)Xm8b*LyU@Dfjl(ewi z*XXm@FgIYvULHc7JJqXpO?HWg6PL#PBEb8`E!p=S9z$8Oxr0bSVecErnuYH-mbg$& zAkk9LOI?(o=p8c80ZBNk$Y1k8cEIEo^@suaWK^uCKo&8iOIw`B+}7h?c<(hF|lBO$F5h( z)Vf&Gu)6@qR5Pu^^yVA1dcR~llP*-dwgA8w!`FiO$jQlI^uzzaeaY?Suz{GlrcBlZ zGXmyM$Qq1Ov_u9d^2#gf{JL-W=rv&U=|Q%0(4(?ggvTmcl2IA8nY4HKGBmEQe4{T# z6G~Pt@T;_EP|H{)RvQ?>(fWt9;AGIj)r}Jf+0={7d{h&`f2~>N8HtFH1F+r&sxE@c zFJ!u3Wn>b+OYR7sYPb*A95S&5KSuoP1QI3?xQYeZMoC!M!;y2$+-IU-Al7<2fZCJ( zHB^jxK)iYVBXXb_bm@?AYD^)cCd{*jj7W_l3>Ll?6KI(v1Ey{EFkJ|lejaKxA!g`{ zkJjZ@s}||?Iz5n(%4kQje0a4$Hnqg?t7|>;?p; z*g&?7u`T`zG8F`G8^IMh&fo`>B*28B-NzHwob}2PgOY>D9%V`hDFBFyPtdx9oqn7= zZh!@2$XvfURfV9P)>AMy1n?qcSl%T8D7mAAkS)~GrkS)V!A0JfsTj3vAL+fp#750a z6ExEx8ZQy?*FiG^>8ewL;zOd#sRw1dOl9FSoZ&NM;nm&U*0;zHtikIh;70^ zHfX~~_~*AQ09x>$b)Wvo-}R6~t6@9X3tbI;fc%>m6z8Rl-_igKH`@qgLa2`ZJqhTy z36{u6Bywm1L*V)~FJaJjB?8_-l^>GQ6s)RHDqk~s)R>Zt0ho!e7Oc@p(&s+ugZlx+ zHO7=#&;l3Tv(EJO7j2?qv@yrUuMij350wBuREfWm%6sPJJ=6dOmDUmgQG<6wRZP=@ zh4p+Tp-vA1>!SOOPU3}1ymi(BMfg_aYGXd3f@Q~xhV%g7)Cl(=->8oszU#s5zfM{#XgM{M=*=6Lv9vVZfLu%iFrD*Cr^^+V}U|I2-z z`~Y3Ae}cbBQ~>DA|4OHGgZ!_4@S+oD_mye;{ftOVFWLAX4T;An6+n>85X!0_e>3|p4PPHc z0H<&dd@?;ryAb!kPh|)BU%g3t%Pq*8t^Bi6(=*vzDw&= zl7;v=_S3QRAhphoJj9qcKG!8C2*4jh#VC^;6RzmI2lujLzks$MiufbwzQ?*B*-yAQ zMvwKO0H()`D{Mo|>uyl%vQ^!&#&)-CqDWSIQ{bF%7LW2ii+Z{UUz(xlo? zZkY;XtJoRdW9^NfckPS(@aVv(><|TEZ2wF@d0VgfpG>CS58`<%`p~X}ge`naMcQ+uWEV z|2MP$lc@F75&xZ<8q|O3WL00jU%TU#pa0y+)5-kNuirWJgT*^9_T?=Dpifr{w9Ws_ zRA12Ck^Y~|4A%d?aHAb)2mce(gEiTMaWXYCm9mkcHD#9yPHxgEPU5$8 zJB(i6dg_1hxy*cXl+|LUDX_aAxQzrA(n z0|-xd3bf7tKJ5eSK>s8DJIMdK9kSELe_rj~hnMb%|L_U6>1ON<^FjV!BVpn{iPY1` z|3^;Hfd4xa>FxAtoByfl#MY90w(|y!|83^~r*d8K|EXLyGqC@*N9G;1zIKQ6*3U$K zBl$yz=f1h`JMrIo|Cg%2a_Whth4;MpIsfkH(a$`Yd++bQ!CU|Arwd>C8SmkT)(?FA z1Ci9f`l0LZ|KJmc&K1A;(@#9`b3gUi@4oE?{~`b3cYpMk&;RQ``_1ou^~loQ_x|bY ze*BdOKl}A}{^oG>M?d!O|NLV=k^S(WUHLF#ef-eD|3p5W%}+UY8ub^mGs*N!sWd&EEfteF8@F^9 zl9SW9LMgLNGoh#d-@X4km75yu|MrpX0chXPSv7K#h z6ZF#m8=U{mrE&xP-@&TBd^P(2qnZ5C#osy}d+_8hAOHG;um7c=`NV_YPJjE`?>&%w z&%WH_pO~IZ{Mg>-{ox(|>Bax@9b?~m{fmaKL{9(gJOBw)t=8`|`RS>3^L68pMD4!j3ke9sExe3%Ge_GMmg#mvAx0R2ojeWUe#?t3O%H zO`=jej(erIDJH%2{|5P=*n}M9f44EJPhUdz{};aaHpk7|M!I(?La&DpFnaSVlUa!bTRD|khPafW~Xz-F9rGmp|kg-7r*%S#RotBmH+!s-~0LZJ+VT^WS>zm!A5u|NW8=yyn_F|I3rtt-J2}g?pCY z@4Wy0$xr;nU7!E>hyG;slN0ay#4D12{G|8EFU0=A(5sslzuOjm7#kc?SNBDC*hox!bc-KX}8f zQs8#P`+Cj)WHQ~c{v!)9J+S|8mA6~zZx7b~t&r*awoOQF_icc;2*^l`vF=YQAwpThk=1N*;^ED1n6gIOQY;@S8?e7HUj zy!ZUyApVyb-2c*Nwg`CP|3C-a4pKiY-?6lW0HAvf7sD>CIt_bisSj9k{%^DXYdV`A zp{%y{_oNVd(Qvv^B a +echo b > b +echo union > union +echo e > e-no-attr +echo unset > unset +echo unspecified > unspecified + +cat <.gitattributes +a merge=a +b merge=b +union merge=union +missing merge=missing +unset -merge +unspecified !merge +EOF + +git add . && git commit -m "init" diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs index 073280e0afd..d42ec7aa823 100644 --- a/gix-merge/tests/merge/blob/builtin_driver.rs +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -123,8 +123,8 @@ mod text { "Number of expected diverging cases must match the actual one - probably the implementation improved" ); assert_eq!( - (num_diverging as f32 / num_cases as f32) * 100.0, - 12.053572, + ((num_diverging as f32 / num_cases as f32) * 100.0) as usize, + 12, "Just to show the percentage of skipped tests - this should get better" ); Ok(()) diff --git a/gix-merge/tests/merge/blob/mod.rs b/gix-merge/tests/merge/blob/mod.rs index f781f63e485..57d9205d79a 100644 --- a/gix-merge/tests/merge/blob/mod.rs +++ b/gix-merge/tests/merge/blob/mod.rs @@ -1 +1,52 @@ mod builtin_driver; +mod pipeline; +mod platform; + +mod util { + use std::collections::HashMap; + + use gix_hash::oid; + use gix_object::{bstr::BString, find::Error}; + + #[derive(Default)] + pub struct ObjectDb { + data_by_id: HashMap, + } + + impl gix_object::FindHeader for ObjectDb { + fn try_header(&self, id: &oid) -> Result, Error> { + match self.data_by_id.get(&id.to_owned()) { + Some(data) => Ok(Some(gix_object::Header { + kind: gix_object::Kind::Blob, + size: data.len() as u64, + })), + None => Ok(None), + } + } + } + + impl gix_object::Find for ObjectDb { + fn try_find<'a>(&self, id: &oid, buffer: &'a mut Vec) -> Result>, Error> { + match self.data_by_id.get(&id.to_owned()) { + Some(data) => { + buffer.clear(); + buffer.extend_from_slice(data); + Ok(Some(gix_object::Data { + kind: gix_object::Kind::Blob, + data: buffer.as_slice(), + })) + } + None => Ok(None), + } + } + } + + impl ObjectDb { + /// Insert `data` and return its hash. That can be used to find it again. + pub fn insert(&mut self, data: &str) -> gix_hash::ObjectId { + let id = gix_object::compute_hash(gix_hash::Kind::Sha1, gix_object::Kind::Blob, data.as_bytes()); + self.data_by_id.insert(id, data.into()); + id + } + } +} diff --git a/gix-merge/tests/merge/blob/pipeline.rs b/gix-merge/tests/merge/blob/pipeline.rs new file mode 100644 index 00000000000..080a9d601f6 --- /dev/null +++ b/gix-merge/tests/merge/blob/pipeline.rs @@ -0,0 +1,433 @@ +use crate::blob::util::ObjectDb; +use bstr::ByteSlice; +use gix_filter::eol; +use gix_filter::eol::AutoCrlf; +use gix_merge::blob::pipeline::{self, Mode, WorktreeRoots}; +use gix_merge::blob::{Pipeline, ResourceKind}; +use gix_object::tree::EntryKind; + +const ALL_MODES: [pipeline::Mode; 2] = [pipeline::Mode::ToGit, pipeline::Mode::Renormalize]; + +#[test] +fn without_transformation() -> crate::Result { + for mode in ALL_MODES { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + default_options(), + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let a_content = "a-content"; + std::fs::write(tmp.path().join(a_name), a_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), a_content, "there is no transformations configured"); + + let link_name = "link"; + gix_fs::symlink::create(a_name.as_ref(), &tmp.path().join(link_name))?; + let err = filter + .convert_to_mergeable( + &does_not_matter, + EntryKind::Link, + link_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + ) + .unwrap_err(); + + assert!( + matches!(err, pipeline::convert_to_mergeable::Error::InvalidEntryKind {rela_path,actual} + if rela_path == link_name && actual == EntryKind::Link) + ); + assert_eq!( + buf.len(), + 9, + "input buffers are cleared only if we think they are going to be used" + ); + drop(tmp); + + let mut db = ObjectDb::default(); + let b_content = "b-content"; + let id = db.insert(b_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &db, + mode, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + b_content, + "there is no transformations configured, it fetched the data from the ODB" + ); + + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, None, "the lack of object in the database isn't a problem"); + + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + "does not exist on disk".into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, None, "the lack of file on disk is fine as well"); + } + + Ok(()) +} + +#[test] +fn binary_below_large_file_threshold() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + current_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + pipeline::Options { + large_file_threshold_bytes: 5, + }, + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let binary_content = "a\0b"; + std::fs::write(tmp.path().join(a_name), binary_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::BlobExecutable, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer), "binary data can still be merged"); + assert_eq!(buf.as_bstr(), binary_content); + + let mut db = ObjectDb::default(); + let id = db.insert(binary_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), binary_content); + + Ok(()) +} + +#[test] +fn above_large_file_threshold() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = gix_merge::blob::Pipeline::new( + WorktreeRoots { + current_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + pipeline::Options { + large_file_threshold_bytes: 4, + }, + ); + + let does_not_matter = gix_hash::Kind::Sha1.null(); + let mut buf = Vec::new(); + let a_name = "a"; + let large_content = "hello"; + std::fs::write(tmp.path().join(a_name), large_content.as_bytes())?; + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::BlobExecutable, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!( + out, + Some(pipeline::Data::TooLarge { size: 5 }), + "it indicates that the file is too large" + ); + assert_eq!(buf.len(), 0, "it should avoid querying that data in the first place"); + + drop(tmp); + let mut db = ObjectDb::default(); + let id = db.insert(large_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::TooLarge { size: 5 })); + assert_eq!( + buf.len(), + 0, + "it won't have queried the blob, first it checks the header" + ); + + Ok(()) +} + +#[test] +fn non_existing() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let mut filter = Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + default_options(), + ); + + let null = gix_hash::Kind::Sha1.null(); + let mut buf = vec![1]; + let a_name = "a"; + assert!( + !tmp.path().join(a_name).exists(), + "precondition: worktree file doesn't exist" + ); + let out = filter.convert_to_mergeable( + &null, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + assert_eq!( + out, None, + "it's OK for a resource to not exist on disk - they'd then count as deleted" + ); + assert_eq!(buf.len(), 0, "always cleared"); + + drop(tmp); + + buf.push(1); + let out = filter.convert_to_mergeable( + &null, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!( + out, None, + "the root path isn't configured and the object database returns nothing" + ); + assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); + + let some_id = gix_hash::ObjectId::from_hex(b"45c160c35c17ad264b96431cceb9793160396e99")?; + let err = filter + .convert_to_mergeable( + &some_id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &gix_object::find::Never, + pipeline::Mode::ToGit, + &mut buf, + ) + .unwrap_err(); + assert!( + matches!( + err, + gix_merge::blob::pipeline::convert_to_mergeable::Error::FindObject( + gix_object::find::existing_object::Error::NotFound { .. } + ), + ), + "missing object database ids are always an error (even though missing objects on disk are allowed)" + ); + Ok(()) +} + +#[test] +fn worktree_filter() -> crate::Result { + let tmp = gix_testtools::tempfile::TempDir::new()?; + let filter = gix_filter::Pipeline::new( + Default::default(), + gix_filter::pipeline::Options { + eol_config: eol::Configuration { + auto_crlf: AutoCrlf::Enabled, + ..Default::default() + }, + ..Default::default() + }, + ); + let mut filter = gix_merge::blob::Pipeline::new( + WorktreeRoots { + common_ancestor_root: Some(tmp.path().to_owned()), + ..Default::default() + }, + filter, + default_options(), + ); + + let mut db = ObjectDb::default(); + let a_name = "a"; + let mut buf = Vec::new(); + let a_content = "a-content\r\n"; + std::fs::write(tmp.path().join(a_name), a_content.as_bytes())?; + for mode in ALL_MODES { + let does_not_matter = gix_hash::Kind::Sha1.null(); + let out = filter.convert_to_mergeable( + &does_not_matter, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &gix_object::find::Never, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + "a-content\n", + "worktree files need to be converted back to what's stored in Git" + ); + + let id = db.insert(a_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CommonAncestorOrBase, + &mut |_, _| {}, + &db, + mode, + &mut buf, + )?; + assert_eq!(out, Some(pipeline::Data::Buffer)); + match mode { + Mode::ToGit => { + assert_eq!( + buf.as_bstr(), + "a-content\r\n", + "if an object with CRLF already exists, we don't 'renormalize' it, it's a feature" + ); + } + Mode::Renormalize => { + assert_eq!( + buf.as_bstr(), + "a-content\n", + "we can also do it if the file exists both on disk and is known to the ODB" + ); + } + } + } + + drop(tmp); + + let b_content = "b-content\n"; + let id = db.insert(b_content); + + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::CurrentOrOurs, + &mut |_, _| {}, + &db, + pipeline::Mode::ToGit, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!(buf.as_bstr(), b_content, "no work is done for what's already in Git"); + + let mut db = ObjectDb::default(); + let b_content = "b-content\r\n"; + let id = db.insert(b_content); + let out = filter.convert_to_mergeable( + &id, + EntryKind::Blob, + a_name.into(), + ResourceKind::OtherOrTheirs, + &mut |_, _| {}, + &db, + pipeline::Mode::Renormalize, + &mut buf, + )?; + + assert_eq!(out, Some(pipeline::Data::Buffer)); + assert_eq!( + buf.as_bstr(), + "b-content\n", + "we see what would have been stored if the file was checked out and checked in again.\ + It explicitly ignores what's in Git already (or it wouldn't do anyting)" + ); + + Ok(()) +} + +fn default_options() -> pipeline::Options { + pipeline::Options { + large_file_threshold_bytes: 0, + } +} diff --git a/gix-merge/tests/merge/blob/platform.rs b/gix-merge/tests/merge/blob/platform.rs new file mode 100644 index 00000000000..6865e097f4d --- /dev/null +++ b/gix-merge/tests/merge/blob/platform.rs @@ -0,0 +1,584 @@ +use gix_merge::blob::{pipeline, ResourceKind}; +use gix_object::tree::EntryKind; +use gix_worktree::stack::state::attributes; + +use gix_merge::blob::Platform; + +#[test] +fn ancestor_and_current_and_other_do_not_exist() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::default()); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "also-missing".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "can't-be-found-in-odb".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::BlobExecutable, + "can't-be-found-in-odb".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let state = platform + .prepare_merge_state(&gix_object::find::Never) + .expect("no validation is done here, let the caller inspect"); + assert_eq!(state.ancestor.data.as_slice(), None); + assert_eq!(state.current.data.as_slice(), None); + assert_eq!(state.other.data.as_slice(), None); + Ok(()) +} + +mod set_resource { + use crate::blob::platform::new_platform; + use gix_merge::blob::{pipeline, ResourceKind}; + use gix_object::tree::EntryKind; + + #[test] + fn invalid_resource_types() { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + for (mode, name) in [(EntryKind::Commit, "Commit"), (EntryKind::Tree, "Tree")] { + assert_eq!( + platform + .set_resource( + gix_hash::Kind::Sha1.null(), + mode, + "a".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + ) + .unwrap_err() + .to_string(), + format!("Can only diff blobs, not {name}") + ); + } + } +} + +fn new_platform( + drivers: impl IntoIterator, + filter_mode: gix_merge::blob::pipeline::Mode, +) -> Platform { + let root = gix_testtools::scripted_fixture_read_only("make_blob_repo.sh").expect("valid fixture"); + let attributes = gix_worktree::Stack::new( + &root, + gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::new( + Default::default(), + None, + attributes::Source::WorktreeThenIdMapping, + Default::default(), + )), + gix_worktree::glob::pattern::Case::Sensitive, + Vec::new(), + Vec::new(), + ); + let filter = gix_merge::blob::Pipeline::new( + gix_merge::blob::pipeline::WorktreeRoots { + common_ancestor_root: Some(root.clone()), + ..Default::default() + }, + gix_filter::Pipeline::default(), + Default::default(), + ); + Platform::new( + filter, + filter_mode, + attributes, + drivers.into_iter().collect(), + Default::default(), + ) +} + +// +// #[test] +// fn with_driver() -> crate::Result { +// let root = gix_testtools::scripted_fixture_read_only("make_blob_repo.sh")?; +// let print_all = "echo $@ %O %A %B %L %P %S %X %Y"; +// let print_script_args = "echo $@"; +// let mut attributes = gix_worktree::Stack::new( +// &root, +// gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::new( +// Default::default(), +// None, +// attributes::Source::WorktreeThenIdMapping, +// Default::default(), +// )), +// gix_worktree::glob::pattern::Case::Sensitive, +// Vec::new(), +// Vec::new(), +// ); +// let mut filter = gix_merge::blob::Pipeline::new( +// WorktreeRoots { +// common_ancestor_root: Some(root.clone()), +// ..Default::default() +// }, +// gix_filter::Pipeline::default(), +// vec![ +// gix_merge::blob::Driver { +// name: "a".into(), +// command: print_all.into(), +// ..Default::default() +// }, +// gix_merge::blob::Driver { +// name: "b".into(), +// command: print_script_args.into(), +// ..Default::default() +// }, +// gix_merge::blob::Driver { +// name: "union".into(), +// ..Default::default() +// }, +// gix_merge::blob::Driver { +// name: "missing".into(), +// ..Default::default() +// }, +// ], +// pipeline::Options { +// default_driver: Some("binary".into()), +// ..crate::blob::pipeline::default_options() +// }, +// ); +// +// let mut buf = Vec::new(); +// let does_not_matter = gix_hash::Kind::Sha1.null(); +// let path = "unspecified"; +// let platform = attributes.at_entry(path, None, &gix_object::find::Never)?; +// let out = filter.convert_to_mergeable( +// &does_not_matter, +// EntryKind::Blob, +// path.into(), +// ResourceKind::CommonAncestorOrBase, +// &mut |_, out| { +// let _ = platform.matching_attributes(out); +// }, +// &gix_object::find::Never, +// pipeline::Mode::ToGit, +// &mut buf, +// )?; +// assert_eq!( +// out.driver, +// DriverChoice::BuiltIn(BuiltinDriver::Binary), +// "fall through to what's set in options" +// ); +// assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// assert_eq!(buf.as_bstr(), "unspecified\n"); +// +// let path = "union"; +// let platform = attributes.at_entry(path, None, &gix_object::find::Never)?; +// let out = filter.convert_to_mergeable( +// &does_not_matter, +// EntryKind::Blob, +// path.into(), +// ResourceKind::CommonAncestorOrBase, +// &mut |_, out| { +// let _ = platform.matching_attributes(out); +// }, +// &gix_object::find::Never, +// pipeline::Mode::ToGit, +// &mut buf, +// )?; +// let driver_idx = 3; +// assert_eq!( +// out.driver, +// DriverChoice::Index(driver_idx), +// "it finds explicit drivers first before it searches built-in ones" +// ); +// assert_eq!( +// filter.drivers()[driver_idx].name, +// "union", +// "it has re-sorted the drivers internally, which is why it's read-only" +// ); +// assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// assert_eq!(buf.as_bstr(), "union\n"); +// // +// // let mut db = ObjectDb::default(); +// // let null = gix_hash::Kind::Sha1.null(); +// // let mut buf = Vec::new(); +// // let platform = attributes.at_entry("a", None, &gix_object::find::Never)?; +// // let worktree_modes = [ +// // pipeline::Mode::ToWorktreeAndBinaryToText, +// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, +// // ]; +// // let all_modes = [ +// // pipeline::Mode::ToGit, +// // pipeline::Mode::ToWorktreeAndBinaryToText, +// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, +// // ]; +// // for mode in worktree_modes { +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "a".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(0)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); +// // } +// // +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "a".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // pipeline::Mode::ToGit, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(0)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode"); +// // +// // let id = db.insert("a\n"); +// // for mode in worktree_modes { +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "a".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(0)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); +// // } +// // +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "a".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // pipeline::Mode::ToGit, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(0)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "a\n", +// // "no filter was applied in this mode, also when using the ODB" +// // ); +// // +// // let platform = attributes.at_entry("missing", None, &gix_object::find::Never)?; +// // for mode in all_modes { +// // buf.push(1); +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Link, +// // "missing".into(), /* does not actually exist */ +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); +// // assert_eq!(out.data, None); +// // assert_eq!(buf.len(), 0, "always cleared"); +// // +// // buf.push(1); +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Link, +// // "missing".into(), /* does not actually exist */ +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); +// // assert_eq!(out.data, None); +// // assert_eq!(buf.len(), 0, "always cleared"); +// // +// // buf.push(1); +// // let id = db.insert("link-target"); +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Link, +// // "missing".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "link-target", +// // "no matter what, links always look the same." +// // ); +// // } +// +// // let platform = attributes.at_entry("b", None, &gix_object::find::Never)?; +// // for mode in all_modes { +// // buf.push(1); +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "b".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // +// // assert_eq!(out.driver_index, Some(1)); +// // assert_eq!( +// // out.data, +// // Some(pipeline::Data::Binary { size: 2 }), +// // "binary value comes from driver, and it's always respected with worktree source" +// // ); +// // assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); +// // } +// // +// // let id = db.insert("b\n"); +// // for mode in all_modes { +// // buf.push(1); +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "b".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // +// // assert_eq!(out.driver_index, Some(1)); +// // assert_eq!( +// // out.data, +// // Some(pipeline::Data::Binary { size: 2 }), +// // "binary value comes from driver, and it's always respected with DB source" +// // ); +// // assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); +// // } +// // +// // let platform = attributes.at_entry("c", None, &gix_object::find::Never)?; +// // for mode in worktree_modes { +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "c".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(2)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "to-text\nc\n", +// // "filter was applied, it overrides binary=true" +// // ); +// // } +// // +// // let id = db.insert("c\n"); +// // for mode in worktree_modes { +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "c".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(2)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "to-text\nc\n", +// // "filter was applied, it overrides binary=true" +// // ); +// // } +// // +// // let platform = attributes.at_entry("unset", None, &gix_object::find::Never)?; +// // for mode in all_modes { +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "unset".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!( +// // out.driver_index, None, +// // "no driver is associated, as `diff` is explicitly unset" +// // ); +// // assert_eq!( +// // out.data, +// // Some(pipeline::Data::Binary { size: 6 }), +// // "unset counts as binary" +// // ); +// // assert_eq!(buf.len(), 0); +// // } +// // +// // let id = db.insert("unset\n"); +// // for mode in all_modes { +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "unset".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!( +// // out.driver_index, None, +// // "no driver is associated, as `diff` is explicitly unset" +// // ); +// // assert_eq!( +// // out.data, +// // Some(pipeline::Data::Binary { size: 6 }), +// // "unset counts as binary" +// // ); +// // assert_eq!(buf.len(), 0); +// // } +// // +// // let platform = attributes.at_entry("d", None, &gix_object::find::Never)?; +// // let id = db.insert("d-in-db"); +// // for mode in worktree_modes { +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "d".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(3)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "to-text\nd\n", +// // "the worktree + text conversion was triggered for worktree source" +// // ); +// // +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "d".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // mode, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, Some(3)); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "to-text\nd-in-db", +// // "the worktree + text conversion was triggered for db source" +// // ); +// // } +// // +// // let platform = attributes.at_entry("e-no-attr", None, &gix_object::find::Never)?; +// // let out = filter.convert_to_diffable( +// // &null, +// // EntryKind::Blob, +// // "e-no-attr".into(), +// // ResourceKind::OldOrSource, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &gix_object::find::Never, +// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, None); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "e\n", +// // "no text filter, so git conversion was applied for worktree source" +// // ); +// // +// // let id = db.insert("e-in-db"); +// // let out = filter.convert_to_diffable( +// // &id, +// // EntryKind::Blob, +// // "e-no-attr".into(), +// // ResourceKind::NewOrDestination, +// // &mut |_, out| { +// // let _ = platform.matching_attributes(out); +// // }, +// // &db, +// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, +// // &mut buf, +// // )?; +// // assert_eq!(out.driver_index, None); +// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); +// // assert_eq!( +// // buf.as_bstr(), +// // "e-in-db", +// // "no text filter, so git conversion was applied for ODB source" +// // ); +// +// Ok(()) +// } From eb37dc36d8c42f5a7714c641244ce4a13111b0a1 Mon Sep 17 00:00:00 2001 From: Sebastian Thiel Date: Sat, 28 Sep 2024 15:26:46 +0200 Subject: [PATCH 10/10] add platform tests and implementation That way, the platform can be used to perform actual merges. This will also be a good chance to try the API. --- Cargo.lock | 1 + gix-merge/Cargo.toml | 3 +- .../src/blob/builtin_driver/text/function.rs | 147 +-- gix-merge/src/blob/builtin_driver/text/mod.rs | 61 +- .../src/blob/builtin_driver/text/utils.rs | 1 - gix-merge/src/blob/mod.rs | 26 +- gix-merge/src/blob/platform.rs | 547 -------- gix-merge/src/blob/platform/merge.rs | 417 ++++++ gix-merge/src/blob/platform/mod.rs | 127 ++ gix-merge/src/blob/platform/prepare_merge.rs | 100 ++ gix-merge/src/blob/platform/resource.rs | 50 + gix-merge/src/blob/platform/set_resource.rs | 103 ++ .../generated-archives/make_blob_repo.tar | Bin 74240 -> 74240 bytes .../generated-archives/text-baseline.tar | Bin 390144 -> 402944 bytes gix-merge/tests/fixtures/make_blob_repo.sh | 4 +- gix-merge/tests/fixtures/text-baseline.sh | 9 + gix-merge/tests/merge/blob/builtin_driver.rs | 36 +- gix-merge/tests/merge/blob/platform.rs | 1115 +++++++++-------- gix-merge/tests/merge/main.rs | 2 + 19 files changed, 1576 insertions(+), 1173 deletions(-) delete mode 100644 gix-merge/src/blob/platform.rs create mode 100644 gix-merge/src/blob/platform/merge.rs create mode 100644 gix-merge/src/blob/platform/mod.rs create mode 100644 gix-merge/src/blob/platform/prepare_merge.rs create mode 100644 gix-merge/src/blob/platform/resource.rs create mode 100644 gix-merge/src/blob/platform/set_resource.rs diff --git a/Cargo.lock b/Cargo.lock index 5912a30127d..13e98214f49 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2052,6 +2052,7 @@ dependencies = [ "gix-hash 0.14.2", "gix-object 0.44.0", "gix-path 0.10.11", + "gix-quote 0.4.12", "gix-tempfile 14.0.2", "gix-testtools", "gix-trace 0.1.10", diff --git a/gix-merge/Cargo.toml b/gix-merge/Cargo.toml index 6d8da010147..93a5ae5b664 100644 --- a/gix-merge/Cargo.toml +++ b/gix-merge/Cargo.toml @@ -17,7 +17,7 @@ doctest = false [features] default = ["blob"] ## Enable diffing of blobs using imara-diff, which also allows for a generic rewrite tracking implementation. -blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace"] +blob = ["dep:imara-diff", "dep:gix-filter", "dep:gix-worktree", "dep:gix-path", "dep:gix-fs", "dep:gix-command", "dep:gix-tempfile", "dep:gix-trace", "dep:gix-quote"] ## Data structures implement `serde::Serialize` and `serde::Deserialize`. serde = ["dep:serde", "gix-hash/serde", "gix-object/serde"] @@ -31,6 +31,7 @@ gix-path = { version = "^0.10.11", path = "../gix-path", optional = true } gix-fs = { version = "^0.11.3", path = "../gix-fs", optional = true } gix-tempfile = { version = "^14.0.0", path = "../gix-tempfile", optional = true } gix-trace = { version = "^0.1.10", path = "../gix-trace", optional = true } +gix-quote = { version = "^0.4.12", path = "../gix-quote", optional = true } thiserror = "1.0.63" imara-diff = { version = "0.1.7", optional = true } diff --git a/gix-merge/src/blob/builtin_driver/text/function.rs b/gix-merge/src/blob/builtin_driver/text/function.rs index a69b9a1a58d..bb800ce47e2 100644 --- a/gix-merge/src/blob/builtin_driver/text/function.rs +++ b/gix-merge/src/blob/builtin_driver/text/function.rs @@ -3,13 +3,12 @@ use crate::blob::builtin_driver::text::utils::{ hunks_differ_in_diff3, take_intersecting, tokens, write_ancestor, write_conflict_marker, write_hunks, zealously_contract_hunks, CollectHunks, Hunk, Side, }; -use crate::blob::builtin_driver::text::{ConflictStyle, Options, ResolveWith}; +use crate::blob::builtin_driver::text::{Conflict, ConflictStyle, Labels, Options}; use crate::blob::Resolution; -use bstr::BStr; /// Merge `current` and `other` with `ancestor` as base according to `opts`. /// -/// Use `current_label`, `other_label` and `ancestor_label` to annotate conflict sections. +/// Use `labels` to annotate conflict sections. /// /// `input` is for reusing memory for lists of tokens, but note that it grows indefinitely /// while tokens for `current`, `ancestor` and `other` are added. @@ -23,12 +22,14 @@ use bstr::BStr; pub fn merge<'a>( out: &mut Vec, input: &mut imara_diff::intern::InternedInput<&'a [u8]>, + Labels { + ancestor: ancestor_label, + current: current_label, + other: other_label, + }: Labels<'_>, current: &'a [u8], - current_label: Option<&BStr>, ancestor: &'a [u8], - ancestor_label: Option<&BStr>, other: &'a [u8], - other_label: Option<&BStr>, opts: Options, ) -> Resolution { out.clear(); @@ -77,9 +78,9 @@ pub fn merge<'a>( .expect("at least one entry"), &mut filled_hunks, ); - match opts.on_conflict { - None => { - let (hunks_front_and_back, num_hunks_front) = match opts.conflict_style { + match opts.conflict { + Conflict::Keep { style, marker_size } => { + let (hunks_front_and_back, num_hunks_front) = match style { ConflictStyle::Merge | ConflictStyle::ZealousDiff3 => { zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens) } @@ -130,28 +131,22 @@ pub fn merge<'a>( ) .or_else(|| detect_line_ending(our_hunks, input, ¤t_tokens)) .unwrap_or(b"\n".into()); - match opts.conflict_style { + match style { ConflictStyle::Merge => { if contains_lines(our_hunks) || contains_lines(their_hunks) { resolution = Resolution::Conflict; - write_conflict_marker(out, b'<', current_label, opts.marker_size, nl); + write_conflict_marker(out, b'<', current_label, marker_size, nl); write_hunks(our_hunks, input, ¤t_tokens, out); - write_conflict_marker(out, b'=', None, opts.marker_size, nl); + write_conflict_marker(out, b'=', None, marker_size, nl); write_hunks(their_hunks, input, ¤t_tokens, out); - write_conflict_marker(out, b'>', other_label, opts.marker_size, nl); + write_conflict_marker(out, b'>', other_label, marker_size, nl); } } ConflictStyle::Diff3 | ConflictStyle::ZealousDiff3 => { if contains_lines(our_hunks) || contains_lines(their_hunks) { - if hunks_differ_in_diff3( - opts.conflict_style, - our_hunks, - their_hunks, - input, - ¤t_tokens, - ) { + if hunks_differ_in_diff3(style, our_hunks, their_hunks, input, ¤t_tokens) { resolution = Resolution::Conflict; - write_conflict_marker(out, b'<', current_label, opts.marker_size, nl); + write_conflict_marker(out, b'<', current_label, marker_size, nl); write_hunks(our_hunks, input, ¤t_tokens, out); let ancestor_hunk = Hunk { before: first_hunk.before.start..last_hunk.before.end, @@ -161,11 +156,11 @@ pub fn merge<'a>( let ancestor_hunk = std::slice::from_ref(&ancestor_hunk); let ancestor_nl = detect_line_ending_or_nl(ancestor_hunk, input, ¤t_tokens); - write_conflict_marker(out, b'|', ancestor_label, opts.marker_size, ancestor_nl); + write_conflict_marker(out, b'|', ancestor_label, marker_size, ancestor_nl); write_hunks(ancestor_hunk, input, ¤t_tokens, out); - write_conflict_marker(out, b'=', None, opts.marker_size, nl); + write_conflict_marker(out, b'=', None, marker_size, nl); write_hunks(their_hunks, input, ¤t_tokens, out); - write_conflict_marker(out, b'>', other_label, opts.marker_size, nl); + write_conflict_marker(out, b'>', other_label, marker_size, nl); } else { write_hunks(our_hunks, input, ¤t_tokens, out); } @@ -176,64 +171,60 @@ pub fn merge<'a>( write_hunks(back_hunks, input, ¤t_tokens, out); ancestor_integrated_until = last_hunk.before.end; } - Some(resolve) => { - match resolve { - ResolveWith::Ours | ResolveWith::Theirs => { - let (our_hunks, their_hunks) = match filled_hunks_side { - Side::Current => (&filled_hunks, &intersecting), - Side::Other => (&intersecting, &filled_hunks), - Side::Ancestor => { - unreachable!("initial hunks are never ancestors") - } - }; - let hunks_to_write = if resolve == ResolveWith::Ours { - our_hunks - } else { - their_hunks - }; - if let Some(first_hunk) = hunks_to_write.first() { - write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); - } - write_hunks(hunks_to_write, input, ¤t_tokens, out); - if let Some(last_hunk) = hunks_to_write.last() { - ancestor_integrated_until = last_hunk.before.end; - } + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs => { + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") } - ResolveWith::Union => { - let (hunks_front_and_back, num_hunks_front) = - zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens); + }; + let hunks_to_write = if opts.conflict == Conflict::ResolveWithOurs { + our_hunks + } else { + their_hunks + }; + if let Some(first_hunk) = hunks_to_write.first() { + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + } + write_hunks(hunks_to_write, input, ¤t_tokens, out); + if let Some(last_hunk) = hunks_to_write.last() { + ancestor_integrated_until = last_hunk.before.end; + } + } + Conflict::ResolveWithUnion => { + let (hunks_front_and_back, num_hunks_front) = + zealously_contract_hunks(&mut filled_hunks, &mut intersecting, input, ¤t_tokens); - let (our_hunks, their_hunks) = match filled_hunks_side { - Side::Current => (&filled_hunks, &intersecting), - Side::Other => (&intersecting, &filled_hunks), - Side::Ancestor => { - unreachable!("initial hunks are never ancestors") - } - }; - let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); - let first_hunk = front_hunks - .first() - .or(our_hunks.first()) - .expect("at least one hunk to write"); - write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); - write_hunks(front_hunks, input, ¤t_tokens, out); - assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); - write_hunks(our_hunks, input, ¤t_tokens, out); - assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); - write_hunks(their_hunks, input, ¤t_tokens, out); - if !back_hunks.is_empty() { - assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); - } - write_hunks(back_hunks, input, ¤t_tokens, out); - let last_hunk = back_hunks - .last() - .or(their_hunks.last()) - .or(our_hunks.last()) - .or(front_hunks.last()) - .expect("at least one hunk"); - ancestor_integrated_until = last_hunk.before.end; + let (our_hunks, their_hunks) = match filled_hunks_side { + Side::Current => (&filled_hunks, &intersecting), + Side::Other => (&intersecting, &filled_hunks), + Side::Ancestor => { + unreachable!("initial hunks are never ancestors") } }; + let (front_hunks, back_hunks) = hunks_front_and_back.split_at(num_hunks_front); + let first_hunk = front_hunks + .first() + .or(our_hunks.first()) + .expect("at least one hunk to write"); + write_ancestor(input, ancestor_integrated_until, first_hunk.before.start as usize, out); + write_hunks(front_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(front_hunks, input, ¤t_tokens)); + write_hunks(our_hunks, input, ¤t_tokens, out); + assure_ends_with_nl(out, detect_line_ending_or_nl(our_hunks, input, ¤t_tokens)); + write_hunks(their_hunks, input, ¤t_tokens, out); + if !back_hunks.is_empty() { + assure_ends_with_nl(out, detect_line_ending_or_nl(their_hunks, input, ¤t_tokens)); + } + write_hunks(back_hunks, input, ¤t_tokens, out); + let last_hunk = back_hunks + .last() + .or(their_hunks.last()) + .or(our_hunks.last()) + .or(front_hunks.last()) + .expect("at least one hunk"); + ancestor_integrated_until = last_hunk.before.end; } } } else { diff --git a/gix-merge/src/blob/builtin_driver/text/mod.rs b/gix-merge/src/blob/builtin_driver/text/mod.rs index 73d3f123cc6..1c4287dc7be 100644 --- a/gix-merge/src/blob/builtin_driver/text/mod.rs +++ b/gix-merge/src/blob/builtin_driver/text/mod.rs @@ -1,3 +1,5 @@ +use bstr::BStr; + /// The way the built-in [text driver](crate::blob::BuiltinDriver::Text) will express /// merge conflicts in the resulting file. #[derive(Default, Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] @@ -48,27 +50,31 @@ pub enum ConflictStyle { ZealousDiff3, } +/// The set of labels to annotate conflict markers with. +/// +/// That way it becomes clearer where the content of conflicts are originating from. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)] +pub struct Labels<'a> { + pub ancestor: Option<&'a BStr>, + pub current: Option<&'a BStr>, + pub other: Option<&'a BStr>, +} + /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). #[derive(Copy, Clone, Debug, Eq, PartialEq)] pub struct Options { /// Determine of the diff will be performed. /// Defaults to [`imara_diff::Algorithm::Myers`]. pub diff_algorithm: imara_diff::Algorithm, - /// How to visualize conflicts in merged files. - pub conflict_style: ConflictStyle, - /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` - pub marker_size: usize, - /// Decide what to do to automatically resolve conflicts. + /// Decide what to do to automatically resolve conflicts, or to keep them /// If `None`, add conflict markers according to `conflict_style` and `marker_size`. - pub on_conflict: Option, + pub conflict: Conflict, } impl Default for Options { fn default() -> Self { Options { - conflict_style: Default::default(), - marker_size: 7, - on_conflict: None, + conflict: Default::default(), diff_algorithm: imara_diff::Algorithm::Myers, } } @@ -76,13 +82,42 @@ impl Default for Options { /// What to do to resolve a conflict. #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub enum ResolveWith { +pub enum Conflict { + /// Keep the conflict by marking it in the source file. + Keep { + /// How to visualize conflicts in merged files. + style: ConflictStyle, + /// The amount of markers to draw, defaults to 7, i.e. `<<<<<<<` + marker_size: usize, + }, /// Chose our side to resolve a conflict. - Ours, + ResolveWithOurs, /// Chose their side to resolve a conflict. - Theirs, + ResolveWithTheirs, /// Place our and their lines one after another, in any order - Union, + ResolveWithUnion, +} + +impl Conflict { + /// The amount of conflict marker characters to print by default. + pub const DEFAULT_MARKER_SIZE: usize = 7; + + /// The amount of conflict markers to print if this instance contains them, or `None` otherwise + pub fn marker_size(&self) -> Option { + match self { + Conflict::Keep { marker_size, .. } => Some(*marker_size), + Conflict::ResolveWithOurs | Conflict::ResolveWithTheirs | Conflict::ResolveWithUnion => None, + } + } +} + +impl Default for Conflict { + fn default() -> Self { + Conflict::Keep { + style: Default::default(), + marker_size: Conflict::DEFAULT_MARKER_SIZE, + } + } } pub(super) mod function; diff --git a/gix-merge/src/blob/builtin_driver/text/utils.rs b/gix-merge/src/blob/builtin_driver/text/utils.rs index 9d3db8d5599..1aab3e47f08 100644 --- a/gix-merge/src/blob/builtin_driver/text/utils.rs +++ b/gix-merge/src/blob/builtin_driver/text/utils.rs @@ -168,7 +168,6 @@ fn ancestor_hunk(start: u32, num_lines: u32) -> Hunk { /// /// Return a new vector of all the hunks that were removed from front and back, with partial hunks inserted, /// along with the amount of hunks that go front, with the remaining going towards the back. -// TODO: refactor so hunks and their associated data can go into an array for easier handling. #[must_use] pub fn zealously_contract_hunks( a_hunks: &mut Vec, diff --git a/gix-merge/src/blob/mod.rs b/gix-merge/src/blob/mod.rs index ab4094e9eaf..07f544a2e23 100644 --- a/gix-merge/src/blob/mod.rs +++ b/gix-merge/src/blob/mod.rs @@ -1,6 +1,7 @@ // TODO: remove this - only needed while &mut Vec isn't used. #![allow(clippy::ptr_arg)] +use crate::blob::platform::{DriverChoice, ResourceRef}; use bstr::BString; use std::path::PathBuf; @@ -83,7 +84,7 @@ pub struct Driver { /// * **%L** /// - The conflict-marker size as positive number. /// * **%P** - /// - The path in which the merged result will be stored. + /// - The path in which the merged result would be stored, as workspace-relative path, of the current/ours side. /// * **%S** /// - The conflict-label for the common ancestor or *base*. /// * **%X** @@ -98,6 +99,8 @@ pub struct Driver { /// ``` /// .merge_file_nR2Qs1 .merge_file_WYXCJe .merge_file_UWbzrm 7 file e2a2970 HEAD feature /// ``` + /// + /// The driver is expected to leave its version in the file at `%A`, by overwriting it. pub command: BString, /// If `true`, this is the `name` of the driver to use when a virtual-merge-base is created, as a merge of all /// available merge-bases if there are more than one. @@ -157,3 +160,24 @@ pub struct Platform { /// The way we convert resources into mergeable states. filter_mode: pipeline::Mode, } + +/// The product of a [`prepare_merge()`](Platform::prepare_merge()) call to finally +/// perform the merge and retrieve the merge results. +#[derive(Copy, Clone)] +pub struct PlatformRef<'parent> { + /// The platform that hosts the resources, used to access drivers. + pub(super) parent: &'parent Platform, + /// The current or our side of the merge operation. + pub current: ResourceRef<'parent>, + /// The ancestor or base of the merge operation. + pub ancestor: ResourceRef<'parent>, + /// The other or their side of the merge operation. + pub other: ResourceRef<'parent>, + /// Which driver to use according to the resource's configuration, + /// using the path of `current` to read git-attributes. + pub driver: DriverChoice, + /// Possibly processed options for use when performing the actual merge. + /// + /// They may be inspected before the merge, or altered at will. + pub options: platform::merge::Options, +} diff --git a/gix-merge/src/blob/platform.rs b/gix-merge/src/blob/platform.rs deleted file mode 100644 index f749e03c3ec..00000000000 --- a/gix-merge/src/blob/platform.rs +++ /dev/null @@ -1,547 +0,0 @@ -use crate::blob::{pipeline, BuiltinDriver, Pipeline, Platform, ResourceKind}; -use bstr::{BStr, BString, ByteSlice}; -use gix_filter::attributes; - -/// A stored value representing a resource that participates in a merge. -#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] -pub(super) struct Resource { - /// The `id` of the value, or `null` if it's only living in a worktree. - id: gix_hash::ObjectId, - /// The repository-relative path where the resource lives in the tree. - rela_path: BString, - /// The outcome of converting a resource into a mergable format using [Pipeline::convert_to_mergeable()]. - data: Option, - /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. - mode: gix_object::tree::EntryKind, - /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary - /// or the resource doesn't exist. - buffer: Vec, -} - -/// A blob or executable ready to be merged in one way or another. -#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] -pub struct ResourceRef<'a> { - /// The data itself, suitable for merging, and if the object or worktree item is present at all. - pub data: resource::Data<'a>, - /// The location of the resource, relative to the working tree. - pub rela_path: &'a BStr, - /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at - /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which - /// after its 'to-git' conversion never had its hash computed. - pub id: &'a gix_hash::oid, -} - -/// Options for use in a [`Platform`]. -#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] -pub struct Options { - /// Define which driver to use by name if the `merge` attribute for a resource is unspecified. - /// - /// This is the value of the `merge.default` git configuration. - pub default_driver: Option, -} - -/// The selection of the driver to use by a resource obtained with [`Pipeline::convert_to_mergeable()`]. -/// -/// If available, an index into the `drivers` field to access more diff-related information of the driver for items -/// at the given path, as previously determined by git-attributes. -/// -/// * `merge` is set -/// - Use the [`BuiltinDriver::Text`] -/// * `-merge` is unset -/// - Use the [`BuiltinDriver::Binary`] -/// * `!merge` is unspecified -/// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. -/// * `merge=name` -/// - Search for a user-configured or built-in driver called `name`. -/// - If not found, silently default to [`BuiltinDriver::Text`] -/// -/// Note that drivers are queried even if there is no object available. -#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] -pub enum DriverChoice { - /// Use the given built-in driver to perform the merge. - BuiltIn(BuiltinDriver), - /// Use the user-provided driver program using the index into [the pipelines driver array](Pipeline::drivers(). - Index(usize), -} - -impl Default for DriverChoice { - fn default() -> Self { - DriverChoice::BuiltIn(Default::default()) - } -} - -/// -pub mod resource { - use crate::blob::{ - pipeline, - platform::{Resource, ResourceRef}, - }; - - impl<'a> ResourceRef<'a> { - pub(super) fn new(cache: &'a Resource) -> Self { - ResourceRef { - data: cache.data.map_or(Data::Missing, |data| match data { - pipeline::Data::Buffer => Data::Buffer(&cache.buffer), - pipeline::Data::TooLarge { size } => Data::Binary { size }, - }), - rela_path: cache.rela_path.as_ref(), - id: &cache.id, - } - } - } - - /// The data of a mergeable resource, as it could be determined and computed previously. - #[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] - pub enum Data<'a> { - /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. - Missing, - /// The textual data as processed and ready for merging, i.e. suitable for storage in Git. - Buffer(&'a [u8]), - /// The size that the binary blob had at the given revision, without having applied filters, as it's either - /// considered binary or above the big-file threshold. - /// - /// In this state, the binary file cannot be merged. - Binary { - /// The size of the object prior to performing any filtering or as it was found on disk. - /// - /// Note that technically, the size isn't always representative of the same 'state' of the - /// content, as once it can be the size of the blob in Git, and once it's the size of file - /// in the worktree. - size: u64, - }, - } - - impl<'a> Data<'a> { - /// Return ourselves as slice of bytes if this instance stores data. - pub fn as_slice(&self) -> Option<&'a [u8]> { - match self { - Data::Buffer(d) => Some(d), - Data::Binary { .. } | Data::Missing => None, - } - } - } -} - -/// -pub mod set_resource { - use bstr::BString; - - use crate::blob::{pipeline, ResourceKind}; - - /// The error returned by [Platform::set_resource](super::Platform::set_resource). - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error("Can only diff blobs, not {mode:?}")] - InvalidMode { mode: gix_object::tree::EntryKind }, - #[error("Failed to read {kind:?} worktree data from '{rela_path}'")] - Io { - rela_path: BString, - kind: ResourceKind, - source: std::io::Error, - }, - #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] - Attributes { - rela_path: BString, - kind: ResourceKind, - source: std::io::Error, - }, - #[error(transparent)] - ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error), - } -} - -/// -pub mod merge { - use crate::blob::platform::DriverChoice; - use crate::blob::platform::ResourceRef; - use crate::blob::{builtin_driver, BuiltinDriver, Driver, Resolution}; - use bstr::BString; - - /// The product of a [`prepare_merge()`](crate::blob::Platform::prepare_merge_state()) call to finally - /// perform the merge and retrieve the merge results. - #[derive(Copy, Clone)] - pub struct State<'parent> { - /// The platform that hosts the resources, used to access drivers. - pub(super) parent: &'parent super::Platform, - /// The current or our side of the merge operation. - pub current: ResourceRef<'parent>, - /// The ancestor or base of the merge operation. - pub ancestor: ResourceRef<'parent>, - /// The other or their side of the merge operation. - pub other: ResourceRef<'parent>, - /// Which driver to use according to the resource's configuration, - /// using the path of `current` to read git-attributes. - pub driver_choice: DriverChoice, - } - - #[derive(Copy, Clone, Debug, Eq, PartialEq)] - pub struct Options { - /// If `true`, the resources being merged are contained in a virtual ancestor, - /// which is the case when merge bases are merged into one. - pub is_virtual_ancestor: bool, - /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible and it picks a side. - pub resolve_binary_with: Option, - /// Options for the builtin [text driver](BuiltinDriver::Text). - pub text: builtin_driver::text::Options, - } - - /// - pub mod prepare_external_driver { - use std::ops::{Deref, DerefMut}; - - use crate::blob::ResourceKind; - use bstr::BString; - - /// The error returned by [State::prepare_merge_command()](super::State::prepare_external_driver()). - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")] - SourceOrDestinationAreBinary, - #[error( - "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created" - )] - CreateTempfile { - rela_path: BString, - kind: ResourceKind, - source: std::io::Error, - }, - #[error( - "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command" - )] - WriteTempfile { - rela_path: BString, - kind: ResourceKind, - source: std::io::Error, - }, - } - - /// The product of a [`prepare_external_driver`](super::State::prepare_external_driver()) operation. - /// - /// This type acts like [`std::process::Command`], ready to run, with `stderr` set to *inherit*, - /// but `stdin` closed and `stdout` setup to be captured. - // TODO: remove this - #[allow(dead_code)] - pub struct Command { - /// The pre-configured command - cmd: std::process::Command, - /// A tempfile holding the *current* (ours) state of the resource. - current: gix_tempfile::Handle, - /// A tempfile holding the *ancestor* (base) state of the resource. - ancestor: gix_tempfile::Handle, - /// A tempfile holding the *other* (their) state of the resource. - other: gix_tempfile::Handle, - } - - impl Deref for Command { - type Target = std::process::Command; - - fn deref(&self) -> &Self::Target { - &self.cmd - } - } - - impl DerefMut for Command { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.cmd - } - } - } - - /// - pub mod builtin_merge { - /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](super::State::builtin_merge). - pub enum Pick { - /// Chose the ancestor. - Ancestor, - /// Chose our side. - Ours, - /// Chose their side. - Theirs, - /// New data was produced with the result of the merge, to be found in the buffer that was passed to - /// [builtin_merge()](super::State::builtin_merge). - Buffer, - } - } - - /// The error returned by [State::merge()]. - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error(transparent)] - PrepareExternalDriver(#[from] prepare_external_driver::Error), - } - - /// Plumbing - impl<'parent> State<'parent> { - /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources, - /// prepare the invocation and temporary files needed to launch it according to protocol. - /// - /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge. - /// - /// ### Deviation - /// - /// We allow passing more context than Git would by taking a whole `context`, it's up to the caller to decide how much is filled. - pub fn prepare_external_driver( - &self, - _merge_command: BString, - _context: gix_command::Context, - ) -> Result { - todo!("prepare command") - } - - /// Perform the merge according to our resources and - /// Note that if the *pick* wasn't [`Buffer`](builtin_merge::Pick::Buffer), then `out` will not have been cleared. - pub fn builtin_merge( - &self, - _out: &mut Vec, - _driver: BuiltinDriver, - _opts: Options, - ) -> (builtin_merge::Pick, Resolution) { - todo!("do full merge") - } - - /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` - /// with the built-in driver to use instead. - pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { - match self.driver_choice { - DriverChoice::BuiltIn(builtin) => Err(builtin), - DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()), - } - } - } - - /// Convenience - impl<'parent> State<'parent> { - /// Perform the merge, possibly invoking an external merge command, and store the result in `out`. - /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`. - pub fn merge( - &self, - _out: &mut Vec, - _opts: Options, - _context: gix_command::Context, - ) -> Result { - match self.configured_driver() { - Ok(driver) => { - let _cmd = self.prepare_external_driver(driver.command.clone(), _context)?; - todo!("invoke command and copy result") - } - Err(_builtin) => { - todo!("call builtins and copy results") - } - } - } - } -} - -/// -pub mod prepare_merge { - use crate::blob::ResourceKind; - use bstr::BString; - - /// The error returned by [Platform::prepare_merge()](super::Platform::prepare_merge_state()). - #[derive(Debug, thiserror::Error)] - #[allow(missing_docs)] - pub enum Error { - #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] - UnsetResource, - #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] - Attributes { - rela_path: BString, - kind: ResourceKind, - source: std::io::Error, - }, - } -} - -/// Lifecycle -impl Platform { - /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. - /// `filter_mode` decides how to do that specifically. - /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. - /// `drivers` are the list of available merge drivers that individual paths can refer to by means of git attributes. - /// `options` further configure the operation. - pub fn new( - filter: Pipeline, - filter_mode: pipeline::Mode, - attr_stack: gix_worktree::Stack, - mut drivers: Vec, - options: Options, - ) -> Self { - drivers.sort_by(|a, b| a.name.cmp(&b.name)); - Platform { - drivers, - current: None, - ancestor: None, - other: None, - filter, - filter_mode, - attr_stack, - attrs: { - let mut out = attributes::search::Outcome::default(); - out.initialize_with_selection(&Default::default(), Some("merge")); - out - }, - options, - } - } -} - -/// Access -impl Platform { - /// Return all drivers that this instance was initialized with. - /// - /// They are sorted by [`name`](super::Driver::name) to support binary searches. - pub fn drivers(&self) -> &[super::Driver] { - &self.drivers - } -} - -/// Preparation -impl Platform { - /// Store enough information about a resource to eventually use it in a merge, where… - /// - /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either - /// be a resource in the worktree, or it's considered a non-existing, deleted object. - /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided - /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root - /// is available for `kind`. - /// * `mode` is the kind of object (only blobs and links are allowed) - /// * `rela_path` is the relative path as seen from the (work)tree root. - /// * `kind` identifies the side of the merge this resource will be used for. - /// * `objects` provides access to the object database in case the resource can't be read from a worktree. - pub fn set_resource( - &mut self, - id: gix_hash::ObjectId, - mode: gix_object::tree::EntryKind, - rela_path: &BStr, - kind: ResourceKind, - objects: &impl gix_object::FindObjectOrHeader, - ) -> Result<(), set_resource::Error> { - self.set_resource_inner(id, mode, rela_path, kind, objects) - } - - /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. - /// Note that no additional validation is performed here to facilitate inspection. - pub fn prepare_merge_state( - &mut self, - objects: &impl gix_object::Find, - ) -> Result, prepare_merge::Error> { - let current = self.current.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; - let ancestor = self.ancestor.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; - let other = self.other.as_ref().ok_or(prepare_merge::Error::UnsetResource)?; - - let entry = self - .attr_stack - .at_entry(current.rela_path.as_bstr(), None, objects) - .map_err(|err| prepare_merge::Error::Attributes { - source: err, - kind: ResourceKind::CurrentOrOurs, - rela_path: current.rela_path.clone(), - })?; - entry.matching_attributes(&mut self.attrs); - let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); - let driver = match attr.assignment.state { - attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), - attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), - attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => { - let name = match attr.assignment.state { - attributes::StateRef::Value(name) => Some(name.as_bstr()), - attributes::StateRef::Unspecified => { - self.options.default_driver.as_ref().map(|name| name.as_bstr()) - } - _ => unreachable!("only value and unspecified are possible here"), - }; - name.and_then(|name| { - self.drivers - .binary_search_by(|d| d.name.as_bstr().cmp(name)) - .ok() - .map(DriverChoice::Index) - .or_else(|| { - name.to_str() - .ok() - .and_then(BuiltinDriver::by_name) - .map(DriverChoice::BuiltIn) - }) - }) - .unwrap_or_default() - } - }; - - let out = merge::State { - parent: self, - driver_choice: driver, - current: ResourceRef::new(current), - ancestor: ResourceRef::new(ancestor), - other: ResourceRef::new(other), - }; - - Ok(out) - } -} - -impl Platform { - fn set_resource_inner( - &mut self, - id: gix_hash::ObjectId, - mode: gix_object::tree::EntryKind, - rela_path: &BStr, - kind: ResourceKind, - objects: &impl gix_object::FindObjectOrHeader, - ) -> Result<(), set_resource::Error> { - if !matches!( - mode, - gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable - ) { - return Err(set_resource::Error::InvalidMode { mode }); - } - let entry = - self.attr_stack - .at_entry(rela_path, None, objects) - .map_err(|err| set_resource::Error::Attributes { - source: err, - kind, - rela_path: rela_path.to_owned(), - })?; - - let storage = match kind { - ResourceKind::OtherOrTheirs => &mut self.other, - ResourceKind::CommonAncestorOrBase => &mut self.ancestor, - ResourceKind::CurrentOrOurs => &mut self.current, - }; - - let mut buf_storage = Vec::new(); - let out = self.filter.convert_to_mergeable( - &id, - mode, - rela_path, - kind, - &mut |_, out| { - let _ = entry.matching_attributes(out); - }, - objects, - self.filter_mode, - storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer), - )?; - - match storage { - None => { - *storage = Some(Resource { - id, - rela_path: rela_path.to_owned(), - data: out, - mode, - buffer: buf_storage, - }); - } - Some(storage) => { - storage.id = id; - storage.rela_path = rela_path.to_owned(); - storage.data = out; - storage.mode = mode; - } - }; - Ok(()) - } -} diff --git a/gix-merge/src/blob/platform/merge.rs b/gix-merge/src/blob/platform/merge.rs new file mode 100644 index 00000000000..1c2ec1a48b4 --- /dev/null +++ b/gix-merge/src/blob/platform/merge.rs @@ -0,0 +1,417 @@ +use crate::blob::{builtin_driver, PlatformRef, Resolution}; +use std::io::Read; +use std::path::PathBuf; + +/// Options for the use in the [`PlatformRef::merge()`] call. +#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)] +pub struct Options { + /// If `true`, the resources being merged are contained in a virtual ancestor, + /// which is the case when merge bases are merged into one. + pub is_virtual_ancestor: bool, + /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side. + pub resolve_binary_with: Option, + /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text). + pub text: builtin_driver::text::Options, +} + +/// The error returned by [`PlatformRef::merge()`]. +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("At least one resource was too large to be processed")] + ResourceTooLarge, + #[error(transparent)] + PrepareExternalDriver(#[from] inner::prepare_external_driver::Error), + #[error("Failed to launch external merge driver: {cmd}")] + SpawnExternalDriver { cmd: String, source: std::io::Error }, + #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")] + ExternalDriverFailure { + status: std::process::ExitStatus, + cmd: String, + }, + #[error("IO failed when dealing with merge-driver output")] + ExternalDriverIO(#[from] std::io::Error), +} + +/// The product of a [`PlatformRef::prepare_external_driver()`] operation. +/// +/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*, +/// but `stdin` closed. +/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there. +// TODO: remove dead-code annotation +#[allow(dead_code)] +pub struct Command { + /// The pre-configured command + cmd: std::process::Command, + /// A tempfile holding the *current* (ours) state of the resource. + current: gix_tempfile::Handle, + /// The path at which `current` is located, for reading the result back from later. + current_path: PathBuf, + /// A tempfile holding the *ancestor* (base) state of the resource. + ancestor: gix_tempfile::Handle, + /// A tempfile holding the *other* (their) state of the resource. + other: gix_tempfile::Handle, +} + +// Just to keep things here but move them a level up later. +pub(super) mod inner { + /// + pub mod prepare_external_driver { + use crate::blob::builtin_driver::text::Conflict; + use crate::blob::platform::{merge, DriverChoice}; + use crate::blob::{builtin_driver, BuiltinDriver, Driver, PlatformRef, ResourceKind}; + use bstr::{BString, ByteVec}; + use gix_tempfile::{AutoRemove, ContainingDirectory}; + use std::io::Write; + use std::ops::{Deref, DerefMut}; + use std::path::{Path, PathBuf}; + use std::process::Stdio; + + /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()). + #[derive(Debug, thiserror::Error)] + #[allow(missing_docs)] + pub enum Error { + #[error("The resource of kind {kind:?} was too large to be processed")] + ResourceTooLarge { kind: ResourceKind }, + #[error( + "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created" + )] + CreateTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error( + "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command" + )] + WriteTempfile { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + } + + /// Plumbing + impl<'parent> PlatformRef<'parent> { + /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources, + /// prepare the invocation and temporary files needed to launch it according to protocol. + /// See the documentation of [`Driver::command`] for possible substitutions. + /// + /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge. + /// + /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file) + /// to read back the result into a suitable buffer. + /// + /// ### Deviation + /// + /// * We allow passing more context than Git would by taking a whole `context`, + /// it's up to the caller to decide how much is filled. + /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness. + pub fn prepare_external_driver( + &self, + merge_command: BString, + builtin_driver::text::Labels { + ancestor, + current, + other, + }: builtin_driver::text::Labels<'_>, + context: gix_command::Context, + ) -> Result { + fn write_data( + data: &[u8], + ) -> std::io::Result<(gix_tempfile::Handle, PathBuf)> { + let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?; + file.write_all(data)?; + let mut path = Default::default(); + file.with_mut(|f| { + f.path().clone_into(&mut path); + })?; + let file = file.close()?; + Ok((file, path)) + } + + let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::CommonAncestorOrBase, + })?; + let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::CurrentOrOurs, + })?; + let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge { + kind: ResourceKind::OtherOrTheirs, + })?; + + let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile { + rela_path: self.ancestor.rela_path.into(), + kind: ResourceKind::CommonAncestorOrBase, + source: err, + })?; + let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile { + rela_path: self.current.rela_path.into(), + kind: ResourceKind::CurrentOrOurs, + source: err, + })?; + let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile { + rela_path: self.other.rela_path.into(), + kind: ResourceKind::OtherOrTheirs, + source: err, + })?; + + let mut cmd = BString::from(Vec::with_capacity(merge_command.len())); + let mut count = 0; + for token in merge_command.split(|b| *b == b'%') { + count += 1; + let token = if count > 1 { + match token.first() { + Some(&b'O') => { + cmd.push_str(gix_path::into_bstr(&base_path).as_ref()); + &token[1..] + } + Some(&b'A') => { + cmd.push_str(gix_path::into_bstr(&ours_path).as_ref()); + &token[1..] + } + Some(&b'B') => { + cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref()); + &token[1..] + } + Some(&b'L') => { + let marker_size = self + .options + .text + .conflict + .marker_size() + .unwrap_or(Conflict::DEFAULT_MARKER_SIZE); + cmd.push_str(format!("{marker_size}")); + &token[1..] + } + Some(&b'P') => { + cmd.push_str(gix_quote::single(self.current.rela_path)); + &token[1..] + } + Some(&b'S') => { + cmd.push_str(gix_quote::single(ancestor.unwrap_or_default())); + &token[1..] + } + Some(&b'X') => { + cmd.push_str(gix_quote::single(current.unwrap_or_default())); + &token[1..] + } + Some(&b'Y') => { + cmd.push_str(gix_quote::single(other.unwrap_or_default())); + &token[1..] + } + Some(_other) => { + cmd.push(b'%'); + token + } + None => b"%", + } + } else { + token + }; + cmd.extend_from_slice(token); + } + + Ok(merge::Command { + cmd: gix_command::prepare(gix_path::from_bstring(cmd)) + .with_context(context) + .with_shell() + .stdin(Stdio::null()) + .stdout(Stdio::inherit()) + .stderr(Stdio::inherit()) + .into(), + current: ours_tmp, + current_path: ours_path, + ancestor: base_tmp, + other: theirs_tmp, + }) + } + + /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err` + /// with the built-in driver to use instead. + pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> { + match self.driver { + DriverChoice::BuiltIn(builtin) => Err(builtin), + DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()), + } + } + } + + impl std::fmt::Debug for merge::Command { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.cmd.fmt(f) + } + } + + impl Deref for merge::Command { + type Target = std::process::Command; + + fn deref(&self) -> &Self::Target { + &self.cmd + } + } + + impl DerefMut for merge::Command { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.cmd + } + } + + impl merge::Command { + /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation. + /// Calling this makes sense only after the merge command has finished successfully. + pub fn open_result_file(&self) -> std::io::Result { + std::fs::File::open(&self.current_path) + } + } + } + + /// + pub mod builtin_merge { + use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution}; + + /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge). + #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)] + pub enum Pick { + /// In a binary merge, chose the ancestor. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Ancestor, + /// In a binary merge, chose our side. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Ours, + /// In a binary merge, chose their side. + /// + /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it. + Theirs, + /// New data was produced with the result of the merge, to be found in the buffer that was passed to + /// [builtin_merge()](PlatformRef::builtin_merge). + /// This happens for any merge that isn't a binary merge. + Buffer, + } + + /// Plumbing + impl<'parent> PlatformRef<'parent> { + /// Perform the merge using the given `driver`, possibly placing the output in `out`. + /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually. + /// Use `labels` to annotate conflict sections in case of a text-merge. + /// Returns `None` if one of the buffers is too large, making a merge impossible. + /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared, + /// and one has to take the data from the respective resource. + pub fn builtin_merge( + &self, + driver: BuiltinDriver, + out: &mut Vec, + input: &mut imara_diff::intern::InternedInput<&'parent [u8]>, + labels: builtin_driver::text::Labels<'_>, + ) -> Option<(Pick, Resolution)> { + let base = self.ancestor.data.as_slice()?; + let ours = self.current.data.as_slice()?; + let theirs = self.other.data.as_slice()?; + let driver = if driver != BuiltinDriver::Binary + && (is_binary_buf(ours) || is_binary_buf(theirs) || is_binary_buf(base)) + { + BuiltinDriver::Binary + } else { + driver + }; + Some(match driver { + BuiltinDriver::Text => { + let resolution = + builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text); + (Pick::Buffer, resolution) + } + BuiltinDriver::Binary => { + let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with); + let pick = match pick { + builtin_driver::binary::Pick::Ours => Pick::Ours, + builtin_driver::binary::Pick::Theirs => Pick::Theirs, + builtin_driver::binary::Pick::Ancestor => Pick::Ancestor, + }; + (pick, resolution) + } + BuiltinDriver::Union => { + let resolution = builtin_driver::text( + out, + input, + labels, + ours, + base, + theirs, + builtin_driver::text::Options { + conflict: builtin_driver::text::Conflict::ResolveWithUnion, + ..self.options.text + }, + ); + (Pick::Buffer, resolution) + } + }) + } + } + + fn is_binary_buf(buf: &[u8]) -> bool { + let buf = &buf[..buf.len().min(8000)]; + buf.contains(&0) + } + } +} + +/// Convenience +impl<'parent> PlatformRef<'parent> { + /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`. + /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer) + /// to indicate it's `out`. + /// Use `labels` to annotate conflict sections in case of a text-merge. + /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`. + /// + /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm. + /// Too-large resources will result in an error. + /// + /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient + /// in terms of buffer handling to make it more useful in the face of missing local files. + pub fn merge( + &self, + out: &mut Vec, + labels: builtin_driver::text::Labels<'_>, + context: gix_command::Context, + ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> { + match self.configured_driver() { + Ok(driver) => { + let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context)?; + let status = cmd.status().map_err(|err| Error::SpawnExternalDriver { + cmd: format!("{:?}", cmd.cmd), + source: err, + })?; + if !status.success() { + return Err(Error::ExternalDriverFailure { + cmd: format!("{:?}", cmd.cmd), + status, + }); + } + out.clear(); + cmd.open_result_file()?.read_to_end(out)?; + Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete)) + } + Err(builtin) => { + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + out.clear(); + let (pick, resolution) = self + .builtin_merge(builtin, out, &mut input, labels) + .ok_or(Error::ResourceTooLarge)?; + Ok((pick, resolution)) + } + } + } + + /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying. + /// Return `None` if the buffer is too large, or if the `pick` corresponds to a buffer (that was written separately). + pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Option<&'parent [u8]> { + match pick { + inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice(), + inner::builtin_merge::Pick::Ours => self.current.data.as_slice(), + inner::builtin_merge::Pick::Theirs => self.other.data.as_slice(), + inner::builtin_merge::Pick::Buffer => None, + } + } +} diff --git a/gix-merge/src/blob/platform/mod.rs b/gix-merge/src/blob/platform/mod.rs new file mode 100644 index 00000000000..14b33d03fd5 --- /dev/null +++ b/gix-merge/src/blob/platform/mod.rs @@ -0,0 +1,127 @@ +use crate::blob::{pipeline, BuiltinDriver, Pipeline, Platform}; +use bstr::{BStr, BString}; +use gix_filter::attributes; + +/// A stored value representing a resource that participates in a merge. +#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)] +pub(super) struct Resource { + /// The `id` of the value, or `null` if it's only living in a worktree. + id: gix_hash::ObjectId, + /// The repository-relative path where the resource lives in the tree. + rela_path: BString, + /// The outcome of converting a resource into a mergable format using [Pipeline::convert_to_mergeable()]. + data: Option, + /// The kind of the resource we are looking at. Only possible values are `Blob` and `BlobExecutable`. + mode: gix_object::tree::EntryKind, + /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary + /// or the resource doesn't exist. + buffer: Vec, +} + +/// A blob or executable ready to be merged in one way or another. +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub struct ResourceRef<'a> { + /// The data itself, suitable for merging, and if the object or worktree item is present at all. + pub data: resource::Data<'a>, + /// The location of the resource, relative to the working tree. + pub rela_path: &'a BStr, + /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at + /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which + /// after its 'to-git' conversion never had its hash computed. + pub id: &'a gix_hash::oid, +} + +/// Options for use in [`Platform::new()`]. +#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)] +pub struct Options { + /// Define which driver to use by name if the `merge` attribute for a resource is unspecified. + /// + /// This is the value of the `merge.default` git configuration. + pub default_driver: Option, +} + +/// The selection of the driver to use by a resource obtained with [`Platform::prepare_merge()`]. +/// +/// If available, an index into the `drivers` field to access more diff-related information of the driver for items +/// at the given path, as previously determined by git-attributes. +/// +/// * `merge` is set +/// - Use the [`BuiltinDriver::Text`] +/// * `-merge` is unset +/// - Use the [`BuiltinDriver::Binary`] +/// * `!merge` is unspecified +/// - Use [`Options::default_driver`] or [`BuiltinDriver::Text`]. +/// * `merge=name` +/// - Search for a user-configured or built-in driver called `name`. +/// - If not found, silently default to [`BuiltinDriver::Text`] +/// +/// Note that drivers are queried even if there is no object available. +#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug, Hash)] +pub enum DriverChoice { + /// Use the given built-in driver to perform the merge. + BuiltIn(BuiltinDriver), + /// Use the user-provided driver program using the index into [the platform drivers array](Platform::drivers()). + Index(usize), +} + +impl Default for DriverChoice { + fn default() -> Self { + DriverChoice::BuiltIn(Default::default()) + } +} + +/// Lifecycle +impl Platform { + /// Create a new instance with a way to `filter` data from the object database and turn it into something that is merge-able. + /// `filter_mode` decides how to do that specifically. + /// Use `attr_stack` to access attributes pertaining worktree filters and merge settings. + /// `drivers` are the list of available merge drivers that individual paths can refer to by means of git attributes. + /// `options` further configure the operation. + pub fn new( + filter: Pipeline, + filter_mode: pipeline::Mode, + attr_stack: gix_worktree::Stack, + mut drivers: Vec, + options: Options, + ) -> Self { + drivers.sort_by(|a, b| a.name.cmp(&b.name)); + Platform { + drivers, + current: None, + ancestor: None, + other: None, + filter, + filter_mode, + attr_stack, + attrs: { + let mut out = attributes::search::Outcome::default(); + out.initialize_with_selection(&Default::default(), Some("merge")); + out + }, + options, + } + } +} + +/// Access +impl Platform { + /// Return all drivers that this instance was initialized with. + /// + /// They are sorted by [`name`](super::Driver::name) to support binary searches. + pub fn drivers(&self) -> &[super::Driver] { + &self.drivers + } +} + +/// +pub mod set_resource; + +/// +pub mod resource; + +/// +pub mod merge; +pub use merge::inner::{builtin_merge, prepare_external_driver}; + +/// +pub mod prepare_merge; diff --git a/gix-merge/src/blob/platform/prepare_merge.rs b/gix-merge/src/blob/platform/prepare_merge.rs new file mode 100644 index 00000000000..24ffb5af32e --- /dev/null +++ b/gix-merge/src/blob/platform/prepare_merge.rs @@ -0,0 +1,100 @@ +use crate::blob::platform::{merge, DriverChoice, ResourceRef}; +use crate::blob::{BuiltinDriver, Platform, PlatformRef, ResourceKind}; +use bstr::{BStr, BString, ByteSlice}; +use gix_filter::attributes; + +/// The error returned by [Platform::prepare_merge_state()](Platform::prepare_merge()). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("The 'current', 'ancestor' or 'other' resource for the merge operation were not set")] + UnsetResource, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, +} + +/// Preparation +impl Platform { + /// Prepare all state needed for performing a merge, using all [previously set](Self::set_resource()) resources. + /// `objects` is used to possibly lookup attribute files when obtaining merge-related attributes. + /// + /// `options` are to be used when merging later, and they may be altered to implement correct binary merges + /// in the present of [virtual merge bases](merge::Options::is_virtual_ancestor). + /// + /// Note that no additional validation is performed here to facilitate inspection, which means that + /// resource buffers might still be too large to be merged, preventing a successful merge at a later time. + pub fn prepare_merge( + &mut self, + objects: &impl gix_object::Find, + mut options: merge::Options, + ) -> Result, Error> { + let current = self.current.as_ref().ok_or(Error::UnsetResource)?; + let ancestor = self.ancestor.as_ref().ok_or(Error::UnsetResource)?; + let other = self.other.as_ref().ok_or(Error::UnsetResource)?; + + let entry = self + .attr_stack + .at_entry(current.rela_path.as_bstr(), None, objects) + .map_err(|err| Error::Attributes { + source: err, + kind: ResourceKind::CurrentOrOurs, + rela_path: current.rela_path.clone(), + })?; + entry.matching_attributes(&mut self.attrs); + let attr = self.attrs.iter_selected().next().expect("pre-initialized with 'diff'"); + let mut driver = match attr.assignment.state { + attributes::StateRef::Set => DriverChoice::BuiltIn(BuiltinDriver::Text), + attributes::StateRef::Unset => DriverChoice::BuiltIn(BuiltinDriver::Binary), + attributes::StateRef::Value(_) | attributes::StateRef::Unspecified => { + let name = match attr.assignment.state { + attributes::StateRef::Value(name) => Some(name.as_bstr()), + attributes::StateRef::Unspecified => { + self.options.default_driver.as_ref().map(|name| name.as_bstr()) + } + _ => unreachable!("only value and unspecified are possible here"), + }; + self.find_driver_by_name(name) + } + }; + if let Some(recursive_driver_name) = match driver { + DriverChoice::Index(idx) => self.drivers.get(idx), + _ => None, + } + .and_then(|driver| driver.recursive.as_deref()) + .filter(|_| options.is_virtual_ancestor) + { + driver = self.find_driver_by_name(Some(recursive_driver_name.as_bstr())); + options.resolve_binary_with = Some(crate::blob::builtin_driver::binary::ResolveWith::Ours); + } + + let out = PlatformRef { + parent: self, + driver, + current: ResourceRef::new(current), + ancestor: ResourceRef::new(ancestor), + other: ResourceRef::new(other), + options, + }; + Ok(out) + } + + fn find_driver_by_name(&self, name: Option<&BStr>) -> DriverChoice { + name.and_then(|name| { + self.drivers + .binary_search_by(|d| d.name.as_bstr().cmp(name)) + .ok() + .map(DriverChoice::Index) + .or_else(|| { + name.to_str() + .ok() + .and_then(BuiltinDriver::by_name) + .map(DriverChoice::BuiltIn) + }) + }) + .unwrap_or_default() + } +} diff --git a/gix-merge/src/blob/platform/resource.rs b/gix-merge/src/blob/platform/resource.rs new file mode 100644 index 00000000000..ed646c94233 --- /dev/null +++ b/gix-merge/src/blob/platform/resource.rs @@ -0,0 +1,50 @@ +use crate::blob::{ + pipeline, + platform::{Resource, ResourceRef}, +}; + +impl<'a> ResourceRef<'a> { + pub(super) fn new(cache: &'a Resource) -> Self { + ResourceRef { + data: cache.data.map_or(Data::Missing, |data| match data { + pipeline::Data::Buffer => Data::Buffer(&cache.buffer), + pipeline::Data::TooLarge { size } => Data::TooLarge { size }, + }), + rela_path: cache.rela_path.as_ref(), + id: &cache.id, + } + } +} + +/// The data of a mergeable resource, as it could be determined and computed previously. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)] +pub enum Data<'a> { + /// The object is missing, either because it didn't exist in the working tree or because its `id` was null. + /// Such data equals an empty buffer. + Missing, + /// The textual data as processed and ready for merging, i.e. suitable for storage in Git. + Buffer(&'a [u8]), + /// The file or blob is above the big-file threshold and cannot be processed. + /// + /// In this state, the file cannot be merged. + TooLarge { + /// The size of the object prior to performing any filtering or as it was found on disk. + /// + /// Note that technically, the size isn't always representative of the same 'state' of the + /// content, as once it can be the size of the blob in Git, and once it's the size of file + /// in the worktree. + size: u64, + }, +} + +impl<'a> Data<'a> { + /// Return ourselves as slice of bytes if this instance stores data. + /// Note that missing data is interpreted as empty slice, to facilitate additions and deletions. + pub fn as_slice(&self) -> Option<&'a [u8]> { + match self { + Data::Buffer(d) => Some(d), + Data::Missing => Some(&[]), + Data::TooLarge { .. } => None, + } + } +} diff --git a/gix-merge/src/blob/platform/set_resource.rs b/gix-merge/src/blob/platform/set_resource.rs new file mode 100644 index 00000000000..377642e2355 --- /dev/null +++ b/gix-merge/src/blob/platform/set_resource.rs @@ -0,0 +1,103 @@ +use bstr::{BStr, BString}; + +use crate::blob::platform::Resource; +use crate::blob::{pipeline, Platform, ResourceKind}; + +/// The error returned by [Platform::set_resource](Platform::set_resource). +#[derive(Debug, thiserror::Error)] +#[allow(missing_docs)] +pub enum Error { + #[error("Can only diff blobs, not {mode:?}")] + InvalidMode { mode: gix_object::tree::EntryKind }, + #[error("Failed to read {kind:?} worktree data from '{rela_path}'")] + Io { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error("Failed to obtain attributes for {kind:?} resource at '{rela_path}'")] + Attributes { + rela_path: BString, + kind: ResourceKind, + source: std::io::Error, + }, + #[error(transparent)] + ConvertToMergeable(#[from] pipeline::convert_to_mergeable::Error), +} + +/// Preparation +impl Platform { + /// Store enough information about a resource to eventually use it in a merge, where… + /// + /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either + /// be a resource in the worktree, or it's considered a non-existing, deleted object. + /// If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided + /// for completeness. Note that it's not expected to be in `objects` if `rela_path` is set and a worktree-root + /// is available for `kind`. + /// * `mode` is the kind of object (only blobs and links are allowed) + /// * `rela_path` is the relative path as seen from the (work)tree root. + /// * `kind` identifies the side of the merge this resource will be used for. + /// * `objects` provides access to the object database in case the resource can't be read from a worktree. + pub fn set_resource( + &mut self, + id: gix_hash::ObjectId, + mode: gix_object::tree::EntryKind, + rela_path: &BStr, + kind: ResourceKind, + objects: &impl gix_object::FindObjectOrHeader, + ) -> Result<(), Error> { + if !matches!( + mode, + gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable + ) { + return Err(Error::InvalidMode { mode }); + } + let entry = self + .attr_stack + .at_entry(rela_path, None, objects) + .map_err(|err| Error::Attributes { + source: err, + kind, + rela_path: rela_path.to_owned(), + })?; + + let storage = match kind { + ResourceKind::OtherOrTheirs => &mut self.other, + ResourceKind::CommonAncestorOrBase => &mut self.ancestor, + ResourceKind::CurrentOrOurs => &mut self.current, + }; + + let mut buf_storage = Vec::new(); + let out = self.filter.convert_to_mergeable( + &id, + mode, + rela_path, + kind, + &mut |_, out| { + let _ = entry.matching_attributes(out); + }, + objects, + self.filter_mode, + storage.as_mut().map_or(&mut buf_storage, |s| &mut s.buffer), + )?; + + match storage { + None => { + *storage = Some(Resource { + id, + rela_path: rela_path.to_owned(), + data: out, + mode, + buffer: buf_storage, + }); + } + Some(storage) => { + storage.id = id; + storage.rela_path = rela_path.to_owned(); + storage.data = out; + storage.mode = mode; + } + }; + Ok(()) + } +} diff --git a/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar b/gix-merge/tests/fixtures/generated-archives/make_blob_repo.tar index ee7859571ca7ed197f13a05aeeec84a1513be7be..9105e6caa792b3db34251677ebb38f21751ab371 100644 GIT binary patch delta 1627 zcmb7EZA?>F7`~?ztx78bQq8o{#jg?z=YIDZ$CeG23F68UnP}DB+j}d_2n^dq9SCK` zp!JJ(t}vM&plo9(N*M@>SY=KS87M|0j_5MyL^l}d5GQk&TDk@QxF09)kLSG4dCvPj z@0;By&2E$)vj$8>A&Q`opiqcG42KAWC<;jefeyEhcL8|-$a*{@A9;NnGJDXvqeU@EIsEqI)e}59V)W)*aXeI@tB0|mSvx(L7`<5A zD86wgZ_zjZn)0pd{FAr8ayloJI5n!pi0ni2dXr|oVDjn;S@M~OM~q&c%Z`N;QP2CK zNgWn#2JRc7)Hxe0?tx2&q7pi z0qJ#6IZeI$_{bBy1`D%6Z;xNr3jZvF+bVX9EWoys&kEz;I;bp&`5kL=wwl)eJZNaA}{*os9;)*ZapSx^8!E#(;Lzj==1m2+NO7wQI_FhM_hAOWrE6PxwJ&yF-Bte*Gr+O6p|xmst*8a z1B@^TS`NiHf`+*09?|duZlHL`69h&wC~IU{BS|s&lfpf2I-pXLqkm7MG&N$TA7vnF z%>{3p<1fT%V&1(IlhancNbl*aPE~Me&)!6RQK3^Y`IElv?ld&i`@CEfaPW#l41%dpC`r8sjv)lho*QDNGLFI$okSW9qTN{IpTXffk39f ZPzKe~G-Nd9l;jPfupUq|G!^*C{sFR>M_B*> delta 1511 zcmb7EZBSHI7`|tB#RXkNTSXcn!Ngq=_MCIS1wTh8Sp!i)9Z~S^-n&SJrV9<`D5jtz z76YsqU>_*D6+r@v!VHNZOBza8kvTO^>X0~ogo&b4>ULRnsj)xyP4R7>a=mIC%3m^-FS}0oN2R4_3YnRk(@j~Kl2txO zxOI+&6?)O5xAPf2MRrM0m=HbvFWtJSD#O6ZJ^8Jr@p%Lgh6HN%W4C~e9)Vr9On-gg zV3UqF%x+Fb50_`F_eXB7_Hvk;lhMQF+7^r%;_3@NaC0(xxZSpQJ=}>?@or8=54XqG zJ!YZq=Nl{DN_{V?MSOGBHuZtLtIuq(~~RQrXsPu@VvV(AZCo26~n=@EqbXW)Nw@g2lu@OhXD=ER0FySWYk_o~I;^ zE*cx<7Wf^GU?@33LlS+yU}TSi@D~+p9b@l41{IoSp9;+dl>+@!vKcGMf0smZ$>%c1 zTk?EJApLB78J4EJO>`{ak@J=Pd-J*rMtx_hE~`17{Q2GX$5$7ohR*4G zd-dYkZ9OIJzG0JU&)l!gp{wt{-k;)H(|4z)DSXy4a^vi1df8yu3BoZgWgDpL*7zta z;;`2fu#)m#yj=ryq5-m`fJJ1HSXzR-Ny4PW$PQx?aROz*G|BUV#CY#rZt!6dJG`Zr z)nKk^QR3OSNaK>@aXrSJb;XUwe%#+*9s>_Xu#QHKGz3rg`3?Ly{?h}x8#^_`CckAi z@&~+pxmFuGpZLCD%5piN4WRCd+@~lE_jv^?S#O_rG+>%wkmt>?3~k{=ESVugv5@9i z5pq1k(UKe#nl>S13k@;2BT*kbks|*)AQE{8bVdzwkO@){l4$9L#a%OEXXy1EuTGDQ zjkG*Ex8Z1nE2hBu$vp=>fA`v_l?NOj&r1r&iRTLwjrAA*{HVO+uq$KZ)}LBSw^t{8 z^YU(|-$0P@Jh6LAu2NDUNy+u|-BY$%JLaJ1^%hl$zFp8Mq$ z@ApnkOkSFvV%Z+jb~8HaJcxdJEH+>Ka><;of@8vGUD9w?eT0toUZncY4r1{cM>Sc7MQAU_T+ iU{?XPJpicv8HS-r`Prr;vo@q>%iGom8lD6JrTTAut}NRC diff --git a/gix-merge/tests/fixtures/generated-archives/text-baseline.tar b/gix-merge/tests/fixtures/generated-archives/text-baseline.tar index b7ffc607c766aa40a4a2948d5aae7d703082d780..8bd4e8f2244dd7a8d08f0d24d4c0d6fcb7aaa715 100644 GIT binary patch delta 1671 zcmZp8A>MFCVng#SDPv;;BL+hQ6Ei~tBLgF20|o;FLql_OLk5G%iJB?f8TT@>F>YqR zbKB50Co?ZqH#ILMGcR2?IU_MIJyk!yw5V7wDJMTk0gGf(VsR>l! zPt^mHDO_0O$+Jla)1=88K8aD!E8IvHOg?a5o<=5XKTxEe$=Cwr;bS@KS%4fU)48TF zOHJPYlaofq+XynM)5hq@uODhn{~*rDJ^cY6W6X9hKE|HFwgkqu1g5qG=C%ZuWeKbi z(~dB*@S*03iGo4XQ=YM!F`F`&LUPb^MrKBKsG{jy6PP2Xr&us6iJPG3IumnKGf1wR z%*gnkIgyuZv!Kv>#)%4iAZrjNOu=b_nUNtd|AK7AZ^8_mCYYEQ5#@$CFcXY1d|_;2 zj&K80BFq!~D4qaDo7{ARCrl<271(hEhncx4v=GAQ8DK2QLk(iXVUW3@i7~=OxWaX^ z_5;bu@?H)&3^6k>B`Rpwzyby%dQAJ_rr`vY0wi!%r3ld^AHiL%NVn$I$UQ07m6H@~tGYj3)yv*%_ PIjrv)xBq?3%FF=(Ba9on delta 110 zcmV-!0FnQI%ou>^7qElc1Tiu*Ft>;91bG3n+~P5pU>X4omyHquT(@)*0gO?RAS#Ar z0fuA(hGYYVWCW&U1z5J~1%@z}hK2zV1~fA@HUKb_u_|Pf0RjIEIWRXiI5IIaEp={b Qw-Im!?*Wk@1*RSb2+d3-A^-pY diff --git a/gix-merge/tests/fixtures/make_blob_repo.sh b/gix-merge/tests/fixtures/make_blob_repo.sh index 13af2c5c6bb..8f4d23f38ec 100644 --- a/gix-merge/tests/fixtures/make_blob_repo.sh +++ b/gix-merge/tests/fixtures/make_blob_repo.sh @@ -3,7 +3,7 @@ set -eu -o pipefail git init -q -echo a > a +echo just-set > just-set echo b > b echo union > union echo e > e-no-attr @@ -11,7 +11,7 @@ echo unset > unset echo unspecified > unspecified cat <.gitattributes -a merge=a +just-set merge b merge=b union merge=union missing merge=missing diff --git a/gix-merge/tests/fixtures/text-baseline.sh b/gix-merge/tests/fixtures/text-baseline.sh index 47e160c56af..17d954aa278 100644 --- a/gix-merge/tests/fixtures/text-baseline.sh +++ b/gix-merge/tests/fixtures/text-baseline.sh @@ -615,6 +615,14 @@ EOF ) ) +mkdir line-ending-change +(cd line-ending-change + + echo -e "a\n" > base.blob + echo -e "a\r\n" > ours.blob + echo -e "a\n" > theirs.blob +) + for dir in simple \ multi-change \ @@ -629,6 +637,7 @@ for dir in simple \ zdiff3-evil \ no-change-add \ no-change-remove \ + line-ending-change \ complex/no-change \ complex/no-conflict \ complex/no-conflict-too \ diff --git a/gix-merge/tests/merge/blob/builtin_driver.rs b/gix-merge/tests/merge/blob/builtin_driver.rs index d42ec7aa823..b0d7afa8f85 100644 --- a/gix-merge/tests/merge/blob/builtin_driver.rs +++ b/gix-merge/tests/merge/blob/builtin_driver.rs @@ -89,12 +89,10 @@ mod text { let actual = gix_merge::blob::builtin_driver::text( &mut out, &mut input, + case.labels(), &case.ours, - Some(case.ours_marker.as_str().as_ref()), &case.base, - Some(case.base_marker.as_str().as_ref()), &case.theirs, - Some(case.theirs_marker.as_str().as_ref()), case.options, ); if is_case_diverging(&case) { @@ -124,7 +122,7 @@ mod text { ); assert_eq!( ((num_diverging as f32 / num_cases as f32) * 100.0) as usize, - 12, + 11, "Just to show the percentage of skipped tests - this should get better" ); Ok(()) @@ -132,7 +130,7 @@ mod text { mod baseline { use bstr::BString; - use gix_merge::blob::builtin_driver::text::{ConflictStyle, ResolveWith}; + use gix_merge::blob::builtin_driver::text::{Conflict, ConflictStyle}; use std::path::Path; #[derive(Debug)] @@ -148,6 +146,16 @@ mod text { pub options: gix_merge::blob::builtin_driver::text::Options, } + impl Expectation { + pub fn labels(&self) -> gix_merge::blob::builtin_driver::text::Labels<'_> { + gix_merge::blob::builtin_driver::text::Labels { + ancestor: Some(self.base_marker.as_str().as_ref()), + current: Some(self.ours_marker.as_str().as_ref()), + other: Some(self.theirs_marker.as_str().as_ref()), + } + } + } + pub struct Expectations<'a> { root: &'a Path, lines: std::str::Lines<'a>, @@ -178,12 +186,18 @@ mod text { let mut options = gix_merge::blob::builtin_driver::text::Options::default(); for arg in words { - match arg { - "--diff3" => options.conflict_style = ConflictStyle::Diff3, - "--zdiff3" => options.conflict_style = ConflictStyle::ZealousDiff3, - "--ours" => options.on_conflict = Some(ResolveWith::Ours), - "--theirs" => options.on_conflict = Some(ResolveWith::Theirs), - "--union" => options.on_conflict = Some(ResolveWith::Union), + options.conflict = match arg { + "--diff3" => Conflict::Keep { + style: ConflictStyle::Diff3, + marker_size: 7, + }, + "--zdiff3" => Conflict::Keep { + style: ConflictStyle::ZealousDiff3, + marker_size: 7, + }, + "--ours" => Conflict::ResolveWithOurs, + "--theirs" => Conflict::ResolveWithTheirs, + "--union" => Conflict::ResolveWithUnion, _ => panic!("Unknown argument to parse into options: '{arg}'"), } } diff --git a/gix-merge/tests/merge/blob/platform.rs b/gix-merge/tests/merge/blob/platform.rs index 6865e097f4d..d03a3ddc960 100644 --- a/gix-merge/tests/merge/blob/platform.rs +++ b/gix-merge/tests/merge/blob/platform.rs @@ -1,42 +1,603 @@ -use gix_merge::blob::{pipeline, ResourceKind}; -use gix_object::tree::EntryKind; use gix_worktree::stack::state::attributes; use gix_merge::blob::Platform; -#[test] -fn ancestor_and_current_and_other_do_not_exist() -> crate::Result { - let mut platform = new_platform(None, pipeline::Mode::default()); - platform.set_resource( - gix_hash::Kind::Sha1.null(), - EntryKind::Blob, - "also-missing".into(), - ResourceKind::CommonAncestorOrBase, - &gix_object::find::Never, - )?; - - platform.set_resource( - gix_hash::Kind::Sha1.null(), - EntryKind::Blob, - "can't-be-found-in-odb".into(), - ResourceKind::CurrentOrOurs, - &gix_object::find::Never, - )?; - platform.set_resource( - gix_hash::Kind::Sha1.null(), - EntryKind::BlobExecutable, - "can't-be-found-in-odb".into(), - ResourceKind::OtherOrTheirs, - &gix_object::find::Never, - )?; - - let state = platform - .prepare_merge_state(&gix_object::find::Never) - .expect("no validation is done here, let the caller inspect"); - assert_eq!(state.ancestor.data.as_slice(), None); - assert_eq!(state.current.data.as_slice(), None); - assert_eq!(state.other.data.as_slice(), None); - Ok(()) +mod merge { + use crate::blob::platform::new_platform; + use crate::blob::util::ObjectDb; + use bstr::{BStr, ByteSlice}; + use gix_merge::blob::builtin_driver::text::ConflictStyle; + use gix_merge::blob::platform::builtin_merge::Pick; + use gix_merge::blob::platform::DriverChoice; + use gix_merge::blob::{builtin_driver, pipeline, platform, BuiltinDriver, Resolution, ResourceKind}; + use gix_object::tree::EntryKind; + use std::process::Stdio; + + #[test] + fn builtin_text_uses_binary_if_needed() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "a".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs\0", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource( + id, + EntryKind::Blob, + "path matters only for attribute lookup".into(), + kind, + &db, + )?; + } + let mut platform_ref = platform.prepare_merge(&db, Default::default())?; + assert_eq!( + platform_ref.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "it starts out at the default text driver" + ); + + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Ours, Resolution::Conflict), + "it detected the binary buffer, ran the binary merge with default conflict resolution" + ); + + platform_ref.options.resolve_binary_with = Some(builtin_driver::binary::ResolveWith::Theirs); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Theirs, Resolution::Complete), + "the auto-binary driver respects its own options" + ); + Ok(()) + } + + #[test] + fn builtin_with_conflict() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; + } + + let mut platform_ref = platform.prepare_merge(&db, Default::default())?; + assert_eq!(platform_ref.driver, DriverChoice::BuiltIn(BuiltinDriver::Text)); + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); + assert_eq!( + buf.as_bstr(), + r#"<<<<<<< current label +ours +======= +theirs +>>>>>>> other label +"#, + "default options apply, hence the 'merge' style conflict" + ); + platform_ref.options.text.conflict = builtin_driver::text::Conflict::Keep { + style: ConflictStyle::Diff3, + marker_size: 3, + }; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Conflict)); + + assert_eq!( + buf.as_bstr(), + r#"<<< current label +ours +||| ancestor label +b +=== +theirs +>>> other label +"#, + "options apply correctly" + ); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithOurs; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "it's actually unclear now if there ever was a conflict, but we *could* compute it" + ); + assert_eq!(buf.as_bstr(), "ours"); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithTheirs; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "theirs"); + + platform_ref.options.text.conflict = builtin_driver::text::Conflict::ResolveWithUnion; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "ours\ntheirs"); + + platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Union); + platform_ref.options.text.conflict = builtin_driver::text::Conflict::default(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete)); + assert_eq!(buf.as_bstr(), "ours\ntheirs"); + + platform_ref.driver = DriverChoice::BuiltIn(BuiltinDriver::Binary); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Ours, Resolution::Conflict), + "binary merges choose ours but conflict by default" + ); + assert!(buf.is_empty(), "it tells us where to get the content from"); + assert_eq!( + platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), + "ours", + "getting access to the content is simplified" + ); + + for (expected, expected_pick, resolve) in [ + ("ours", Pick::Ours, builtin_driver::binary::ResolveWith::Ours), + ("theirs", Pick::Theirs, builtin_driver::binary::ResolveWith::Theirs), + ("b\n", Pick::Ancestor, builtin_driver::binary::ResolveWith::Ancestor), + ] { + platform_ref.options.resolve_binary_with = Some(resolve); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (expected_pick, Resolution::Complete)); + assert_eq!(platform_ref.buffer_by_pick(res.0).unwrap().as_bstr(), expected); + } + + Ok(()) + } + + #[test] + fn with_external() -> crate::Result { + let mut platform = new_platform( + [gix_merge::blob::Driver { + name: "b".into(), + command: + "for arg in %O %A %B %L %P %S %X %Y %F; do echo $arg >> \"%A\"; done; cat \"%O\" \"%B\" >> \"%A\"" + .into(), + ..Default::default() + }], + pipeline::Mode::ToGit, + ); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + let mut db = ObjectDb::default(); + for (content, kind) in [ + ("ours", ResourceKind::CurrentOrOurs), + ("theirs", ResourceKind::OtherOrTheirs), + ] { + let id = db.insert(content); + platform.set_resource(id, EntryKind::Blob, "b".into(), kind, &db)?; + } + + let platform_ref = platform.prepare_merge(&db, Default::default())?; + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!(res, (Pick::Buffer, Resolution::Complete), "merge drivers always merge "); + let mut lines = cleaned_driver_lines(&buf)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "b", + "ancestor label", + "current label", + "other label", + "%F", + "b", + "theirs" + ], + "we handle word-splitting and definitely pick-up what's written into the %A buffer" + ); + + let id = db.insert("binary\0"); + platform.set_resource(id, EntryKind::Blob, "b".into(), ResourceKind::OtherOrTheirs, &db)?; + let platform_ref = platform.prepare_merge(&db, Default::default())?; + let res = platform_ref.merge(&mut buf, default_labels(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "merge drivers deal with binary themselves" + ); + let mut lines = cleaned_driver_lines(&buf)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "b", + "ancestor label", + "current label", + "other label", + "%F", + "b", + "binary\0" + ], + "in this case, the binary lines are just taken verbatim" + ); + + Ok(()) + } + + #[test] + fn missing_buffers_are_empty_buffers() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + // Two deletions + for kind in [ResourceKind::CurrentOrOurs, ResourceKind::OtherOrTheirs] { + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "does not matter for driver".into(), + kind, + &gix_object::find::Never, + )?; + } + + let platform_ref = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + + let mut buf = Vec::new(); + let res = platform_ref.merge(&mut buf, Default::default(), Default::default())?; + assert_eq!( + res, + (Pick::Buffer, Resolution::Complete), + "both versions are deleted, an actual merge happened" + ); + assert!( + buf.is_empty(), + "the new buffer is considered empty, both sides were deleted, too" + ); + + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + let res = platform_ref.builtin_merge(BuiltinDriver::Text, &mut buf, &mut input, Default::default()); + assert_eq!( + res, + Some((Pick::Buffer, Resolution::Complete)), + "both versions are deleted" + ); + assert!(buf.is_empty(), "the result is the same on direct invocation"); + + let print_all = "for arg in $@ %O %A %B %L %P %S %X %Y %F; do echo $arg; done"; + let mut cmd = platform_ref.prepare_external_driver(print_all.into(), default_labels(), Default::default())?; + let stdout = cmd.stdout(Stdio::piped()).output()?.stdout; + let mut lines = cleaned_driver_lines(&stdout)?; + for tmp_file in lines.by_ref().take(3) { + assert!(tmp_file.contains_str(&b".tmp"[..]), "{tmp_file}"); + } + let lines: Vec<_> = lines.collect(); + assert_eq!( + lines, + [ + "7", + "does not matter for driver", + "ancestor label", + "current label", + "other label", + "%F" + ], + "word splitting is prevented thanks to proper quoting" + ); + Ok(()) + } + + #[test] + fn one_buffer_too_large() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.filter.options.large_file_threshold_bytes = 9; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + platform.filter.roots.other_root = platform.filter.roots.common_ancestor_root.clone(); + platform.filter.roots.current_root = platform.filter.roots.common_ancestor_root.clone(); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unspecified".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let platform_ref = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!(platform_ref.other.data, platform::resource::Data::TooLarge { size: 12 }); + + let mut out = Vec::new(); + let err = platform_ref + .merge(&mut out, Default::default(), Default::default()) + .unwrap_err(); + assert!(matches!(err, platform::merge::Error::ResourceTooLarge)); + + let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]); + assert_eq!( + platform_ref.builtin_merge(BuiltinDriver::Text, &mut out, &mut input, Default::default(),), + None + ); + + let err = platform_ref + .prepare_external_driver("bogus".into(), Default::default(), Default::default()) + .unwrap_err(); + assert!(matches!( + err, + platform::prepare_external_driver::Error::ResourceTooLarge { .. } + )); + Ok(()) + } + + fn cleaned_driver_lines(buf: &[u8]) -> std::io::Result> { + let current_dir = gix_path::into_bstr(std::env::current_dir()?); + Ok(buf + .lines() + .map(move |line| line.strip_prefix(current_dir.as_bytes()).unwrap_or(line).as_bstr())) + } + + fn default_labels() -> builtin_driver::text::Labels<'static> { + builtin_driver::text::Labels { + ancestor: Some("ancestor label".into()), + current: Some("current label".into()), + other: Some("other label".into()), + } + } +} + +mod prepare_merge { + use crate::blob::platform::new_platform; + use gix_merge::blob::platform::{resource, DriverChoice}; + use gix_merge::blob::{builtin_driver, pipeline, BuiltinDriver, ResourceKind}; + use gix_object::tree::EntryKind; + + #[test] + fn ancestor_and_current_and_other_do_not_exist() -> crate::Result { + let mut platform = new_platform(None, pipeline::Mode::ToGit); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "also-missing".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "can't-be-found-in-odb".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::BlobExecutable, + "can't-be-found-in-odb".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let state = platform + .prepare_merge(&gix_object::find::Never, Default::default()) + .expect("no validation is done here, let the caller inspect"); + assert_eq!(state.ancestor.data, resource::Data::Missing); + assert_eq!(state.current.data, resource::Data::Missing); + assert_eq!(state.other.data, resource::Data::Missing); + Ok(()) + } + + #[test] + fn driver_selection() -> crate::Result { + let mut platform = new_platform( + [ + gix_merge::blob::Driver { + name: "union".into(), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "to proof it will be sorted".into(), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "b".into(), + recursive: Some("for-recursion".into()), + ..Default::default() + }, + gix_merge::blob::Driver { + name: "for-recursion".into(), + recursive: Some("should not be looked up".into()), + ..Default::default() + }, + ], + pipeline::Mode::ToGit, + ); + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "just-set".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "does not matter for driver".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::BlobExecutable, + "also does not matter for driver".into(), + ResourceKind::OtherOrTheirs, + &gix_object::find::Never, + )?; + + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`merge` attribute means text" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unset".into(), + ResourceKind::CommonAncestorOrBase, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`-merge` attribute means binary, but it looked up 'current' which is still at some bogus worktree path" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unset".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Binary), + "`-merge` attribute means binary" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "unspecified".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "`!merge` attribute means the hardcoded default" + ); + + platform.options.default_driver = Some("union".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + let expected_idx = 3; + assert_eq!( + prepared.driver, + DriverChoice::Index(expected_idx), + "`!merge` attribute will also pick up the 'merge.default' configuration, and find the name in passed drivers first.\ + Note that the index is 1, even though it was 0 when passing the drivers - they are sorted by name." + ); + assert_eq!(platform.drivers()[expected_idx].name, "union"); + + platform.options.default_driver = Some("binary".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Binary), + "`!merge` attribute will also pick up the 'merge.default' configuration, non-overridden builtin filters work as well" + ); + + platform.options.default_driver = Some("Binary".into()); + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + assert_eq!( + prepared.driver, + DriverChoice::BuiltIn(BuiltinDriver::Text), + "'merge.default' is case-sensitive" + ); + + platform.set_resource( + gix_hash::Kind::Sha1.null(), + EntryKind::Blob, + "b".into(), + ResourceKind::CurrentOrOurs, + &gix_object::find::Never, + )?; + let prepared = platform.prepare_merge(&gix_object::find::Never, Default::default())?; + let expected_idx = 0; + assert_eq!(prepared.driver, DriverChoice::Index(expected_idx)); + assert_eq!( + platform.drivers()[expected_idx].name, + "b", + "by default, even if recursive is specified, it doesn't look it up" + ); + + let prepared = platform.prepare_merge( + &gix_object::find::Never, + gix_merge::blob::platform::merge::Options { + is_virtual_ancestor: true, + resolve_binary_with: None, + ..Default::default() + }, + )?; + let expected_idx = 1; + assert_eq!(prepared.driver, DriverChoice::Index(expected_idx),); + assert_eq!( + prepared.options.resolve_binary_with, + Some(builtin_driver::binary::ResolveWith::Ours), + "it automatically adjusts the merge mode for binary operations to work for bases" + ); + assert_eq!( + platform.drivers()[expected_idx].name, + "for-recursion", + "It looks up the final driver, including recursion, it only looks it up once though" + ); + Ok(()) + } } mod set_resource { @@ -98,487 +659,3 @@ fn new_platform( Default::default(), ) } - -// -// #[test] -// fn with_driver() -> crate::Result { -// let root = gix_testtools::scripted_fixture_read_only("make_blob_repo.sh")?; -// let print_all = "echo $@ %O %A %B %L %P %S %X %Y"; -// let print_script_args = "echo $@"; -// let mut attributes = gix_worktree::Stack::new( -// &root, -// gix_worktree::stack::State::AttributesStack(gix_worktree::stack::state::Attributes::new( -// Default::default(), -// None, -// attributes::Source::WorktreeThenIdMapping, -// Default::default(), -// )), -// gix_worktree::glob::pattern::Case::Sensitive, -// Vec::new(), -// Vec::new(), -// ); -// let mut filter = gix_merge::blob::Pipeline::new( -// WorktreeRoots { -// common_ancestor_root: Some(root.clone()), -// ..Default::default() -// }, -// gix_filter::Pipeline::default(), -// vec![ -// gix_merge::blob::Driver { -// name: "a".into(), -// command: print_all.into(), -// ..Default::default() -// }, -// gix_merge::blob::Driver { -// name: "b".into(), -// command: print_script_args.into(), -// ..Default::default() -// }, -// gix_merge::blob::Driver { -// name: "union".into(), -// ..Default::default() -// }, -// gix_merge::blob::Driver { -// name: "missing".into(), -// ..Default::default() -// }, -// ], -// pipeline::Options { -// default_driver: Some("binary".into()), -// ..crate::blob::pipeline::default_options() -// }, -// ); -// -// let mut buf = Vec::new(); -// let does_not_matter = gix_hash::Kind::Sha1.null(); -// let path = "unspecified"; -// let platform = attributes.at_entry(path, None, &gix_object::find::Never)?; -// let out = filter.convert_to_mergeable( -// &does_not_matter, -// EntryKind::Blob, -// path.into(), -// ResourceKind::CommonAncestorOrBase, -// &mut |_, out| { -// let _ = platform.matching_attributes(out); -// }, -// &gix_object::find::Never, -// pipeline::Mode::ToGit, -// &mut buf, -// )?; -// assert_eq!( -// out.driver, -// DriverChoice::BuiltIn(BuiltinDriver::Binary), -// "fall through to what's set in options" -// ); -// assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// assert_eq!(buf.as_bstr(), "unspecified\n"); -// -// let path = "union"; -// let platform = attributes.at_entry(path, None, &gix_object::find::Never)?; -// let out = filter.convert_to_mergeable( -// &does_not_matter, -// EntryKind::Blob, -// path.into(), -// ResourceKind::CommonAncestorOrBase, -// &mut |_, out| { -// let _ = platform.matching_attributes(out); -// }, -// &gix_object::find::Never, -// pipeline::Mode::ToGit, -// &mut buf, -// )?; -// let driver_idx = 3; -// assert_eq!( -// out.driver, -// DriverChoice::Index(driver_idx), -// "it finds explicit drivers first before it searches built-in ones" -// ); -// assert_eq!( -// filter.drivers()[driver_idx].name, -// "union", -// "it has re-sorted the drivers internally, which is why it's read-only" -// ); -// assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// assert_eq!(buf.as_bstr(), "union\n"); -// // -// // let mut db = ObjectDb::default(); -// // let null = gix_hash::Kind::Sha1.null(); -// // let mut buf = Vec::new(); -// // let platform = attributes.at_entry("a", None, &gix_object::find::Never)?; -// // let worktree_modes = [ -// // pipeline::Mode::ToWorktreeAndBinaryToText, -// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, -// // ]; -// // let all_modes = [ -// // pipeline::Mode::ToGit, -// // pipeline::Mode::ToWorktreeAndBinaryToText, -// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, -// // ]; -// // for mode in worktree_modes { -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "a".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(0)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); -// // } -// // -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "a".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // pipeline::Mode::ToGit, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(0)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!(buf.as_bstr(), "a\n", "unconditionally use git according to mode"); -// // -// // let id = db.insert("a\n"); -// // for mode in worktree_modes { -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "a".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(0)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!(buf.as_bstr(), "to-text\na\n", "filter was applied"); -// // } -// // -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "a".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // pipeline::Mode::ToGit, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(0)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "a\n", -// // "no filter was applied in this mode, also when using the ODB" -// // ); -// // -// // let platform = attributes.at_entry("missing", None, &gix_object::find::Never)?; -// // for mode in all_modes { -// // buf.push(1); -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Link, -// // "missing".into(), /* does not actually exist */ -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); -// // assert_eq!(out.data, None); -// // assert_eq!(buf.len(), 0, "always cleared"); -// // -// // buf.push(1); -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Link, -// // "missing".into(), /* does not actually exist */ -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); -// // assert_eq!(out.data, None); -// // assert_eq!(buf.len(), 0, "always cleared"); -// // -// // buf.push(1); -// // let id = db.insert("link-target"); -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Link, -// // "missing".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(4), "despite missing, we get driver information"); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "link-target", -// // "no matter what, links always look the same." -// // ); -// // } -// -// // let platform = attributes.at_entry("b", None, &gix_object::find::Never)?; -// // for mode in all_modes { -// // buf.push(1); -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "b".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // -// // assert_eq!(out.driver_index, Some(1)); -// // assert_eq!( -// // out.data, -// // Some(pipeline::Data::Binary { size: 2 }), -// // "binary value comes from driver, and it's always respected with worktree source" -// // ); -// // assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); -// // } -// // -// // let id = db.insert("b\n"); -// // for mode in all_modes { -// // buf.push(1); -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "b".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // -// // assert_eq!(out.driver_index, Some(1)); -// // assert_eq!( -// // out.data, -// // Some(pipeline::Data::Binary { size: 2 }), -// // "binary value comes from driver, and it's always respected with DB source" -// // ); -// // assert_eq!(buf.len(), 0, "it's always cleared before any potential use"); -// // } -// // -// // let platform = attributes.at_entry("c", None, &gix_object::find::Never)?; -// // for mode in worktree_modes { -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "c".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(2)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "to-text\nc\n", -// // "filter was applied, it overrides binary=true" -// // ); -// // } -// // -// // let id = db.insert("c\n"); -// // for mode in worktree_modes { -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "c".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(2)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "to-text\nc\n", -// // "filter was applied, it overrides binary=true" -// // ); -// // } -// // -// // let platform = attributes.at_entry("unset", None, &gix_object::find::Never)?; -// // for mode in all_modes { -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "unset".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!( -// // out.driver_index, None, -// // "no driver is associated, as `diff` is explicitly unset" -// // ); -// // assert_eq!( -// // out.data, -// // Some(pipeline::Data::Binary { size: 6 }), -// // "unset counts as binary" -// // ); -// // assert_eq!(buf.len(), 0); -// // } -// // -// // let id = db.insert("unset\n"); -// // for mode in all_modes { -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "unset".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!( -// // out.driver_index, None, -// // "no driver is associated, as `diff` is explicitly unset" -// // ); -// // assert_eq!( -// // out.data, -// // Some(pipeline::Data::Binary { size: 6 }), -// // "unset counts as binary" -// // ); -// // assert_eq!(buf.len(), 0); -// // } -// // -// // let platform = attributes.at_entry("d", None, &gix_object::find::Never)?; -// // let id = db.insert("d-in-db"); -// // for mode in worktree_modes { -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "d".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(3)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "to-text\nd\n", -// // "the worktree + text conversion was triggered for worktree source" -// // ); -// // -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "d".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // mode, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, Some(3)); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "to-text\nd-in-db", -// // "the worktree + text conversion was triggered for db source" -// // ); -// // } -// // -// // let platform = attributes.at_entry("e-no-attr", None, &gix_object::find::Never)?; -// // let out = filter.convert_to_diffable( -// // &null, -// // EntryKind::Blob, -// // "e-no-attr".into(), -// // ResourceKind::OldOrSource, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &gix_object::find::Never, -// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, None); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "e\n", -// // "no text filter, so git conversion was applied for worktree source" -// // ); -// // -// // let id = db.insert("e-in-db"); -// // let out = filter.convert_to_diffable( -// // &id, -// // EntryKind::Blob, -// // "e-no-attr".into(), -// // ResourceKind::NewOrDestination, -// // &mut |_, out| { -// // let _ = platform.matching_attributes(out); -// // }, -// // &db, -// // pipeline::Mode::ToGitUnlessBinaryToTextIsPresent, -// // &mut buf, -// // )?; -// // assert_eq!(out.driver_index, None); -// // assert_eq!(out.data, Some(pipeline::Data::Buffer)); -// // assert_eq!( -// // buf.as_bstr(), -// // "e-in-db", -// // "no text filter, so git conversion was applied for ODB source" -// // ); -// -// Ok(()) -// } diff --git a/gix-merge/tests/merge/main.rs b/gix-merge/tests/merge/main.rs index 05375cb2279..9f7a6989d2c 100644 --- a/gix-merge/tests/merge/main.rs +++ b/gix-merge/tests/merge/main.rs @@ -1,3 +1,5 @@ +extern crate core; + #[cfg(feature = "blob")] mod blob;