diff --git a/.bootc-dev-infra-commit.txt b/.bootc-dev-infra-commit.txt index 1163ccea..439a61ce 100644 --- a/.bootc-dev-infra-commit.txt +++ b/.bootc-dev-infra-commit.txt @@ -1 +1 @@ -a9c43f8d6fcc95ed00563878bf8632ffa159ee3c +56e4f615d38cc4a923f6a7e2a174a0c05a962451 diff --git a/.devcontainer/debian/devcontainer.json b/.devcontainer/debian/devcontainer.json new file mode 100644 index 00000000..f1b43a85 --- /dev/null +++ b/.devcontainer/debian/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "bootc-devenv-debian", + "image": "ghcr.io/bootc-dev/devenv-debian", + "customizations": { + "vscode": { + // Arbitrary, but most of our code is in one of these two + "extensions": [ + "rust-lang.rust-analyzer", + "golang.Go" + ] + }, + "devaipod": { + // When running under devaipod, use minimal capabilities + // (SYS_ADMIN, NET_ADMIN, etc.) instead of full --privileged. + "nestedContainers": true + } + }, + "features": {}, + // Use privileged mode for broad compatibility (Codespaces, Docker, + // stock devcontainer CLI). devaipod overrides this with tighter + // security via the nestedContainers customization above. + "privileged": true, + "postCreateCommand": { + // Our init script + "devenv-init": "sudo /usr/local/bin/devenv-init.sh" + }, + "remoteEnv": { + "PATH": "${containerEnv:PATH}:/usr/local/cargo/bin" + } +} diff --git a/.devcontainer/ubuntu/devcontainer.json b/.devcontainer/ubuntu/devcontainer.json new file mode 100644 index 00000000..8a6167ac --- /dev/null +++ b/.devcontainer/ubuntu/devcontainer.json @@ -0,0 +1,30 @@ +{ + "name": "bootc-devenv-ubuntu", + "image": "ghcr.io/bootc-dev/devenv-ubuntu", + "customizations": { + "vscode": { + // Arbitrary, but most of our code is in one of these two + "extensions": [ + "rust-lang.rust-analyzer", + "golang.Go" + ] + }, + "devaipod": { + // When running under devaipod, use minimal capabilities + // (SYS_ADMIN, NET_ADMIN, etc.) instead of full --privileged. + "nestedContainers": true + } + }, + "features": {}, + // Use privileged mode for broad compatibility (Codespaces, Docker, + // stock devcontainer CLI). devaipod overrides this with tighter + // security via the nestedContainers customization above. + "privileged": true, + "postCreateCommand": { + // Our init script + "devenv-init": "sudo /usr/local/bin/devenv-init.sh" + }, + "remoteEnv": { + "PATH": "${containerEnv:PATH}:/usr/local/cargo/bin" + } +} diff --git a/.github/workflows/bootc-revdep.yml b/.github/workflows/bootc-revdep.yml index d5dc1c4d..db3f608f 100644 --- a/.github/workflows/bootc-revdep.yml +++ b/.github/workflows/bootc-revdep.yml @@ -44,3 +44,8 @@ jobs: - name: Build and test bootc with local composefs-rs run: just bootc/test + env: + # Use bootc branch with OpenConfig API compatibility + # TODO: revert to main once bootc-dev/bootc#2044 is merged + COMPOSEFS_BOOTC_REPO: https://github.com/cgwalters/bootc + COMPOSEFS_BOOTC_REF: prep-composefs-manifest diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d208b7cc..1f661796 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -47,6 +47,8 @@ jobs: name: Unprivileged smoke test runs-on: ubuntu-24.04 steps: + - name: Enable fs-verity on / + run: sudo tune2fs -O verity $(findmnt -vno SOURCE /) - uses: actions/checkout@v5 - uses: bootc-dev/actions/bootc-ubuntu-setup@main - uses: dtolnay/rust-toolchain@stable @@ -91,12 +93,18 @@ jobs: fail-fast: false matrix: include: - - name: centos + - name: centos-stream9 + base_image: quay.io/centos-bootc/centos-bootc:stream9 + cfsctl_features: rhel9 + - name: centos-stream10 base_image: quay.io/centos-bootc/centos-bootc:stream10 + cfsctl_features: pre-6.15 - name: debian base_image: ghcr.io/bootcrew/debian-bootc:latest + cfsctl_features: oci env: COMPOSEFS_BASE_IMAGE: ${{ matrix.base_image }} + COMPOSEFS_CFSCTL_FEATURES: ${{ matrix.cfsctl_features }} steps: - uses: actions/checkout@v5 diff --git a/Containerfile b/Containerfile index 59c80c3e..b9dd4374 100644 --- a/Containerfile +++ b/Containerfile @@ -14,6 +14,7 @@ # to clear stale build caches that may be incompatible across distros. ARG base_image=quay.io/centos-bootc/centos-bootc:stream10 +ARG cfsctl_features=pre-6.15 # -- source snapshot (keeps layer graph clean) -- FROM scratch AS src @@ -21,6 +22,7 @@ COPY . /src # -- build stage -- FROM ${base_image} AS build +ARG cfsctl_features COPY --from=src /src/contrib /src/contrib RUN /src/contrib/packaging/install-build-deps.sh @@ -39,7 +41,7 @@ RUN --network=none \ --mount=type=cache,target=/src/target \ --mount=type=cache,target=/root/.cargo/registry \ --mount=type=cache,target=/root/.cargo/git \ - cargo build --release -p cfsctl -p integration-tests && \ + cargo build --release -p cfsctl --features="${cfsctl_features}" -p integration-tests && \ cp /src/target/release/cfsctl /usr/bin/cfsctl && \ cp /src/target/release/cfsctl-integration-tests /usr/bin/cfsctl-integration-tests diff --git a/Justfile b/Justfile index 356deb15..ce6725c1 100644 --- a/Justfile +++ b/Justfile @@ -25,6 +25,8 @@ check-feature-combos: cargo clippy -p cfsctl --no-default-features -- -D warnings cargo clippy -p cfsctl --no-default-features --features oci -- -D warnings cargo clippy -p cfsctl --no-default-features --features http -- -D warnings + cargo clippy -p composefs-oci -- -D warnings + cargo clippy -p composefs-oci --features boot -- -D warnings # Run rustfmt check fmt-check: @@ -42,8 +44,14 @@ check: clippy check-feature-combos fmt-check test # just base_image=quay.io/centos-bootc/centos-bootc:stream10 test-integration-vm base_image := env("COMPOSEFS_BASE_IMAGE", "ghcr.io/bootcrew/debian-bootc:latest") +# cfsctl feature flags for the container build. Defaults match the base_image: +# debian (>= 6.15 kernel): no compat features needed +# centos stream10 (6.12): pre-6.15 +# centos stream9 (5.14): rhel9 +cfsctl_features := env("COMPOSEFS_CFSCTL_FEATURES", "pre-6.15") + # Derive test image name from base_image -_test_image := if base_image =~ "debian" { "localhost/composefs-rs-test-debian:latest" } else { "localhost/composefs-rs-test:latest" } +_test_image := if base_image =~ "debian" { "localhost/composefs-rs-test-debian:latest" } else if base_image =~ "stream9" { "localhost/composefs-rs-test-c9s:latest" } else { "localhost/composefs-rs-test:latest" } # Run unprivileged integration tests against the cfsctl binary (no root, no VM) test-integration: build @@ -51,7 +59,7 @@ test-integration: build # Build the test container image for VM-based integration tests _integration-container-build: - podman build --build-arg base_image={{base_image}} -t {{_test_image}} . + podman build --build-arg base_image={{base_image}} --build-arg cfsctl_features={{cfsctl_features}} -t {{_test_image}} . # Run all integration tests including privileged VM tests (requires podman + libvirt) test-integration-vm: build _integration-container-build diff --git a/bootc/Justfile b/bootc/Justfile index 3bf81778..c9390491 100644 --- a/bootc/Justfile +++ b/bootc/Justfile @@ -45,7 +45,7 @@ patch: clone set -euo pipefail cd "$COMPOSEFS_BOOTC_PATH" - # Check if already patched + # Check if already patched by us (either the appended comment or the inline marker) if grep -q 'Patched by composefs-rs' Cargo.toml 2>/dev/null; then echo "bootc already patched for composefs-rs" exit 0 @@ -53,16 +53,24 @@ patch: clone echo "Patching bootc Cargo.toml to use $_COMPOSEFS_SRC" - # Add [patch] section with the real local path. + # Point the composefs-rs patch at the local checkout. # bootc only depends on `cfsctl` which re-exports the other composefs-rs crates. # bootc's Justfile will auto-detect this via `cargo xtask local-rust-deps` # and bind-mount it into the container build (mapping /home -> /var/home as needed) - { - echo '' - echo '# Patched by composefs-rs CI to test against local composefs-rs' - echo '[patch."https://github.com/composefs/composefs-rs"]' - echo "cfsctl = { path = \"$_COMPOSEFS_SRC/crates/cfsctl\" }" - } >> Cargo.toml + if grep -q '\[patch\."https://github.com/composefs/composefs-rs"\]' Cargo.toml; then + # Patch section already exists (e.g. on a dev branch) — replace the cfsctl line + sed -i '/\[patch\."https:\/\/github.com\/composefs\/composefs-rs"\]/,/^$\|^\[/{ + s|^cfsctl = .*|cfsctl = { path = "'"$_COMPOSEFS_SRC"'/crates/cfsctl" } # Patched by composefs-rs| + }' Cargo.toml + else + # No patch section yet — append one + { + echo '' + echo '# Patched by composefs-rs CI to test against local composefs-rs' + echo '[patch."https://github.com/composefs/composefs-rs"]' + echo "cfsctl = { path = \"$_COMPOSEFS_SRC/crates/cfsctl\" }" + } >> Cargo.toml + fi # Patch the workspace lints to allow missing_docs for composefs-rs crates # bootc has workspace.lints.rust.missing_docs = "deny" but composefs-rs has undocumented items diff --git a/crates/cfsctl/Cargo.toml b/crates/cfsctl/Cargo.toml index 35ef1108..bfb56522 100644 --- a/crates/cfsctl/Cargo.toml +++ b/crates/cfsctl/Cargo.toml @@ -27,11 +27,12 @@ clap = { version = "4.5.0", default-features = false, features = ["std", "help", comfy-table = { version = "7.1", default-features = false } composefs = { workspace = true } composefs-boot = { workspace = true } -composefs-oci = { workspace = true, optional = true } +composefs-oci = { workspace = true, optional = true, features = ["boot"] } composefs-http = { workspace = true, optional = true } env_logger = { version = "0.11.0", default-features = false } hex = { version = "0.4.0", default-features = false } rustix = { version = "1.0.0", default-features = false, features = ["fs", "process"] } +serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } tokio = { version = "1.24.2", default-features = false, features = ["io-std", "io-util"] } diff --git a/crates/cfsctl/src/lib.rs b/crates/cfsctl/src/lib.rs index bbcbf949..70498b9b 100644 --- a/crates/cfsctl/src/lib.rs +++ b/crates/cfsctl/src/lib.rs @@ -37,6 +37,7 @@ use clap::{Parser, Subcommand, ValueEnum}; use comfy_table::{presets::UTF8_FULL, Table}; use rustix::fs::CWD; +use serde::Serialize; #[cfg(feature = "oci")] use composefs_boot::write_boot; @@ -53,6 +54,23 @@ use composefs::{ tree::RegularFile, }; +/// JSON output wrapper for `cfsctl fsck --json`. +#[derive(Serialize)] +struct FsckJsonOutput { + ok: bool, + #[serde(flatten)] + result: composefs::repository::FsckResult, +} + +/// JSON output wrapper for `cfsctl oci fsck --json`. +#[cfg(feature = "oci")] +#[derive(Serialize)] +struct OciFsckJsonOutput { + ok: bool, + #[serde(flatten)] + result: composefs_oci::OciFsckResult, +} + /// cfsctl #[derive(Debug, Parser)] #[clap(name = "cfsctl", version)] @@ -135,6 +153,9 @@ enum OciCommand { image: String, /// optional reference name for the manifest, use as 'ref/' elsewhere name: Option, + /// Also generate a bootable EROFS image from the pulled OCI image + #[arg(long)] + bootable: bool, }, /// List all tagged OCI images in the repository #[clap(name = "images")] @@ -185,33 +206,21 @@ enum OciCommand { #[clap(long, conflicts_with = "dumpfile")] json: bool, }, + /// Mount an OCI image's composefs EROFS at the given mountpoint + Mount { + /// Image reference (tag name or manifest digest) + image: String, + /// Target mountpoint + mountpoint: String, + /// Mount the bootable variant instead of the regular EROFS image + #[arg(long)] + bootable: bool, + }, /// Compute the composefs image object id of the rootfs of a stored OCI image ComputeId { #[clap(flatten)] config_opts: OCIConfigFilesystemOptions, }, - /// Create the composefs image of the rootfs of a stored OCI image, commit it to the repo, and print its image object ID - CreateImage { - #[clap(flatten)] - config_opts: OCIConfigFilesystemOptions, - /// optional reference name for the image, use as 'ref/' elsewhere - #[clap(long)] - image_name: Option, - }, - /// Seal a stored OCI image by creating a cloned manifest with embedded verity digest (a.k.a. composefs image object ID) - /// in the repo, then prints the stream and verity digest of the new sealed manifest - Seal { - #[clap(flatten)] - config_opts: OCIConfigOptions, - }, - /// Mounts a stored and sealed OCI image by looking up its composefs image. Note that the composefs image must be built - /// and committed to the repo first - Mount { - /// the name of the target OCI manifest stream, either a stream ID in format oci-config-: or a reference in 'ref/' - name: String, - /// the mountpoint - mountpoint: String, - }, /// Create the composefs image of the rootfs of a stored OCI image, perform bootable transformation, commit it to the repo, /// then configure boot for the image by writing new boot resources and bootloader entries to boot partition. Performs /// state preparation for composefs-setup-root consumption as well. Note that state preparation here is not suitable for @@ -229,6 +238,18 @@ enum OciCommand { #[clap(long)] cmdline: Vec, }, + /// Check integrity of OCI images in the repository + /// + /// Verifies manifest and config content digests, layer references, seal + /// consistency, and delegates to the underlying repository fsck for object + /// integrity and splitstream validation. + Fsck { + /// Check only the named image instead of all tagged images + image: Option, + /// Output results as JSON (always exits 0 unless the check itself fails) + #[clap(long)] + json: bool, + }, } /// Common options for reading a filesystem from a path @@ -317,6 +338,16 @@ enum Command { #[clap(long)] backing_path_only: bool, }, + /// Check repository integrity + /// + /// Verifies fsverity digests of all objects, validates stream and image + /// symlinks, and checks splitstream internal consistency. Exits with + /// a non-zero status if corruption is found. + Fsck { + /// Output results as JSON (always exits 0 unless the check itself fails) + #[clap(long)] + json: bool, + }, #[cfg(feature = "http")] Fetch { url: String, name: String }, } @@ -502,24 +533,48 @@ where let fs = load_filesystem_from_oci_image(&repo, config_opts)?; fs.print_dumpfile()?; } + OciCommand::Mount { + ref image, + ref mountpoint, + bootable, + } => { + let img = if image.starts_with("sha256:") { + composefs_oci::oci_image::OciImage::open(&repo, image, None)? + } else { + composefs_oci::oci_image::OciImage::open_ref(&repo, image)? + }; + let erofs_id = if bootable { + match img.boot_image_ref() { + Some(id) => id, + None => anyhow::bail!( + "No boot EROFS image linked — try pulling with --bootable" + ), + } + } else { + match img.image_ref() { + Some(id) => id, + None => anyhow::bail!( + "No composefs EROFS image linked — try re-pulling the image" + ), + } + }; + repo.mount_at(&erofs_id.to_hex(), mountpoint.as_str())?; + } OciCommand::ComputeId { config_opts } => { let fs = load_filesystem_from_oci_image(&repo, config_opts)?; let id = fs.compute_image_id(); println!("{}", id.to_hex()); } - OciCommand::CreateImage { - config_opts, - ref image_name, + OciCommand::Pull { + ref image, + name, + bootable, } => { - let fs = load_filesystem_from_oci_image(&repo, config_opts)?; - let image_id = fs.commit_image(&repo, image_name.as_deref())?; - println!("{}", image_id.to_id()); - } - OciCommand::Pull { ref image, name } => { // If no explicit name provided, use the image reference as the tag let tag_name = name.as_deref().unwrap_or(image); + let repo_arc = Arc::new(repo); let (result, stats) = - composefs_oci::pull_image(&Arc::new(repo), image, Some(tag_name), None).await?; + composefs_oci::pull_image(&repo_arc, image, Some(tag_name), None).await?; println!("manifest {}", result.manifest_digest); println!("config {}", result.config_digest); @@ -532,6 +587,12 @@ where stats.bytes_copied, stats.bytes_inlined, ); + + if bootable { + let image_verity = + composefs_oci::generate_boot_image(&repo_arc, &result.manifest_digest)?; + println!("Boot image: {}", image_verity.to_hex()); + } } OciCommand::ListImages { json } => { let images = composefs_oci::oci_image::list_images(&repo)?; @@ -543,7 +604,7 @@ where } else { let mut table = Table::new(); table.load_preset(UTF8_FULL); - table.set_header(["NAME", "DIGEST", "ARCH", "SEALED", "LAYERS", "REFS"]); + table.set_header(["NAME", "DIGEST", "ARCH", "LAYERS", "REFS"]); for img in images { let digest_short = img @@ -560,12 +621,10 @@ where } else { &img.architecture }; - let sealed = if img.sealed { "yes" } else { "no" }; table.add_row([ img.name.as_str(), digest_display, arch, - sealed, &img.layer_count.to_string(), &img.referrer_count.to_string(), ]); @@ -633,25 +692,6 @@ where composefs_oci::layer_tar(&repo, layer, &mut out)?; } } - OciCommand::Seal { - config_opts: - OCIConfigOptions { - ref config_name, - ref config_verity, - }, - } => { - let verity = verity_opt(config_verity)?; - let (digest, verity) = - composefs_oci::seal(&Arc::new(repo), config_name, verity.as_ref())?; - println!("config {digest}"); - println!("verity {}", verity.to_id()); - } - OciCommand::Mount { - ref name, - ref mountpoint, - } => { - composefs_oci::mount(&repo, name, mountpoint, None)?; - } OciCommand::PrepareBoot { config_opts: OCIConfigOptions { @@ -696,12 +736,27 @@ where create_dir_all(state.join("etc/upper"))?; create_dir_all(state.join("etc/work"))?; } + OciCommand::Fsck { image, json } => { + let result = if let Some(ref name) = image { + composefs_oci::oci_fsck_image(&repo, name).await? + } else { + composefs_oci::oci_fsck(&repo).await? + }; + if json { + let output = OciFsckJsonOutput { + ok: result.is_ok(), + result, + }; + serde_json::to_writer_pretty(std::io::stdout().lock(), &output)?; + println!(); + } else { + print!("{result}"); + if !result.is_ok() { + anyhow::bail!("OCI integrity check failed"); + } + } + } }, - Command::ComputeId { fs_opts } => { - let fs = load_filesystem_from_ondisk_fs(&fs_opts, &repo)?; - let id = fs.compute_image_id(); - println!("{}", id.to_hex()); - } Command::CreateImage { fs_opts, ref image_name, @@ -710,6 +765,11 @@ where let id = fs.commit_image(&repo, image_name.as_deref())?; println!("{}", id.to_id()); } + Command::ComputeId { fs_opts } => { + let fs = load_filesystem_from_ondisk_fs(&fs_opts, &repo)?; + let id = fs.compute_image_id(); + println!("{}", id.to_hex()); + } Command::CreateDumpfile { fs_opts } => { let fs = load_filesystem_from_ondisk_fs(&fs_opts, &repo)?; fs.print_dumpfile()?; @@ -768,6 +828,22 @@ where )?, }; } + Command::Fsck { json } => { + let result = repo.fsck().await?; + if json { + let output = FsckJsonOutput { + ok: result.is_ok(), + result, + }; + serde_json::to_writer_pretty(std::io::stdout().lock(), &output)?; + println!(); + } else { + print!("{result}"); + if !result.is_ok() { + anyhow::bail!("repository integrity check failed"); + } + } + } #[cfg(feature = "http")] Command::Fetch { url, name } => { let (digest, verity) = composefs_http::download(&url, &name, Arc::new(repo)).await?; diff --git a/crates/composefs-oci/Cargo.toml b/crates/composefs-oci/Cargo.toml index eda0e0ec..59083dfc 100644 --- a/crates/composefs-oci/Cargo.toml +++ b/crates/composefs-oci/Cargo.toml @@ -10,28 +10,38 @@ repository.workspace = true rust-version.workspace = true version.workspace = true +[features] +test = ["tar", "composefs/test"] +boot = ["composefs-boot"] + [dependencies] anyhow = { version = "1.0.87", default-features = false } fn-error-context = "0.2" async-compression = { version = "0.4.0", default-features = false, features = ["tokio", "zstd", "gzip"] } bytes = { version = "1", default-features = false } composefs = { workspace = true } +composefs-boot = { workspace = true, optional = true } containers-image-proxy = { version = "0.9.2", default-features = false } hex = { version = "0.4.0", default-features = false } indicatif = { version = "0.17.0", default-features = false, features = ["tokio"] } oci-spec = { version = "0.8.0", default-features = false } rustix = { version = "1.0.0", features = ["fs"] } serde = { version = "1.0", default-features = false, features = ["derive"] } +thiserror = { version = "2.0.0", default-features = false } serde_json = { version = "1.0", default-features = false, features = ["std"] } sha2 = { version = "0.10.1", default-features = false } +tar = { version = "0.4.38", default-features = false, optional = true } tar-core = "0.1.0" -tokio = { version = "1.24.2", features = ["rt-multi-thread"] } +tokio = { version = "1.24.2", features = ["macros", "rt-multi-thread"] } tokio-util = { version = "0.7", default-features = false, features = ["io"] } [dev-dependencies] +cap-std = { version = "3.4.5", default-features = false } +cap-tempfile = { version = "3.4.5", default-features = false } similar-asserts = "1.7.0" tar = { version = "0.4.38", default-features = false } composefs = { workspace = true, features = ["test"] } +composefs-boot = { workspace = true } once_cell = "1.21.3" proptest = "1" tempfile = "3.8.0" diff --git a/crates/composefs-oci/src/boot.rs b/crates/composefs-oci/src/boot.rs new file mode 100644 index 00000000..35c0f373 --- /dev/null +++ b/crates/composefs-oci/src/boot.rs @@ -0,0 +1,288 @@ +//! Boot image management for OCI containers. +//! +//! A bootable EROFS image is a derived artifact from an OCI container image +//! that filters out some components (such as the UKI) to avoid circular references. + +use std::sync::Arc; + +use anyhow::Result; + +use composefs::fsverity::FsVerityHashValue; +use composefs::repository::Repository; + +/// The name used for the bootable image reference in the config. +pub const BOOT_IMAGE_REF_NAME: &str = "cfs-oci-for-bootable"; + +/// Generate a bootable EROFS image from a pulled OCI manifest (idempotent). +#[cfg(feature = "boot")] +pub fn generate_boot_image( + repo: &Arc>, + manifest_digest: &str, +) -> Result { + if let Some(existing) = boot_image(repo, manifest_digest)? { + return Ok(existing); + } + + let erofs_id = crate::ensure_oci_composefs_erofs_boot(repo, manifest_digest, None, None)? + .expect("container image should produce boot EROFS"); + + Ok(erofs_id) +} + +/// Returns the boot EROFS image verity, if one exists. +pub fn boot_image( + repo: &Repository, + manifest_digest: &str, +) -> Result> { + crate::composefs_boot_erofs_for_manifest(repo, manifest_digest, None) +} + +/// Remove the bootable EROFS image reference (idempotent). +/// +/// The EROFS image itself is garbage-collected on the next `repo.gc()`. +pub fn remove_boot_image( + repo: &Arc>, + manifest_digest: &str, +) -> Result<()> { + let img = crate::oci_image::OciImage::open(repo, manifest_digest, None)?; + + if img.boot_image_ref().is_none() { + return Ok(()); + } + + if !img.is_container_image() { + anyhow::bail!("not a container image"); + } + + // Read original config JSON to preserve its exact bytes + let config_json = img.read_config_json(repo)?; + + let (_config_digest, new_config_verity) = crate::write_config_raw( + repo, + &config_json, + img.layer_refs().clone(), + img.image_ref(), + None, // no boot image + )?; + + let manifest_json = img.read_manifest_json(repo)?; + let layer_verities = img.layer_refs().clone(); + + crate::oci_image::rewrite_manifest( + repo, + &manifest_json, + manifest_digest, + &new_config_verity, + &layer_verities, + None, + )?; + + Ok(()) +} + +#[cfg(all(test, feature = "boot"))] +mod test { + use super::*; + use composefs::fsverity::Sha256HashValue; + use composefs::test::TestRepo; + use composefs_boot::bootloader::get_boot_resources; + + use crate::oci_image::OciImage; + use crate::test_util; + + #[tokio::test] + async fn test_boot_image_none_before_generate() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + let result = boot_image(repo, &img.manifest_digest).unwrap(); + assert!(result.is_none(), "no boot image should exist yet"); + } + + #[tokio::test] + async fn test_generate_boot_image() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + let image_verity = generate_boot_image(repo, &img.manifest_digest).unwrap(); + + let found = boot_image(repo, &img.manifest_digest).unwrap(); + assert_eq!(found, Some(image_verity.clone())); + + // Open by tag since manifest was rewritten + let oci = OciImage::open_ref(repo, "myapp:v1").unwrap(); + assert_eq!(oci.boot_image_ref(), Some(&image_verity)); + + let plain_image = crate::image::create_filesystem(repo, &img.config_digest, None).unwrap(); + let plain_verity = plain_image.compute_image_id(); + assert_ne!( + image_verity, plain_verity, + "boot-transformed image should differ from non-transformed image" + ); + } + + #[tokio::test] + async fn test_generate_boot_image_idempotent() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + let v1 = generate_boot_image(repo, &img.manifest_digest).unwrap(); + let v2 = generate_boot_image(repo, &img.manifest_digest).unwrap(); + assert_eq!(v1, v2); + } + + #[tokio::test] + async fn test_remove_boot_image() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + generate_boot_image(repo, &img.manifest_digest).unwrap(); + assert!(boot_image(repo, &img.manifest_digest).unwrap().is_some()); + + remove_boot_image(repo, &img.manifest_digest).unwrap(); + assert!( + boot_image(repo, &img.manifest_digest).unwrap().is_none(), + "boot image should be gone after remove" + ); + + let oci = OciImage::open_ref(repo, "myapp:v1").unwrap(); + assert!(oci.is_container_image()); + + let gc = repo.gc(&[]).unwrap(); + assert_eq!( + gc.images_pruned, 1, + "exactly the EROFS image should be pruned" + ); + } + + #[tokio::test] + async fn test_remove_boot_image_idempotent() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + remove_boot_image(repo, &img.manifest_digest).unwrap(); + + generate_boot_image(repo, &img.manifest_digest).unwrap(); + remove_boot_image(repo, &img.manifest_digest).unwrap(); + remove_boot_image(repo, &img.manifest_digest).unwrap(); + + assert!(boot_image(repo, &img.manifest_digest).unwrap().is_none()); + } + + #[tokio::test] + async fn test_boot_image_gc_preserves_when_tagged() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + let image_verity = generate_boot_image(repo, &img.manifest_digest).unwrap(); + + let gc = repo.gc(&[]).unwrap(); + assert_eq!(gc.images_pruned, 0); + assert_eq!(gc.streams_pruned, 0); + + let oci = OciImage::open_ref(repo, "myapp:v1").unwrap(); + assert_eq!(oci.boot_image_ref(), Some(&image_verity)); + } + + #[tokio::test] + async fn test_boot_image_gc_collects_after_untag() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + generate_boot_image(repo, &img.manifest_digest).unwrap(); + + crate::oci_image::untag_image(repo, "myapp:v1").unwrap(); + + let gc = repo.gc(&[]).unwrap(); + assert!(gc.objects_removed > 0); + assert_eq!(gc.images_pruned, 1); + assert!(gc.streams_pruned > 0); + + let gc2 = repo.gc(&[]).unwrap(); + assert_eq!(gc2.objects_removed, 0); + assert_eq!(gc2.images_pruned, 0); + assert_eq!(gc2.streams_pruned, 0); + } + + #[tokio::test] + async fn test_remove_boot_image_then_gc_preserves_oci() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + generate_boot_image(repo, &img.manifest_digest).unwrap(); + + remove_boot_image(repo, &img.manifest_digest).unwrap(); + let gc = repo.gc(&[]).unwrap(); + assert_eq!(gc.images_pruned, 1); + + let oci = OciImage::open_ref(repo, "myapp:v1").unwrap(); + assert!(oci.is_container_image()); + assert!(oci.boot_image_ref().is_none()); + } + + #[tokio::test] + async fn test_boot_content_assertion() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("myapp:v1"), 1).await; + + let boot_verity = generate_boot_image(repo, &img.manifest_digest).unwrap(); + + let fs = crate::image::create_filesystem(repo, &img.config_digest, None).unwrap(); + let boot_entries = get_boot_resources(&fs, repo).unwrap(); + assert_eq!(boot_entries.len(), 2); + assert!(boot_entries.iter().any(|e| matches!( + e, + composefs_boot::bootloader::BootEntry::UsrLibModulesVmLinuz(_) + )),); + assert!(boot_entries + .iter() + .any(|e| matches!(e, composefs_boot::bootloader::BootEntry::Type2(_))),); + + let plain_fs = crate::image::create_filesystem(repo, &img.config_digest, None).unwrap(); + let plain_verity = plain_fs.commit_image(repo, None).unwrap(); + assert_ne!(boot_verity, plain_verity); + } + + #[tokio::test] + async fn test_boot_content_uki() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_bootable_image(repo, Some("uki:v1"), 1).await; + + let boot_verity = generate_boot_image(repo, &img.manifest_digest).unwrap(); + + let fs = crate::image::create_filesystem(repo, &img.config_digest, None).unwrap(); + let boot_entries = get_boot_resources(&fs, repo).unwrap(); + assert_eq!(boot_entries.len(), 2); + assert!(boot_entries + .iter() + .any(|e| matches!(e, composefs_boot::bootloader::BootEntry::Type2(_))),); + assert!(boot_entries.iter().any(|e| matches!( + e, + composefs_boot::bootloader::BootEntry::UsrLibModulesVmLinuz(_) + )),); + + let plain_fs = crate::image::create_filesystem(repo, &img.config_digest, None).unwrap(); + let plain_verity = plain_fs.commit_image(repo, None).unwrap(); + assert_ne!(boot_verity, plain_verity); + } +} diff --git a/crates/composefs-oci/src/image.rs b/crates/composefs-oci/src/image.rs index 7ddc33a4..4744e548 100644 --- a/crates/composefs-oci/src/image.rs +++ b/crates/composefs-oci/src/image.rs @@ -104,7 +104,9 @@ pub fn create_filesystem( ) -> Result> { let mut filesystem = FileSystem::new(Stat::uninitialized()); - let (config, map) = crate::open_config(repo, config_name, config_verity)?; + let oc = crate::open_config(repo, config_name, config_verity)?; + let config = oc.config; + let map = oc.layer_refs; for diff_id in config.rootfs().diff_ids() { let layer_verity = map diff --git a/crates/composefs-oci/src/lib.rs b/crates/composefs-oci/src/lib.rs index 362d49ed..7629e5e9 100644 --- a/crates/composefs-oci/src/lib.rs +++ b/crates/composefs-oci/src/lib.rs @@ -8,21 +8,26 @@ //! - Pulling container images from registries using skopeo //! - Converting OCI image layers from tar format to composefs split streams //! - Creating mountable filesystems from OCI image configurations -//! - Sealing containers with fs-verity hashes for integrity verification - #![forbid(unsafe_code)] +pub mod boot; pub mod image; pub mod oci_image; pub mod skopeo; pub mod tar; +/// Test utilities for building OCI images from dumpfile strings. +#[cfg(any(test, feature = "test"))] +#[allow(missing_docs, missing_debug_implementations)] +#[doc(hidden)] +pub mod test_util; + // Re-export the composefs crate for consumers who only need composefs-oci pub use composefs; use std::{collections::HashMap, sync::Arc}; -use anyhow::{bail, ensure, Context, Result}; +use anyhow::{ensure, Result}; use containers_image_proxy::ImageProxyConfig; use oci_spec::image::ImageConfiguration; use sha2::{Digest, Sha256}; @@ -36,11 +41,21 @@ use composefs::{ use crate::skopeo::{OCI_CONFIG_CONTENT_TYPE, TAR_LAYER_CONTENT_TYPE}; use crate::tar::get_entry; +/// Named ref key for the EROFS image derived from this OCI config. +pub const IMAGE_REF_KEY: &str = "composefs.image"; + +/// Named ref key for the boot EROFS image derived from this OCI config. +pub const BOOT_IMAGE_REF_KEY: &str = "composefs.image.boot"; + // Re-export key types for convenience +#[cfg(feature = "boot")] +pub use boot::generate_boot_image; +pub use boot::{boot_image, remove_boot_image, BOOT_IMAGE_REF_NAME}; pub use oci_image::{ add_referrer, layer_dumpfile, layer_info, layer_tar, list_images, list_referrers, list_refs, - remove_referrer, remove_referrers_for_subject, resolve_ref, tag_image, untag_image, ImageInfo, - LayerInfo, OciImage, SplitstreamInfo, OCI_REF_PREFIX, + oci_fsck, oci_fsck_image, remove_referrer, remove_referrers_for_subject, resolve_ref, + tag_image, untag_image, ImageInfo, LayerInfo, OciFsckError, OciFsckResult, OciImage, + SplitstreamInfo, OCI_REF_PREFIX, }; pub use skopeo::pull_image; @@ -118,6 +133,28 @@ pub struct PullResult { type ContentAndVerity = (String, ObjectID); +/// Parsed OCI config and its associated references. +pub struct OpenConfig { + /// The parsed OCI image configuration. + pub config: ImageConfiguration, + /// Map from layer diff_id to its fs-verity object ID. + pub layer_refs: HashMap, ObjectID>, + /// The EROFS image ObjectID linked to this config, if any. + pub image_ref: Option, + /// The boot EROFS image ObjectID linked to this config, if any. + pub boot_image_ref: Option, +} + +impl std::fmt::Debug for OpenConfig { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("OpenConfig") + .field("layer_refs", &self.layer_refs) + .field("image_ref", &self.image_ref) + .field("boot_image_ref", &self.boot_image_ref) + .finish_non_exhaustive() + } +} + fn layer_identifier(diff_id: &str) -> String { format!("oci-layer-{diff_id}") } @@ -189,7 +226,7 @@ pub async fn pull( ) -> Result> { let (config_digest, config_verity, stats) = skopeo::pull(repo, imgref, reference, img_proxy_config).await?; - Ok(PullResult { + Ok(crate::PullResult { config_digest, config_verity, stats, @@ -204,14 +241,16 @@ fn hash(bytes: &[u8]) -> String { /// Opens and parses a container configuration. /// -/// Reads the OCI image configuration from the repository and returns both the parsed -/// configuration and a digest map containing fs-verity hashes for all referenced layers. +/// Reads the OCI image configuration from the repository and returns an [`OpenConfig`] +/// containing the parsed configuration, a digest map of layer fs-verity hashes, and an +/// optional EROFS image ObjectID if one has been linked to this config. /// /// If verity is provided, it's used directly. Otherwise, the name must be a sha256 digest /// and the corresponding verity hash will be looked up (which is more expensive) and the content /// will be hashed and compared to the provided digest. /// -/// Returns the parsed image configuration and the map of layer references. +/// The returned layer refs map does not contain the [`IMAGE_REF_KEY`] — that is +/// returned separately in [`OpenConfig::image_ref`]. /// /// Note: if the verity value is known and trusted then the layer fs-verity values can also be /// trusted. If not, then you can use the layer map to find objects that are ostensibly the layers @@ -220,8 +259,8 @@ pub fn open_config( repo: &Repository, config_digest: &str, verity: Option<&ObjectID>, -) -> Result<(ImageConfiguration, HashMap, ObjectID>)> { - let (data, named_refs) = oci_image::read_external_splitstream( +) -> Result> { + let (data, mut named_refs) = oci_image::read_external_splitstream( repo, &config_identifier(config_digest), verity, @@ -236,8 +275,58 @@ pub fn open_config( ); } + let image_ref = named_refs.remove(IMAGE_REF_KEY); + let boot_image_ref = named_refs.remove(BOOT_IMAGE_REF_KEY); let config = ImageConfiguration::from_reader(&data[..])?; - Ok((config, named_refs)) + Ok(OpenConfig { + config, + layer_refs: named_refs, + image_ref, + boot_image_ref, + }) +} + +/// Returns the composefs EROFS ObjectID referenced by the given OCI config, if any. +pub fn composefs_erofs_for_config( + repo: &Repository, + config_digest: &str, + verity: Option<&ObjectID>, +) -> Result> { + let oc = open_config(repo, config_digest, verity)?; + Ok(oc.image_ref) +} + +/// Returns the composefs EROFS ObjectID for an OCI image identified by manifest, if any. +/// +/// This opens the manifest to find the config, then reads the config's +/// [`IMAGE_REF_KEY`] named ref. +pub fn composefs_erofs_for_manifest( + repo: &Repository, + manifest_digest: &str, + manifest_verity: Option<&ObjectID>, +) -> Result> { + let img = oci_image::OciImage::open(repo, manifest_digest, manifest_verity)?; + Ok(img.image_ref().cloned()) +} + +/// Returns the boot EROFS ObjectID from the given OCI config, if any. +pub fn composefs_boot_erofs_for_config( + repo: &Repository, + config_digest: &str, + verity: Option<&ObjectID>, +) -> Result> { + let oc = open_config(repo, config_digest, verity)?; + Ok(oc.boot_image_ref) +} + +/// Returns the boot EROFS ObjectID for an OCI image identified by manifest, if any. +pub fn composefs_boot_erofs_for_manifest( + repo: &Repository, + manifest_digest: &str, + manifest_verity: Option<&ObjectID>, +) -> Result> { + let img = oci_image::OciImage::open(repo, manifest_digest, manifest_verity)?; + Ok(img.boot_image_ref().cloned()) } /// Writes a container configuration to the repository. @@ -246,64 +335,174 @@ pub fn open_config( /// provided layer reference map. The configuration is stored as an external object so /// fsverity can be independently enabled on it. /// +/// If `image` is provided, a named ref with key [`IMAGE_REF_KEY`] is added to the +/// splitstream pointing to the EROFS image's ObjectID. This ensures the GC walk keeps +/// the EROFS image alive as long as the config is reachable. +/// +/// If `boot_image` is provided, a named ref with key [`BOOT_IMAGE_REF_KEY`] is added +/// pointing to the boot EROFS image's ObjectID. +/// /// Returns a tuple of (sha256 content hash, fs-verity hash value). pub fn write_config( repo: &Arc>, config: &ImageConfiguration, refs: HashMap, ObjectID>, + image: Option<&ObjectID>, + boot_image: Option<&ObjectID>, ) -> Result> { let json = config.to_string()?; - let json_bytes = json.as_bytes(); - let config_digest = hash(json_bytes); + write_config_raw(repo, json.as_bytes(), refs, image, boot_image) +} + +/// Rewrites a container configuration in the repository from raw JSON bytes. +/// +/// Like [`write_config`], but takes pre-serialized JSON bytes instead of an +/// `ImageConfiguration`. This must be used when rewriting an existing config +/// (e.g. to add EROFS image refs) to preserve the original JSON bytes and +/// avoid changing the sha256 content digest. +pub fn write_config_raw( + repo: &Arc>, + config_json: &[u8], + refs: HashMap, ObjectID>, + image: Option<&ObjectID>, + boot_image: Option<&ObjectID>, +) -> Result> { + let config_digest = hash(config_json); let mut stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); for (name, value) in &refs { stream.add_named_stream_ref(name, value) } - stream.write_external(json_bytes)?; + if let Some(image_id) = image { + stream.add_named_stream_ref(IMAGE_REF_KEY, image_id); + } + if let Some(boot_id) = boot_image { + stream.add_named_stream_ref(BOOT_IMAGE_REF_KEY, boot_id); + } + stream.write_external(config_json)?; let id = repo.write_stream(stream, &config_identifier(&config_digest), None)?; Ok((config_digest, id)) } -/// Seals a container by computing its filesystem fs-verity hash and adding it to the config. +/// Ensures a composefs EROFS image exists for the given OCI container image, +/// linking it to the config splitstream so GC keeps it alive through the tag chain. +/// +/// This performs the following steps: +/// 1. Opens the manifest and config to get the image configuration +/// 2. Creates a composefs `FileSystem` from the OCI layers +/// 3. Commits the filesystem as an EROFS image to the repository +/// 4. Rewrites the config splitstream with an [`IMAGE_REF_KEY`] named ref +/// pointing to the EROFS image's ObjectID +/// 5. Rewrites the manifest splitstream with the updated config verity +/// 6. If `tag` is provided, updates the tag to point to the new manifest /// -/// Creates the complete filesystem from all layers, computes its fs-verity hash, and stores -/// this hash in the container config labels under "containers.composefs.fsverity". This allows -/// the container to be mounted with integrity protection. +/// Calling this multiple times is safe — a new EROFS image is generated each +/// time (though usually identical via object dedup) and the config+manifest +/// splitstreams are rewritten. The old splitstream objects become unreferenced +/// and are collected by the next GC. /// -/// Returns a tuple of (sha256 content hash, fs-verity hash value) for the updated configuration. -pub fn seal( +/// Returns the EROFS image's ObjectID (fs-verity digest). +fn ensure_oci_composefs_erofs( repo: &Arc>, - config_name: &str, - config_verity: Option<&ObjectID>, -) -> Result> { - let (mut config, refs) = open_config(repo, config_name, config_verity)?; - let mut myconfig = config.config().clone().context("no config!")?; - let labels = myconfig.labels_mut().get_or_insert_with(HashMap::new); - let fs = crate::image::create_filesystem(repo, config_name, config_verity)?; - let id = fs.compute_image_id(); - labels.insert("containers.composefs.fsverity".to_string(), id.to_hex()); - config.set_config(Some(myconfig)); - write_config(repo, &config, refs) + manifest_digest: &str, + manifest_verity: Option<&ObjectID>, + tag: Option<&str>, +) -> Result> { + let img = oci_image::OciImage::open(repo, manifest_digest, manifest_verity)?; + if !img.is_container_image() { + return Ok(None); + } + + // Build the composefs filesystem from all layers + let fs = image::create_filesystem(repo, img.config_digest(), Some(img.config_verity()))?; + + // Commit as EROFS image (no name — the GC link comes from the config ref) + let erofs_id = fs.commit_image(repo, None)?; + + // Read original config JSON to preserve its exact bytes (and thus its + // sha256 digest) when rewriting the splitstream with the new EROFS ref. + let config_json = img.read_config_json(repo)?; + + // Rewrite config with the EROFS image ref, using layer refs from the + // OciImage (which already stripped the old image ref if any). + // Preserve any existing boot image ref. + let (_config_digest, new_config_verity) = write_config_raw( + repo, + &config_json, + img.layer_refs().clone(), + Some(&erofs_id), + img.boot_image_ref(), + )?; + + // Read original manifest JSON for rewriting + let manifest_json = img.read_manifest_json(repo)?; + + // Rewrite manifest with updated config verity, preserving layer verities. + // The layer_refs from OciImage are the same as the manifest's layer refs + // (both ultimately come from the config's diff_id → verity map). + let layer_verities = img.layer_refs().clone(); + + let (_new_manifest_digest, _new_manifest_verity) = oci_image::rewrite_manifest( + repo, + &manifest_json, + manifest_digest, + &new_config_verity, + &layer_verities, + tag, + )?; + + Ok(Some(erofs_id)) } -/// Mounts a sealed container filesystem at the specified mountpoint. -/// -/// Reads the container configuration to extract the fs-verity hash from the -/// "containers.composefs.fsverity" label, then mounts the corresponding filesystem. -/// The container must have been previously sealed using `seal()`. -/// -/// Returns an error if the container is not sealed or if mounting fails. -pub fn mount( - repo: &Repository, - name: &str, - mountpoint: &str, - verity: Option<&ObjectID>, -) -> Result<()> { - let (config, _map) = open_config(repo, name, verity)?; - let Some(id) = config.get_config_annotation("containers.composefs.fsverity") else { - bail!("Can only mount sealed containers"); - }; - repo.mount_at(id, mountpoint) +/// Boot-variant counterpart to [`ensure_oci_composefs_erofs`]; applies +/// `transform_for_boot` before committing. +#[cfg(feature = "boot")] +fn ensure_oci_composefs_erofs_boot( + repo: &Arc>, + manifest_digest: &str, + manifest_verity: Option<&ObjectID>, + tag: Option<&str>, +) -> Result> { + use composefs_boot::BootOps; + + let img = oci_image::OciImage::open(repo, manifest_digest, manifest_verity)?; + if !img.is_container_image() { + return Ok(None); + } + + // Build the composefs filesystem from all layers, then transform for boot + let mut fs = image::create_filesystem(repo, img.config_digest(), Some(img.config_verity()))?; + fs.transform_for_boot(repo)?; + + // Commit as EROFS image + let boot_erofs_id = fs.commit_image(repo, None)?; + + // Read original config JSON to preserve its exact bytes + let config_json = img.read_config_json(repo)?; + + // Rewrite config with the boot EROFS image ref, preserving the existing image ref + let (_config_digest, new_config_verity) = write_config_raw( + repo, + &config_json, + img.layer_refs().clone(), + img.image_ref(), + Some(&boot_erofs_id), + )?; + + // Read original manifest JSON for rewriting + let manifest_json = img.read_manifest_json(repo)?; + + let layer_verities = img.layer_refs().clone(); + + let (_new_manifest_digest, _new_manifest_verity) = oci_image::rewrite_manifest( + repo, + &manifest_json, + manifest_digest, + &new_config_verity, + &layer_verities, + tag, + )?; + + Ok(Some(boot_erofs_id)) } #[cfg(test)] @@ -387,19 +586,21 @@ mod test { let mut refs = HashMap::new(); refs.insert("sha256:abc123def456".into(), Sha256HashValue::EMPTY); - let (config_digest, config_verity) = write_config(&repo, &config, refs.clone()).unwrap(); + let (config_digest, config_verity) = + write_config(&repo, &config, refs.clone(), None, None).unwrap(); assert!(config_digest.starts_with("sha256:")); - let (opened_config, opened_refs) = - open_config(&repo, &config_digest, Some(&config_verity)).unwrap(); - assert_eq!(opened_config.architecture().to_string(), "amd64"); - assert_eq!(opened_config.os().to_string(), "linux"); - assert_eq!(opened_refs.len(), 1); - assert!(opened_refs.contains_key("sha256:abc123def456")); + let oc = open_config(&repo, &config_digest, Some(&config_verity)).unwrap(); + assert_eq!(oc.config.architecture().to_string(), "amd64"); + assert_eq!(oc.config.os().to_string(), "linux"); + assert_eq!(oc.layer_refs.len(), 1); + assert!(oc.layer_refs.contains_key("sha256:abc123def456")); + assert!(oc.image_ref.is_none()); + assert!(oc.boot_image_ref.is_none()); - let (opened_config2, _) = open_config(&repo, &config_digest, None).unwrap(); - assert_eq!(opened_config2.architecture().to_string(), "amd64"); + let oc2 = open_config(&repo, &config_digest, None).unwrap(); + assert_eq!(oc2.config.architecture().to_string(), "amd64"); } #[test] @@ -422,7 +623,8 @@ mod test { .build() .unwrap(); - let (config_digest, config_verity) = write_config(&repo, &config, HashMap::new()).unwrap(); + let (config_digest, config_verity) = + write_config(&repo, &config, HashMap::new(), None, None).unwrap(); // Re-open the splitstream and check that the config JSON is stored // as an external object reference (not inline). This is important @@ -481,7 +683,8 @@ mod test { .build() .unwrap(); - let (config_digest, _config_verity) = write_config(&repo, &config, HashMap::new()).unwrap(); + let (config_digest, _config_verity) = + write_config(&repo, &config, HashMap::new(), None, None).unwrap(); let bad_digest = "sha256:0000000000000000000000000000000000000000000000000000000000000000"; let result = open_config::(&repo, bad_digest, None); @@ -491,6 +694,356 @@ mod test { assert!(result.is_ok()); } + #[test] + fn test_config_with_image_ref() { + use oci_spec::image::{ImageConfigurationBuilder, RootFsBuilder}; + + let repo_dir = tempdir(); + let repo = Arc::new(Repository::::open_path(CWD, &repo_dir).unwrap()); + + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(vec!["sha256:abc123def456".to_string()]) + .build() + .unwrap(); + + let config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .build() + .unwrap(); + + let mut refs = HashMap::new(); + let layer_id = Sha256HashValue::EMPTY; + refs.insert("sha256:abc123def456".into(), layer_id); + + // Use a fake EROFS image ID + let fake_erofs_id: Sha256HashValue = + composefs::fsverity::compute_verity(b"fake-erofs-image"); + + let (config_digest, config_verity) = + write_config(&repo, &config, refs.clone(), Some(&fake_erofs_id), None).unwrap(); + + // Reopen and verify + let oc = open_config(&repo, &config_digest, Some(&config_verity)).unwrap(); + assert_eq!( + oc.layer_refs.len(), + 1, + "layer refs should not include image ref" + ); + assert!(oc.layer_refs.contains_key("sha256:abc123def456")); + assert_eq!( + oc.image_ref, + Some(fake_erofs_id.clone()), + "image ref should be returned" + ); + + // Also verify via the convenience function + let img_ref = + composefs_erofs_for_config(&repo, &config_digest, Some(&config_verity)).unwrap(); + assert_eq!(img_ref, Some(fake_erofs_id)); + } + + #[test] + fn test_config_without_image_ref() { + use oci_spec::image::{ImageConfigurationBuilder, RootFsBuilder}; + + let repo_dir = tempdir(); + let repo = Arc::new(Repository::::open_path(CWD, &repo_dir).unwrap()); + + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(vec!["sha256:abc123def456".to_string()]) + .build() + .unwrap(); + + let config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .build() + .unwrap(); + + let mut refs = HashMap::new(); + refs.insert("sha256:abc123def456".into(), Sha256HashValue::EMPTY); + + let (config_digest, config_verity) = + write_config(&repo, &config, refs.clone(), None, None).unwrap(); + + let oc = open_config(&repo, &config_digest, Some(&config_verity)).unwrap(); + assert_eq!(oc.layer_refs.len(), 1); + assert!(oc.layer_refs.contains_key("sha256:abc123def456")); + assert!(oc.image_ref.is_none(), "no image ref should be present"); + + let img_ref = + composefs_erofs_for_config(&repo, &config_digest, Some(&config_verity)).unwrap(); + assert!(img_ref.is_none()); + } + + #[tokio::test] + async fn test_ensure_oci_composefs_erofs() { + use composefs::test::TestRepo; + + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_base_image(repo, Some("test:v1")).await; + + // Create the EROFS image and link it to the config + let erofs_id = ensure_oci_composefs_erofs( + repo, + &img.manifest_digest, + Some(&img.manifest_verity), + Some("test:v1"), + ) + .unwrap() + .expect("container image should produce EROFS"); + + // The EROFS image should exist in the repository + assert!( + repo.open_image(&erofs_id.to_hex()).is_ok(), + "EROFS image should be accessible" + ); + + // The manifest+config were rewritten with the EROFS ref + let oci = oci_image::OciImage::open_ref(repo, "test:v1").unwrap(); + assert_ne!( + oci.manifest_verity(), + &img.manifest_verity, + "manifest should have been rewritten with new config verity" + ); + assert_eq!( + oci.image_ref(), + Some(&erofs_id), + "config should reference the EROFS image" + ); + // Also verify via the convenience functions + let erofs_ref = + composefs_erofs_for_config(repo, oci.config_digest(), Some(oci.config_verity())) + .unwrap(); + assert_eq!(erofs_ref, Some(erofs_id.clone())); + + let erofs_ref2 = + composefs_erofs_for_manifest(repo, &img.manifest_digest, Some(oci.manifest_verity())) + .unwrap(); + assert_eq!(erofs_ref2, Some(erofs_id.clone())); + + // Verify the EROFS content by round-tripping through erofs_to_filesystem + let erofs_data = repo.read_object(&erofs_id).unwrap(); + let fs = + composefs::erofs::reader::erofs_to_filesystem::(&erofs_data).unwrap(); + let mut dump = Vec::new(); + composefs::dumpfile::write_dumpfile(&mut dump, &fs).unwrap(); + let dump = String::from_utf8(dump).unwrap(); + similar_asserts::assert_eq!( + dump, + "\ +/ 0 40755 6 0 0 0 0.0 - - - +/etc 0 40755 2 0 0 0 0.0 - - - +/etc/hostname 9 100644 1 0 0 0 0.0 - testhost\\x0a - +/etc/os-release 23 100644 1 0 0 0 0.0 - ID\\x3dtest\\x0aVERSION_ID\\x3d1.0\\x0a - +/etc/passwd 34 100644 1 0 0 0 0.0 - root:x:0:0:root:/root:/usr/bin/sh\\x0a - +/tmp 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 5 0 0 0 0.0 - - - +/usr/bin 0 40755 2 0 0 0 0.0 - - - +/usr/bin/busybox 22 100755 1 0 0 0 0.0 - busybox-binary-content - +/usr/bin/cat 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/cp 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/ls 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/mv 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/myapp 25 100755 1 0 0 0 0.0 - #!/usr/bin/sh\\x0aecho\\x20hello\\x0a - +/usr/bin/rm 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/sh 7 120777 1 0 0 0 0.0 busybox - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 3 0 0 0 0.0 - - - +/usr/share/myapp 0 40755 2 0 0 0 0.0 - - - +/usr/share/myapp/data.txt 16 100644 1 0 0 0 0.0 - application-data - +/var 0 40755 2 0 0 0 0.0 - - - +" + ); + } + + #[tokio::test] + async fn test_ensure_oci_composefs_erofs_gc() { + use composefs::test::TestRepo; + + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = test_util::create_base_image(repo, Some("gctest:v1")).await; + + // After pull, nothing is garbage + let dry = repo.gc_dry_run(&[]).unwrap(); + assert_eq!(dry.objects_removed, 0); + assert_eq!(dry.streams_pruned, 0); + assert_eq!(dry.images_pruned, 0); + + let erofs_id = ensure_oci_composefs_erofs( + repo, + &img.manifest_digest, + Some(&img.manifest_verity), + Some("gctest:v1"), + ) + .unwrap() + .expect("container image should produce EROFS"); + + // ensure_oci_composefs_erofs rewrites config+manifest, leaving 2 old splitstream + // objects unreferenced (the original config and manifest splitstreams) + let gc1 = repo.gc(&[]).unwrap(); + assert_eq!( + gc1.objects_removed, 2, + "old config+manifest splitstream objects" + ); + assert_eq!(gc1.streams_pruned, 0); + assert_eq!(gc1.images_pruned, 0); + + // After GC, everything is clean — EROFS survives via config ref + let dry = repo.gc_dry_run(&[]).unwrap(); + assert_eq!(dry.objects_removed, 0); + assert!( + repo.open_image(&erofs_id.to_hex()).is_ok(), + "EROFS image should survive GC while tagged" + ); + + // Untag and GC — everything gets collected + oci_image::untag_image(repo, "gctest:v1").unwrap(); + let gc2 = repo.gc(&[]).unwrap(); + // 10 objects: 5 layer splitstreams + config JSON + manifest JSON + // + EROFS image + new config splitstream + new manifest splitstream + assert_eq!(gc2.objects_removed, 10, "all objects collected after untag"); + // 7 streams: 5 layers + 1 config + 1 manifest (tag ref removed by untag) + assert_eq!(gc2.streams_pruned, 7, "all stream symlinks pruned"); + // 1 image: the EROFS symlink under images/ + assert_eq!(gc2.images_pruned, 1, "EROFS image symlink pruned"); + + assert!( + repo.open_image(&erofs_id.to_hex()).is_err(), + "EROFS image should be collected after untag + GC" + ); + + // Repo is completely empty now + let dry = repo.gc_dry_run(&[]).unwrap(); + assert_eq!(dry.objects_removed, 0); + assert_eq!(dry.streams_pruned, 0); + assert_eq!(dry.images_pruned, 0); + } + + /// Verify that rewriting a config splitstream (to add an EROFS image ref) + /// preserves the original config JSON bytes — even when those bytes use + /// non-canonical formatting that differs from `ImageConfiguration::to_string()`. + /// + /// Regression test: `ensure_oci_composefs_erofs` previously re-serialized + /// the config through `config.to_string()`, producing different bytes (and + /// a different sha256 digest), which caused `oci fsck` to report a + /// `config-digest-mismatch`. + #[tokio::test] + async fn test_config_rewrite_preserves_noncanonical_json() { + use composefs::test::TestRepo; + use serde_json::ser::{PrettyFormatter, Serializer}; + + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + // Create a normal image with well-formed layers + let _img = test_util::create_base_image(repo, Some("nc:v1")).await; + + // Read back the original config JSON + let oci_before = oci_image::OciImage::open_ref(repo, "nc:v1").unwrap(); + let canonical_json = oci_before.read_config_json(repo).unwrap(); + + // Re-serialize through serde_json::Value with PrettyFormatter to + // get different bytes (tab indentation) while remaining + // semantically identical JSON. + let value: serde_json::Value = serde_json::from_slice(&canonical_json).unwrap(); + let mut buf = Vec::new(); + let formatter = PrettyFormatter::with_indent(b"\t"); + let mut ser = Serializer::with_formatter(&mut buf, formatter); + serde::Serialize::serialize(&value, &mut ser).unwrap(); + let noncanonical_json = buf; + + // Sanity: the two serializations must differ in bytes but parse + // identically. + assert_ne!( + canonical_json.as_slice(), + noncanonical_json.as_slice(), + "pretty-printed JSON should differ from canonical" + ); + let reparsed: serde_json::Value = serde_json::from_slice(&noncanonical_json).unwrap(); + assert_eq!(value, reparsed, "non-canonical JSON must parse identically"); + + // Now overwrite the config splitstream with the non-canonical bytes. + let (_new_config_digest, new_config_verity) = write_config_raw( + repo, + &noncanonical_json, + oci_before.layer_refs().clone(), + None, + None, + ) + .unwrap(); + let new_config_digest = hash(&noncanonical_json); + + // Rewrite the manifest to reference the non-canonical config. + use oci_spec::image::{ + DescriptorBuilder, Digest as OciDigest, ImageManifestBuilder, MediaType, + }; + use std::str::FromStr; + + let old_manifest = oci_before.manifest(); + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageConfig) + .digest(OciDigest::from_str(&new_config_digest).unwrap()) + .size(noncanonical_json.len() as u64) + .build() + .unwrap(); + let new_manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(old_manifest.layers().clone()) + .build() + .unwrap(); + + let new_manifest_json = new_manifest.to_string().unwrap(); + let new_manifest_digest = hash(new_manifest_json.as_bytes()); + + oci_image::untag_image(repo, "nc:v1").unwrap(); + let (_md, new_manifest_verity) = oci_image::write_manifest( + repo, + &new_manifest, + &new_manifest_digest, + &new_config_verity, + oci_before.layer_refs(), + Some("nc:v1"), + ) + .unwrap(); + + // Now the real test: ensure_oci_composefs_erofs rewrites the config + // to add an EROFS image ref. The config digest MUST be preserved. + let erofs_id = ensure_oci_composefs_erofs( + repo, + &new_manifest_digest, + Some(&new_manifest_verity), + Some("nc:v1"), + ) + .unwrap() + .expect("should produce EROFS"); + + let oci_after = oci_image::OciImage::open_ref(repo, "nc:v1").unwrap(); + assert_eq!( + oci_after.config_digest(), + &new_config_digest, + "config digest must be preserved after EROFS rewrite" + ); + assert_eq!(oci_after.image_ref(), Some(&erofs_id)); + + let stored_json = oci_after.read_config_json(repo).unwrap(); + assert_eq!( + stored_json, noncanonical_json, + "raw config JSON bytes must survive round-trip through EROFS rewrite" + ); + } + #[test] fn test_import_stats_display() { let stats = ImportStats { diff --git a/crates/composefs-oci/src/oci_image.rs b/crates/composefs-oci/src/oci_image.rs index aa125d07..25fd7a49 100644 --- a/crates/composefs-oci/src/oci_image.rs +++ b/crates/composefs-oci/src/oci_image.rs @@ -106,6 +106,10 @@ pub struct OciImage { config: Option, /// Map from layer diff_id to its fs-verity object ID layer_refs: HashMap, ObjectID>, + /// The EROFS image ObjectID linked to this config, if any + image_ref: Option, + /// The boot EROFS image ObjectID linked to this config, if any + boot_image_ref: Option, /// The fs-verity ID of the manifest splitstream manifest_verity: ObjectID, } @@ -151,7 +155,7 @@ impl OciImage { )?; // Try to parse as ImageConfiguration, but don't fail for artifacts - let (config, layer_refs) = match manifest.config().media_type() { + let (config, mut layer_refs) = match manifest.config().media_type() { MediaType::ImageConfig => { let config = ImageConfiguration::from_reader(&config_data[..])?; (Some(config), config_named_refs) @@ -174,6 +178,10 @@ impl OciImage { } }; + // Strip the EROFS image ref from layer_refs (it's not a layer) + let image_ref = layer_refs.remove(crate::IMAGE_REF_KEY); + let boot_image_ref = layer_refs.remove(crate::BOOT_IMAGE_REF_KEY); + let manifest_verity = if let Some(v) = verity { v.clone() } else { @@ -188,6 +196,8 @@ impl OciImage { config_verity, config, layer_refs, + image_ref, + boot_image_ref, manifest_verity, }) } @@ -223,11 +233,31 @@ impl OciImage { &self.config_digest } + /// Returns the config fs-verity hash. + pub fn config_verity(&self) -> &ObjectID { + &self.config_verity + } + /// Returns the OCI config, if this is a container image. pub fn config(&self) -> Option<&ImageConfiguration> { self.config.as_ref() } + /// Returns the layer refs map (diff_id → fs-verity ObjectID). + pub fn layer_refs(&self) -> &HashMap, ObjectID> { + &self.layer_refs + } + + /// Returns the EROFS image ObjectID linked to this config, if any. + pub fn image_ref(&self) -> Option<&ObjectID> { + self.image_ref.as_ref() + } + + /// Returns the boot EROFS image ObjectID linked to this config, if any. + pub fn boot_image_ref(&self) -> Option<&ObjectID> { + self.boot_image_ref.as_ref() + } + /// Returns the image architecture (empty string for artifacts). pub fn architecture(&self) -> String { self.config @@ -249,18 +279,6 @@ impl OciImage { self.config.as_ref().and_then(|c| c.created().as_deref()) } - /// Returns the composefs seal digest, if sealed. - pub fn seal_digest(&self) -> Option<&str> { - self.config - .as_ref() - .and_then(|c| c.get_config_annotation("containers.composefs.fsverity")) - } - - /// Returns whether this image has been sealed. - pub fn is_sealed(&self) -> bool { - self.seal_digest().is_some() - } - /// Opens an artifact layer's backing object by index, returning a /// read-only file descriptor to the raw blob data. /// @@ -387,11 +405,21 @@ impl OciImage { .map(|(digest, _verity)| serde_json::json!({ "digest": digest })) .collect(); - Ok(serde_json::json!({ + let mut result = serde_json::json!({ "manifest": manifest_value, "config": config_value, "referrers": referrers_value, - })) + }); + + if let Some(ref erofs_id) = self.image_ref { + result["composefs_erofs"] = serde_json::json!(erofs_id.to_hex()); + } + + if let Some(ref boot_id) = self.boot_image_ref { + result["composefs_boot_erofs"] = serde_json::json!(boot_id.to_hex()); + } + + Ok(result) } } @@ -499,8 +527,6 @@ pub struct ImageInfo { pub os: String, /// Creation timestamp pub created: Option, - /// Whether sealed with composefs - pub sealed: bool, /// Number of layers/blobs pub layer_count: usize, /// Number of OCI referrers (signatures, attestations, etc.) @@ -524,7 +550,6 @@ pub fn list_images( architecture: img.architecture(), os: img.os(), created: img.created().map(String::from), - sealed: img.is_sealed(), layer_count: img.layer_descriptors().len(), referrer_count, }); @@ -594,6 +619,46 @@ pub fn write_manifest( Ok((manifest_digest.to_string(), id)) } +/// Rewrites a manifest splitstream with updated named refs. +/// +/// Unlike [`write_manifest`], this always writes the splitstream even if the +/// content identifier already exists. This is needed when the manifest JSON +/// hasn't changed but the config splitstream's verity has (e.g., because an +/// EROFS image ref was added to the config). +/// +/// If `reference` is provided, the manifest is also tagged with that name. +pub(crate) fn rewrite_manifest( + repo: &Arc>, + manifest_json: &[u8], + manifest_digest: &str, + config_verity: &ObjectID, + layer_verities: &HashMap, ObjectID>, + reference: Option<&str>, +) -> Result<(String, ObjectID)> { + let content_id = manifest_identifier(manifest_digest); + + let config_digest = { + let manifest = ImageManifest::from_reader(manifest_json)?; + manifest.config().digest().to_string() + }; + + let mut stream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE); + + let config_key = format!("config:{config_digest}"); + stream.add_named_stream_ref(&config_key, config_verity); + + for (diff_id, verity) in layer_verities { + stream.add_named_stream_ref(diff_id, verity); + } + + stream.write_external(manifest_json)?; + + let oci_ref = reference.map(oci_ref_path); + let id = repo.write_stream(stream, &content_id, oci_ref.as_deref())?; + + Ok((manifest_digest.to_string(), id)) +} + /// Checks if a manifest exists. pub fn has_manifest( repo: &Repository, @@ -921,6 +986,421 @@ pub fn cleanup_dangling_referrers( Ok(removed) } +// ============================================================================= +// Filesystem Consistency Checks (fsck) +// ============================================================================= + +/// A structured error found during an OCI-level consistency check. +/// +/// Each variant corresponds to a specific kind of OCI metadata integrity +/// problem. The `Display` implementation produces a kebab-case error type +/// prefix followed by the image name/context and any relevant details. +#[derive(Debug, Clone, serde::Serialize, thiserror::Error)] +#[serde(tag = "type", rename_all = "kebab-case")] +#[non_exhaustive] +#[allow(missing_docs)] +pub enum OciFsckError { + #[error("fsck: manifest-read-failed: {name}: {detail}")] + ManifestReadFailed { name: String, detail: String }, + + #[error("fsck: manifest-digest-mismatch: {name}: expected {expected}, got {actual}")] + ManifestDigestMismatch { + name: String, + expected: String, + actual: String, + }, + + #[error("fsck: manifest-parse-failed: {name}: {detail}")] + ManifestParseFailed { name: String, detail: String }, + + #[error("fsck: config-ref-missing: {name}: {digest}")] + ConfigRefMissing { name: String, digest: String }, + + #[error("fsck: config-read-failed: {name}: {detail}")] + ConfigReadFailed { name: String, detail: String }, + + #[error("fsck: config-digest-mismatch: {name}: expected {expected}, got {actual}")] + ConfigDigestMismatch { + name: String, + expected: String, + actual: String, + }, + + #[error("fsck: config-parse-failed: {name}: {detail}")] + ConfigParseFailed { name: String, detail: String }, + + #[error("fsck: layer-ref-missing: {name}: {diff_id}")] + #[serde(rename_all = "camelCase")] + LayerRefMissing { name: String, diff_id: String }, + + #[error("fsck: layer-stream-missing: {name}: {diff_id}")] + #[serde(rename_all = "camelCase")] + LayerStreamMissing { name: String, diff_id: String }, + + #[error("fsck: layer-check-failed: {name}: {diff_id}: {detail}")] + #[serde(rename_all = "camelCase")] + LayerCheckFailed { + name: String, + diff_id: String, + detail: String, + }, + + #[error("fsck: layer-object-missing: {name}: {diff_id}: {detail}")] + #[serde(rename_all = "camelCase")] + LayerObjectMissing { + name: String, + diff_id: String, + detail: String, + }, + + #[error("fsck: seal-image-missing: {name}: {digest}: {detail}")] + SealImageMissing { + name: String, + digest: String, + detail: String, + }, + + #[error("fsck: artifact-layer-ref-missing: {name}: {digest}")] + ArtifactLayerRefMissing { name: String, digest: String }, + + #[error("fsck: artifact-layer-object-missing: {name}: {digest}: {detail}")] + ArtifactLayerObjectMissing { + name: String, + digest: String, + detail: String, + }, + + #[error("fsck: ref-resolve-failed: {name}: {detail}")] + RefResolveFailed { name: String, detail: String }, +} + +/// Results from an OCI-level filesystem consistency check. +/// +/// Returned by [`oci_fsck`] and [`oci_fsck_image`] to report integrity status +/// of OCI images stored in the repository. This includes checks at both the +/// OCI metadata level (manifest/config digests, layer references) and the +/// underlying repository level (object integrity, splitstream validity). +#[derive(Debug, Clone, Default, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct OciFsckResult { + pub(crate) repo_result: composefs::repository::FsckResult, + pub(crate) images_checked: u64, + pub(crate) images_corrupted: u64, + pub(crate) errors: Vec, +} + +impl OciFsckResult { + /// Returns true if no corruption or errors were found at any level. + pub fn is_ok(&self) -> bool { + debug_assert!( + self.images_corrupted == 0 || !self.errors.is_empty(), + "images_corrupted is non-zero but no OCI error messages recorded" + ); + self.repo_result.is_ok() && self.errors.is_empty() + } + + /// Results from the underlying repository fsck. + pub fn repo_result(&self) -> &composefs::repository::FsckResult { + &self.repo_result + } + + /// Number of OCI images checked. + pub fn images_checked(&self) -> u64 { + self.images_checked + } + + /// Number of OCI images with issues. + pub fn images_corrupted(&self) -> u64 { + self.images_corrupted + } + + /// OCI-level errors found during the check. + pub fn errors(&self) -> &[OciFsckError] { + &self.errors + } +} + +impl std::fmt::Display for OciFsckResult { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.repo_result)?; + writeln!( + f, + "oci images: {}/{} ok", + self.images_checked.saturating_sub(self.images_corrupted), + self.images_checked + )?; + if !self.errors.is_empty() { + writeln!(f, "oci errors: {}", self.errors.len())?; + for err in &self.errors { + writeln!(f, " - {err}")?; + } + } + Ok(()) + } +} + +/// Run a full OCI-aware consistency check on the repository. +/// +/// This performs the underlying repository fsck (object integrity, splitstream +/// validation, symlink checks) and then additionally validates all tagged OCI +/// images: manifest digest verification, config digest verification, layer +/// reference existence, and seal consistency. +pub async fn oci_fsck( + repo: &Repository, +) -> Result { + let repo_result = repo.fsck().await?; + let mut result = OciFsckResult { + repo_result, + ..Default::default() + }; + + // Check all tagged OCI images + let refs = list_refs(repo).context("listing OCI refs")?; + for (name, manifest_digest) in refs { + fsck_single_image(repo, &name, &manifest_digest, &mut result); + } + + Ok(result) +} + +/// Run an OCI-aware consistency check on a single image by tag name. +/// +/// Performs the underlying repository fsck, then validates the specified image. +pub async fn oci_fsck_image( + repo: &Repository, + name: &str, +) -> Result { + let repo_result = repo.fsck().await?; + let mut result = OciFsckResult { + repo_result, + ..Default::default() + }; + + let (manifest_digest, _verity) = match resolve_ref(repo, name) { + Ok(v) => v, + Err(e) => { + result.images_corrupted += 1; + result.images_checked += 1; + result.errors.push(OciFsckError::RefResolveFailed { + name: name.to_string(), + detail: e.to_string(), + }); + return Ok(result); + } + }; + + fsck_single_image(repo, name, &manifest_digest, &mut result); + Ok(result) +} + +/// Internal: validate a single OCI image's metadata integrity. +fn fsck_single_image( + repo: &Repository, + name: &str, + manifest_digest: &str, + result: &mut OciFsckResult, +) { + result.images_checked += 1; + let error_count_before = result.errors.len(); + + // 1. Verify manifest content hash + let manifest_id = manifest_identifier(manifest_digest); + let (manifest_data, manifest_named_refs) = match read_external_splitstream( + repo, + &manifest_id, + None, + Some(OCI_MANIFEST_CONTENT_TYPE), + ) { + Ok(v) => v, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ManifestReadFailed { + name: name.to_string(), + detail: e.to_string(), + }); + return; + } + }; + + let computed_digest = hash(&manifest_data); + if manifest_digest != computed_digest { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ManifestDigestMismatch { + name: name.to_string(), + expected: manifest_digest.to_string(), + actual: computed_digest, + }); + return; + } + + // 2. Parse manifest + let manifest = match ImageManifest::from_reader(&manifest_data[..]) { + Ok(m) => m, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ManifestParseFailed { + name: name.to_string(), + detail: e.to_string(), + }); + return; + } + }; + + // 3. Verify config reference exists in manifest's named refs + let config_digest = manifest.config().digest().to_string(); + let config_key = format!("config:{config_digest}"); + let config_verity = match manifest_named_refs.get(config_key.as_str()) { + Some(v) => v.clone(), + None => { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ConfigRefMissing { + name: name.to_string(), + digest: config_digest, + }); + return; + } + }; + + // 4. Verify config content hash + let config_id = crate::config_identifier(&config_digest); + let (config_data, config_named_refs) = match read_external_splitstream( + repo, + &config_id, + Some(&config_verity), + Some(OCI_CONFIG_CONTENT_TYPE), + ) { + Ok(v) => v, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ConfigReadFailed { + name: name.to_string(), + detail: e.to_string(), + }); + return; + } + }; + + let computed_config = hash(&config_data); + if config_digest != computed_config { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ConfigDigestMismatch { + name: name.to_string(), + expected: config_digest, + actual: computed_config, + }); + return; + } + + // 5. Parse config and verify layer references + let is_container = matches!(manifest.config().media_type(), MediaType::ImageConfig); + + if is_container { + let config = match ImageConfiguration::from_reader(&config_data[..]) { + Ok(c) => c, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(OciFsckError::ConfigParseFailed { + name: name.to_string(), + detail: e.to_string(), + }); + return; + } + }; + + // Verify each layer diff_id has a corresponding named ref and stream + for diff_id in config.rootfs().diff_ids() { + let layer_verity = match config_named_refs.get(diff_id.as_str()) { + Some(v) => v, + None => { + result.errors.push(OciFsckError::LayerRefMissing { + name: name.to_string(), + diff_id: diff_id.to_string(), + }); + continue; + } + }; + + // Check the layer stream exists + let layer_id = crate::layer_identifier(diff_id); + match repo.has_stream(&layer_id) { + Ok(Some(_)) => {} + Ok(None) => { + result.errors.push(OciFsckError::LayerStreamMissing { + name: name.to_string(), + diff_id: diff_id.to_string(), + }); + } + Err(e) => { + result.errors.push(OciFsckError::LayerCheckFailed { + name: name.to_string(), + diff_id: diff_id.to_string(), + detail: e.to_string(), + }); + } + } + + // Verify the layer's object exists + match repo.open_object(layer_verity) { + Ok(_) => {} + Err(e) => { + result.errors.push(OciFsckError::LayerObjectMissing { + name: name.to_string(), + diff_id: diff_id.to_string(), + detail: e.to_string(), + }); + } + } + } + + // 6. If sealed, verify the seal image exists + if let Some(seal_digest) = config.get_config_annotation("containers.composefs.fsverity") { + match repo.open_image(seal_digest) { + Ok(_) => {} + Err(e) => { + result.errors.push(OciFsckError::SealImageMissing { + name: name.to_string(), + digest: seal_digest.to_string(), + detail: e.to_string(), + }); + } + } + } + } else { + // Artifact: verify layer references from manifest named refs + for layer_desc in manifest.layers() { + let layer_digest = layer_desc.digest().to_string(); + match manifest_named_refs.get(layer_digest.as_str()) { + Some(verity) => { + // Verify the layer object exists + match repo.open_object(verity) { + Ok(_) => {} + Err(e) => { + result + .errors + .push(OciFsckError::ArtifactLayerObjectMissing { + name: name.to_string(), + digest: layer_digest, + detail: e.to_string(), + }); + } + } + } + None => { + result.errors.push(OciFsckError::ArtifactLayerRefMissing { + name: name.to_string(), + digest: layer_digest, + }); + } + } + } + } + + // Count at most once per image + if result.errors.len() > error_count_before { + result.images_corrupted += 1; + } +} + // ============================================================================= // Layer Inspection // ============================================================================= @@ -2586,4 +3066,566 @@ mod test { // Idempotent: calling again on an already-empty subject is fine remove_referrers_for_subject(repo, &subject_digest).unwrap(); } + + // ==================== OCI Fsck Tests ==================== + + #[tokio::test] + async fn test_oci_fsck_healthy_image() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + create_test_image(repo, Some("healthy:v1"), "amd64"); + + let result = oci_fsck(repo).await.unwrap(); + + assert!( + result.is_ok(), + "oci_fsck should pass on healthy repo: {result}" + ); + assert_eq!(result.images_checked, 1); + assert_eq!(result.images_corrupted, 0); + assert!(result.repo_result.is_ok()); + assert!(result.errors.is_empty()); + } + + #[tokio::test] + async fn test_oci_fsck_detects_corrupt_manifest() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let (manifest_digest, manifest_verity, _) = + create_test_image(repo, Some("corrupt:v1"), "amd64"); + + // The manifest is stored as an external object in a splitstream. + // Find the object file that holds the manifest JSON and corrupt it. + let manifest_id = manifest_identifier(&manifest_digest); + let mut stream = repo + .open_stream(&manifest_id, Some(&manifest_verity), None) + .unwrap(); + + let mut object_refs: Vec = Vec::new(); + stream + .get_object_refs(|id| object_refs.push(id.clone())) + .unwrap(); + assert!( + !object_refs.is_empty(), + "manifest should have an external object ref" + ); + + // Corrupt the first (manifest JSON) object on disk. + // Objects may be immutable due to fs-verity, so delete and recreate. + let obj = &object_refs[0]; + let hex = obj.to_hex(); + let (dir, file) = hex.split_at(2); + let obj_path = test_repo.path().join(format!("objects/{dir}/{file}")); + std::fs::remove_file(&obj_path).unwrap(); + std::fs::write(&obj_path, b"not valid manifest json").unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + // The underlying repo fsck should detect the corrupted object + assert!( + !result.is_ok(), + "oci_fsck should fail with corrupted manifest object: {result}" + ); + assert!( + result.repo_result().objects_corrupted() > 0, + "repo fsck should detect corrupted object" + ); + } + + #[tokio::test] + async fn test_oci_fsck_detects_missing_layer() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let (manifest_digest, manifest_verity, _) = + create_test_image(repo, Some("missing-layer:v1"), "amd64"); + + // Open the image to find the layer diff_id + let img = OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).unwrap(); + let diff_ids = img.layer_diff_ids(); + assert_eq!(diff_ids.len(), 1); + + // Find the layer stream and its backing splitstream object, then + // delete the stream symlink so the layer appears missing. + let layer_id = crate::layer_identifier(diff_ids[0]); + let stream_symlink = test_repo.path().join(format!("streams/{layer_id}")); + std::fs::remove_file(&stream_symlink).unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + assert!( + !result.is_ok(), + "oci_fsck should detect missing layer: {result}" + ); + assert!( + result.images_corrupted > 0, + "should report corrupted OCI image" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("layer-stream-missing")), + "errors should mention missing layer stream: {:?}", + result.errors + ); + } + + // ==================== Additional OCI Fsck Gap Tests ==================== + + #[tokio::test] + async fn test_oci_fsck_detects_config_digest_mismatch() { + // Exercises fsck_single_image config digest mismatch (line ~1109). + // Corrupts the config JSON object so its sha256 hash no longer + // matches the digest recorded in the manifest. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let (manifest_digest, manifest_verity, config_digest) = + create_test_image(repo, Some("config-corrupt:v1"), "amd64"); + + // Open image to get config verity, then find and corrupt the config object + let img = OciImage::open(repo, &manifest_digest, Some(&manifest_verity)).unwrap(); + let config_verity = img.config_verity.clone(); + drop(img); + + let config_id = crate::config_identifier(&config_digest); + let mut stream = repo + .open_stream(&config_id, Some(&config_verity), None) + .unwrap(); + let mut config_obj_refs: Vec = Vec::new(); + stream + .get_object_refs(|id| config_obj_refs.push(id.clone())) + .unwrap(); + assert!(!config_obj_refs.is_empty()); + + // Corrupt the config object — replace with valid JSON that has + // a different hash + let obj = &config_obj_refs[0]; + let hex = obj.to_hex(); + let (prefix, rest) = hex.split_at(2); + let dir = + cap_std::fs::Dir::open_ambient_dir(test_repo.path(), cap_std::ambient_authority()) + .unwrap(); + let obj_rel = format!("objects/{prefix}/{rest}"); + dir.remove_file(&obj_rel).unwrap(); + // Write valid JSON config but with modified content + dir.write( + &obj_rel, + br#"{"architecture":"arm64","os":"linux","rootfs":{"type":"layers","diff_ids":[]}}"#, + ) + .unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + // The repo-level fsck will flag the object digest mismatch, + // which makes the overall result not ok. + assert!( + !result.is_ok(), + "oci_fsck should detect config corruption: {result}" + ); + } + + #[tokio::test] + async fn test_oci_fsck_detects_missing_config_named_ref() { + // Exercises the "manifest missing config reference" branch (line ~1079). + // Deletes the config named ref from the manifest splitstream by + // rewriting the manifest splitstream without the config named ref. + // + // Approach: create a manifest splitstream that stores the manifest + // JSON externally but has NO named ref for the config, then point + // the oci ref to it. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + // Build a valid manifest JSON + let layer_data = b"fake-layer-data"; + let layer_digest = hash(layer_data); + + let mut layer_stream = repo.create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE); + layer_stream.write_external(layer_data).unwrap(); + let layer_verity = repo + .write_stream(layer_stream, &crate::layer_identifier(&layer_digest), None) + .unwrap(); + + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(vec![layer_digest.clone()]) + .build() + .unwrap(); + let cfg = ConfigBuilder::default().build().unwrap(); + let config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .config(cfg) + .build() + .unwrap(); + let config_json = config.to_string().unwrap(); + let config_digest = hash(config_json.as_bytes()); + + // Store config normally + let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + config_stream.add_named_stream_ref(&layer_digest, &layer_verity); + config_stream + .write_external(config_json.as_bytes()) + .unwrap(); + let _config_verity = repo + .write_stream( + config_stream, + &crate::config_identifier(&config_digest), + None, + ) + .unwrap(); + + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageConfig) + .digest(OciDigest::from_str(&config_digest).unwrap()) + .size(config_json.len() as u64) + .build() + .unwrap(); + let layer_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageLayerGzip) + .digest(OciDigest::from_str(&layer_digest).unwrap()) + .size(layer_data.len() as u64) + .build() + .unwrap(); + let manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(vec![layer_descriptor]) + .build() + .unwrap(); + + let manifest_json = manifest.to_string().unwrap(); + let manifest_digest = hash(manifest_json.as_bytes()); + + // Store manifest WITHOUT config named ref — this is the bug we test + let manifest_id = manifest_identifier(&manifest_digest); + let mut manifest_stream = repo.create_stream(OCI_MANIFEST_CONTENT_TYPE); + // Deliberately omit: manifest_stream.add_named_stream_ref(...) + manifest_stream + .write_external(manifest_json.as_bytes()) + .unwrap(); + let _manifest_verity = repo + .write_stream(manifest_stream, &manifest_id, None) + .unwrap(); + + // Create the OCI ref pointing to this manifest + let ref_path = oci_ref_path("no-config-ref:v1"); + let stream_path = format!("streams/{manifest_id}"); + repo.symlink(&format!("streams/refs/{ref_path}"), &stream_path) + .unwrap(); + + let result = oci_fsck_image(repo, "no-config-ref:v1").await.unwrap(); + + assert!( + !result.is_ok(), + "oci_fsck should detect missing config ref: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("config-ref-missing")), + "errors should mention missing config reference: {:?}", + result.errors + ); + } + + #[tokio::test] + async fn test_oci_fsck_healthy_artifact() { + // Exercises the artifact validation path (line ~1183). + // Creates a non-container artifact and verifies oci_fsck passes. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + // Create an artifact with non-ImageConfig media type + let blob_data = b"artifact-content-for-fsck-test"; + let (blob_digest, blob_verity) = write_blob(repo, blob_data).unwrap(); + + let empty_config = b"{}"; + let config_digest = hash(empty_config); + let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + config_stream.write_external(empty_config).unwrap(); + let config_verity = repo + .write_stream( + config_stream, + &crate::config_identifier(&config_digest), + None, + ) + .unwrap(); + + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::EmptyJSON) // NOT ImageConfig + .digest(OciDigest::from_str(&config_digest).unwrap()) + .size(empty_config.len() as u64) + .build() + .unwrap(); + let layer_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other("application/octet-stream".to_string())) + .digest(OciDigest::from_str(&blob_digest).unwrap()) + .size(blob_data.len() as u64) + .build() + .unwrap(); + let manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(vec![layer_descriptor]) + .build() + .unwrap(); + + let mut layer_verities = HashMap::new(); + layer_verities.insert(blob_digest.clone().into_boxed_str(), blob_verity); + + let manifest_json = manifest.to_string().unwrap(); + let manifest_digest = hash(manifest_json.as_bytes()); + + write_manifest( + repo, + &manifest, + &manifest_digest, + &config_verity, + &layer_verities, + Some("artifact-fsck:v1"), + ) + .unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + assert!( + result.is_ok(), + "oci_fsck should pass for healthy artifact: {result}" + ); + assert_eq!(result.images_checked, 1); + assert_eq!(result.images_corrupted, 0); + } + + #[tokio::test] + async fn test_oci_fsck_detects_missing_artifact_layer_ref() { + // Exercises the artifact "manifest missing layer reference" branch + // (line ~1198). Creates an artifact where the manifest named refs + // don't include the layer digest. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let blob_data = b"artifact-blob-missing-ref"; + let (blob_digest, _blob_verity) = write_blob(repo, blob_data).unwrap(); + + let empty_config = b"{}"; + let config_digest = hash(empty_config); + let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + config_stream.write_external(empty_config).unwrap(); + let config_verity = repo + .write_stream( + config_stream, + &crate::config_identifier(&config_digest), + None, + ) + .unwrap(); + + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::EmptyJSON) + .digest(OciDigest::from_str(&config_digest).unwrap()) + .size(empty_config.len() as u64) + .build() + .unwrap(); + let layer_descriptor = DescriptorBuilder::default() + .media_type(MediaType::Other("application/wasm".to_string())) + .digest(OciDigest::from_str(&blob_digest).unwrap()) + .size(blob_data.len() as u64) + .build() + .unwrap(); + let manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(vec![layer_descriptor]) + .build() + .unwrap(); + + // Deliberately pass empty layer_verities — no layer refs in manifest + let layer_verities: HashMap, Sha256HashValue> = HashMap::new(); + + let manifest_json = manifest.to_string().unwrap(); + let manifest_digest = hash(manifest_json.as_bytes()); + + write_manifest( + repo, + &manifest, + &manifest_digest, + &config_verity, + &layer_verities, + Some("artifact-no-layer-ref:v1"), + ) + .unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + assert!( + !result.is_ok(), + "oci_fsck should detect missing artifact layer ref: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("artifact-layer-ref-missing")), + "errors should mention missing layer reference: {:?}", + result.errors + ); + } + + #[tokio::test] + async fn test_oci_fsck_image_unresolvable_ref() { + // Exercises oci_fsck_image with an unresolvable ref (line ~1011). + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let result = oci_fsck_image(repo, "nonexistent:tag").await.unwrap(); + + assert!(!result.is_ok(), "should fail for nonexistent ref"); + assert_eq!(result.images_checked, 1); + assert_eq!(result.images_corrupted, 1); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("ref-resolve-failed")), + "errors should mention cannot resolve ref: {:?}", + result.errors + ); + } + + #[tokio::test] + async fn test_oci_fsck_multiple_images_partial_corruption() { + // Verifies that oci_fsck checks ALL images and correctly counts + // corrupted vs healthy ones when there's a mix. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + // Create two healthy images + create_test_image(repo, Some("healthy1:v1"), "amd64"); + let (manifest_digest2, manifest_verity2, _) = + create_test_image(repo, Some("corrupt1:v1"), "arm64"); + + // Corrupt the second image's layer + let img = OciImage::open(repo, &manifest_digest2, Some(&manifest_verity2)).unwrap(); + let diff_ids = img.layer_diff_ids(); + let layer_id = crate::layer_identifier(diff_ids[0]); + let dir = + cap_std::fs::Dir::open_ambient_dir(test_repo.path(), cap_std::ambient_authority()) + .unwrap(); + dir.remove_file(format!("streams/{layer_id}")).unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + assert!(!result.is_ok(), "should detect corruption: {result}"); + assert_eq!(result.images_checked, 2); + assert_eq!( + result.images_corrupted, 1, + "only one image should be corrupt" + ); + } + + #[tokio::test] + async fn test_oci_fsck_detects_missing_layer_named_ref_in_config() { + // Exercises the "config missing layer reference" branch (line ~1134). + // Creates a container image where the config splitstream is missing + // the named ref for a layer diff_id. + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let layer_data = b"layer-for-missing-ref-test"; + let layer_digest = hash(layer_data); + + let mut layer_stream = repo.create_stream(crate::skopeo::TAR_LAYER_CONTENT_TYPE); + layer_stream.write_external(layer_data).unwrap(); + let layer_verity = repo + .write_stream(layer_stream, &crate::layer_identifier(&layer_digest), None) + .unwrap(); + + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(vec![layer_digest.clone()]) + .build() + .unwrap(); + let cfg = ConfigBuilder::default().build().unwrap(); + let config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .config(cfg) + .build() + .unwrap(); + let config_json = config.to_string().unwrap(); + let config_digest = hash(config_json.as_bytes()); + + // Store config WITHOUT the layer named ref — this is the bug + let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + // Deliberately omit: config_stream.add_named_stream_ref(&layer_digest, &layer_verity); + config_stream + .write_external(config_json.as_bytes()) + .unwrap(); + let config_verity = repo + .write_stream( + config_stream, + &crate::config_identifier(&config_digest), + None, + ) + .unwrap(); + + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageConfig) + .digest(OciDigest::from_str(&config_digest).unwrap()) + .size(config_json.len() as u64) + .build() + .unwrap(); + let layer_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageLayerGzip) + .digest(OciDigest::from_str(&layer_digest).unwrap()) + .size(layer_data.len() as u64) + .build() + .unwrap(); + let manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(vec![layer_descriptor]) + .build() + .unwrap(); + + let mut layer_verities = HashMap::new(); + layer_verities.insert(layer_digest.into_boxed_str(), layer_verity); + let manifest_json = manifest.to_string().unwrap(); + let manifest_digest = hash(manifest_json.as_bytes()); + + write_manifest( + repo, + &manifest, + &manifest_digest, + &config_verity, + &layer_verities, + Some("missing-layer-ref:v1"), + ) + .unwrap(); + + let result = oci_fsck(repo).await.unwrap(); + + assert!( + !result.is_ok(), + "oci_fsck should detect missing layer ref in config: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("layer-ref-missing")), + "errors should mention config missing layer reference: {:?}", + result.errors + ); + } } diff --git a/crates/composefs-oci/src/skopeo.rs b/crates/composefs-oci/src/skopeo.rs index e0c44e35..4523cbb3 100644 --- a/crates/composefs-oci/src/skopeo.rs +++ b/crates/composefs-oci/src/skopeo.rs @@ -465,9 +465,22 @@ pub async fn pull_image( .await .with_context(|| format!("Unable to pull container image {imgref}"))?; - if let Some(name) = reference { - tag_image(repo, &result.manifest_digest, name)?; + // Generate the composefs EROFS image and link it to the config splitstream. + // For container images this rewrites the config+manifest with the EROFS ref + // and tags the final manifest. Artifacts are skipped and tagged as-is. + let erofs = crate::ensure_oci_composefs_erofs( + repo, + &result.manifest_digest, + Some(&result.manifest_verity), + reference, + )?; + if erofs.is_none() { + // Not a container image (artifact) — tag the manifest directly + if let Some(name) = reference { + tag_image(repo, &result.manifest_digest, name)?; + } } + Ok((result, stats)) } diff --git a/crates/composefs-oci/src/test_util.rs b/crates/composefs-oci/src/test_util.rs new file mode 100644 index 00000000..6c811d71 --- /dev/null +++ b/crates/composefs-oci/src/test_util.rs @@ -0,0 +1,762 @@ +/// Shared test utilities for composefs-oci. +/// +/// Provides helpers to build multi-layer OCI images from composefs dumpfile +/// strings, so that `transform_for_boot` actually extracts boot entries and +/// produces a filesystem different from the raw OCI one. +/// +/// Each layer is a `&str` in standard composefs dumpfile format: +/// +/// ```text +/// /path size mode nlink uid gid rdev mtime payload content digest +/// ``` +/// +/// For example: +/// +/// ```text +/// /usr/bin 0 40755 2 0 0 0 0.0 - - - +/// /usr/bin/hello 5 100644 1 0 0 0 0.0 - world - +/// /usr/bin/sh 0 120777 1 0 0 0 0.0 busybox - - +/// ``` +use std::collections::HashMap; +use std::io::Read as _; +use std::sync::Arc; + +use composefs::dumpfile_parse::{Entry, Item}; +use composefs::fsverity::Sha256HashValue; +use composefs::repository::Repository; +use containers_image_proxy::oci_spec::image::{ + ConfigBuilder, DescriptorBuilder, Digest as OciDigest, ImageConfigurationBuilder, + ImageManifestBuilder, MediaType, RootFsBuilder, +}; +use rustix::fs::FileType; +use sha2::{Digest, Sha256}; +use std::str::FromStr; + +use crate::oci_image::write_manifest; +use crate::skopeo::OCI_CONFIG_CONTENT_TYPE; + +fn hash(bytes: &[u8]) -> String { + let mut context = Sha256::new(); + context.update(bytes); + format!("sha256:{}", hex::encode(context.finalize())) +} + +/// Convert composefs dumpfile lines into tar bytes. +/// +/// Parses each line as a composefs [`Entry`] and builds the corresponding +/// tar entry. The root directory (`/`) is skipped since tar archives don't +/// include it. Only regular files (inline), directories, and symlinks are +/// supported — this is sufficient for test images. +fn dumpfile_to_tar(dumpfile: &str) -> Vec { + let mut builder = ::tar::Builder::new(vec![]); + + for line in dumpfile.lines() { + let line = line.trim(); + if line.is_empty() { + continue; + } + + let entry = + Entry::parse(line).unwrap_or_else(|e| panic!("bad dumpfile line {line:?}: {e}")); + + // Skip the root directory — tar doesn't need it + if entry.path.as_ref() == std::path::Path::new("/") { + continue; + } + + // Strip leading / for tar paths + let path = entry + .path + .to_str() + .expect("non-UTF8 path") + .trim_start_matches('/'); + + let ty = FileType::from_raw_mode(entry.mode); + match ty { + FileType::Directory => { + let mut header = ::tar::Header::new_ustar(); + header.set_uid(entry.uid.into()); + header.set_gid(entry.gid.into()); + header.set_mode(entry.mode & 0o7777); + header.set_entry_type(::tar::EntryType::Directory); + header.set_size(0); + builder + .append_data(&mut header, path, std::io::empty()) + .unwrap(); + } + FileType::RegularFile => match &entry.item { + Item::RegularInline { content, .. } => { + let mut header = ::tar::Header::new_ustar(); + header.set_uid(entry.uid.into()); + header.set_gid(entry.gid.into()); + header.set_mode(entry.mode & 0o7777); + header.set_entry_type(::tar::EntryType::Regular); + header.set_size(content.len() as u64); + builder + .append_data(&mut header, path, &content[..]) + .unwrap(); + } + Item::Regular { size, .. } => { + // External file with no inline content — create sized entry + let mut header = ::tar::Header::new_ustar(); + header.set_uid(entry.uid.into()); + header.set_gid(entry.gid.into()); + header.set_mode(entry.mode & 0o7777); + header.set_entry_type(::tar::EntryType::Regular); + header.set_size(*size); + builder + .append_data(&mut header, path, std::io::repeat(0u8).take(*size)) + .unwrap(); + } + other => panic!("unexpected regular file item variant: {other:?}"), + }, + FileType::Symlink => { + let target = match &entry.item { + Item::Symlink { target, .. } => target, + other => panic!("expected Symlink item, got {other:?}"), + }; + let mut header = ::tar::Header::new_ustar(); + header.set_uid(entry.uid.into()); + header.set_gid(entry.gid.into()); + header.set_mode(entry.mode & 0o7777); + header.set_entry_type(::tar::EntryType::Symlink); + header.set_size(0); + header + .set_link_name(target.as_ref()) + .expect("failed to set symlink target"); + builder + .append_data(&mut header, path, std::io::empty()) + .unwrap(); + } + other => panic!("unsupported file type in test dumpfile: {other:?}"), + } + } + + builder.into_inner().unwrap() +} + +/// Return value from image creation helpers. +#[allow(dead_code)] +pub struct TestImage { + pub manifest_digest: String, + pub manifest_verity: Sha256HashValue, + pub config_digest: String, +} + +/// Create an OCI image from multiple layers, each described in composefs +/// dumpfile format. +/// +/// For each layer: parses the dumpfile, builds tar bytes, imports via +/// [`import_layer`](crate::import_layer), then assembles a proper OCI +/// config and manifest referencing all layers in order. +async fn create_multi_layer_image( + repo: &Arc>, + tag: Option<&str>, + layers: &[&str], +) -> TestImage { + let mut layer_digests = Vec::new(); + let mut layer_verities_map: HashMap, Sha256HashValue> = HashMap::new(); + let mut layer_descriptors = Vec::new(); + + for dumpfile in layers { + let tar_data = dumpfile_to_tar(dumpfile); + let digest = hash(&tar_data); + + let (verity, _stats) = crate::import_layer(repo, &digest, None, &tar_data[..]) + .await + .unwrap(); + + let descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageLayerGzip) + .digest(OciDigest::from_str(&digest).unwrap()) + .size(tar_data.len() as u64) + .build() + .unwrap(); + + layer_verities_map.insert(digest.clone().into_boxed_str(), verity); + layer_digests.push(digest); + layer_descriptors.push(descriptor); + } + + // Build OCI config + let rootfs = RootFsBuilder::default() + .typ("layers") + .diff_ids(layer_digests.clone()) + .build() + .unwrap(); + + let cfg = ConfigBuilder::default().build().unwrap(); + + let config = ImageConfigurationBuilder::default() + .architecture("amd64") + .os("linux") + .rootfs(rootfs) + .config(cfg) + .build() + .unwrap(); + + let config_json = config.to_string().unwrap(); + let config_digest = hash(config_json.as_bytes()); + + let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE); + for (digest, verity) in &layer_verities_map { + config_stream.add_named_stream_ref(digest, verity); + } + config_stream + .write_external(config_json.as_bytes()) + .unwrap(); + let config_verity = repo + .write_stream( + config_stream, + &crate::config_identifier(&config_digest), + None, + ) + .unwrap(); + + // Build OCI manifest + let config_descriptor = DescriptorBuilder::default() + .media_type(MediaType::ImageConfig) + .digest(OciDigest::from_str(&config_digest).unwrap()) + .size(config_json.len() as u64) + .build() + .unwrap(); + + let manifest = ImageManifestBuilder::default() + .schema_version(2u32) + .media_type(MediaType::ImageManifest) + .config(config_descriptor) + .layers(layer_descriptors) + .build() + .unwrap(); + + let manifest_json = manifest.to_string().unwrap(); + let manifest_digest = hash(manifest_json.as_bytes()); + + let (_stored_digest, manifest_verity) = write_manifest( + repo, + &manifest, + &manifest_digest, + &config_verity, + &layer_verities_map, + tag, + ) + .unwrap(); + + TestImage { + manifest_digest, + manifest_verity, + config_digest, + } +} + +// --------------------------------------------------------------------------- +// Layer definitions in composefs dumpfile format +// +// Format: /path size mode nlink uid gid rdev mtime payload content digest +// +// Directories: /path 0 40755 2 0 0 0 0.0 - - - +// Inline files: /path 100644 1 0 0 0 0.0 - - +// Executables: /path 100755 1 0 0 0 0.0 - - +// Symlinks: /path 120777 1 0 0 0 0.0 - - +// --------------------------------------------------------------------------- + +const LAYER_ROOT_STRUCTURE: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/bin 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 2 0 0 0 0.0 - - - +/etc 0 40755 2 0 0 0 0.0 - - - +/var 0 40755 2 0 0 0 0.0 - - - +/tmp 0 40755 2 0 0 0 0.0 - - - +"; + +const LAYER_BUSYBOX: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/bin 0 40755 2 0 0 0 0.0 - - - +/usr/bin/busybox 22 100755 1 0 0 0 0.0 - busybox-binary-content - +/usr/bin/sh 7 120777 1 0 0 0 0.0 busybox - - +"; + +const LAYER_CORE_UTILS: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/bin 0 40755 2 0 0 0 0.0 - - - +/usr/bin/ls 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/cat 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/cp 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/mv 7 120777 1 0 0 0 0.0 busybox - - +/usr/bin/rm 7 120777 1 0 0 0 0.0 busybox - - +"; + +const LAYER_CONFIG: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/etc 0 40755 2 0 0 0 0.0 - - - +/etc/os-release 26 100644 1 0 0 0 0.0 - ID=test\\nVERSION_ID=1.0\\n - +/etc/hostname 9 100644 1 0 0 0 0.0 - testhost\\n - +/etc/passwd 36 100644 1 0 0 0 0.0 - root:x:0:0:root:/root:/usr/bin/sh\\n - +"; + +const LAYER_APP: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 2 0 0 0 0.0 - - - +/usr/share/myapp 0 40755 2 0 0 0 0.0 - - - +/usr/share/myapp/data.txt 16 100644 1 0 0 0 0.0 - application-data - +/usr/bin 0 40755 2 0 0 0 0.0 - - - +/usr/bin/myapp 26 100755 1 0 0 0 0.0 - #!/usr/bin/sh\\necho\\x20hello\\n - +"; + +const LAYER_BOOT_DIRS: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/boot 0 40755 2 0 0 0 0.0 - - - +/boot/EFI 0 40755 2 0 0 0 0.0 - - - +/boot/EFI/Linux 0 40755 2 0 0 0 0.0 - - - +/sysroot 0 40755 2 0 0 0 0.0 - - - +"; + +const LAYER_KERNEL_MODULES_DIR: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0 0 40755 2 0 0 0 0.0 - - - +"; + +// Version-specific boot layers. v1 and v2 share userspace (layers 1-5 +// and 14-20) but ship different kernels, initramfs, modules, and UKIs. +// This exercises shared-object deduplication in the repo and ensures GC +// correctly handles content referenced by multiple images. + +const LAYER_KERNEL_V1: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0/vmlinuz 28 100755 1 0 0 0 0.0 - fake-kernel-6.1.0-image-v1 - +"; + +const LAYER_KERNEL_V2: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0/vmlinuz 28 100755 1 0 0 0 0.0 - fake-kernel-6.2.0-image-v2 - +"; + +const LAYER_INITRAMFS_V1: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0/initramfs.img 24 100644 1 0 0 0 0.0 - fake-initramfs-6.1.0-v1 - +"; + +const LAYER_INITRAMFS_V2: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0/initramfs.img 24 100644 1 0 0 0 0.0 - fake-initramfs-6.2.0-v2 - +"; + +const LAYER_KERNEL_MODULES_V1: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.1.0/modules.dep 14 100644 1 0 0 0 0.0 - kmod-deps-v1\\n - +/usr/lib/modules/6.1.0/modules.alias 16 100644 1 0 0 0 0.0 - kmod-aliases-v1\\n - +"; + +const LAYER_KERNEL_MODULES_V2: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0 0 40755 2 0 0 0 0.0 - - - +/usr/lib/modules/6.2.0/modules.dep 14 100644 1 0 0 0 0.0 - kmod-deps-v2\\n - +/usr/lib/modules/6.2.0/modules.alias 16 100644 1 0 0 0 0.0 - kmod-aliases-v2\\n - +"; + +const LAYER_UKI_V1: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/boot 0 40755 2 0 0 0 0.0 - - - +/boot/EFI 0 40755 2 0 0 0 0.0 - - - +/boot/EFI/Linux 0 40755 2 0 0 0 0.0 - - - +/boot/EFI/Linux/test-6.1.0.efi 21 100755 1 0 0 0 0.0 - MZ-fake-uki-6.1.0-v1 - +"; + +const LAYER_UKI_V2: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/boot 0 40755 2 0 0 0 0.0 - - - +/boot/EFI 0 40755 2 0 0 0 0.0 - - - +/boot/EFI/Linux 0 40755 2 0 0 0 0.0 - - - +/boot/EFI/Linux/test-6.2.0.efi 21 100755 1 0 0 0 0.0 - MZ-fake-uki-6.2.0-v2 - +"; + +const LAYER_SYSTEMD: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/systemd 0 40755 2 0 0 0 0.0 - - - +/usr/lib/systemd/system 0 40755 2 0 0 0 0.0 - - - +/usr/lib/systemd/system/multi-user.target 0 100644 1 0 0 0 0.0 - - - +"; + +const LAYER_SYSROOT_MARKER: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/sysroot 0 40755 2 0 0 0 0.0 - - - +/sysroot/.ostree-root 0 100644 1 0 0 0 0.0 - - - +"; + +const LAYER_LIBS_1: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/libc.so.6 16 100644 1 0 0 0 0.0 - fake-libc-content - +/usr/lib/libm.so.6 16 100644 1 0 0 0 0.0 - fake-libm-content - +"; + +const LAYER_LIBS_2: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/lib 0 40755 2 0 0 0 0.0 - - - +/usr/lib/libpthread.so.0 22 100644 1 0 0 0 0.0 - fake-libpthread-content - +/usr/lib/libdl.so.2 16 100644 1 0 0 0 0.0 - fake-libdl-content - +"; + +const LAYER_LOCALE: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 2 0 0 0 0.0 - - - +/usr/share/locale 0 40755 2 0 0 0 0.0 - - - +/usr/share/locale/en_US 0 40755 2 0 0 0 0.0 - - - +/usr/share/locale/en_US/LC_MESSAGES 0 40755 2 0 0 0 0.0 - - - +/usr/share/locale/en_US/LC_MESSAGES/messages 11 100644 1 0 0 0 0.0 - fake-locale - +"; + +const LAYER_DOCS: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 2 0 0 0 0.0 - - - +/usr/share/doc 0 40755 2 0 0 0 0.0 - - - +/usr/share/doc/readme.txt 21 100644 1 0 0 0 0.0 - documentation-content - +"; + +const LAYER_NSS_CONFIG: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/etc 0 40755 2 0 0 0 0.0 - - - +/etc/nsswitch.conf 27 100644 1 0 0 0 0.0 - passwd:files\\ngroup:files\\n - +/etc/resolv.conf 22 100644 1 0 0 0 0.0 - nameserver\\x20127.0.0.53\\n - +"; + +const LAYER_ZONEINFO: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/usr 0 40755 2 0 0 0 0.0 - - - +/usr/share 0 40755 2 0 0 0 0.0 - - - +/usr/share/zoneinfo 0 40755 2 0 0 0 0.0 - - - +/usr/share/zoneinfo/UTC 12 100644 1 0 0 0 0.0 - fake-tz-data - +"; + +const LAYER_VAR_LOG: &str = "\ +/ 0 40755 2 0 0 0 0.0 - - - +/var 0 40755 2 0 0 0 0.0 - - - +/var/log 0 40755 2 0 0 0 0.0 - - - +/var/log/.keepdir 0 100644 1 0 0 0 0.0 - - - +"; + +/// Base image layers: a busybox-like app image (5 layers). +const BASE_LAYERS: &[&str] = &[ + LAYER_ROOT_STRUCTURE, + LAYER_BUSYBOX, + LAYER_CORE_UTILS, + LAYER_CONFIG, + LAYER_APP, +]; + +/// Shared userspace layers used by all bootable image versions. +/// These are identical across v1/v2, so the repo deduplicates them. +const SHARED_SYSTEM_LAYERS: &[&str] = &[ + LAYER_SYSTEMD, + LAYER_SYSROOT_MARKER, + LAYER_LIBS_1, + LAYER_LIBS_2, + LAYER_LOCALE, + LAYER_DOCS, + LAYER_NSS_CONFIG, + LAYER_ZONEINFO, + LAYER_VAR_LOG, +]; + +/// Build the full layer list for a bootable image at the given version. +fn bootable_layers(version: u32) -> Vec<&'static str> { + let (kernel, initramfs, modules, uki) = match version { + 1 => ( + LAYER_KERNEL_V1, + LAYER_INITRAMFS_V1, + LAYER_KERNEL_MODULES_V1, + LAYER_UKI_V1, + ), + 2 => ( + LAYER_KERNEL_V2, + LAYER_INITRAMFS_V2, + LAYER_KERNEL_MODULES_V2, + LAYER_UKI_V2, + ), + _ => panic!("unsupported test image version: {version}"), + }; + + let mut layers = Vec::with_capacity(20); + // Layers 1-5: base userspace (shared across versions) + layers.extend_from_slice(BASE_LAYERS); + // Layers 6-7: boot directory structure (shared) + layers.push(LAYER_BOOT_DIRS); + layers.push(LAYER_KERNEL_MODULES_DIR); + // Layers 8-11: version-specific boot content + layers.push(kernel); + layers.push(initramfs); + layers.push(modules); + layers.push(uki); + // Layers 12-20: shared system content + layers.extend_from_slice(SHARED_SYSTEM_LAYERS); + layers +} + +/// Create a base (non-bootable) test OCI image with 5 layers. +/// +/// Layers contain a busybox-like userspace: root directory structure, busybox +/// binary with shell symlink, core utility symlinks, configuration files, and +/// a small application. +pub async fn create_base_image( + repo: &Arc>, + tag: Option<&str>, +) -> TestImage { + create_multi_layer_image(repo, tag, BASE_LAYERS).await +} + +/// Create a bootable test OCI image with 20 layers. +/// +/// `version` controls the kernel/initramfs/UKI content: +/// - v1: kernel 6.1.0, UKI test-6.1.0.efi +/// - v2: kernel 6.2.0, UKI test-6.2.0.efi +/// +/// Userspace layers (busybox, libs, systemd, configs) are identical across +/// versions — when both v1 and v2 are pulled into the same repo, the shared +/// layers are deduplicated. This exercises GC correctness with content +/// referenced by multiple images. +pub async fn create_bootable_image( + repo: &Arc>, + tag: Option<&str>, + version: u32, +) -> TestImage { + let layers = bootable_layers(version); + create_multi_layer_image(repo, tag, &layers).await +} + +/// Create a base test OCI image in a repository at the given path. +/// +/// This is a convenience wrapper for integration tests that work with repo +/// paths rather than `Repository` handles. Opens the repo, creates the +/// image with `create_base_image`, generates the EROFS, and returns. +pub fn create_test_oci_image(repo_path: &std::path::Path, tag: &str) -> anyhow::Result<()> { + let mut repo = Repository::::open_path(rustix::fs::CWD, repo_path)?; + repo.set_insecure(true); + let repo = Arc::new(repo); + let rt = tokio::runtime::Runtime::new()?; + rt.block_on(create_base_image(&repo, Some(tag))); + ensure_erofs_for_image(&repo, tag)?; + Ok(()) +} + +/// Create a bootable test OCI image in a repository at the given path. +/// +/// Like [`create_test_oci_image`] but builds a 20-layer bootable image +/// (version 1) and generates both the plain EROFS and the boot EROFS. +/// Requires the `boot` feature. +#[cfg(feature = "boot")] +pub fn create_test_bootable_oci_image( + repo_path: &std::path::Path, + tag: &str, +) -> anyhow::Result<()> { + let mut repo = Repository::::open_path(rustix::fs::CWD, repo_path)?; + repo.set_insecure(true); + let repo = Arc::new(repo); + let rt = tokio::runtime::Runtime::new()?; + let img = rt.block_on(create_bootable_image(&repo, Some(tag), 1)); + ensure_erofs_for_image(&repo, tag)?; + crate::boot::generate_boot_image(&repo, &img.manifest_digest)?; + Ok(()) +} + +/// Generate the composefs EROFS for a tagged OCI image and link it to the +/// config splitstream. +/// +/// This is the test-visible wrapper around the crate-internal +/// `ensure_oci_composefs_erofs`. Integration tests that create images via +/// `create_base_image` (which bypasses `pull_image`) need this to populate +/// the EROFS ref before testing `cfsctl oci mount`. +pub fn ensure_erofs_for_image( + repo: &Arc>, + tag: &str, +) -> anyhow::Result { + let oci = crate::oci_image::OciImage::open_ref(repo, tag)?; + let erofs_id = crate::ensure_oci_composefs_erofs( + repo, + oci.manifest_digest(), + Some(oci.manifest_verity()), + Some(tag), + )? + .ok_or_else(|| anyhow::anyhow!("image is not a container image"))?; + Ok(erofs_id) +} + +#[cfg(test)] +mod tests { + use super::*; + use composefs::test::TestRepo; + + #[test] + fn test_dumpfile_to_tar_directory() { + let tar_data = dumpfile_to_tar( + "/ 0 40755 2 0 0 0 0.0 - - -\n\ + /mydir 0 40755 2 0 0 0 0.0 - - -\n", + ); + let mut archive = ::tar::Archive::new(&tar_data[..]); + let entries: Vec<_> = archive + .entries() + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(entries.len(), 1); // root is skipped + assert_eq!(entries[0].path().unwrap().to_str().unwrap(), "mydir"); + assert_eq!( + entries[0].header().entry_type(), + ::tar::EntryType::Directory + ); + assert_eq!(entries[0].header().mode().unwrap(), 0o755); + } + + #[test] + fn test_dumpfile_to_tar_file() { + let tar_data = dumpfile_to_tar( + "/ 0 40755 2 0 0 0 0.0 - - -\n\ + /hello 5 100644 1 0 0 0 0.0 - world -\n", + ); + let mut archive = ::tar::Archive::new(&tar_data[..]); + let mut entries = archive.entries().unwrap(); + let mut entry = entries.next().unwrap().unwrap(); + assert_eq!(entry.path().unwrap().to_str().unwrap(), "hello"); + assert_eq!(entry.header().entry_type(), ::tar::EntryType::Regular); + assert_eq!(entry.header().mode().unwrap(), 0o644); + let mut content = String::new(); + std::io::Read::read_to_string(&mut entry, &mut content).unwrap(); + assert_eq!(content, "world"); + } + + #[test] + fn test_dumpfile_to_tar_executable() { + let tar_data = dumpfile_to_tar( + "/ 0 40755 2 0 0 0 0.0 - - -\n\ + /bin/app 14 100755 1 0 0 0 0.0 - binary-content -\n", + ); + let mut archive = ::tar::Archive::new(&tar_data[..]); + let entries: Vec<_> = archive + .entries() + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(entries[0].header().mode().unwrap(), 0o755); + } + + #[test] + fn test_dumpfile_to_tar_symlink() { + let tar_data = dumpfile_to_tar( + "/ 0 40755 2 0 0 0 0.0 - - -\n\ + /usr/bin/sh 7 120777 1 0 0 0 0.0 busybox - -\n", + ); + let mut archive = ::tar::Archive::new(&tar_data[..]); + let entries: Vec<_> = archive + .entries() + .unwrap() + .collect::>() + .unwrap(); + assert_eq!(entries.len(), 1); + assert_eq!(entries[0].header().entry_type(), ::tar::EntryType::Symlink); + assert_eq!( + entries[0].link_name().unwrap().unwrap().to_str().unwrap(), + "busybox" + ); + } + + #[tokio::test] + async fn test_create_base_image() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = create_base_image(repo, Some("base:v1")).await; + assert!(img.manifest_digest.starts_with("sha256:")); + assert!(img.config_digest.starts_with("sha256:")); + } + + #[tokio::test] + async fn test_create_bootable_image() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let img = create_bootable_image(repo, Some("boot:v1"), 1).await; + assert!(img.manifest_digest.starts_with("sha256:")); + assert!(img.config_digest.starts_with("sha256:")); + } + + /// v1 and v2 share userspace layers but differ in kernel/UKI. + /// Pulling both into the same repo deduplicates the shared content. + #[tokio::test] + async fn test_versioned_images_share_layers() { + let test_repo = TestRepo::::new(); + let repo = &test_repo.repo; + + let v1 = create_bootable_image(repo, Some("os:v1"), 1).await; + let v2 = create_bootable_image(repo, Some("os:v2"), 2).await; + + // Different manifests (different kernel content) + assert_ne!(v1.manifest_digest, v2.manifest_digest); + // Different configs (different layer digests for kernel layers) + assert_ne!(v1.config_digest, v2.config_digest); + + // Both should be openable + let oci_v1 = crate::oci_image::OciImage::open_ref(repo, "os:v1").unwrap(); + let oci_v2 = crate::oci_image::OciImage::open_ref(repo, "os:v2").unwrap(); + assert!(oci_v1.is_container_image()); + assert!(oci_v2.is_container_image()); + + // Untagging v1 and running GC should collect v1-specific objects + // (its manifest, config, and version-specific layer streams) + // but shared layers must survive for v2. + crate::oci_image::untag_image(repo, "os:v1").unwrap(); + let gc = repo.gc(&[]).unwrap(); + // v1-specific: manifest splitstream + config splitstream + manifest JSON + + // config JSON + 4 version-specific layer splitstreams (kernel, initramfs, + // modules, UKI — each has unique content per version) + assert_eq!(gc.objects_removed, 8, "v1-specific objects collected"); + // 4 v1-specific layer streams + manifest + config = 6 stream symlinks + // (the 16 shared layers are still live via v2) + assert_eq!(gc.streams_pruned, 6, "v1-specific stream symlinks pruned"); + + // v2 should still be fully intact after v1 is GC'd + let oci_v2 = crate::oci_image::OciImage::open_ref(repo, "os:v2").unwrap(); + assert!(oci_v2.is_container_image()); + + // GC again — nothing more should be collected (shared layers are live) + let gc2 = repo.gc(&[]).unwrap(); + assert_eq!(gc2.objects_removed, 0, "no more objects to collect"); + assert_eq!(gc2.streams_pruned, 0, "no more streams to prune"); + assert_eq!(gc2.images_pruned, 0, "no images to prune"); + } +} diff --git a/crates/composefs/Cargo.toml b/crates/composefs/Cargo.toml index 3fd55480..39a3bed2 100644 --- a/crates/composefs/Cargo.toml +++ b/crates/composefs/Cargo.toml @@ -18,6 +18,7 @@ test = ["tempfile"] [dependencies] anyhow = { version = "1.0.87", default-features = false } composefs-ioctls = { workspace = true } +serde = { version = "1.0", default-features = false, features = ["derive", "alloc"] } fn-error-context = "0.2" hex = { version = "0.4.0", default-features = false, features = ["std"] } log = { version = "0.4.8", default-features = false } @@ -33,6 +34,8 @@ zstd = { version = "0.13.0", default-features = false } rand = { version = "0.9.1", default-features = true } [dev-dependencies] +cap-std = { version = "3.4.5", default-features = false } +cap-tempfile = { version = "3.4.5", default-features = false } insta = "1.42.2" proptest = "1" similar-asserts = "1.7.0" diff --git a/crates/composefs/src/dumpfile.rs b/crates/composefs/src/dumpfile.rs index 88159bcc..5aaa4927 100644 --- a/crates/composefs/src/dumpfile.rs +++ b/crates/composefs/src/dumpfile.rs @@ -50,32 +50,48 @@ fn write_escaped(writer: &mut impl fmt::Write, bytes: &[u8]) -> fmt::Result { return writer.write_str("\\x2d"); } - write_escaped_raw(writer, bytes) + write_escaped_raw(writer, bytes, EscapeEquals::No) +} + +/// Whether to escape `=` as `\x3d`. +/// +/// The C composefs implementation only escapes `=` in xattr key/value +/// fields where it separates the key from the value. In other fields +/// (paths, content, payload) `=` is a normal graphic character. +#[derive(Clone, Copy)] +enum EscapeEquals { + /// Escape `=` — used for xattr key/value fields. + Yes, + /// Do not escape `=` — used for paths, content, and payload fields. + No, } /// Escape a byte slice without the `-` sentinel logic. /// -/// This corresponds to `print_escaped` with `ESCAPE_EQUAL` (but without -/// `ESCAPE_LONE_DASH`) in the C composefs `composefs-info.c`. Used for -/// xattr values where `-` and empty are valid literal values, not -/// sentinels. +/// This corresponds to `print_escaped` in the C composefs +/// `composefs-info.c`. Used for xattr values where `-` and empty are +/// valid literal values, not sentinels. /// -/// Note: we unconditionally escape `=` in all fields, whereas the C code -/// only uses `ESCAPE_EQUAL` for xattr keys and values. This is harmless -/// since `\x3d` round-trips correctly, but means our output for paths -/// containing `=` is slightly more verbose than the C output. -fn write_escaped_raw(writer: &mut impl fmt::Write, bytes: &[u8]) -> fmt::Result { +/// The `escape_eq` parameter controls whether `=` is escaped (only +/// needed in xattr key/value fields where `=` is a separator). +fn write_escaped_raw( + writer: &mut impl fmt::Write, + bytes: &[u8], + escape_eq: EscapeEquals, +) -> fmt::Result { for c in bytes { let c = *c; - // The set of hex-escaped characters matches C `!isgraph(c)` in the - // POSIX locale (i.e. outside 0x21..=0x7E), plus `=` and `\`. - // The C code uses named escapes for `\\`, `\n`, `\r`, `\t` while - // we hex-escape everything uniformly; both forms parse correctly. - if c < b'!' || c == b'=' || c == b'\\' || c > b'~' { - write!(writer, "\\x{c:02x}")?; - } else { - writer.write_char(c as char)?; + // Named escapes matching the C composefs implementation. + match c { + b'\\' => writer.write_str("\\\\")?, + b'\n' => writer.write_str("\\n")?, + b'\r' => writer.write_str("\\r")?, + b'\t' => writer.write_str("\\t")?, + b'=' if matches!(escape_eq, EscapeEquals::Yes) => write!(writer, "\\x{c:02x}")?, + // Hex-escape non-graphic characters (outside 0x21..=0x7E in POSIX locale). + c if !(b'!'..=b'~').contains(&c) => write!(writer, "\\x{c:02x}")?, + c => writer.write_char(c as char)?, } } @@ -117,11 +133,11 @@ fn write_entry( for (key, value) in &*stat.xattrs.borrow() { write!(writer, " ")?; - write_escaped(writer, key.as_bytes())?; + write_escaped_raw(writer, key.as_bytes(), EscapeEquals::Yes)?; write!(writer, "=")?; // Xattr values don't use the `-` sentinel — they're always present // when the key=value pair exists, and empty or `-` are valid values. - write_escaped_raw(writer, value)?; + write_escaped_raw(writer, value, EscapeEquals::Yes)?; } Ok(()) @@ -561,7 +577,7 @@ mod tests { use super::*; use crate::fsverity::Sha256HashValue; - const SIMPLE_DUMP: &str = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + const SIMPLE_DUMP: &str = r#"/ 0 40755 2 0 0 0 1000.0 - - - /empty_file 0 100644 1 0 0 0 1000.0 - - - /small_file 5 100644 1 0 0 0 1000.0 - hello - /symlink 7 120777 1 0 0 0 1000.0 /target - - @@ -592,10 +608,10 @@ mod tests { // The nlink/uid/gid/rdev fields on hardlink lines use `-` here, // matching the C composefs writer convention. The parser must // accept these without trying to parse them as integers. - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /original 11 100644 2 0 0 0 1000.0 - hello_world - /hardlink1 0 @120000 - - - - 0.0 /original - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/hardlink2 0 @120000 - - - - 0.0 /original - - "#; @@ -765,6 +781,75 @@ mod tests { Ok(()) } + /// Helper to escape bytes through write_escaped and return the result. + fn escaped(bytes: &[u8]) -> String { + let mut out = String::new(); + write_escaped(&mut out, bytes).unwrap(); + out + } + + /// Helper to escape bytes through write_escaped_raw with the given mode. + fn escaped_raw(bytes: &[u8], eq: EscapeEquals) -> String { + let mut out = String::new(); + write_escaped_raw(&mut out, bytes, eq).unwrap(); + out + } + + #[test] + fn test_named_escapes() { + // These must use named escapes matching C composefs, not \xHH. + assert_eq!(escaped_raw(b"\\", EscapeEquals::No), "\\\\"); + assert_eq!(escaped_raw(b"\n", EscapeEquals::No), "\\n"); + assert_eq!(escaped_raw(b"\r", EscapeEquals::No), "\\r"); + assert_eq!(escaped_raw(b"\t", EscapeEquals::No), "\\t"); + + // Mixed: named escapes interspersed with literals + assert_eq!(escaped_raw(b"a\nb", EscapeEquals::No), "a\\nb"); + assert_eq!(escaped_raw(b"\t\n\\", EscapeEquals::No), "\\t\\n\\\\"); + } + + #[test] + fn test_non_graphic_hex_escapes() { + // Characters outside 0x21..=0x7E get \xHH + assert_eq!(escaped_raw(b"\x00", EscapeEquals::No), "\\x00"); + assert_eq!(escaped_raw(b"\x1f", EscapeEquals::No), "\\x1f"); + assert_eq!(escaped_raw(b" ", EscapeEquals::No), "\\x20"); // space = 0x20 < '!' + assert_eq!(escaped_raw(b"\x7f", EscapeEquals::No), "\\x7f"); + assert_eq!(escaped_raw(b"\xff", EscapeEquals::No), "\\xff"); + } + + #[test] + fn test_equals_escaping_context() { + // '=' is literal in normal fields (paths, content, payload) + assert_eq!(escaped_raw(b"a=b", EscapeEquals::No), "a=b"); + assert_eq!(escaped(b"key=val"), "key=val"); + + // '=' is escaped in xattr key/value fields + assert_eq!(escaped_raw(b"a=b", EscapeEquals::Yes), "a\\x3db"); + assert_eq!( + escaped_raw(b"overlay.redirect=/foo", EscapeEquals::Yes), + "overlay.redirect\\x3d/foo" + ); + } + + #[test] + fn test_escaped_sentinels() { + // Empty → "-" + assert_eq!(escaped(b""), "-"); + // Lone dash → "\x2d" + assert_eq!(escaped(b"-"), "\\x2d"); + // Dash in context is fine + assert_eq!(escaped(b"a-b"), "a-b"); + } + + #[test] + fn test_graphic_chars_literal() { + // All printable graphic ASCII (0x21..=0x7E) except '\' should be literal + assert_eq!(escaped_raw(b"!", EscapeEquals::No), "!"); + assert_eq!(escaped_raw(b"~", EscapeEquals::No), "~"); + assert_eq!(escaped_raw(b"abc/def.txt", EscapeEquals::No), "abc/def.txt"); + } + mod proptest_tests { use super::*; use crate::fsverity::Sha512HashValue; diff --git a/crates/composefs/src/dumpfile_parse.rs b/crates/composefs/src/dumpfile_parse.rs index ea4902c3..dd6acdb9 100644 --- a/crates/composefs/src/dumpfile_parse.rs +++ b/crates/composefs/src/dumpfile_parse.rs @@ -24,6 +24,7 @@ use crate::MAX_INLINE_CONTENT; /// https://github.com/torvalds/linux/blob/47ac09b91befbb6a235ab620c32af719f8208399/include/uapi/linux/limits.h#L13 const PATH_MAX: u32 = 4096; +use crate::SYMLINK_MAX; /// https://github.com/torvalds/linux/blob/47ac09b91befbb6a235ab620c32af719f8208399/include/uapi/linux/limits.h#L15 /// This isn't exposed in libc/rustix, and in any case we should be conservative...if this ever /// gets bumped it'd be a hazard. @@ -33,7 +34,7 @@ const XATTR_LIST_MAX: usize = u16::MAX as usize; // See above const XATTR_SIZE_MAX: usize = u16::MAX as usize; -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] /// An extended attribute entry pub struct Xattr<'k> { /// key @@ -122,8 +123,6 @@ pub enum Item<'p> { }, /// A directory Directory { - /// Size of a directory is not necessarily meaningful - size: u64, /// Number of links nlink: u32, }, @@ -375,16 +374,14 @@ impl<'p> Entry<'p> { (false, u32::from_str_radix(modeval, 8)?) }; - // For hardlinks, the C parser skips the remaining numeric fields - // (nlink, uid, gid, rdev, mtime) and only reads the payload (target - // path). We match that: consume the tokens without parsing them as - // integers, so values like `-` are accepted. + // Per composefs-dump(5): for hardlinks "we ignore all the fields + // except the payload." The C parser does the same (mkcomposefs.c + // bails out early). Skip everything and zero ignored fields. if is_hardlink { let ty = FileType::from_raw_mode(mode); if ty == FileType::Directory { anyhow::bail!("Invalid hardlinked directory"); } - // Skip nlink, uid, gid, rdev, mtime for field in ["nlink", "uid", "gid", "rdev", "mtime"] { next(field)?; } @@ -395,7 +392,7 @@ impl<'p> Entry<'p> { path, uid: 0, gid: 0, - mode, + mode: 0, mtime: Mtime { sec: 0, nsec: 0 }, item: Item::Hardlink { target }, xattrs: Vec::new(), @@ -410,7 +407,7 @@ impl<'p> Entry<'p> { let payload = optional_str(next("payload")?); let content = optional_str(next("content")?); let fsverity_digest = optional_str(next("digest")?); - let xattrs = components + let mut xattrs = components .try_fold((Vec::new(), 0usize), |(mut acc, total_namelen), line| { let xattr = Xattr::parse(line)?; // Limit the total length of keys. @@ -422,6 +419,10 @@ impl<'p> Entry<'p> { Ok((acc, total_namelen)) })? .0; + // Canonicalize xattr ordering — the composefs-dump(5) spec doesn't + // define an order, and different implementations emit them differently + // (C uses EROFS on-disk order, Rust uses BTreeMap order). + xattrs.sort(); let ty = FileType::from_raw_mode(mode); let item = { @@ -456,8 +457,10 @@ impl<'p> Entry<'p> { let target = unescape_to_path(payload.ok_or_else(|| anyhow!("Missing payload"))?)?; let targetlen = target.as_os_str().as_bytes().len(); - if targetlen > PATH_MAX as usize { - anyhow::bail!("Target length too large {targetlen}"); + if targetlen > SYMLINK_MAX { + anyhow::bail!( + "Symlink target length {targetlen} exceeds limit {SYMLINK_MAX}" + ); } Item::Symlink { nlink, target } } @@ -474,8 +477,9 @@ impl<'p> Entry<'p> { FileType::Directory => { Self::check_nonregfile(content, fsverity_digest)?; Self::check_rdev(rdev)?; - - Item::Directory { size, nlink } + // Per composefs-dump(5): "SIZE: The size of the file. + // This is ignored for directories." We discard it. + Item::Directory { nlink } } FileType::Socket => { anyhow::bail!("sockets are not supported"); @@ -512,8 +516,10 @@ impl<'p> Entry<'p> { impl Item<'_> { pub(crate) fn size(&self) -> u64 { match self { - Item::Regular { size, .. } | Item::Directory { size, .. } => *size, + Item::Regular { size, .. } => *size, Item::RegularInline { content, .. } => content.len() as u64, + // Directories always report 0; the spec says size is ignored. + Item::Directory { .. } => 0, _ => 0, } } @@ -556,9 +562,14 @@ impl Display for Mtime { impl Display for Entry<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { escape(f, self.path.as_os_str().as_bytes(), EscapeMode::Standard)?; + let hardlink_prefix = if matches!(self.item, Item::Hardlink { .. }) { + "@" + } else { + "" + }; write!( f, - " {} {:o} {} {} {} {} {} ", + " {} {hardlink_prefix}{:o} {} {} {} {} {} ", self.item.size(), self.mode, self.item.nlink(), @@ -858,18 +869,78 @@ mod tests { fn test_parse() { const CONTENT: &str = include_str!("tests/assets/special.dump"); for line in CONTENT.lines() { - // Test a full round trip by parsing, serialize, parsing again + // Test a full round trip by parsing, serializing, parsing again. + // The serialized form may differ from the input (e.g. xattr + // ordering is canonicalized), so we check structural equality + // and that serialization is idempotent. let e = Entry::parse(line).unwrap(); let serialized = e.to_string(); - if line != serialized { - dbg!(&line, &e, &serialized); - } - similar_asserts::assert_eq!(line, serialized); let e2 = Entry::parse(&serialized).unwrap(); similar_asserts::assert_eq!(e, e2); + // Serialization must be idempotent + similar_asserts::assert_eq!(serialized, e2.to_string()); } } + #[test] + fn test_canonicalize_directory_size() { + // Directory size should be discarded — any input value becomes 0 + let e = Entry::parse("/ 4096 40755 2 0 0 0 1000.0 - - -").unwrap(); + assert_eq!(e.item.size(), 0); + assert!(e.to_string().starts_with("/ 0 40755")); + + let e = Entry::parse("/ 99999 40755 2 0 0 0 1000.0 - - -").unwrap(); + assert_eq!(e.item.size(), 0); + } + + #[test] + fn test_canonicalize_hardlink_metadata() { + // Hardlink metadata fields should all be zeroed — only path and + // target (payload) are meaningful per composefs-dump(5). + let e = Entry::parse( + "/link 259 @100644 3 1000 1000 0 1695368732.385062094 /original - \ + 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408 \ + security.selinux=foo", + ) + .unwrap(); + + // All metadata zeroed + assert_eq!(e.uid, 0); + assert_eq!(e.gid, 0); + assert_eq!(e.mode, 0); + assert_eq!(e.mtime, Mtime { sec: 0, nsec: 0 }); + assert!(e.xattrs.is_empty()); + + // Target preserved + match &e.item { + Item::Hardlink { target } => assert_eq!(target.as_ref(), Path::new("/original")), + other => panic!("Expected Hardlink, got {other:?}"), + } + + // Serialization uses @0 for mode, zeroed fields + let s = e.to_string(); + assert!(s.contains("@0 0 0 0 0 0.0"), "got: {s}"); + } + + #[test] + fn test_canonicalize_xattr_ordering() { + // Xattrs should be sorted by key regardless of input order + let e = Entry::parse("/ 0 40755 2 0 0 0 0.0 - - - user.z=1 security.ima=2 trusted.a=3") + .unwrap(); + + let keys: Vec<&[u8]> = e.xattrs.iter().map(|x| x.key.as_bytes()).collect(); + assert_eq!( + keys, + vec![b"security.ima".as_slice(), b"trusted.a", b"user.z"], + "xattrs should be sorted by key" + ); + + // Re-serialization preserves sorted order + let s = e.to_string(); + let e2 = Entry::parse(&s).unwrap(); + assert_eq!(e, e2); + } + fn parse_all(name: &str, s: &str) -> Result<()> { for line in s.lines() { if line.is_empty() { @@ -886,10 +957,10 @@ mod tests { const CASES: &[(&str, &str)] = &[ ( "content in fifo", - "/ 4096 40755 2 0 0 0 0.0 - - -\n/fifo 0 10777 1 0 0 0 0.0 - foobar -", + "/ 0 40755 2 0 0 0 0.0 - - -\n/fifo 0 10777 1 0 0 0 0.0 - foobar -", ), - ("root with rdev", "/ 4096 40755 2 0 0 42 0.0 - - -"), - ("root with fsverity", "/ 4096 40755 2 0 0 0 0.0 - - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408"), + ("root with rdev", "/ 0 40755 2 0 0 42 0.0 - - -"), + ("root with fsverity", "/ 0 40755 2 0 0 0 0.0 - - 35d02f81325122d77ec1d11baba655bc9bf8a891ab26119a41c50fa03ddfb408"), ]; for (name, case) in CASES.iter().copied() { assert!( @@ -919,7 +990,7 @@ mod tests { #[test] fn test_load_cfs_filtered() -> Result<()> { const FILTERED: &str = - "/ 4096 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2\n\ + "/ 0 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2\n\ /blockdev 0 60777 1 0 0 107690 1633950376.0 - - - trusted.bar=bar-2\n\ /inline 15 100777 1 0 0 0 1633950376.0 - FOOBAR\\nINAFILE\\n - user.foo=bar-2\n"; let mut tmpf = tempfile::tempfile()?; diff --git a/crates/composefs/src/erofs/reader.rs b/crates/composefs/src/erofs/reader.rs index c52f275e..b3a8439d 100644 --- a/crates/composefs/src/erofs/reader.rs +++ b/crates/composefs/src/erofs/reader.rs @@ -1254,22 +1254,45 @@ fn check_metacopy_xattr( Ok(None) } -/// Collect directory entries from an inode, yielding (name_bytes, nid) pairs. -/// Skips "." and "..". +/// Result of scanning a directory's entries, separating '.' and '..' from +/// the normal children. +struct DirEntries<'a> { + /// The nid that '.' points to, if present. + dot_nid: Option, + /// The nid that '..' points to, if present. + dotdot_nid: Option, + /// Child entries (everything except '.' and '..'). + children: Vec<(&'a [u8], u64)>, +} + +/// Collect directory entries from an inode, separating '.' and '..' from +/// the normal children. fn dir_entries<'a>( img: &'a Image<'a>, dir_inode: &'a InodeType<'a>, -) -> anyhow::Result> { - let mut entries = Vec::new(); +) -> anyhow::Result> { + let mut result = DirEntries { + dot_nid: None, + dotdot_nid: None, + children: Vec::new(), + }; + + // Closure that processes a single entry + let mut process_entry = |entry: DirectoryEntry<'a>| { + if entry.name == b"." { + result.dot_nid = Some(entry.nid()); + } else if entry.name == b".." { + result.dotdot_nid = Some(entry.nid()); + } else { + result.children.push((entry.name, entry.nid())); + } + }; // Block-based entries for blkid in img.inode_blocks(dir_inode)? { let block = img.directory_block(blkid)?; for entry in block.entries()? { - let entry = entry?; - if entry.name != b"." && entry.name != b".." { - entries.push((entry.name, entry.nid())); - } + process_entry(entry?); } } @@ -1277,46 +1300,131 @@ fn dir_entries<'a>( if let Some(data) = dir_inode.inline() { if let Ok(block) = DirectoryBlock::ref_from_bytes(data) { for entry in block.entries()? { - let entry = entry?; - if entry.name != b"." && entry.name != b".." { - entries.push((entry.name, entry.nid())); - } + process_entry(entry?); } } } - Ok(entries) + Ok(result) } /// Maximum directory nesting depth. PATH_MAX is 4096 on Linux, and directory names /// must be at least 2 bytes (1 char + separator), so the theoretical max is PATH_MAX / 2. const MAX_DIRECTORY_DEPTH: usize = 4096 / 2; +/// Per-leaf nlink tracking for post-traversal validation. +struct NlinkEntry { + /// The on-disk nlink value from the inode header. + expected: u32, + /// Reference to the leaf so we can check Rc::strong_count() later. + leaf: Rc>, +} + +/// Mutable state threaded through the recursive directory traversal. +struct TreeBuilder { + /// Map from nid to first-seen leaf for hardlink detection. + hardlinks: HashMap>>, + /// Map from nid to nlink tracking entry for post-traversal validation. + nlink_tracker: HashMap>, +} + +impl TreeBuilder { + fn new() -> Self { + Self { + hardlinks: HashMap::new(), + nlink_tracker: HashMap::new(), + } + } + + /// Validate that every leaf's on-disk nlink matches the number of + /// references found during traversal. + fn validate_nlink(&self) -> anyhow::Result<()> { + for (nid, entry) in &self.nlink_tracker { + // strong_count includes: one per tree insertion + one held by + // nlink_tracker + possibly one in the hardlinks map. + let tracker_refs: usize = 1 + usize::from(self.hardlinks.contains_key(nid)); + let tree_refs = Rc::strong_count(&entry.leaf) + .checked_sub(tracker_refs) + .expect("strong_count must be >= tracker_refs"); + let tree_nlink: u32 = tree_refs + .try_into() + .map_err(|_| anyhow::anyhow!("nlink overflow for inode nid {nid}"))?; + if entry.expected != tree_nlink { + anyhow::bail!( + "nlink mismatch for inode nid {nid}: on-disk nlink is {}, \ + but found {tree_nlink} reference(s) in the directory tree", + entry.expected, + ); + } + } + Ok(()) + } +} + /// Recursively populate a `tree::Directory` from an erofs directory inode. +/// +/// `dir_nid` and `parent_nid` are used to validate that the '.' and '..' +/// entries point to the correct inodes. fn populate_directory( img: &Image, + dir_nid: u64, + parent_nid: u64, dir_inode: &InodeType, dir: &mut tree::Directory, - hardlinks: &mut HashMap>>, + builder: &mut TreeBuilder, depth: usize, ) -> anyhow::Result<()> { if depth >= MAX_DIRECTORY_DEPTH { return Err(ErofsReaderError::DepthExceeded.into()); } - for (name_bytes, nid) in dir_entries(img, dir_inode)? { + let dir_result = dir_entries(img, dir_inode)?; + + // Validate '.' and '..' entries + match dir_result.dot_nid { + Some(nid) if nid != dir_nid => { + return Err(ErofsReaderError::InvalidSelfReference.into()); + } + None => { + return Err(ErofsReaderError::InvalidSelfReference.into()); + } + _ => {} + } + match dir_result.dotdot_nid { + Some(nid) if nid != parent_nid => { + return Err(ErofsReaderError::InvalidParentReference.into()); + } + None => { + return Err(ErofsReaderError::InvalidParentReference.into()); + } + _ => {} + } + + let mut n_subdirs: u32 = 0; + for (name_bytes, nid) in dir_result.children { let name = OsStr::from_bytes(name_bytes); let child_inode = img.inode(nid)?; if child_inode.mode().is_dir() { + n_subdirs = n_subdirs + .checked_add(1) + .ok_or_else(|| anyhow::anyhow!("too many subdirectories"))?; let child_stat = stat_from_inode_for_tree(img, &child_inode)?; let mut child_dir = tree::Directory::new(child_stat); - populate_directory(img, &child_inode, &mut child_dir, hardlinks, depth + 1) - .with_context(|| format!("reading directory {:?}", name))?; + populate_directory( + img, + nid, + dir_nid, + &child_inode, + &mut child_dir, + builder, + depth + 1, + ) + .with_context(|| format!("reading directory {:?}", name))?; dir.insert(name, tree::Inode::Directory(Box::new(child_dir))); } else { // Check if this is a hardlink (same nid seen before) - if let Some(existing_leaf) = hardlinks.get(&nid) { + if let Some(existing_leaf) = builder.hardlinks.get(&nid) { dir.insert(name, tree::Inode::Leaf(Rc::clone(existing_leaf))); continue; } @@ -1350,6 +1458,14 @@ fn populate_directory( } S_IFLNK => { let target_data = child_inode.inline().unwrap_or(&[]); + if target_data.len() > crate::SYMLINK_MAX { + anyhow::bail!( + "symlink target for {:?} is {} bytes (max {})", + name, + target_data.len(), + crate::SYMLINK_MAX, + ); + } let target = OsStr::from_bytes(target_data); tree::LeafContent::Symlink(Box::from(target)) } @@ -1363,14 +1479,37 @@ fn populate_directory( let leaf = Rc::new(tree::Leaf { stat, content }); // Track for hardlink detection if nlink > 1 - if child_inode.nlink() > 1 { - hardlinks.insert(nid, Rc::clone(&leaf)); + let on_disk_nlink = child_inode.nlink(); + if on_disk_nlink > 1 { + builder.hardlinks.insert(nid, Rc::clone(&leaf)); } + // Track for post-traversal nlink validation + builder + .nlink_tracker + .entry(nid) + .or_insert_with(|| NlinkEntry { + expected: on_disk_nlink, + leaf: Rc::clone(&leaf), + }); + dir.insert(name, tree::Inode::Leaf(leaf)); } } + // Validate directory nlink: should be 2 (for '.' and parent's '..') + // plus one for each child subdirectory's '..' pointing back. + let expected_nlink = n_subdirs + .checked_add(2) + .ok_or_else(|| anyhow::anyhow!("directory nlink overflow"))?; + let actual_nlink = dir_inode.nlink(); + if actual_nlink != expected_nlink { + anyhow::bail!( + "directory nlink mismatch: on-disk nlink is {actual_nlink}, \ + expected {expected_nlink} (2 + {n_subdirs} subdirectories)", + ); + } + Ok(()) } @@ -1380,19 +1519,36 @@ fn populate_directory( /// reconstructs the tree structure, including proper handling of hardlinks /// (via `Rc` sharing), xattr namespace transformations, and metacopy-based /// external file references. +/// +/// Validates structural invariants including: +/// - '.' and '..' entries point to the correct directories +/// - Directory nlink matches 2 + number of subdirectories +/// - Leaf nlink matches the number of references in the tree pub fn erofs_to_filesystem( image_data: &[u8], ) -> anyhow::Result> { let img = Image::open(image_data)?.restrict_to_composefs()?; - let root_inode = img.root()?; + let root_nid = img.sb.root_nid.get() as u64; + let root_inode = img.inode(root_nid)?; let root_stat = stat_from_inode_for_tree(&img, &root_inode)?; let mut fs = tree::FileSystem::new(root_stat); - let mut hardlinks: HashMap>> = HashMap::new(); + let mut builder = TreeBuilder::new(); + + // Root's '..' points to itself + populate_directory( + &img, + root_nid, + root_nid, + &root_inode, + &mut fs.root, + &mut builder, + 0, + ) + .context("reading root directory")?; - populate_directory(&img, &root_inode, &mut fs.root, &mut hardlinks, 0) - .context("reading root directory")?; + builder.validate_nlink()?; Ok(fs) } @@ -1407,6 +1563,36 @@ mod tests { }; use std::collections::HashMap; + /// Returns whether `fsck.erofs` is available on the system. + /// The result is cached so the lookup only happens once. + fn have_fsck_erofs() -> bool { + static AVAILABLE: std::sync::OnceLock = std::sync::OnceLock::new(); + *AVAILABLE.get_or_init(|| { + std::process::Command::new("fsck.erofs") + .arg("--help") + .output() + .is_ok() + }) + } + + /// Run `fsck.erofs` on an image and return whether it passed. + /// Returns `None` if `fsck.erofs` is not installed. + fn run_fsck_erofs(image: &[u8]) -> Option { + if !have_fsck_erofs() { + return None; + } + + let temp_dir = tempfile::TempDir::new().unwrap(); + let image_path = temp_dir.path().join("test.erofs"); + std::fs::write(&image_path, image).unwrap(); + + let output = std::process::Command::new("fsck.erofs") + .arg(&image_path) + .output() + .expect("fsck.erofs was detected but failed to run"); + Some(output.status.success()) + } + /// Helper to validate that directory entries can be read correctly fn validate_directory_entries(img: &Image, nid: u64, expected_names: &[&str]) { let inode = img.inode(nid).unwrap(); @@ -1448,8 +1634,8 @@ mod tests { #[test] fn test_empty_directory() { // Create filesystem with empty directory - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/empty_dir 0 40755 2 0 0 0 1000.0 - - - "#; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); @@ -1491,8 +1677,8 @@ mod tests { #[test] fn test_directory_with_inline_entries() { // Create filesystem with directory that has a few entries (should be inline) - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/file1 5 100644 1 0 0 0 1000.0 - hello - /dir1/file2 5 100644 1 0 0 0 1000.0 - world - "#; @@ -1532,8 +1718,8 @@ mod tests { #[test] fn test_directory_with_many_entries() { // Create a directory with many entries to force block storage - let mut dumpfile = String::from("/ 4096 40755 2 0 0 0 1000.0 - - -\n"); - dumpfile.push_str("/bigdir 4096 40755 2 0 0 0 1000.0 - - -\n"); + let mut dumpfile = String::from("/ 0 40755 2 0 0 0 1000.0 - - -\n"); + dumpfile.push_str("/bigdir 0 40755 2 0 0 0 1000.0 - - -\n"); // Add many files to force directory blocks for i in 0..100 { @@ -1585,10 +1771,10 @@ mod tests { #[test] fn test_nested_directories() { // Test deeply nested directory structure - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/a 4096 40755 2 0 0 0 1000.0 - - - -/a/b 4096 40755 2 0 0 0 1000.0 - - - -/a/b/c 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/a 0 40755 2 0 0 0 1000.0 - - - +/a/b 0 40755 2 0 0 0 1000.0 - - - +/a/b/c 0 40755 2 0 0 0 1000.0 - - - /a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - "#; @@ -1622,12 +1808,12 @@ mod tests { #[test] fn test_mixed_entry_types() { // Test directory with various file types - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/mixed 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/mixed 0 40755 2 0 0 0 1000.0 - - - /mixed/regular 10 100644 1 0 0 0 1000.0 - content123 - /mixed/symlink 7 120777 1 0 0 0 1000.0 /target - - /mixed/fifo 0 10644 1 0 0 0 1000.0 - - - -/mixed/subdir 4096 40755 2 0 0 0 1000.0 - - - +/mixed/subdir 0 40755 2 0 0 0 1000.0 - - - "#; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); @@ -1668,11 +1854,11 @@ mod tests { #[test] fn test_collect_objects_traversal() { // Test that object collection properly traverses all directories - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/dir1 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/file1 5 100644 1 0 0 0 1000.0 - hello - -/dir2 4096 40755 2 0 0 0 1000.0 - - - -/dir2/subdir 4096 40755 2 0 0 0 1000.0 - - - +/dir2 0 40755 2 0 0 0 1000.0 - - - +/dir2/subdir 0 40755 2 0 0 0 1000.0 - - - /dir2/subdir/file2 5 100644 1 0 0 0 1000.0 - world - "#; @@ -1705,8 +1891,8 @@ mod tests { // . and .. entries). // Generate a C-generated erofs image using mkcomposefs - let dumpfile_content = r#"/ 4096 40755 2 0 0 0 1000.0 - - - -/empty_dir 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile_content = r#"/ 0 40755 2 0 0 0 1000.0 - - - +/empty_dir 0 40755 2 0 0 0 1000.0 - - - "#; // Create temporary files for dumpfile and erofs output @@ -1745,10 +1931,10 @@ mod tests { #[test] fn test_round_trip_basic() { // Full round-trip: dumpfile -> tree -> erofs -> read back -> validate - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /file1 5 100644 1 0 0 0 1000.0 - hello - /file2 6 100644 1 0 0 0 1000.0 - world! - -/dir1 4096 40755 2 0 0 0 1000.0 - - - +/dir1 0 40755 2 0 0 0 1000.0 - - - /dir1/nested 8 100644 1 0 0 0 1000.0 - content1 - "#; @@ -1812,14 +1998,14 @@ mod tests { #[test] fn test_erofs_to_filesystem_empty_root() { - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n"; + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); } #[test] fn test_erofs_to_filesystem_inline_files() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /empty 0 100644 1 0 0 0 1000.0 - - - /hello 5 100644 1 0 0 0 1000.0 - hello - /world 6 100644 1 0 0 0 1000.0 - world! - @@ -1830,7 +2016,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_symlinks() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /link1 7 120777 1 0 0 0 1000.0 /target - - /link2 11 120777 1 0 0 0 1000.0 /other/path - - "#; @@ -1840,10 +2026,10 @@ mod tests { #[test] fn test_erofs_to_filesystem_nested_dirs() { - let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - -/a 4096 40755 3 0 0 0 1000.0 - - - -/a/b 4096 40755 3 0 0 0 1000.0 - - - -/a/b/c 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 3 0 0 0 1000.0 - - - +/a 0 40755 3 0 0 0 1000.0 - - - +/a/b 0 40755 3 0 0 0 1000.0 - - - +/a/b/c 0 40755 2 0 0 0 1000.0 - - - /a/b/c/file.txt 5 100644 1 0 0 0 1000.0 - hello - /a/b/other 3 100644 1 0 0 0 1000.0 - abc - "#; @@ -1853,7 +2039,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_devices_and_fifos() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /blk 0 60660 1 0 0 2049 1000.0 - - - /chr 0 20666 1 0 0 1025 1000.0 - - - /fifo 0 10644 1 0 0 0 1000.0 - - - @@ -1865,7 +2051,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_xattrs() { let dumpfile = - "/ 4096 40755 2 0 0 0 1000.0 - - - security.selinux=system_u:object_r:root_t:s0\n\ + "/ 0 40755 2 0 0 0 1000.0 - - - security.selinux=system_u:object_r:root_t:s0\n\ /file 5 100644 1 0 0 0 1000.0 - hello - user.myattr=myvalue\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); @@ -1875,7 +2061,7 @@ mod tests { fn test_erofs_to_filesystem_escaped_overlay_xattrs() { // The writer escapes trusted.overlay.X to trusted.overlay.overlay.X. // Round-tripping must preserve the original xattr name. - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n\ /file 5 100644 1 0 0 0 1000.0 - hello - trusted.overlay.custom=val\n"; let (orig, rt) = round_trip_dumpfile(dumpfile); assert_eq!(orig, rt); @@ -1891,7 +2077,7 @@ mod tests { let digest = "a".repeat(64); let pathname = format!("{}/{}", &digest[..2], &digest[2..]); let dumpfile = format!( - "/ 4096 40755 2 0 0 0 1000.0 - - -\n\ + "/ 0 40755 2 0 0 0 1000.0 - - -\n\ /ext 1000000000 100644 1 0 0 0 1000.0 {pathname} - {digest}\n" ); let (orig, rt) = round_trip_dumpfile(&dumpfile); @@ -1900,7 +2086,7 @@ mod tests { #[test] fn test_erofs_to_filesystem_hardlinks() { - let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 2 0 0 0 1000.0 - - - /original 11 100644 2 0 0 0 1000.0 - hello_world - /hardlink 0 @120000 2 0 0 0 0.0 /original - - "#; @@ -1933,10 +2119,10 @@ mod tests { #[test] fn test_erofs_to_filesystem_mixed_types() { - let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - + let dumpfile = r#"/ 0 40755 3 0 0 0 1000.0 - - - /blk 0 60660 1 0 6 259 1000.0 - - - /chr 0 20666 1 0 6 1025 1000.0 - - - -/dir 4096 40755 2 42 42 0 2000.0 - - - +/dir 0 40755 2 42 42 0 2000.0 - - - /dir/nested 3 100644 1 42 42 0 2000.0 - abc - /fifo 0 10644 1 0 0 0 1000.0 - - - /hello 5 100644 1 1000 1000 0 1500.0 - hello - @@ -1949,7 +2135,7 @@ mod tests { #[test] fn test_restrict_to_composefs_rejects_unsupported_features() { // Build a minimal valid composefs image (just a root directory). - let dumpfile = "/ 4096 40755 2 0 0 0 1000.0 - - -\n"; + let dumpfile = "/ 0 40755 2 0 0 0 1000.0 - - -\n"; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); let base_image = mkfs_erofs(&fs); @@ -2070,6 +2256,198 @@ mod tests { } } + #[test] + fn test_rejects_corrupted_dot_and_dotdot() { + // Build a valid image and corrupt directory '.' and '..' entries + // to verify they are rejected by erofs_to_filesystem(). + let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - +/dir 4096 40755 2 0 0 0 1000.0 - - - +/file 5 100644 1 0 0 0 1000.0 - hello - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let base_image = mkfs_erofs(&fs); + + // Sanity: unmodified image round-trips fine + erofs_to_filesystem::(&base_image) + .expect("unmodified image should be accepted"); + if let Some(ok) = run_fsck_erofs(&base_image) { + assert!(ok, "fsck.erofs should accept unmodified image"); + } + + // Find the byte positions of '.' entry nids in the image. + // Directory entries are stored inline after the inode header + xattrs. + // Each DirectoryEntryHeader is 12 bytes, with inode_offset at byte 0 (U64). + // Entries are sorted by name, so '.' comes first, then '..'. + let img = Image::open(&base_image).unwrap(); + let root_nid = img.sb.root_nid.get() as u64; + + // Find the child directory's nid + let dir_nid = img.find_child_nid(root_nid, b"dir").unwrap().unwrap(); + + // Locate the child directory's inline data in the raw image. + // The inode is at inodes_start + nid * 32, and the inline data + // follows the header + xattrs. + let dir_inode = img.inode(dir_nid).unwrap(); + let dir_inline = dir_inode.inline().unwrap(); + + // Get byte offset of the inline data within the image + let inline_ptr = dir_inline.as_ptr() as usize; + let image_ptr = base_image.as_ptr() as usize; + let inline_offset = inline_ptr - image_ptr; + drop(img); + + // The inline directory block contains entries sorted by name. + // For /dir, entries are: '.', '..'. + // Each DirectoryEntryHeader is 12 bytes with inode_offset (U64) at offset 0. + + struct Case { + name: &'static str, + // Byte offset of the inode_offset field to corrupt, relative to inline_offset + entry_byte_offset: usize, + expected_error: &'static str, + } + + let cases = [ + Case { + name: "corrupted '.' entry", + entry_byte_offset: 0, // first entry's inode_offset + expected_error: "'.'", + }, + Case { + name: "corrupted '..' entry", + entry_byte_offset: 12, // second entry's inode_offset + expected_error: "'..'", + }, + ]; + + for case in &cases { + let mut image = base_image.clone(); + let offset = inline_offset + case.entry_byte_offset; + // Write a bogus nid (0xDEAD) that doesn't match the directory's own nid + image[offset..offset + 8].copy_from_slice(&0xDEADu64.to_le_bytes()); + + let result = erofs_to_filesystem::(&image); + let err = result.expect_err(&format!("{}: should have been rejected", case.name)); + let msg = format!("{err:#}"); + assert!( + msg.contains(case.expected_error), + "{}: expected error containing {:?}, got: {msg}", + case.name, + case.expected_error, + ); + + // Cross-check with fsck.erofs if available + if let Some(ok) = run_fsck_erofs(&image) { + assert!( + !ok, + "{}: fsck.erofs should also reject this corruption", + case.name, + ); + } + } + } + + #[test] + fn test_rejects_corrupted_nlink() { + // Build a valid image and corrupt a leaf inode's nlink field to + // verify nlink validation catches the mismatch. + let dumpfile = r#"/ 4096 40755 2 0 0 0 1000.0 - - - +/file 5 100644 1 0 0 0 1000.0 - hello - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let base_image = mkfs_erofs(&fs); + + // Sanity check + erofs_to_filesystem::(&base_image) + .expect("unmodified image should be accepted"); + + // Find the file inode and corrupt its nlink field. + let img = Image::open(&base_image).unwrap(); + let root_nid = img.sb.root_nid.get() as u64; + let file_nid = img.find_child_nid(root_nid, b"file").unwrap().unwrap(); + + // Compute byte offset of the file's inode in the image + let block_size = img.block_size; + let meta_start = img.sb.meta_blkaddr.get() as usize * block_size; + let inode_byte_offset = meta_start + file_nid as usize * 32; + let is_extended = base_image[inode_byte_offset] & 1 != 0; + drop(img); + + let mut image = base_image.clone(); + if is_extended { + // ExtendedInodeHeader.nlink is U32 at byte offset 44 + let nlink_offset = inode_byte_offset + 44; + image[nlink_offset..nlink_offset + 4].copy_from_slice(&5u32.to_le_bytes()); + } else { + // CompactInodeHeader.nlink is U16 at byte offset 6 + let nlink_offset = inode_byte_offset + 6; + image[nlink_offset..nlink_offset + 2].copy_from_slice(&5u16.to_le_bytes()); + } + + let result = erofs_to_filesystem::(&image); + let err = result.expect_err("corrupted nlink should be rejected"); + let msg = format!("{err:#}"); + assert!( + msg.contains("nlink mismatch"), + "expected nlink mismatch error, got: {msg}", + ); + + // Note: fsck.erofs (as of 1.9) does not validate nlink counts -- + // it reads nlink from disk and trusts it. We intentionally go + // further here. + } + + #[test] + fn test_rejects_corrupted_directory_nlink() { + // Build a valid image and corrupt a directory inode's nlink to + // verify directory nlink validation. + let dumpfile = r#"/ 4096 40755 3 0 0 0 1000.0 - - - +/dir 4096 40755 2 0 0 0 1000.0 - - - +/file 5 100644 1 0 0 0 1000.0 - hello - +"#; + + let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); + let base_image = mkfs_erofs(&fs); + + // Sanity check + erofs_to_filesystem::(&base_image) + .expect("unmodified image should be accepted"); + + // Find the child directory inode and corrupt its nlink + let img = Image::open(&base_image).unwrap(); + let root_nid = img.sb.root_nid.get() as u64; + let dir_nid = img.find_child_nid(root_nid, b"dir").unwrap().unwrap(); + + let block_size = img.block_size; + let meta_start = img.sb.meta_blkaddr.get() as usize * block_size; + let inode_byte_offset = meta_start + dir_nid as usize * 32; + let is_extended = base_image[inode_byte_offset] & 1 != 0; + drop(img); + + let mut image = base_image.clone(); + if is_extended { + // ExtendedInodeHeader.nlink is U32 at byte offset 44 + let nlink_offset = inode_byte_offset + 44; + image[nlink_offset..nlink_offset + 4].copy_from_slice(&99u32.to_le_bytes()); + } else { + // CompactInodeHeader.nlink is U16 at byte offset 6 + let nlink_offset = inode_byte_offset + 6; + image[nlink_offset..nlink_offset + 2].copy_from_slice(&99u16.to_le_bytes()); + } + + let result = erofs_to_filesystem::(&image); + let err = result.expect_err("corrupted directory nlink should be rejected"); + let msg = format!("{err:#}"); + assert!( + msg.contains("nlink mismatch"), + "expected directory nlink mismatch error, got: {msg}", + ); + + // Note: fsck.erofs (as of 1.9) does not validate nlink counts. + } + #[test] fn test_inode_blocks_rejects_oversized_range() { // Build a minimal valid EROFS image, then corrupt the root inode's @@ -2079,7 +2457,7 @@ mod tests { // The corrupted size must be a multiple of block_size so that // additional_bytes() (which uses `size % block_size` for FlatInline) // stays the same and the inode still parses successfully. - let dumpfile = "/ 4096 40755 1 0 0 0 0.0 - - -\n"; + let dumpfile = "/ 0 40755 1 0 0 0 0.0 - - -\n"; let fs = dumpfile_to_filesystem::(dumpfile).unwrap(); let mut image = mkfs_erofs(&fs); @@ -2124,8 +2502,7 @@ mod tests { use crate::test::proptest_strategies::{build_filesystem, filesystem_spec}; use proptest::prelude::*; - /// Round-trip a FileSystem through erofs with a given ObjectID type - /// and compare dumpfile output before and after. + /// Round-trip a FileSystem through erofs and compare dumpfile output. fn round_trip_filesystem( fs_orig: &tree::FileSystem, ) { @@ -2138,7 +2515,10 @@ mod tests { let mut rt_output = Vec::new(); write_dumpfile(&mut rt_output, &fs_rt).unwrap(); - assert_eq!(orig_output, rt_output); + similar_asserts::assert_eq!( + String::from_utf8_lossy(&orig_output), + String::from_utf8_lossy(&rt_output) + ); } proptest! { diff --git a/crates/composefs/src/erofs/writer.rs b/crates/composefs/src/erofs/writer.rs index 78858162..33f5a09e 100644 --- a/crates/composefs/src/erofs/writer.rs +++ b/crates/composefs/src/erofs/writer.rs @@ -293,6 +293,12 @@ impl Leaf<'_, ObjectID> { (format::DataLayout::FlatPlain, 0, 0) } tree::LeafContent::Symlink(target) => { + assert!( + target.len() <= crate::SYMLINK_MAX, + "symlink target is {} bytes (max {})", + target.len(), + crate::SYMLINK_MAX, + ); (format::DataLayout::FlatInline, 0, target.len() as u64) } }; diff --git a/crates/composefs/src/lib.rs b/crates/composefs/src/lib.rs index 521a9a73..38d55e1f 100644 --- a/crates/composefs/src/lib.rs +++ b/crates/composefs/src/lib.rs @@ -45,6 +45,13 @@ pub const INLINE_CONTENT_MAX_V0: usize = 64; /// ). pub const MAX_INLINE_CONTENT: usize = 512; +/// Maximum symlink target length in bytes. +/// +/// XFS limits symlink targets to 1024 bytes (`XFS_SYMLINK_MAXLEN`). Since +/// generic Linux containers are commonly backed by XFS, we enforce that +/// limit rather than the Linux VFS `PATH_MAX` of 4096. +pub const SYMLINK_MAX: usize = 1024; + /// Internal constants shared across workspace crates. /// /// Not part of the public API — may change without notice. diff --git a/crates/composefs/src/repository.rs b/crates/composefs/src/repository.rs index 908717b7..b3834f25 100644 --- a/crates/composefs/src/repository.rs +++ b/crates/composefs/src/repository.rs @@ -80,6 +80,7 @@ use std::{ collections::{HashMap, HashSet}, ffi::{CStr, CString, OsStr, OsString}, + fmt, fs::{canonicalize, File}, io::{Read, Write}, os::{ @@ -204,6 +205,200 @@ pub struct GcResult { pub streams_pruned: u64, } +/// A structured error found during a filesystem consistency check. +/// +/// Each variant corresponds to a specific kind of repository integrity problem. +/// The `Display` implementation produces a kebab-case error type prefix followed +/// by the path/context and any relevant details, suitable for both human display +/// and structured logging. +#[derive(Debug, Clone, serde::Serialize, thiserror::Error)] +#[serde(tag = "type", rename_all = "kebab-case")] +#[non_exhaustive] +#[allow(missing_docs)] +pub enum FsckError { + #[error("fsck: object-invalid-name: {path}: {detail}")] + ObjectInvalidName { path: String, detail: String }, + + #[error("fsck: object-open-failed: {path}: {detail}")] + ObjectOpenFailed { path: String, detail: String }, + + #[error("fsck: object-digest-mismatch: {path}: measured {measured}")] + ObjectDigestMismatch { path: String, measured: String }, + + #[error("fsck: object-verity-failed: {path}: {detail}")] + ObjectVerityFailed { path: String, detail: String }, + + #[error("fsck: object-verity-missing: {path}")] + ObjectVerityMissing { path: String }, + + #[error("fsck: entry-not-symlink: {path}")] + EntryNotSymlink { path: String }, + + #[error("fsck: broken-symlink: {path}")] + BrokenSymlink { path: String }, + + #[error("fsck: stat-failed: {path}: {detail}")] + StatFailed { path: String, detail: String }, + + #[error("fsck: unexpected-file-type: {path}: {detail}")] + UnexpectedFileType { path: String, detail: String }, + + #[error("fsck: stream-open-failed: {path}: {detail}")] + StreamOpenFailed { path: String, detail: String }, + + #[error("fsck: missing-object-ref: {path}: {object_id}")] + #[serde(rename_all = "camelCase")] + MissingObjectRef { path: String, object_id: String }, + + #[error("fsck: stream-read-failed: {path}: {detail}")] + StreamReadFailed { path: String, detail: String }, + + #[error("fsck: missing-named-ref: {path}: ref {ref_name}: {object_id}")] + #[serde(rename_all = "camelCase")] + MissingNamedRef { + path: String, + ref_name: String, + object_id: String, + }, + + #[error("fsck: object-check-failed: {path}: {object_id}: {detail}")] + #[serde(rename_all = "camelCase")] + ObjectCheckFailed { + path: String, + object_id: String, + detail: String, + }, + + #[error("fsck: image-open-failed: {path}: {detail}")] + ImageOpenFailed { path: String, detail: String }, + + #[error("fsck: image-read-failed: {path}: {detail}")] + ImageReadFailed { path: String, detail: String }, + + #[error("fsck: image-invalid: {path}: {detail}")] + ImageInvalid { path: String, detail: String }, + + #[error("fsck: image-missing-object: {path}: {object_id}")] + #[serde(rename_all = "camelCase")] + ImageMissingObject { path: String, object_id: String }, +} + +/// Results from a filesystem consistency check. +/// +/// Returned by [`Repository::fsck`] to report repository integrity status. +#[derive(Debug, Clone, Default, serde::Serialize)] +#[serde(rename_all = "camelCase")] +pub struct FsckResult { + pub(crate) objects_checked: u64, + pub(crate) objects_corrupted: u64, + pub(crate) streams_checked: u64, + pub(crate) streams_corrupted: u64, + pub(crate) images_checked: u64, + pub(crate) images_corrupted: u64, + pub(crate) broken_links: u64, + pub(crate) missing_objects: u64, + pub(crate) errors: Vec, +} + +impl FsckResult { + /// Returns true if no corruption or errors were found. + pub fn is_ok(&self) -> bool { + debug_assert!( + self.objects_corrupted == 0 + && self.streams_corrupted == 0 + && self.images_corrupted == 0 + && self.broken_links == 0 + && self.missing_objects == 0 + || !self.errors.is_empty(), + "corruption counters are non-zero but no error messages recorded" + ); + self.errors.is_empty() + } + + /// Number of objects verified. + pub fn objects_checked(&self) -> u64 { + self.objects_checked + } + + /// Number of objects with bad fsverity digests. + pub fn objects_corrupted(&self) -> u64 { + self.objects_corrupted + } + + /// Number of streams verified. + pub fn streams_checked(&self) -> u64 { + self.streams_checked + } + + /// Number of streams with issues (bad header, missing refs, etc.). + pub fn streams_corrupted(&self) -> u64 { + self.streams_corrupted + } + + /// Number of images verified. + pub fn images_checked(&self) -> u64 { + self.images_checked + } + + /// Number of images with issues. + pub fn images_corrupted(&self) -> u64 { + self.images_corrupted + } + + /// Number of broken symlinks found. + pub fn broken_links(&self) -> u64 { + self.broken_links + } + + /// Number of missing objects referenced by streams. + pub fn missing_objects(&self) -> u64 { + self.missing_objects + } + + /// Errors found during the check. + pub fn errors(&self) -> &[FsckError] { + &self.errors + } +} + +impl fmt::Display for FsckResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "objects: {}/{} ok", + self.objects_checked.saturating_sub(self.objects_corrupted), + self.objects_checked + )?; + writeln!( + f, + "streams: {}/{} ok", + self.streams_checked.saturating_sub(self.streams_corrupted), + self.streams_checked + )?; + writeln!( + f, + "images: {}/{} ok", + self.images_checked.saturating_sub(self.images_corrupted), + self.images_checked + )?; + if self.broken_links > 0 { + writeln!(f, "broken symlinks: {}", self.broken_links)?; + } + if self.missing_objects > 0 { + writeln!(f, "missing objects: {}", self.missing_objects)?; + } + if self.errors.is_empty() { + writeln!(f, "status: ok")?; + } else { + writeln!(f, "status: {} error(s)", self.errors.len())?; + for err in &self.errors { + writeln!(f, " - {err}")?; + } + } + Ok(()) + } +} + impl Repository { /// Return the objects directory. pub fn objects_dir(&self) -> ErrnoResult<&OwnedFd> { @@ -1431,9 +1626,402 @@ impl Repository { Ok(result) } - // fn fsck(&self) -> Result<()> { - // unimplemented!() - // } + /// Check the structural integrity of the repository. + /// + /// Walks all objects, streams, and images in the repository, verifying: + /// - Object fsverity digests match their path-derived identifiers + /// - Stream and image symlinks resolve to existing objects + /// - Stream/image refs resolve to valid entries + /// - Splitstreams have valid headers and reference only existing objects + /// + /// Object directories are checked in parallel using `spawn_blocking`, + /// with concurrency bounded by `available_parallelism()`. + /// + /// Returns a [`FsckResult`] summarizing the findings. Does not modify + /// any repository contents. + #[context("Running filesystem consistency check")] + pub async fn fsck(&self) -> Result { + let mut result = FsckResult::default(); + + // Phase 1: Verify all objects (parallel across object subdirectories) + self.fsck_objects(&mut result) + .await + .context("Checking objects")?; + + // Phase 2: Verify stream symlinks and splitstream integrity + self.fsck_category("streams", &mut result) + .context("Checking streams")?; + + // Phase 3: Verify image symlinks + self.fsck_category("images", &mut result) + .context("Checking images")?; + + Ok(result) + } + + /// Verify all objects in the repository have correct fsverity digests. + /// + /// Each `objects/XX/` subdirectory is checked on a blocking thread via + /// `tokio::task::spawn_blocking`, with bounded concurrency to avoid + /// overwhelming the system with I/O. + async fn fsck_objects(&self, result: &mut FsckResult) -> Result<()> { + // Cap at available CPUs; the work is a mix of I/O (reading objects) + // and CPU (computing verity hashes). + let max_concurrent = available_parallelism().map(|n| n.get()).unwrap_or(4); + let insecure = self.insecure; + + let mut joinset = tokio::task::JoinSet::new(); + let mut partial_results = Vec::new(); + + for first_byte in 0x00..=0xffu8 { + // Drain completed tasks if we're at the concurrency limit + while joinset.len() >= max_concurrent { + partial_results.push(joinset.join_next().await.unwrap()??); + } + + let dirfd = match self.openat( + &format!("objects/{first_byte:02x}"), + OFlags::RDONLY | OFlags::DIRECTORY, + ) { + Ok(fd) => fd, + Err(Errno::NOENT) => continue, + Err(e) => { + Err(e).with_context(|| format!("Opening objects/{first_byte:02x} directory"))? + } + }; + + joinset + .spawn_blocking(move || fsck_object_dir::(dirfd, first_byte, insecure)); + } + + // Drain remaining tasks + while let Some(output) = joinset.join_next().await { + partial_results.push(output??); + } + + // Fold all per-directory results into the main result + for partial in partial_results { + result.objects_checked += partial.objects_checked; + result.objects_corrupted += partial.objects_corrupted; + result.errors.extend(partial.errors); + } + + Ok(()) + } + + /// Verify symlink integrity and splitstream/image validity for a category + /// ("streams" or "images"). + #[context("Checking {category} integrity")] + fn fsck_category(&self, category: &str, result: &mut FsckResult) -> Result<()> { + let is_streams = category == "streams"; + + let Some(category_fd) = self + .openat(category, OFlags::RDONLY | OFlags::DIRECTORY) + .filter_errno(Errno::NOENT) + .with_context(|| format!("Opening {category} directory"))? + else { + return Ok(()); + }; + + // Check first-level symlinks: each should point to an existing object + for item in + Dir::read_from(&category_fd).with_context(|| format!("Reading {category} directory"))? + { + let entry = item.context("Reading directory entry")?; + let filename = entry.file_name(); + if filename == c"." || filename == c".." || filename == c"refs" { + continue; + } + + if is_streams { + result.streams_checked += 1; + } else { + result.images_checked += 1; + } + + if entry.file_type() != FileType::Symlink { + if is_streams { + result.streams_corrupted += 1; + } else { + result.images_corrupted += 1; + } + result.errors.push(FsckError::EntryNotSymlink { + path: format!( + "{category}/{}", + String::from_utf8_lossy(filename.to_bytes()) + ), + }); + continue; + } + + // Check the symlink resolves (follows through to the object) + match statat(&category_fd, filename, AtFlags::empty()) { + Ok(_) => {} + Err(Errno::NOENT) => { + result.broken_links += 1; + if is_streams { + result.streams_corrupted += 1; + } else { + result.images_corrupted += 1; + } + result.errors.push(FsckError::BrokenSymlink { + path: format!( + "{category}/{}", + String::from_utf8_lossy(filename.to_bytes()) + ), + }); + continue; + } + Err(e) => { + result.errors.push(FsckError::StatFailed { + path: format!( + "{category}/{}", + String::from_utf8_lossy(filename.to_bytes()) + ), + detail: e.to_string(), + }); + continue; + } + } + + let name = String::from_utf8_lossy(filename.to_bytes()).to_string(); + if is_streams { + // Validate splitstream contents + self.fsck_splitstream(&name, result); + } else { + // Validate erofs image structure and object references + self.fsck_image(&name, result); + } + } + + // Check refs/ symlinks + let refs_fd = match openat( + &category_fd, + c"refs", + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .filter_errno(Errno::NOENT) + .with_context(|| format!("Opening {category}/refs directory"))? + { + Some(fd) => fd, + None => return Ok(()), + }; + + self.fsck_refs_dir(&refs_fd, category, "", result) + .with_context(|| format!("Checking {category}/refs")) + } + + /// Recursively verify that all ref symlinks resolve to valid entries in the + /// parent category directory. + fn fsck_refs_dir( + &self, + refs_fd: &OwnedFd, + category: &str, + prefix: &str, + result: &mut FsckResult, + ) -> Result<()> { + for item in Dir::read_from(refs_fd) + .with_context(|| format!("Reading {category}/refs/{prefix} directory"))? + { + let entry = item.context("Reading refs directory entry")?; + let filename = entry.file_name(); + if filename == c"." || filename == c".." { + continue; + } + + let name = String::from_utf8_lossy(filename.to_bytes()).to_string(); + let display_path = if prefix.is_empty() { + format!("{category}/refs/{name}") + } else { + format!("{category}/refs/{prefix}/{name}") + }; + + match entry.file_type() { + FileType::Directory => { + let subdir = openat( + refs_fd, + filename, + OFlags::RDONLY | OFlags::DIRECTORY | OFlags::CLOEXEC, + Mode::empty(), + ) + .with_context(|| format!("Opening {display_path}"))?; + let sub_prefix = if prefix.is_empty() { + name.clone() + } else { + format!("{prefix}/{name}") + }; + self.fsck_refs_dir(&subdir, category, &sub_prefix, result)?; + } + FileType::Symlink => { + // The ref should ultimately resolve to a file (following + // the chain: refs/X -> ../../entry -> ../objects/XX/YY) + match statat(refs_fd, filename, AtFlags::empty()) { + Ok(_) => {} + Err(Errno::NOENT) => { + result.broken_links += 1; + result.errors.push(FsckError::BrokenSymlink { + path: display_path.clone(), + }); + } + Err(e) => { + result.errors.push(FsckError::StatFailed { + path: display_path.clone(), + detail: e.to_string(), + }); + } + } + } + other => { + result.errors.push(FsckError::UnexpectedFileType { + path: display_path.clone(), + detail: format!("{other:?}"), + }); + } + } + } + Ok(()) + } + + /// Validate a single splitstream: check header and object references. + fn fsck_splitstream(&self, stream_name: &str, result: &mut FsckResult) { + let stream_path = format!("streams/{stream_name}"); + let mut split_stream = match self.open_stream(stream_name, None, None) { + Ok(s) => s, + Err(e) => { + result.streams_corrupted += 1; + result.errors.push(FsckError::StreamOpenFailed { + path: stream_path, + detail: e.to_string(), + }); + return; + } + }; + + // Check that all object_refs point to existing objects + let check_result = split_stream.get_object_refs(|id| { + let obj_path = Self::format_object_path(id); + match self.openat(&obj_path, OFlags::RDONLY) { + Ok(_) => {} + Err(Errno::NOENT) => { + result.missing_objects += 1; + result.errors.push(FsckError::MissingObjectRef { + path: stream_path.clone(), + object_id: id.to_hex(), + }); + } + Err(e) => { + result.errors.push(FsckError::ObjectCheckFailed { + path: stream_path.clone(), + object_id: id.to_hex(), + detail: e.to_string(), + }); + } + } + }); + if let Err(e) = check_result { + result.streams_corrupted += 1; + result.errors.push(FsckError::StreamReadFailed { + path: stream_path, + detail: e.to_string(), + }); + return; + } + + // Check that all named refs (stream refs) point to existing objects + for (ref_name, ref_id) in split_stream.iter_named_refs() { + // The named ref's object should exist + let obj_path = Self::format_object_path(ref_id); + match self.openat(&obj_path, OFlags::RDONLY) { + Ok(_) => {} + Err(Errno::NOENT) => { + result.missing_objects += 1; + result.errors.push(FsckError::MissingNamedRef { + path: stream_path.clone(), + ref_name: ref_name.to_string(), + object_id: ref_id.to_hex(), + }); + } + Err(e) => { + result.errors.push(FsckError::ObjectCheckFailed { + path: stream_path.clone(), + object_id: ref_id.to_hex(), + detail: format!("checking named ref '{ref_name}': {e}"), + }); + } + } + // The stream entry itself should also exist (but don't double-count). + // Note: the named ref name may not correspond to an actual stream + // entry. OCI images use named refs with keys like + // "config:sha256:..." or layer diff_ids that aren't stream names. + // We only warn if the object itself is missing (handled above); + // a missing stream entry with an existing object is benign. + } + } + + /// Validate a single erofs image: parse structure, enforce composefs + /// invariants, and verify all referenced objects exist. + fn fsck_image(&self, image_name: &str, result: &mut FsckResult) { + // Read the image data + let image_path = format!("images/{image_name}"); + let mut data = vec![]; + let fd = match self.openat(&image_path, OFlags::RDONLY) { + Ok(fd) => fd, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(FsckError::ImageOpenFailed { + path: image_path, + detail: e.to_string(), + }); + return; + } + }; + if let Err(e) = File::from(fd).read_to_end(&mut data) { + result.images_corrupted += 1; + result.errors.push(FsckError::ImageReadFailed { + path: image_path, + detail: e.to_string(), + }); + return; + } + + // Parse the erofs image with composefs-specific structural validation + // (header magic, superblock, no unsupported features, etc.) and walk + // the directory tree to collect all referenced object IDs. + let objects = match crate::erofs::reader::collect_objects::(&data) { + Ok(objects) => objects, + Err(e) => { + result.images_corrupted += 1; + result.errors.push(FsckError::ImageInvalid { + path: image_path, + detail: e.to_string(), + }); + return; + } + }; + + // Verify all referenced objects exist + for obj_id in &objects { + let path = Self::format_object_path(obj_id); + match self.openat(&path, OFlags::RDONLY) { + Ok(_) => {} + Err(Errno::NOENT) => { + result.missing_objects += 1; + result.errors.push(FsckError::ImageMissingObject { + path: image_path.clone(), + object_id: obj_id.to_hex(), + }); + } + Err(e) => { + result.errors.push(FsckError::ObjectCheckFailed { + path: image_path.clone(), + object_id: obj_id.to_hex(), + detail: e.to_string(), + }); + } + } + } + } /// Returns a borrowed file descriptor for the repository root. /// @@ -1479,6 +2067,120 @@ impl Repository { } } +/// Verify each object in a single `objects/XX/` subdirectory. +/// +/// This is a free function (not a method) so it can be used with +/// `spawn_blocking` — it captures only owned/`Send` values. +fn fsck_object_dir( + dirfd: OwnedFd, + first_byte: u8, + insecure: bool, +) -> Result { + let mut result = FsckResult::default(); + + for item in Dir::read_from(&dirfd) + .with_context(|| format!("Reading objects/{first_byte:02x} directory"))? + { + let entry = item.context("Reading object directory entry")?; + let filename = entry.file_name(); + if filename == c"." || filename == c".." { + continue; + } + + result.objects_checked += 1; + + let expected_id = + match ObjectID::from_object_dir_and_basename(first_byte, filename.to_bytes()) { + Ok(id) => id, + Err(e) => { + result.objects_corrupted += 1; + result.errors.push(FsckError::ObjectInvalidName { + path: format!( + "objects/{first_byte:02x}/{}", + String::from_utf8_lossy(filename.to_bytes()) + ), + detail: e.to_string(), + }); + continue; + } + }; + + let fd = match openat( + &dirfd, + filename, + OFlags::RDONLY | OFlags::CLOEXEC, + Mode::empty(), + ) { + Ok(fd) => fd, + Err(e) => { + result.objects_corrupted += 1; + result.errors.push(FsckError::ObjectOpenFailed { + path: format!( + "objects/{first_byte:02x}/{}", + String::from_utf8_lossy(filename.to_bytes()) + ), + detail: e.to_string(), + }); + continue; + } + }; + + let Some(measured) = + fsck_measure_object::(fd, &expected_id, insecure, &mut result) + else { + continue; + }; + + if measured != expected_id { + result.objects_corrupted += 1; + result.errors.push(FsckError::ObjectDigestMismatch { + path: format!("objects/{}", expected_id.to_object_pathname()), + measured: measured.to_hex(), + }); + } + } + Ok(result) +} + +/// Measure the verity digest of a single object file. +/// +/// Returns `Some(digest)` on success, or `None` after recording the error +/// in `result` (so the caller can `continue`). +fn fsck_measure_object( + fd: OwnedFd, + expected_id: &ObjectID, + insecure: bool, + result: &mut FsckResult, +) -> Option { + if let Ok(digest) = measure_verity::(&fd) { + return Some(digest); + } + + // Kernel measurement failed — in insecure mode, try userspace computation + if insecure { + match Repository::::compute_verity_digest(&mut std::io::BufReader::new( + File::from(fd), + )) { + Ok(digest) => return Some(digest), + Err(e) => { + result.objects_corrupted += 1; + result.errors.push(FsckError::ObjectVerityFailed { + path: format!("objects/{}", expected_id.to_object_pathname()), + detail: e.to_string(), + }); + return None; + } + } + } + + // Not insecure — verity is required but missing/unsupported + result.objects_corrupted += 1; + result.errors.push(FsckError::ObjectVerityMissing { + path: format!("objects/{}", expected_id.to_object_pathname()), + }); + None +} + #[cfg(test)] mod tests { use super::*; @@ -2252,4 +2954,594 @@ mod tests { assert_eq!(result.streams_pruned, 0); Ok(()) } + + // ==================== Fsck Tests ==================== + + #[tokio::test] + async fn test_fsck_empty_repo() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let result = repo.fsck().await?; + + assert!(result.is_ok()); + assert_eq!(result.objects_checked, 0); + assert_eq!(result.objects_corrupted, 0); + assert_eq!(result.streams_checked, 0); + assert_eq!(result.streams_corrupted, 0); + assert_eq!(result.images_checked, 0); + assert_eq!(result.images_corrupted, 0); + assert_eq!(result.broken_links, 0); + assert_eq!(result.missing_objects, 0); + assert!(result.errors.is_empty()); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_healthy_repo_with_objects() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj1 = generate_test_data(32 * 1024, 0xAE); + let obj2 = generate_test_data(64 * 1024, 0xEA); + + let _obj1_id = repo.ensure_object(&obj1)?; + let _obj2_id: Sha512HashValue = compute_verity(&obj2); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj2)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + repo.sync()?; + + let result = repo.fsck().await?; + + assert!(result.is_ok(), "fsck should pass: {result}"); + // 3 objects: obj1, obj2, and the splitstream object + assert!(result.objects_checked >= 3); + assert_eq!(result.objects_corrupted, 0); + assert_eq!(result.streams_checked, 1); + assert_eq!(result.streams_corrupted, 0); + assert_eq!(result.broken_links, 0); + assert_eq!(result.missing_objects, 0); + assert!(result.errors.is_empty()); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_corrupted_object() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(32 * 1024, 0xAE); + let obj_id = repo.ensure_object(&obj)?; + repo.sync()?; + + // Corrupt the object by replacing the file (objects may be + // immutable due to fs-verity, so we delete and recreate). + let hex = obj_id.to_hex(); + let (dir, file) = hex.split_at(2); + let obj_path = tmp + .path() + .join("repo") + .join(format!("objects/{dir}/{file}")); + std::fs::remove_file(&obj_path)?; + std::fs::write(&obj_path, b"corrupted data")?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect corruption"); + assert!( + result.objects_corrupted > 0, + "should report corrupted objects" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("object-digest-mismatch")), + "errors should mention digest mismatch: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_broken_stream_link() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(64 * 1024, 0xEA); + let _obj_verity: Sha512HashValue = compute_verity(&obj); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + repo.sync()?; + + // The stream symlink points to a splitstream object. Find and + // read the symlink target, then delete the backing object. + let stream_symlink = tmp.path().join("repo/streams/test-stream"); + let link_target = std::fs::read_link(&stream_symlink)?; + // link_target is relative to streams/, e.g. "../objects/XX/YY..." + let backing_path = tmp.path().join("repo/streams").join(&link_target); + std::fs::remove_file(&backing_path)?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect broken link"); + assert!( + result.broken_links > 0, + "should report broken links: {result}" + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_missing_stream_object_ref() -> Result<()> { + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(64 * 1024, 0xEA); + let obj_verity: Sha512HashValue = compute_verity(&obj); + + // Create a stream with an external reference to the object. + // write_external calls ensure_object internally, so the object + // will exist. + let mut writer = repo.create_stream(0); + writer.write_external(&obj)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + repo.sync()?; + + // Delete the referenced object (but leave the splitstream intact) + let hex = obj_verity.to_hex(); + let (dir, file) = hex.split_at(2); + let obj_path = tmp + .path() + .join("repo") + .join(format!("objects/{dir}/{file}")); + std::fs::remove_file(&obj_path)?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect missing object ref"); + assert!( + result.missing_objects > 0, + "should report missing objects: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("missing-object-ref")), + "errors should mention missing object: {:?}", + result.errors + ); + Ok(()) + } + + // ==================== Additional Fsck Gap Tests ==================== + + fn open_test_repo_dir(tmp: &tempfile::TempDir) -> cap_std::fs::Dir { + cap_std::fs::Dir::open_ambient_dir(tmp.path().join("repo"), cap_std::ambient_authority()) + .unwrap() + } + + #[tokio::test] + async fn test_fsck_detects_non_symlink_in_streams() -> Result<()> { + // Exercises fsck_category non-symlink detection (line ~1695). + // The code checks entry.file_type() != FileType::Symlink and reports + // "not a symlink" for regular files or directories in streams/. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + // Create a regular file directly in streams/ (not a symlink) + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("streams")?; + dir.write("streams/bogus-entry", b"not a symlink")?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect non-symlink in streams"); + assert_eq!(result.streams_corrupted, 1); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("entry-not-symlink")), + "errors should mention non-symlink: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_non_symlink_in_images() -> Result<()> { + // Exercises fsck_category non-symlink detection for the "images" + // category (same code path as streams, but counting images_corrupted). + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("images")?; + dir.write("images/bogus-image", b"not a symlink")?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect non-symlink in images"); + assert_eq!(result.images_corrupted, 1); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("entry-not-symlink")), + "errors should mention non-symlink: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_broken_ref_symlink() -> Result<()> { + // Exercises fsck_refs_dir broken symlink detection (line ~1804). + // Creates a ref symlink that points to a non-existent stream entry, + // so following the chain refs/X -> ../../stream-entry -> object fails. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + // Create refs directory under streams + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("streams/refs")?; + + // Create a dangling symlink in refs/ + dir.symlink("../nonexistent-stream", "streams/refs/broken-ref")?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect broken ref symlink"); + assert!(result.broken_links > 0, "should report broken links"); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("broken-symlink") + && e.to_string().contains("refs")), + "errors should mention broken ref symlink: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_refs_dir_unexpected_file_type() -> Result<()> { + // Exercises the "unexpected file type" branch in fsck_refs_dir + // (line ~1817). Regular files in refs/ are neither symlinks nor + // directories — they should be flagged. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("streams/refs")?; + + // Put a regular file directly in refs/ + dir.write("streams/refs/stray-file", b"should not be here")?; + + let result = repo.fsck().await?; + + assert!(!result.is_ok(), "fsck should detect unexpected file type"); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("unexpected-file-type")), + "errors should mention unexpected file type: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_refs_dir_recursive() -> Result<()> { + // Exercises the recursive walk in fsck_refs_dir: creates a nested + // subdirectory under refs/ with a broken symlink inside to verify + // the recursion actually descends into subdirs. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("streams/refs/nested/deep")?; + + // Broken symlink in the nested directory + dir.symlink( + "../../../nonexistent-stream", + "streams/refs/nested/deep/broken-nested-ref", + )?; + + let result = repo.fsck().await?; + + assert!( + !result.is_ok(), + "fsck should detect broken symlink in nested refs" + ); + assert!(result.broken_links > 0); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("nested/deep") + && e.to_string().contains("broken-symlink")), + "error should reference the nested path: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_invalid_object_filename() -> Result<()> { + // Exercises fsck_object_dir invalid filename detection (line ~1581). + // Creates a file with a name that can't be parsed as a hex hash + // remainder in objects/XX/. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + repo.sync()?; + + let dir = open_test_repo_dir(&tmp); + dir.create_dir_all("objects/ab")?; + dir.write("objects/ab/not-a-valid-hex-hash", b"junk")?; + + let result = repo.fsck().await?; + + assert!( + !result.is_ok(), + "fsck should detect invalid object filename" + ); + assert!(result.objects_corrupted > 0); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("object-invalid-name")), + "errors should mention invalid filename: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_broken_image_symlink() -> Result<()> { + // Exercises the broken symlink path in fsck_category for images + // (line ~1711). The stream broken-symlink test covers streams; + // this covers the same logic for images/. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj_size: u64 = 32 * 1024; + let obj = generate_test_data(obj_size, 0xBB); + let obj_id = repo.ensure_object(&obj)?; + + let fs = make_test_fs(&obj_id, obj_size); + let image_id = fs.commit_image(&repo, None)?; + repo.sync()?; + + // Delete the backing object that the image symlink points to + let dir = open_test_repo_dir(&tmp); + let image_rel = format!("images/{}", image_id.to_hex()); + let link_target = dir.read_link(&image_rel)?; + let backing_rel = PathBuf::from("images").join(&link_target); + dir.remove_file(&backing_rel)?; + + let result = repo.fsck().await?; + + assert!( + !result.is_ok(), + "fsck should detect broken image symlink: {result}" + ); + assert!(result.broken_links > 0); + assert!(result.images_corrupted > 0); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_missing_named_ref_object() -> Result<()> { + // Exercises fsck_splitstream named ref checking (line ~1869). + // Creates a stream with a named ref pointing to a non-existent + // object, which should be detected as a missing object. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(64 * 1024, 0xEA); + + // Create stream1 that references obj + let mut writer1 = repo.create_stream(0); + writer1.write_external(&obj)?; + let stream1_id = repo.write_stream(writer1, "test-stream1", None)?; + + // Create stream2 with a named ref to stream1 + let mut writer2 = repo.create_stream(0); + writer2.add_named_stream_ref("test-stream1", &stream1_id); + let _stream2_id = repo.write_stream(writer2, "test-stream2", None)?; + repo.sync()?; + + // Delete the object that the named ref points to (the stream1 splitstream object) + let hex = stream1_id.to_hex(); + let (prefix, rest) = hex.split_at(2); + let repo_dir = open_test_repo_dir(&tmp); + repo_dir.remove_file(format!("objects/{prefix}/{rest}"))?; + + let result = repo.fsck().await?; + + assert!( + !result.is_ok(), + "fsck should detect missing named ref object" + ); + assert!( + result.missing_objects > 0, + "should report missing objects: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("missing-named-ref")), + "errors should mention missing named ref object: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_healthy_repo_with_refs() -> Result<()> { + // Verifies fsck_refs_dir passes on valid refs. Prior tests only + // checked that fsck detects broken refs; this confirms a repo + // with valid refs reports ok. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(64 * 1024, 0xEA); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj)?; + // write_stream with reference creates a ref symlink + let _stream_id = repo.write_stream(writer, "test-stream", Some("my-ref"))?; + repo.sync()?; + + let result = repo.fsck().await?; + + assert!(result.is_ok(), "fsck should pass with valid refs: {result}"); + assert!(result.errors.is_empty()); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_corrupted_splitstream_object() -> Result<()> { + // Exercises fsck_splitstream failure-to-open path (line ~1829). + // Corrupts the splitstream object so that open_stream fails to + // parse it, which is different from a missing external object ref. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj = generate_test_data(64 * 1024, 0xEA); + + let mut writer = repo.create_stream(0); + writer.write_external(&obj)?; + let _stream_id = repo.write_stream(writer, "test-stream", None)?; + repo.sync()?; + + // Find the splitstream object path via the stream symlink + let dir = open_test_repo_dir(&tmp); + let link_target = dir.read_link("streams/test-stream")?; + let backing_rel = PathBuf::from("streams").join(&link_target); + + // Corrupt the splitstream object (not the data object it references) + dir.remove_file(&backing_rel)?; + dir.write(&backing_rel, b"corrupted splitstream header")?; + + let result = repo.fsck().await?; + + assert!( + !result.is_ok(), + "fsck should detect corrupted splitstream: {result}" + ); + // The object digest mismatch is detected by object checking, + // and the stream is also flagged because open_stream will fail + // or the object refs check will fail. + assert!( + result.objects_corrupted > 0 || result.streams_corrupted > 0, + "should report corruption: {result}" + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_validates_erofs_image_objects() -> Result<()> { + // Exercises fsck_image: creates a valid erofs image, then deletes + // one of its referenced objects. Fsck should detect the missing + // object via erofs parsing. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj_size: u64 = 32 * 1024; + let obj = generate_test_data(obj_size, 0xCC); + let obj_id = repo.ensure_object(&obj)?; + + let fs = make_test_fs(&obj_id, obj_size); + let image_id = fs.commit_image(&repo, None)?; + repo.sync()?; + + // Sanity: fsck passes on a healthy image + let result = repo.fsck().await?; + assert!(result.is_ok(), "healthy image should pass fsck: {result}"); + assert!(result.images_checked > 0, "should have checked the image"); + + // Delete the object referenced by the erofs image + let hex = obj_id.to_hex(); + let (prefix, rest) = hex.split_at(2); + let dir = open_test_repo_dir(&tmp); + dir.remove_file(format!("objects/{prefix}/{rest}"))?; + + let result = repo.fsck().await?; + assert!( + !result.is_ok(), + "fsck should detect missing object referenced by erofs image: {result}" + ); + assert!( + result.missing_objects > 0, + "should report missing objects: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains(&image_id.to_hex()) + && e.to_string().contains("image-missing-object")), + "error should reference the image: {:?}", + result.errors + ); + Ok(()) + } + + #[tokio::test] + async fn test_fsck_detects_corrupt_erofs_image() -> Result<()> { + // Exercises fsck_image: corrupts the erofs image data so that + // parsing fails. The catch_unwind should catch the panic from + // the current erofs reader. + let tmp = tempdir(); + let repo = create_test_repo(&tmp.path().join("repo"))?; + + let obj_size: u64 = 32 * 1024; + let obj = generate_test_data(obj_size, 0xDD); + let obj_id = repo.ensure_object(&obj)?; + + let fs = make_test_fs(&obj_id, obj_size); + let image_id = fs.commit_image(&repo, None)?; + repo.sync()?; + + // Corrupt the erofs image data (replace the backing object) + let hex = image_id.to_hex(); + let (prefix, rest) = hex.split_at(2); + let dir = open_test_repo_dir(&tmp); + let obj_path = format!("objects/{prefix}/{rest}"); + dir.remove_file(&obj_path)?; + dir.write(&obj_path, b"this is not a valid erofs image")?; + + let result = repo.fsck().await?; + assert!( + !result.is_ok(), + "fsck should detect corrupt erofs image: {result}" + ); + assert!( + result + .errors + .iter() + .any(|e| e.to_string().contains("image-invalid") + || e.to_string().contains("digest mismatch")), + "error should mention erofs corruption or digest mismatch: {:?}", + result.errors + ); + Ok(()) + } } diff --git a/crates/composefs/src/splitstream.rs b/crates/composefs/src/splitstream.rs index a0b44d1b..827a02ac 100644 --- a/crates/composefs/src/splitstream.rs +++ b/crates/composefs/src/splitstream.rs @@ -41,6 +41,7 @@ const SPLITSTREAM_MAGIC: [u8; 11] = *b"SplitStream"; const LG_BLOCKSIZE: u8 = 12; // TODO: hard-coded 4k. make this generic later... // Nearly everything in the file is located at an offset indicated by a FileRange. +#[repr(C)] #[derive(Debug, Clone, Copy, FromBytes, Immutable, IntoBytes, KnownLayout)] struct FileRange { start: U64, @@ -48,6 +49,7 @@ struct FileRange { } // The only exception is the header: it is a fixed sized and comes at the start (offset 0). +#[repr(C)] #[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] struct SplitstreamHeader { pub magic: [u8; 11], // Contains SPLITSTREAM_MAGIC @@ -59,6 +61,7 @@ struct SplitstreamHeader { } // The info block can be located anywhere, indicated by the "info" FileRange in the header. +#[repr(C)] #[derive(Debug, FromBytes, Immutable, IntoBytes, KnownLayout)] struct SplitstreamInfo { pub stream_refs: FileRange, // location of the stream references array diff --git a/crates/composefs/src/test.rs b/crates/composefs/src/test.rs index 22c533e5..7e165f76 100644 --- a/crates/composefs/src/test.rs +++ b/crates/composefs/src/test.rs @@ -67,6 +67,28 @@ impl TestRepo { _tempdir: dir, } } + + /// Returns the filesystem path of the repository root. + /// + /// Useful in tests that need to manipulate the on-disk layout directly + /// (e.g. corruption tests for fsck). + pub fn path(&self) -> &std::path::Path { + self._tempdir.path() + } + + /// Returns a capability-based directory handle for the repository root. + /// + /// Tests should use this instead of raw `std::fs` operations to ensure + /// all filesystem manipulation is scoped to the repository directory. + /// + /// Only available when compiling this crate's own tests (cap-std is a + /// dev-dependency). Cross-crate consumers should construct a + /// `cap_std::fs::Dir` from [`path()`](Self::path) directly. + #[cfg(test)] + pub fn dir(&self) -> cap_std::fs::Dir { + cap_std::fs::Dir::open_ambient_dir(self._tempdir.path(), cap_std::ambient_authority()) + .unwrap() + } } impl Default for TestRepo { @@ -109,8 +131,7 @@ pub(crate) mod proptest_strategies { /// EROFS limit (`EROFS_NAME_LEN`). const NAME_MAX: usize = 255; - /// Maximum symlink target length on Linux (`PATH_MAX`). - const PATH_MAX: usize = 4096; + use crate::SYMLINK_MAX; /// Strategy for valid filenames as OsString. /// @@ -208,8 +229,7 @@ pub(crate) mod proptest_strategies { /// Strategy for symlink targets as OsString. /// /// Symlink targets on Linux are arbitrary bytes except `\0`, up to - /// [`PATH_MAX`] (4096) bytes. We generate a mix of path-like ASCII - /// targets and binary targets, occasionally long. + /// [`SYMLINK_MAX`] (1024) bytes, matching the XFS limit. fn symlink_target() -> impl Strategy { prop_oneof![ // Short path-like ASCII target (common case) @@ -219,8 +239,8 @@ pub(crate) mod proptest_strategies { // Binary target with arbitrary bytes (no NUL) 3 => prop::collection::vec(1..=0xFFu8, 1..=100) .prop_map(OsString::from_vec), - // Long ASCII target (up to PATH_MAX) - 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9/._-]{{100,{PATH_MAX}}}")) + // Long ASCII target (up to SYMLINK_MAX) + 1 => proptest::string::string_regex(&format!("[a-zA-Z0-9/._-]{{100,{SYMLINK_MAX}}}")) .expect("valid regex") .prop_map(OsString::from), ] diff --git a/crates/composefs/src/tests/assets/special.dump b/crates/composefs/src/tests/assets/special.dump index 7665fdd7..2afe62f4 100644 --- a/crates/composefs/src/tests/assets/special.dump +++ b/crates/composefs/src/tests/assets/special.dump @@ -1,7 +1,7 @@ -/ 4096 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2 +/ 0 40555 2 0 0 0 1633950376.0 - - - trusted.foo1=bar-1 user.foo2=bar-2 /blockdev 0 60777 1 0 0 107690 1633950376.0 - - - trusted.bar=bar-2 /chardev 0 20777 1 0 0 10769 1633950376.0 - - - trusted.foo=bar-2 -/escaped-xattr 0 100777 1 0 0 0 1633950376.0 - - - trusted.overlay.redirect=/foo\n user.overlay.redirect=/foo\n user.foo=bar-2 +/escaped-xattr 0 100777 1 0 0 0 1633950376.0 - - - trusted.overlay.redirect=/foo\n user.foo=bar-2 user.overlay.redirect=/foo\n /external 42 100755 1 0 0 0 1731497312.0 70/a9125438f7255245f596c54cebb6621cb9a64f062752cf26763c1b690e7340 - 70a9125438f7255245f596c54cebb6621cb9a64f062752cf26763c1b690e7340 /fifo 0 10777 1 0 0 0 1633950376.0 - - - trusted.bar=bar-2 /inline 15 100777 1 0 0 0 1633950376.0 - FOOBAR\nINAFILE\n - user.foo=bar-2 diff --git a/crates/integration-tests/Cargo.toml b/crates/integration-tests/Cargo.toml index 476a096c..f63aa370 100644 --- a/crates/integration-tests/Cargo.toml +++ b/crates/integration-tests/Cargo.toml @@ -15,7 +15,9 @@ path = "src/main.rs" [dependencies] anyhow = "1" cap-std-ext = "4.0" -composefs = { workspace = true } +# Only the test_util module is used — for creating test OCI images. +# All verification must go through the cfsctl CLI. +composefs-oci = { workspace = true, features = ["test", "boot"] } libtest-mimic = "0.8" linkme = "0.3" ocidir = "0.6" diff --git a/crates/integration-tests/src/tests/cli.rs b/crates/integration-tests/src/tests/cli.rs index 5daa8517..35e3a1d3 100644 --- a/crates/integration-tests/src/tests/cli.rs +++ b/crates/integration-tests/src/tests/cli.rs @@ -371,7 +371,7 @@ fn test_oci_pull_and_inspect() -> Result<()> { integration_test!(test_oci_pull_and_inspect); fn test_oci_layer_inspect() -> Result<()> { - use composefs::dumpfile_parse::{Entry, Item}; + use composefs_oci::composefs::dumpfile_parse::{Entry, Item}; use std::io::Read; use std::path::Path; @@ -551,3 +551,268 @@ fn test_dump_files() -> Result<()> { Ok(()) } integration_test!(test_dump_files); + +/// Corrupt the first object found in the repository. +/// +/// Walks `objects/XX/` directories, deletes the first regular file found, +/// and replaces it with junk data. Panics if no object is found. +fn corrupt_one_object(repo: &std::path::Path) -> Result<()> { + use cap_std_ext::cap_std; + + let dir = cap_std::fs::Dir::open_ambient_dir(repo, cap_std::ambient_authority())?; + let objects = std::path::Path::new("objects"); + for entry in dir.read_dir(objects)? { + let entry = entry?; + if !entry.file_type()?.is_dir() { + continue; + } + let sub_path = objects.join(entry.file_name()); + let sub = dir.open_dir(&sub_path)?; + for obj_entry in sub.entries()? { + let obj_entry = obj_entry?; + if obj_entry.file_type()?.is_file() { + let rel = sub_path.join(obj_entry.file_name()); + dir.remove_file(&rel)?; + dir.write(&rel, b"CORRUPTED DATA")?; + return Ok(()); + } + } + } + anyhow::bail!("no object found to corrupt"); +} + +fn test_fsck_empty_repo() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], true); + assert_eq!(v["objectsChecked"], 0); + assert_eq!(v["objectsCorrupted"], 0); + assert!(v["errors"].as_array().unwrap().is_empty()); + Ok(()) +} +integration_test!(test_fsck_empty_repo); + +fn test_fsck_healthy_repo() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let rootfs = create_test_rootfs(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} create-image {rootfs} my-image" + ) + .read()?; + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], true); + assert!(v["objectsChecked"].as_u64().unwrap() > 0); + assert_eq!(v["objectsCorrupted"], 0); + assert!(v["errors"].as_array().unwrap().is_empty()); + Ok(()) +} +integration_test!(test_fsck_healthy_repo); + +fn test_fsck_detects_corrupted_object() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let rootfs = create_test_rootfs(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} create-image {rootfs} my-image" + ) + .read()?; + corrupt_one_object(repo)?; + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], false); + assert!(v["objectsCorrupted"].as_u64().unwrap() > 0); + assert!(!v["errors"].as_array().unwrap().is_empty()); + Ok(()) +} +integration_test!(test_fsck_detects_corrupted_object); + +/// Verify that without --json, fsck exits non-zero on corruption. +fn test_fsck_nonzero_exit_on_corruption() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let rootfs = create_test_rootfs(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} create-image {rootfs} my-image" + ) + .read()?; + corrupt_one_object(repo)?; + + cmd!(sh, "{cfsctl} --insecure --repo {repo} fsck") + .run() + .expect_err("fsck without --json should exit non-zero on corruption"); + Ok(()) +} +integration_test!(test_fsck_nonzero_exit_on_corruption); + +fn test_oci_fsck_healthy() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let oci_layout = create_oci_layout(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci pull oci:{oci_layout} test-image" + ) + .read()?; + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], true); + assert_eq!(v["imagesChecked"], 1); + assert_eq!(v["imagesCorrupted"], 0); + Ok(()) +} +integration_test!(test_oci_fsck_healthy); + +fn test_oci_fsck_detects_corrupted_manifest() -> Result<()> { + use cap_std_ext::cap_std; + + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let oci_layout = create_oci_layout(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci pull oci:{oci_layout} test-image" + ) + .read()?; + + // Find the manifest stream symlink and corrupt its backing object + let dir = cap_std::fs::Dir::open_ambient_dir(repo, cap_std::ambient_authority())?; + let streams = std::path::Path::new("streams"); + let mut corrupted = false; + for entry in dir.read_dir(streams)? { + let entry = entry?; + let name = entry.file_name(); + if !name.as_encoded_bytes().starts_with(b"oci-manifest-") { + continue; + } + let symlink_rel = streams.join(&name); + let target = dir.read_link(&symlink_rel)?; + let obj_rel = streams.join(&target); + if dir.exists(&obj_rel) { + dir.remove_file(&obj_rel)?; + dir.write(&obj_rel, b"CORRUPTED MANIFEST DATA")?; + corrupted = true; + break; + } + } + assert!(corrupted, "should have found a manifest object to corrupt"); + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} oci fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], false); + Ok(()) +} +integration_test!(test_oci_fsck_detects_corrupted_manifest); + +fn test_oci_fsck_single_image() -> Result<()> { + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let oci_layout = create_oci_layout(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci pull oci:{oci_layout} test-image" + ) + .read()?; + + // Check a specific image by name + let output = cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci fsck --json test-image" + ) + .read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], true); + assert_eq!(v["imagesChecked"], 1); + + // A nonexistent image is a hard error (not a corruption finding) + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} oci fsck nonexistent-image" + ) + .run() + .expect_err("oci fsck should fail for nonexistent image"); + Ok(()) +} +integration_test!(test_oci_fsck_single_image); + +fn test_fsck_detects_broken_image_ref() -> Result<()> { + use cap_std_ext::cap_std; + + let sh = Shell::new()?; + let cfsctl = cfsctl()?; + let repo_dir = tempfile::tempdir()?; + let repo = repo_dir.path(); + let fixture_dir = tempfile::tempdir()?; + let rootfs = create_test_rootfs(fixture_dir.path())?; + + cmd!( + sh, + "{cfsctl} --insecure --repo {repo} create-image {rootfs} my-image" + ) + .read()?; + + // Break the ref chain: images/refs/my-image -> ../HEXID -> ../objects/... + // by removing the intermediate image symlink. + let dir = cap_std::fs::Dir::open_ambient_dir(repo, cap_std::ambient_authority())?; + let refs_path = std::path::Path::new("images/refs"); + let mut broken = false; + if dir.exists(refs_path) { + for entry in dir.read_dir(refs_path)? { + let entry = entry?; + let entry_rel = refs_path.join(entry.file_name()); + if dir.symlink_metadata(&entry_rel)?.is_symlink() { + let target = dir.read_link(&entry_rel)?; + let resolved = refs_path.join(&target); + if dir.symlink_metadata(&resolved).is_ok() { + dir.remove_file(&resolved)?; + broken = true; + break; + } + } + } + } + assert!(broken, "should have found an image ref to break"); + + let output = cmd!(sh, "{cfsctl} --insecure --repo {repo} fsck --json").read()?; + let v: serde_json::Value = serde_json::from_str(&output)?; + assert_eq!(v["ok"], false); + assert!(v["brokenLinks"].as_u64().unwrap() > 0); + Ok(()) +} +integration_test!(test_fsck_detects_broken_image_ref); diff --git a/crates/integration-tests/src/tests/privileged.rs b/crates/integration-tests/src/tests/privileged.rs index 96659a04..41fce1d3 100644 --- a/crates/integration-tests/src/tests/privileged.rs +++ b/crates/integration-tests/src/tests/privileged.rs @@ -13,7 +13,7 @@ use std::path::{Path, PathBuf}; use anyhow::{bail, ensure, Result}; use xshell::{cmd, Shell}; -use crate::{cfsctl, create_test_rootfs, integration_test}; +use crate::{cfsctl, integration_test}; /// Ensure we're running as root, or re-exec this test inside a VM. /// @@ -125,45 +125,184 @@ fn privileged_repo_without_insecure() -> Result<()> { } integration_test!(privileged_repo_without_insecure); -fn privileged_create_image() -> Result<()> { - if require_privileged("privileged_create_image")?.is_some() { +/// Build a bootable test OCI image, mount it via `cfsctl oci mount` (plain +/// and `--bootable`), and verify the filesystem content differs correctly. +/// The plain mount should contain /boot/EFI/Linux/test-6.1.0.efi (the UKI), +/// while the bootable mount should have an empty /boot (transform_for_boot +/// clears it) but still have /usr content intact. +fn privileged_oci_bootable_mount() -> Result<()> { + if require_privileged("privileged_oci_bootable_mount")?.is_some() { return Ok(()); } let sh = Shell::new()?; let cfsctl = cfsctl()?; let verity_dir = VerityTempDir::new()?; - let repo = verity_dir.path().join("repo"); - let fixture_dir = tempfile::tempdir()?; - let rootfs = create_test_rootfs(fixture_dir.path())?; + let repo_path = verity_dir.path().join("repo"); + let repo_arg = repo_path.to_str().unwrap(); + let hash = "sha256"; + + composefs_oci::test_util::create_test_bootable_oci_image(&repo_path, "boot-test:v1")?; + + let inspect_output = cmd!( + sh, + "{cfsctl} --insecure --hash {hash} --repo {repo_arg} oci inspect boot-test:v1" + ) + .read()?; + let inspect: serde_json::Value = serde_json::from_str(&inspect_output)?; + ensure!( + inspect.get("composefs_erofs").is_some(), + "inspect should show composefs_erofs field" + ); + ensure!( + inspect.get("composefs_boot_erofs").is_some(), + "inspect should show composefs_boot_erofs field" + ); + + // Plain mount: full filesystem including /boot + let mountpoint1 = tempfile::tempdir()?; + let mp1 = mountpoint1.path().to_str().unwrap(); + cmd!( + sh, + "{cfsctl} --insecure --hash {hash} --repo {repo_arg} oci mount boot-test:v1 {mp1}" + ) + .run()?; + + ensure!( + mountpoint1 + .path() + .join("boot/EFI/Linux/test-6.1.0.efi") + .exists(), + "plain mount should contain UKI at /boot/EFI/Linux/test-6.1.0.efi" + ); + + cmd!(sh, "umount {mp1}").run()?; + + // Bootable mount: /boot empty, /usr intact + let mountpoint2 = tempfile::tempdir()?; + let mp2 = mountpoint2.path().to_str().unwrap(); + cmd!( + sh, + "{cfsctl} --insecure --hash {hash} --repo {repo_arg} oci mount --bootable boot-test:v1 {mp2}" + ) + .run()?; + + let boot_dir = mountpoint2.path().join("boot"); + ensure!( + boot_dir.is_dir(), + "bootable mount should have /boot directory" + ); + let boot_entries: Vec<_> = std::fs::read_dir(&boot_dir)?.collect(); + ensure!( + boot_entries.is_empty(), + "bootable mount /boot should be empty, found {} entries", + boot_entries.len() + ); - let output = cmd!(sh, "{cfsctl} --repo {repo} create-image {rootfs}").read()?; ensure!( - !output.trim().is_empty(), - "expected image ID output, got nothing" + !mountpoint2 + .path() + .join("boot/EFI/Linux/test-6.1.0.efi") + .exists(), + "bootable mount should NOT contain UKI" ); + + ensure!( + mountpoint2 + .path() + .join("usr/lib/modules/6.1.0/vmlinuz") + .exists(), + "bootable mount should still have kernel at /usr/lib/modules/6.1.0/vmlinuz" + ); + + let os_release = std::fs::read_to_string(mountpoint2.path().join("etc/os-release"))?; + ensure!( + os_release.contains("ID=test"), + "bootable mount os-release missing ID=test: {os_release:?}" + ); + + cmd!(sh, "umount {mp2}").run()?; + Ok(()) } -integration_test!(privileged_create_image); +integration_test!(privileged_oci_bootable_mount); -fn privileged_create_image_idempotent() -> Result<()> { - if require_privileged("privileged_create_image_idempotent")?.is_some() { +/// Build a test OCI image, mount it via `cfsctl oci mount`, and verify +/// the filesystem content. Uses the library only for image creation (test +/// setup); all verification goes through the CLI. +fn privileged_oci_pull_mount() -> Result<()> { + if require_privileged("privileged_oci_pull_mount")?.is_some() { return Ok(()); } let sh = Shell::new()?; let cfsctl = cfsctl()?; let verity_dir = VerityTempDir::new()?; - let repo = verity_dir.path().join("repo"); - let fixture_dir = tempfile::tempdir()?; - let rootfs = create_test_rootfs(fixture_dir.path())?; + let repo_path = verity_dir.path().join("repo"); + let repo_arg = repo_path.to_str().unwrap(); + + // Create a test OCI image with EROFS linked (library used only for setup) + composefs_oci::test_util::create_test_oci_image(&repo_path, "mount-test:v1")?; - let id1 = cmd!(sh, "{cfsctl} --repo {repo} create-image {rootfs}").read()?; - let id2 = cmd!(sh, "{cfsctl} --repo {repo} create-image {rootfs}").read()?; + // test_util creates SHA-256 repos; tell cfsctl to match + let hash = "sha256"; + + // Verify inspect shows the EROFS ref + let inspect_output = cmd!( + sh, + "{cfsctl} --insecure --hash {hash} --repo {repo_arg} oci inspect mount-test:v1" + ) + .read()?; + let inspect: serde_json::Value = serde_json::from_str(&inspect_output)?; ensure!( - id1.trim() == id2.trim(), - "creating the same image twice should produce the same ID: {id1} vs {id2}" + inspect.get("composefs_erofs").is_some(), + "inspect should show composefs_erofs field" ); + + // Mount via cfsctl oci mount + let mountpoint = tempfile::tempdir()?; + let mp = mountpoint.path().to_str().unwrap(); + cmd!( + sh, + "{cfsctl} --insecure --hash {hash} --repo {repo_arg} oci mount mount-test:v1 {mp}" + ) + .run()?; + + // Verify file content at the mountpoint + let hostname = std::fs::read_to_string(mountpoint.path().join("etc/hostname"))?; + ensure!(hostname == "testhost\n", "hostname mismatch: {hostname:?}"); + + let os_release = std::fs::read_to_string(mountpoint.path().join("etc/os-release"))?; + ensure!( + os_release.contains("ID=test"), + "os-release missing ID: {os_release:?}" + ); + + let busybox = std::fs::read(mountpoint.path().join("usr/bin/busybox"))?; + ensure!( + busybox == b"busybox-binary-content", + "busybox content mismatch" + ); + + let sh_target = std::fs::read_link(mountpoint.path().join("usr/bin/sh"))?; + ensure!( + sh_target.to_str() == Some("busybox"), + "sh symlink target mismatch: {sh_target:?}" + ); + + let app_data = std::fs::read_to_string(mountpoint.path().join("usr/share/myapp/data.txt"))?; + ensure!( + app_data == "application-data", + "app data mismatch: {app_data:?}" + ); + + ensure!(mountpoint.path().join("tmp").is_dir(), "/tmp missing"); + ensure!(mountpoint.path().join("var").is_dir(), "/var missing"); + ensure!( + mountpoint.path().join("usr/lib").is_dir(), + "/usr/lib missing" + ); + Ok(()) } -integration_test!(privileged_create_image_idempotent); +integration_test!(privileged_oci_pull_mount); diff --git a/doc/plans/oci-sealing-impl.md b/doc/plans/oci-sealing-impl.md deleted file mode 100644 index dea523c9..00000000 --- a/doc/plans/oci-sealing-impl.md +++ /dev/null @@ -1,210 +0,0 @@ -# OCI Sealing Implementation in composefs-rs - -This document describes the implementation of OCI sealing in composefs-rs. For the generic specification applicable to any composefs implementation, see [oci-sealing-spec.md](oci-sealing-spec.md). - - - -## Current Implementation Status - -### What Exists - -The `composefs-oci` crate at `crates/composefs-oci/src/image.rs` already implements the core sealing mechanism. The `seal()` function computes the fsverity digest via `compute_image_id()`, creates an EROFS image from merged layers with whiteouts applied, and stores the digest in `config.labels["containers.composefs.fsverity"]`. A new config with updated labels is written via `write_config()`, returning both the SHA256 config digest and fsverity image digest. - -The implementation includes fsverity computation and verification through the `composefs` crate's fsverity module. Config label storage follows the OCI specification with digest mapping from SHA256 to fsverity maintained in split streams. Repository-level integrity verification is provided through `check_stream()` and `check_image()`. Mount operations check for the seal label and use fsverity verification when present. - -All objects in the repository are fsverity-enabled by default, with digests stored using the generic `ObjectID` type parameterized over `FsVerityHashValue`. Images are tracked separately in the `images/` directory, distinct from general objects due to the kernel security model that restricts non-root filesystem mounting. - -### Current Workflow - -The sealing workflow in composefs-rs begins with `create_filesystem()` building the filesystem from OCI layers. Layer tar streams are imported via `import_layer()`, converting them to composefs split streams. Files 64 bytes or smaller are stored inline in the split stream, while larger files are stored in the object store with fsverity digests. Layers are processed in order, applying overlayfs semantics including whiteout handling (`.wh.` files). Hardlinks are tracked properly across layers to maintain filesystem semantics. - -After building the filesystem, `compute_image_id()` generates the EROFS image and computes its fsverity digest. The digest is stored in the config label `containers.composefs.fsverity`. The `write_config()` function writes the new config to the repository with the digest mapping, and both the SHA256 config digest and fsverity image digest are returned. - -For mounting, the `mount()` operation requires the `containers.composefs.fsverity` label to be present. It extracts the image ID from the label and mounts at the specified path with kernel fsverity verification. - -## Repository Architecture - -The composefs-rs repository architecture at `crates/composefs/src/repository.rs` supports sealing without major changes. Objects are stored in a content-addressed layout under `objects/XX/YYY...` where `XX` is the first byte of the fsverity digest and `YYY` are the remaining 62 hex characters. All files in `objects/` must have fsverity enabled, enforced via `ensure_verity_equal()`. - -Images are tracked separately in the `images/` directory as symlinks to objects, with refs providing named references and garbage collection roots. Split streams are stored in the `streams/` directory, also as symlinks to objects. The repository has an "insecure" mode for development without fsverity filesystem support, but sealing operations should explicitly fail in this mode. - -Two-level naming allows access by fsverity digest (verified) or by ref name (unverified). The `ensure_stream()` method provides idempotent stream creation with SHA256-based deduplication. Streams can reference other streams via digest maps stored in split stream headers, enabling the layer→config relationship tracking. - -## Required Enhancements - -### Manifest Annotations - -Manifest annotations should be added to indicate sealed images and enable discovery without parsing configs. The sealing operation should add `containers.composefs.sealed` set to `"true"` and optionally `containers.composefs.image.fsverity` containing the image digest. This allows registries to discover sealed images and clients to optimize pull strategies. - -### Per-Layer Digest Annotations - -Per-layer digests enable incremental verification and caching. A `SealedImageInfo` structure should track the image fsverity digest, config SHA256 digest, optional config fsverity digest, and a list of layer seal information. Each `LayerSealInfo` entry should contain the original tar layer digest, the composefs fsverity of the layer, and the split stream digest in the repository. - -During sealing, layer descriptors should be annotated with `containers.composefs.layer.fsverity` after processing each layer. This allows verification of individual layers before merging and enables caching where shared layers have known composefs digests. - -### Verification API - -A standalone verification API separate from mounting should be implemented. The verification function should check manifest annotations for the seal flag, fetch and verify the config against the manifest's config descriptor, extract the fsverity digest from the config label, verify annotated layers if present, and optionally verify the image exists in the repository. - -This enables verification before mounting and provides detailed seal information without building the filesystem. The returned `SealedImageInfo` structure contains all digest relationships and layer details. - -### Pull Integration - -The `pull()` function in `crates/composefs-oci/src/image.rs` should be enhanced to handle sealed images. When a verify_seal flag is enabled, the pull operation should check manifest annotations for the sealed flag and verify the seal during pull if present. If the image is sealed and verification passes, some integrity checks can be skipped since the composefs digests are trusted. - -An optimization is that sealed images don't require re-computing digests during import if verification already passed. The pull result should include optional seal information alongside the manifest and config. - -### Push Integration - -Support for pushing sealed images back to registries requires preserving seal annotations through the registry round-trip. The push operation should construct the manifest with seal annotations, push the config with the composefs label, push layers optionally with layer annotations, and push the manifest with seal annotations. - -The challenge is maintaining digest mappings through the registry round-trip, as registries may re-compress or re-package layers while preserving content digests. - -### Insecure Mode Handling - -Repository sealing operations should explicitly fail when the repository is in insecure mode. The rationale is that if the repository doesn't enforce fsverity, sealing provides no security benefit. The check should be performed at the beginning of seal operations, returning an error if `repo.is_insecure()` is true. - -## Implementation Phases - -### Phase 1: Core Sealing (Completed) - -Phase 1 is complete with basic `seal()` implementation in `composefs-oci`, fsverity computation and storage, config label with digest, and mount with seal verification. - -### Phase 2: Manifest Annotations (Planned) - -Phase 2 will add manifest annotation support to `seal()`, create the `SealedImageInfo` type, implement the `verify_seal()` API, document the label/annotation schema, and add tests for sealed image workflows. - -Deliverables include `seal()` emitting manifests with annotations, standalone verification without mounting, and updated documentation in `doc/oci.md`. - -### Phase 3: Per-Layer Digests (Planned) - -Phase 3 will record per-layer fsverity during sealing, add layer annotations to manifests, implement incremental verification, and optimize pull for sealed images. - -Deliverables include full `SealedImageInfo` with layer details, layer-by-layer verification API, and performance improvements for sealed pulls. - -### Phase 4: Push/Registry Integration (Planned) - -Phase 4 will implement push support for sealed images, preserve annotations through registry round-trip, test with standard OCI registries, and document registry compatibility. - -Deliverables include bidirectional registry support, a registry compatibility matrix, and integration tests with real registries. - -### Phase 5: Advanced Features (Future) - -Future work includes dumpfile digest support, eager/lazy verification modes, zstd:chunked integration, the three-digest model, and signature integration. - -## API Design Considerations - -### Type Safety - -The generic `ObjectID` type parameterized over `FsVerityHashValue` provides type safety for digest handling. Both `Sha256HashValue` and `Sha512HashValue` implement the `FsVerityHashValue` trait with hex encoding/decoding, object pathname format, and algorithm ID constants. - -### Async/Await - -Operations like `seal()` and `pull()` are async to support parallel layer fetching with semaphore-based concurrency control. The repository is wrapped in `Arc` to enable sharing across async contexts. - -### Error Handling - -The codebase uses `anyhow::Result` for error handling with context. Seal operations should provide clear error messages distinguishing between fsverity failures, missing labels, and repository integrity issues. - -### Verification Modes - -Supporting both eager and lazy verification requires a configuration option, potentially as an enum `SealVerificationMode` with variants `Eager`, `Lazy`, and `Never`. Different defaults may apply for user versus system repositories. - -## Integration Points - -### Split Streams - -Split streams at `crates/composefs/src/splitstream.rs` are the intermediate format between OCI tar layers and composefs EROFS images. They contain inline data for small files and references to objects for large files. Split stream headers include digest maps linking SHA256 layer digests to fsverity digests. - -Per-layer sealing should leverage split streams to maintain the digest mapping. The split stream format doesn't need changes but seal metadata should reference split stream digests. - -### EROFS Generation - -EROFS image generation via `mkfs_erofs()` in `crates/composefs/src/erofs/` creates reproducible images from filesystem trees. The EROFS writer handles inline data, shared data, and metadata blocks with deterministic layout. The same input filesystem produces the same EROFS digest. - -Sealing relies on this determinism for verification. The EROFS format version may evolve, which is why dumpfile digests are being considered as a format-agnostic alternative. - -### Fsverity Module - -The fsverity module at `crates/composefs/src/fsverity/` provides userspace computation matching kernel behavior and ioctl wrappers for kernel interaction. Digest computation uses a hardcoded 4096-byte block size with no salt support, matching kernel fs-verity defaults. - -Sealing uses `compute_verity()` for userspace digest computation during EROFS generation and `enable_verity_maybe_copy()` to handle ETXTBSY by copying files if needed. Verification uses `measure_verity()` to get kernel-measured digests and `ensure_verity_equal()` to compare against expected values. - -## Open Implementation Questions - -### Config Annotation Method - -The current code calls `config.get_config_annotation()` which actually reads from labels, not annotations. This naming suggests potential confusion between OCI label and annotation semantics. Clarification is needed whether storing in labels is intentional or if annotations should be used for the digest. - -### Sealed Config Mutability - -Sealing modifies config content by adding the label, creating a new SHA256 for the config and breaking existing references to the old config digest. This may be acceptable since the sealed config is a new artifact, but it needs clear documentation about the relationship between sealed and unsealed images. - -### Performance at Scale - -Computing fsverity for large images is expensive as `compute_image_id()` builds the entire EROFS in memory. Streaming approaches or caching strategies should be considered for multi-GB images. The EROFS writer could be enhanced to support streaming output with incremental digest computation. - -### Seal Metadata Persistence - -Optionally persisting `SealedImageInfo` as `.seal.json` alongside images in the repository could enable faster seal information retrieval without re-parsing configs. This metadata cache would need invalidation strategies and shouldn't be security-critical. - -### Repository Ref Strategy - -Sealed images have different config digests than unsealed images. The ref strategy for managing variants should avoid keeping both sealed and unsealed versions indefinitely. Garbage collection should understand the relationship between sealed and unsealed images, potentially tracking seal derivation relationships. - -## Testing Strategy - -Testing should cover sealing unsealed images and verifying the config label is added correctly with the expected fsverity digest. Mounting sealed images should verify that fsverity is checked by the kernel. Verification API tests should check correct extraction of seal information from manifest and config. - -Per-layer annotation tests should verify layer digests are computed and annotated correctly. Pull integration tests should verify detection and verification of sealed images during pull. Push integration tests should verify seal metadata is preserved through registry round-trip. - -Negative tests should verify that seal operations fail in insecure mode, mounting fails with incorrect fsverity digest, and verification fails with missing or incorrect labels. - -Performance tests should measure sealing time for various image sizes and verify parallel layer processing performance. - -## Compatibility Considerations - -### OCI Registry Compatibility - -Standard OCI registries should store and serve sealed images without special handling. Unknown labels and annotations are preserved by spec-compliant registries. Testing should verify round-trip through common registries like Docker Hub, Quay, and GitHub Container Registry. - -### Existing Composefs-rs Versions - -The seal format version label enables detection of format changes. Forward compatibility means newer implementations can read older seals. Backward compatibility means older implementations should gracefully ignore newer seal formats they don't understand. - -### C Composefs Compatibility - -While composefs-rs aims to become the reference implementation, compatibility with the C composefs implementation should be maintained where feasible. EROFS images and dumpfiles should be interchangeable. Digest computation must match exactly between implementations. - -## Future Implementation Work - -### Dumpfile Digest Support - -Supporting dumpfile digests requires adding `containers.composefs.dumpfile.sha256` label computation during sealing. Verification should support parsing EROFS back to dumpfile format and verifying the digest. Caching the dumpfile→fsverity mapping requires careful security consideration to avoid cache poisoning. - -### zstd:chunked Integration - -Integration with zstd:chunked requires reading and writing TOC metadata with fsverity digests added to entries. The TOC format from the estargz/stargz-snapshotter projects would need extension for fsverity. Direct TOC→dumpfile conversion would enable unified metadata handling. - -### Non-Root Mounting Helper - -A separate composefs-mount-helper service would accept dumpfiles from unprivileged users, generate EROFS images, validate fsverity, and return mount file descriptors. This requires privileged service implementation with careful input validation on the dumpfile format. - -### Signature Integration - -Integrating with cosign or sigstore requires fetching and verifying signatures during pull, associating signatures with sealed images in the repository, and potentially storing signature references in seal metadata. The signature verification should happen before seal verification in the trust chain. - -## References - -See [oci-sealing-spec.md](oci-sealing-spec.md) for the generic specification and complete reference list. - -**Implementation references**: -- `crates/composefs-oci/src/image.rs` - OCI image operations including seal() -- `crates/composefs/src/repository.rs` - Repository management -- `crates/composefs/src/fsverity/` - Fsverity computation and verification -- `crates/composefs/src/splitstream.rs` - Split stream format -- `crates/composefs/src/erofs/` - EROFS generation - -**Related composefs-rs issues**: -- Check for existing issues about OCI sealing enhancements -- File new issues for specific implementation work items diff --git a/doc/repository.md b/doc/repository.md index c9c2a3ed..5fd3f9a4 100644 --- a/doc/repository.md +++ b/doc/repository.md @@ -149,3 +149,109 @@ For example: cfsctl mount refs/system/rootfs/some_id /mnt # does not check fs-verity cfsctl mount 974d04eaff[...] /mnt # enforces fs-verity ``` + +## OCI image storage + +OCI container images are stored using streams exclusively. Each OCI artifact +(manifest, config, layer) becomes a splitstream, and OCI "tags" are refs under +`streams/refs/oci/`. + +### Naming conventions + +| OCI artifact | Stream name pattern | Example | +|---------------|------------------------------------|------------------------------------| +| Manifest | `oci-manifest-{manifest_digest}` | `oci-manifest-sha256:abc123...` | +| Config | `oci-config-{config_digest}` | `oci-config-sha256:def456...` | +| Layer | `oci-layer-{diff_id}` | `oci-layer-sha256:ghi789...` | +| Blob | `oci-blob-{blob_digest}` | `oci-blob-sha256:jkl012...` | + +Tags are stored under `streams/refs/oci/` with percent-encoding for +filesystem safety (`/` → `%2F`): + +``` +streams/refs/oci/myimage:latest → ../../oci-manifest-sha256:abc123... +``` + +### Splitstream reference chains + +Each splitstream contains `named_refs` (semantic labels mapping to entries +in the `stream_refs` array) and `object_refs` (raw objects referenced by +the compressed stream data). For OCI images the chain is: + +**Manifest splitstream** (`oci-manifest-sha256:...`): + - `object_refs`: the manifest JSON blob + - `named_refs`: + - `config:{config_digest}` → config splitstream verity + - `{diff_id}` → layer splitstream verity (one per layer) + +**Config splitstream** (`oci-config-sha256:...`): + - `object_refs`: the config JSON blob + - `named_refs`: + - `{diff_id}` → layer splitstream verity (one per layer) + +**Layer splitstream** (`oci-layer-sha256:...`): + - `object_refs`: file content objects extracted from the tar + - `named_refs`: none (leaf node) + +Both the manifest and config redundantly reference the layers. The GC +can reach layers from either path. + +### Garbage collection + +The GC walks all refs under `streams/refs/` to find root splitstreams, +then transitively follows `named_refs` (by resolving fs-verity IDs +through a stream name map) and collects `object_refs`. Any object not +reachable from a root is deleted. + +Concretely, for a tagged container image: + + 1. Tag `streams/refs/oci/myimage:v1` resolves to `oci-manifest-sha256:abc` + 2. Walk the manifest: mark its JSON blob and follow `named_refs` to + the config and layer streams + 3. Walk the config: mark its JSON blob and follow `named_refs` to layers + (already visited, skipped) + 4. Walk each layer: mark all file content objects + +When a tag is removed, the manifest and everything reachable only from it +becomes GC-eligible. Layers shared between images survive as long as any +referencing manifest remains tagged. + +### EROFS image tracking via config splitstream refs + +When an EROFS image is generated from an OCI image (via +`create_filesystem` + `commit_image`), its object ID (fs-verity digest) +is stored as a named ref on the config splitstream with the key +`composefs.image`. + +GC walks from tag → manifest → config, and finds the `composefs.image` +named ref. The EROFS object ID is added to the live set, keeping the +EROFS image alive. The EROFS image still needs an entry under `images/` +for the kernel mount security model (see above), but `images/` is not a +GC root — the config ref is what keeps the object alive. + +This means a single OCI tag is sufficient to keep the entire image +(manifest, config, layers, and the EROFS image) alive through GC. + +### Bootable image variant + +For bootable images, a second EROFS may be generated after +`transform_for_boot` (stripping `/boot`, etc.). This boot EROFS is +stored as a second named ref on the config, `composefs.image.boot`. + +Since the config splitstream content changes (new named ref), it gets a +new fs-verity digest. This cascades: the manifest must also be +rewritten (its `config:` named ref now points to the new config verity), +producing a new manifest verity. The tag is re-pointed to the new +manifest. The old config and manifest splitstreams become unreferenced +and are collected by GC. + +The result: one tag still keeps everything alive — layers, raw EROFS, +and boot EROFS. + +### Future: sealed images + +For sealed/signed images, the EROFS comes pre-built from the registry as +part of a composefs OCI artifact (referrer pattern). The artifact +splitstream would hold references to the pre-fetched EROFS layers. This +is complementary to the unsealed case — both use the same GC mechanism +(named refs pointing to EROFS objects). diff --git a/examples/common/make-image b/examples/common/make-image index 5f4e5b19..91e6a93a 100755 --- a/examples/common/make-image +++ b/examples/common/make-image @@ -4,8 +4,10 @@ set -eux output="$1" -# check that the image doesn't have errors -fsck.erofs tmp/sysroot/composefs/images/* +# check that the images don't have errors +for img in tmp/sysroot/composefs/images/*; do + fsck.erofs "$img" +done fakeroot "${0%/*}/run-repart" tmp/image.raw qemu-img convert -f raw tmp/image.raw -O qcow2 "${output}" diff --git a/renovate.json b/renovate.json new file mode 100644 index 00000000..3e4b91a3 --- /dev/null +++ b/renovate.json @@ -0,0 +1,6 @@ +{ + "$schema": "https://docs.renovatebot.com/renovate-schema.json", + "extends": [ + "local>bootc-dev/infra:renovate-shared-config.json" + ] +}