Skip to content

Commit 6abc58b

Browse files
committed
oci: Support non-tar layers for OCI artifacts
Store non-tar layers (SBOMs, signatures, WASM modules, etc.) as inline splitstreams with OCI_BLOB_CONTENT_TYPE. This allows pulling artifacts like `quay.io/centos-bootc/centos-bootc:...sbom` which have layers with media types like `text/spdx+json` rather than tar archives. The data is stored inline in the splitstream rather than using the tar split machinery. This keeps it readable via open_stream() and properly tracked by GC when referenced from a manifest. Assisted-by: OpenCode (Opus 4) Signed-off-by: Colin Walters <walters@verbum.org>
1 parent d5ac320 commit 6abc58b

File tree

2 files changed

+395
-18
lines changed

2 files changed

+395
-18
lines changed

crates/composefs-oci/src/oci_image.rs

Lines changed: 336 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,12 +123,16 @@ impl<ObjectID: FsVerityHashValue> OciImage<ObjectID> {
123123
MediaType::ImageConfig => {
124124
let mut stream = config_stream;
125125
let config = ImageConfiguration::from_reader(&mut stream)?;
126+
// For container images, layer refs are in the config stream
126127
let refs = stream.into_named_refs();
127128
(Some(config), refs)
128129
}
129130
_ => {
130-
// Artifact - config may not be a valid ImageConfiguration
131-
(None, config_stream.into_named_refs())
131+
// Artifact - layer refs are in the manifest's named refs
132+
// (the config stream has no named refs for artifacts)
133+
let mut refs = named_refs.clone();
134+
refs.remove(config_key.as_str());
135+
(None, refs)
132136
}
133137
};
134138

@@ -219,6 +223,49 @@ impl<ObjectID: FsVerityHashValue> OciImage<ObjectID> {
219223
self.seal_digest().is_some()
220224
}
221225

226+
/// Opens an artifact layer's backing object by index, returning a
227+
/// read-only file descriptor to the raw blob data.
228+
///
229+
/// This only works for non-tar layers (OCI artifacts). Returns an
230+
/// error for tar layers — use the splitstream API for those.
231+
pub fn open_layer_fd(
232+
&self,
233+
repo: &Repository<ObjectID>,
234+
index: usize,
235+
) -> Result<rustix::fd::OwnedFd> {
236+
let descriptor = self
237+
.manifest
238+
.layers()
239+
.get(index)
240+
.with_context(|| format!("Layer index {index} out of range"))?;
241+
242+
ensure!(
243+
!is_tar_media_type(descriptor.media_type()),
244+
"open_layer_fd does not support tar layers (media type: {}); \
245+
use the splitstream API instead",
246+
descriptor.media_type()
247+
);
248+
249+
let diff_id: &str = descriptor.digest().as_ref();
250+
let layer_verity = self
251+
.layer_verity(diff_id)
252+
.with_context(|| format!("No verity for layer {diff_id}"))?;
253+
254+
let content_id = crate::layer_identifier(diff_id);
255+
let mut stream = repo.open_stream(&content_id, Some(layer_verity), None)?;
256+
257+
// Artifact layers are stored as a single object; the splitstream
258+
// exists only for GC tracking.
259+
let mut object_refs = vec![];
260+
stream.get_object_refs(|id| object_refs.push(id.clone()))?;
261+
ensure!(
262+
object_refs.len() == 1,
263+
"Expected exactly 1 external ref for artifact layer, got {}",
264+
object_refs.len()
265+
);
266+
repo.open_object(&object_refs[0])
267+
}
268+
222269
/// Returns the layer diff_ids (for container images).
223270
pub fn layer_diff_ids(&self) -> Vec<&str> {
224271
self.config
@@ -459,6 +506,19 @@ pub fn manifest_identifier(digest: &str) -> String {
459506
format!("oci-manifest-{digest}")
460507
}
461508

509+
/// Returns true if this is a tar-based layer media type.
510+
fn is_tar_media_type(media_type: &MediaType) -> bool {
511+
matches!(
512+
media_type,
513+
MediaType::ImageLayer
514+
| MediaType::ImageLayerGzip
515+
| MediaType::ImageLayerZstd
516+
| MediaType::ImageLayerNonDistributable
517+
| MediaType::ImageLayerNonDistributableGzip
518+
| MediaType::ImageLayerNonDistributableZstd
519+
)
520+
}
521+
462522
/// Returns the reference path for an OCI name.
463523
fn oci_ref_path(name: &str) -> String {
464524
format!("{OCI_REF_PREFIX}{}", encode_tag(name))
@@ -553,6 +613,8 @@ mod test {
553613
ConfigBuilder, DescriptorBuilder, Digest as OciDigest, ImageConfigurationBuilder,
554614
ImageManifestBuilder, RootFsBuilder,
555615
};
616+
use std::fs::File;
617+
use std::io::Read;
556618
use std::str::FromStr;
557619

558620
/// Helper to create a synthetic container image in the repository.
@@ -873,6 +935,278 @@ mod test {
873935
assert_eq!(read_wasm, wasm_bytes);
874936
}
875937

938+
/// Test the OCI 1.1 empty config artifact pattern from the spec:
939+
/// config is `application/vnd.oci.empty.v1+json`, layers use custom
940+
/// media types, and layer digests are used as diff_ids.
941+
/// See: https://github.com/opencontainers/image-spec/blob/main/artifacts-guidance.md
942+
#[test]
943+
fn test_oci_artifact_empty_config() {
944+
let test_repo = TestRepo::<Sha256HashValue>::new();
945+
let repo = &test_repo.repo;
946+
947+
let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
948+
let layer_digest = hash(sbom_data);
949+
950+
// Store the raw layer as an object with external ref splitstream
951+
let blob_object_id = repo.ensure_object(sbom_data).unwrap();
952+
let layer_content_id = crate::layer_identifier(&layer_digest);
953+
let mut layer_stream = repo.create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE);
954+
layer_stream.add_external_size(sbom_data.len() as u64);
955+
layer_stream
956+
.write_reference(blob_object_id.clone())
957+
.unwrap();
958+
let layer_verity = repo
959+
.write_stream(layer_stream, &layer_content_id, None)
960+
.unwrap();
961+
962+
// The OCI 1.1 empty config: `{}` with the well-known digest
963+
let empty_config = b"{}";
964+
let config_digest = hash(empty_config);
965+
assert_eq!(
966+
config_digest,
967+
"sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a"
968+
);
969+
970+
// Store the config — for artifacts we still write it as a config
971+
// splitstream, but it contains no diff_ids-derived named refs.
972+
// Instead, the layer refs come from the manifest layer digests.
973+
let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE);
974+
config_stream.write_inline(empty_config);
975+
let config_verity = repo
976+
.write_stream(
977+
config_stream,
978+
&crate::config_identifier(&config_digest),
979+
None,
980+
)
981+
.unwrap();
982+
983+
// Build a spec-conformant artifact manifest with EmptyJSON config
984+
let config_descriptor = DescriptorBuilder::default()
985+
.media_type(MediaType::EmptyJSON)
986+
.digest(OciDigest::from_str(&config_digest).unwrap())
987+
.size(empty_config.len() as u64)
988+
.build()
989+
.unwrap();
990+
991+
let layer_descriptor = DescriptorBuilder::default()
992+
.media_type(MediaType::Other("text/spdx+json".to_string()))
993+
.digest(OciDigest::from_str(&layer_digest).unwrap())
994+
.size(sbom_data.len() as u64)
995+
.build()
996+
.unwrap();
997+
998+
let manifest = ImageManifestBuilder::default()
999+
.schema_version(2u32)
1000+
.media_type(MediaType::ImageManifest)
1001+
.config(config_descriptor.clone())
1002+
.layers(vec![layer_descriptor])
1003+
.build()
1004+
.unwrap();
1005+
1006+
// Verify: EmptyJSON config is NOT an image config
1007+
assert_ne!(*config_descriptor.media_type(), MediaType::ImageConfig);
1008+
1009+
// Store manifest — layer_verities uses the layer digest as key
1010+
// (same logic as ensure_config_with_layers when !is_image_config)
1011+
let mut layer_verities = HashMap::new();
1012+
layer_verities.insert(layer_digest.clone().into_boxed_str(), layer_verity.clone());
1013+
1014+
let manifest_json = manifest.to_string().unwrap();
1015+
let manifest_digest = hash(manifest_json.as_bytes());
1016+
1017+
let (_stored_digest, manifest_verity) = write_manifest(
1018+
&repo,
1019+
&manifest,
1020+
&manifest_digest,
1021+
&config_verity,
1022+
&layer_verities,
1023+
Some("my-sbom:v1"),
1024+
)
1025+
.unwrap();
1026+
1027+
// Verify the image opens and is not a container image
1028+
let opened = OciImage::open(&repo, &manifest_digest, Some(&manifest_verity)).unwrap();
1029+
assert!(!opened.is_container_image());
1030+
assert_eq!(opened.layer_descriptors().len(), 1);
1031+
assert_eq!(
1032+
opened.layer_descriptors()[0].media_type(),
1033+
&MediaType::Other("text/spdx+json".to_string())
1034+
);
1035+
1036+
// Verify open_layer_fd gives us a readable fd to the raw blob
1037+
let fd = opened.open_layer_fd(&repo, 0).unwrap();
1038+
let mut recovered = vec![];
1039+
File::from(fd).read_to_end(&mut recovered).unwrap();
1040+
assert_eq!(recovered, sbom_data);
1041+
1042+
// Out of range index should fail
1043+
assert!(opened.open_layer_fd(&repo, 1).is_err());
1044+
1045+
// Verify GC keeps everything when tagged
1046+
let gc = repo.gc(&[]).unwrap();
1047+
assert_eq!(gc.objects_removed, 0);
1048+
1049+
// Verify untagging makes it collectible
1050+
untag_image(&repo, "my-sbom:v1").unwrap();
1051+
let gc = repo.gc(&[]).unwrap();
1052+
assert!(gc.objects_removed > 0);
1053+
}
1054+
1055+
/// Test that open_layer_fd rejects tar layers.
1056+
#[test]
1057+
fn test_open_layer_fd_rejects_tar() {
1058+
let test_repo = TestRepo::<Sha256HashValue>::new();
1059+
let repo = &test_repo.repo;
1060+
1061+
let (digest, verity, _) = create_test_image(repo, Some("myimage:v1"), "amd64");
1062+
let img = OciImage::open(&repo, &digest, Some(&verity)).unwrap();
1063+
assert!(img.is_container_image());
1064+
1065+
// Tar layer should be rejected
1066+
let err = img.open_layer_fd(&repo, 0).unwrap_err();
1067+
let msg = format!("{err}");
1068+
assert!(msg.contains("does not support tar layers"), "got: {msg}");
1069+
}
1070+
1071+
/// Test storing a non-tar layer as a splitstream with a single
1072+
/// external reference, simulating how `ensure_layer` handles
1073+
/// non-tar media types. The raw bytes go into objects/ and a
1074+
/// tiny splitstream holds the reference for GC tracking.
1075+
#[test]
1076+
fn test_non_tar_layer_storage() {
1077+
let test_repo = TestRepo::<Sha256HashValue>::new();
1078+
let repo = &test_repo.repo;
1079+
1080+
let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
1081+
let diff_id = hash(sbom_data);
1082+
1083+
// Store the raw bytes as a repository object
1084+
let object_id = repo.ensure_object(sbom_data).unwrap();
1085+
1086+
// Create a splitstream with a single external ref (matches ensure_layer)
1087+
let content_id = crate::layer_identifier(&diff_id);
1088+
let mut stream = repo.create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE);
1089+
stream.add_external_size(sbom_data.len() as u64);
1090+
stream.write_reference(object_id.clone()).unwrap();
1091+
let stream_verity = repo.write_stream(stream, &content_id, None).unwrap();
1092+
1093+
// Verify has_stream finds it
1094+
let found = repo.has_stream(&content_id).unwrap();
1095+
assert!(found.is_some());
1096+
assert_eq!(found.unwrap(), stream_verity);
1097+
1098+
// Verify we can get the external ref back from the splitstream
1099+
let mut reader = repo
1100+
.open_stream(
1101+
&content_id,
1102+
Some(&stream_verity),
1103+
Some(crate::skopeo::OCI_BLOB_CONTENT_TYPE),
1104+
)
1105+
.unwrap();
1106+
let mut refs = vec![];
1107+
reader.get_object_refs(|id| refs.push(id.clone())).unwrap();
1108+
assert_eq!(refs.len(), 1);
1109+
assert_eq!(refs[0], object_id);
1110+
1111+
// Verify we can open the raw object and read the data back
1112+
let mut recovered = vec![];
1113+
File::from(repo.open_object(&object_id).unwrap())
1114+
.read_to_end(&mut recovered)
1115+
.unwrap();
1116+
assert_eq!(recovered, sbom_data);
1117+
}
1118+
1119+
/// Test that a non-tar artifact layer (stored as an external ref)
1120+
/// is preserved by GC when referenced from a tagged manifest.
1121+
#[test]
1122+
fn test_non_tar_artifact_gc() {
1123+
use containers_image_proxy::oci_spec::image::{
1124+
DescriptorBuilder, Digest as OciDigest, ImageManifestBuilder,
1125+
};
1126+
use std::str::FromStr;
1127+
1128+
let test_repo = TestRepo::<Sha256HashValue>::new();
1129+
let repo = &test_repo.repo;
1130+
1131+
// Store the raw blob as an object
1132+
let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"#;
1133+
let diff_id = hash(sbom_data);
1134+
let blob_object_id = repo.ensure_object(sbom_data).unwrap();
1135+
1136+
// Create a splitstream with external ref (matches ensure_layer)
1137+
let layer_content_id = crate::layer_identifier(&diff_id);
1138+
let mut layer_stream = repo.create_stream(crate::skopeo::OCI_BLOB_CONTENT_TYPE);
1139+
layer_stream.add_external_size(sbom_data.len() as u64);
1140+
layer_stream
1141+
.write_reference(blob_object_id.clone())
1142+
.unwrap();
1143+
let layer_verity = repo
1144+
.write_stream(layer_stream, &layer_content_id, None)
1145+
.unwrap();
1146+
1147+
// Store a minimal config
1148+
let config_bytes = b"{}";
1149+
let config_digest = hash(config_bytes);
1150+
let mut config_stream = repo.create_stream(OCI_CONFIG_CONTENT_TYPE);
1151+
config_stream.write_inline(config_bytes);
1152+
let config_verity = repo
1153+
.write_stream(
1154+
config_stream,
1155+
&crate::config_identifier(&config_digest),
1156+
None,
1157+
)
1158+
.unwrap();
1159+
1160+
// Build and store a manifest referencing both
1161+
let config_descriptor = DescriptorBuilder::default()
1162+
.media_type(MediaType::ImageConfig)
1163+
.digest(OciDigest::from_str(&config_digest).unwrap())
1164+
.size(config_bytes.len() as u64)
1165+
.build()
1166+
.unwrap();
1167+
let layer_descriptor = DescriptorBuilder::default()
1168+
.media_type(MediaType::Other("text/spdx+json".to_string()))
1169+
.digest(OciDigest::from_str(&diff_id).unwrap())
1170+
.size(sbom_data.len() as u64)
1171+
.build()
1172+
.unwrap();
1173+
let manifest = ImageManifestBuilder::default()
1174+
.schema_version(2u32)
1175+
.media_type(MediaType::ImageManifest)
1176+
.config(config_descriptor)
1177+
.layers(vec![layer_descriptor])
1178+
.build()
1179+
.unwrap();
1180+
1181+
let mut layer_verities = HashMap::new();
1182+
layer_verities.insert(diff_id.clone().into_boxed_str(), layer_verity);
1183+
1184+
let manifest_json = manifest.to_string().unwrap();
1185+
let manifest_digest = hash(manifest_json.as_bytes());
1186+
1187+
let (_stored_digest, _manifest_verity) = write_manifest(
1188+
&repo,
1189+
&manifest,
1190+
&manifest_digest,
1191+
&config_verity,
1192+
&layer_verities,
1193+
Some("my-sbom:v1"),
1194+
)
1195+
.unwrap();
1196+
1197+
// GC should preserve everything — the blob object is reachable via
1198+
// manifest → config named ref → layer splitstream → external ref
1199+
let gc = repo.gc(&[]).unwrap();
1200+
assert_eq!(gc.objects_removed, 0, "tagged artifact should be preserved");
1201+
1202+
// Verify we can still get an fd to the raw blob object
1203+
let mut recovered = vec![];
1204+
File::from(repo.open_object(&blob_object_id).unwrap())
1205+
.read_to_end(&mut recovered)
1206+
.unwrap();
1207+
assert_eq!(recovered, sbom_data);
1208+
}
1209+
8761210
/// Test storing and listing multiple container images.
8771211
#[test]
8781212
fn test_multiple_images() {

0 commit comments

Comments
 (0)