@@ -123,12 +123,16 @@ impl<ObjectID: FsVerityHashValue> OciImage<ObjectID> {
123123 MediaType :: ImageConfig => {
124124 let mut stream = config_stream;
125125 let config = ImageConfiguration :: from_reader ( & mut stream) ?;
126+ // For container images, layer refs are in the config stream
126127 let refs = stream. into_named_refs ( ) ;
127128 ( Some ( config) , refs)
128129 }
129130 _ => {
130- // Artifact - config may not be a valid ImageConfiguration
131- ( None , config_stream. into_named_refs ( ) )
131+ // Artifact - layer refs are in the manifest's named refs
132+ // (the config stream has no named refs for artifacts)
133+ let mut refs = named_refs. clone ( ) ;
134+ refs. remove ( config_key. as_str ( ) ) ;
135+ ( None , refs)
132136 }
133137 } ;
134138
@@ -219,6 +223,49 @@ impl<ObjectID: FsVerityHashValue> OciImage<ObjectID> {
219223 self . seal_digest ( ) . is_some ( )
220224 }
221225
226+ /// Opens an artifact layer's backing object by index, returning a
227+ /// read-only file descriptor to the raw blob data.
228+ ///
229+ /// This only works for non-tar layers (OCI artifacts). Returns an
230+ /// error for tar layers — use the splitstream API for those.
231+ pub fn open_layer_fd (
232+ & self ,
233+ repo : & Repository < ObjectID > ,
234+ index : usize ,
235+ ) -> Result < rustix:: fd:: OwnedFd > {
236+ let descriptor = self
237+ . manifest
238+ . layers ( )
239+ . get ( index)
240+ . with_context ( || format ! ( "Layer index {index} out of range" ) ) ?;
241+
242+ ensure ! (
243+ !is_tar_media_type( descriptor. media_type( ) ) ,
244+ "open_layer_fd does not support tar layers (media type: {}); \
245+ use the splitstream API instead",
246+ descriptor. media_type( )
247+ ) ;
248+
249+ let diff_id: & str = descriptor. digest ( ) . as_ref ( ) ;
250+ let layer_verity = self
251+ . layer_verity ( diff_id)
252+ . with_context ( || format ! ( "No verity for layer {diff_id}" ) ) ?;
253+
254+ let content_id = crate :: layer_identifier ( diff_id) ;
255+ let mut stream = repo. open_stream ( & content_id, Some ( layer_verity) , None ) ?;
256+
257+ // Artifact layers are stored as a single object; the splitstream
258+ // exists only for GC tracking.
259+ let mut object_refs = vec ! [ ] ;
260+ stream. get_object_refs ( |id| object_refs. push ( id. clone ( ) ) ) ?;
261+ ensure ! (
262+ object_refs. len( ) == 1 ,
263+ "Expected exactly 1 external ref for artifact layer, got {}" ,
264+ object_refs. len( )
265+ ) ;
266+ repo. open_object ( & object_refs[ 0 ] )
267+ }
268+
222269 /// Returns the layer diff_ids (for container images).
223270 pub fn layer_diff_ids ( & self ) -> Vec < & str > {
224271 self . config
@@ -459,6 +506,19 @@ pub fn manifest_identifier(digest: &str) -> String {
459506 format ! ( "oci-manifest-{digest}" )
460507}
461508
509+ /// Returns true if this is a tar-based layer media type.
510+ fn is_tar_media_type ( media_type : & MediaType ) -> bool {
511+ matches ! (
512+ media_type,
513+ MediaType :: ImageLayer
514+ | MediaType :: ImageLayerGzip
515+ | MediaType :: ImageLayerZstd
516+ | MediaType :: ImageLayerNonDistributable
517+ | MediaType :: ImageLayerNonDistributableGzip
518+ | MediaType :: ImageLayerNonDistributableZstd
519+ )
520+ }
521+
462522/// Returns the reference path for an OCI name.
463523fn oci_ref_path ( name : & str ) -> String {
464524 format ! ( "{OCI_REF_PREFIX}{}" , encode_tag( name) )
@@ -553,6 +613,8 @@ mod test {
553613 ConfigBuilder , DescriptorBuilder , Digest as OciDigest , ImageConfigurationBuilder ,
554614 ImageManifestBuilder , RootFsBuilder ,
555615 } ;
616+ use std:: fs:: File ;
617+ use std:: io:: Read ;
556618 use std:: str:: FromStr ;
557619
558620 /// Helper to create a synthetic container image in the repository.
@@ -873,6 +935,278 @@ mod test {
873935 assert_eq ! ( read_wasm, wasm_bytes) ;
874936 }
875937
938+ /// Test the OCI 1.1 empty config artifact pattern from the spec:
939+ /// config is `application/vnd.oci.empty.v1+json`, layers use custom
940+ /// media types, and layer digests are used as diff_ids.
941+ /// See: https://github.com/opencontainers/image-spec/blob/main/artifacts-guidance.md
942+ #[ test]
943+ fn test_oci_artifact_empty_config ( ) {
944+ let test_repo = TestRepo :: < Sha256HashValue > :: new ( ) ;
945+ let repo = & test_repo. repo ;
946+
947+ let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"# ;
948+ let layer_digest = hash ( sbom_data) ;
949+
950+ // Store the raw layer as an object with external ref splitstream
951+ let blob_object_id = repo. ensure_object ( sbom_data) . unwrap ( ) ;
952+ let layer_content_id = crate :: layer_identifier ( & layer_digest) ;
953+ let mut layer_stream = repo. create_stream ( crate :: skopeo:: OCI_BLOB_CONTENT_TYPE ) ;
954+ layer_stream. add_external_size ( sbom_data. len ( ) as u64 ) ;
955+ layer_stream
956+ . write_reference ( blob_object_id. clone ( ) )
957+ . unwrap ( ) ;
958+ let layer_verity = repo
959+ . write_stream ( layer_stream, & layer_content_id, None )
960+ . unwrap ( ) ;
961+
962+ // The OCI 1.1 empty config: `{}` with the well-known digest
963+ let empty_config = b"{}" ;
964+ let config_digest = hash ( empty_config) ;
965+ assert_eq ! (
966+ config_digest,
967+ "sha256:44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a"
968+ ) ;
969+
970+ // Store the config — for artifacts we still write it as a config
971+ // splitstream, but it contains no diff_ids-derived named refs.
972+ // Instead, the layer refs come from the manifest layer digests.
973+ let mut config_stream = repo. create_stream ( OCI_CONFIG_CONTENT_TYPE ) ;
974+ config_stream. write_inline ( empty_config) ;
975+ let config_verity = repo
976+ . write_stream (
977+ config_stream,
978+ & crate :: config_identifier ( & config_digest) ,
979+ None ,
980+ )
981+ . unwrap ( ) ;
982+
983+ // Build a spec-conformant artifact manifest with EmptyJSON config
984+ let config_descriptor = DescriptorBuilder :: default ( )
985+ . media_type ( MediaType :: EmptyJSON )
986+ . digest ( OciDigest :: from_str ( & config_digest) . unwrap ( ) )
987+ . size ( empty_config. len ( ) as u64 )
988+ . build ( )
989+ . unwrap ( ) ;
990+
991+ let layer_descriptor = DescriptorBuilder :: default ( )
992+ . media_type ( MediaType :: Other ( "text/spdx+json" . to_string ( ) ) )
993+ . digest ( OciDigest :: from_str ( & layer_digest) . unwrap ( ) )
994+ . size ( sbom_data. len ( ) as u64 )
995+ . build ( )
996+ . unwrap ( ) ;
997+
998+ let manifest = ImageManifestBuilder :: default ( )
999+ . schema_version ( 2u32 )
1000+ . media_type ( MediaType :: ImageManifest )
1001+ . config ( config_descriptor. clone ( ) )
1002+ . layers ( vec ! [ layer_descriptor] )
1003+ . build ( )
1004+ . unwrap ( ) ;
1005+
1006+ // Verify: EmptyJSON config is NOT an image config
1007+ assert_ne ! ( * config_descriptor. media_type( ) , MediaType :: ImageConfig ) ;
1008+
1009+ // Store manifest — layer_verities uses the layer digest as key
1010+ // (same logic as ensure_config_with_layers when !is_image_config)
1011+ let mut layer_verities = HashMap :: new ( ) ;
1012+ layer_verities. insert ( layer_digest. clone ( ) . into_boxed_str ( ) , layer_verity. clone ( ) ) ;
1013+
1014+ let manifest_json = manifest. to_string ( ) . unwrap ( ) ;
1015+ let manifest_digest = hash ( manifest_json. as_bytes ( ) ) ;
1016+
1017+ let ( _stored_digest, manifest_verity) = write_manifest (
1018+ & repo,
1019+ & manifest,
1020+ & manifest_digest,
1021+ & config_verity,
1022+ & layer_verities,
1023+ Some ( "my-sbom:v1" ) ,
1024+ )
1025+ . unwrap ( ) ;
1026+
1027+ // Verify the image opens and is not a container image
1028+ let opened = OciImage :: open ( & repo, & manifest_digest, Some ( & manifest_verity) ) . unwrap ( ) ;
1029+ assert ! ( !opened. is_container_image( ) ) ;
1030+ assert_eq ! ( opened. layer_descriptors( ) . len( ) , 1 ) ;
1031+ assert_eq ! (
1032+ opened. layer_descriptors( ) [ 0 ] . media_type( ) ,
1033+ & MediaType :: Other ( "text/spdx+json" . to_string( ) )
1034+ ) ;
1035+
1036+ // Verify open_layer_fd gives us a readable fd to the raw blob
1037+ let fd = opened. open_layer_fd ( & repo, 0 ) . unwrap ( ) ;
1038+ let mut recovered = vec ! [ ] ;
1039+ File :: from ( fd) . read_to_end ( & mut recovered) . unwrap ( ) ;
1040+ assert_eq ! ( recovered, sbom_data) ;
1041+
1042+ // Out of range index should fail
1043+ assert ! ( opened. open_layer_fd( & repo, 1 ) . is_err( ) ) ;
1044+
1045+ // Verify GC keeps everything when tagged
1046+ let gc = repo. gc ( & [ ] ) . unwrap ( ) ;
1047+ assert_eq ! ( gc. objects_removed, 0 ) ;
1048+
1049+ // Verify untagging makes it collectible
1050+ untag_image ( & repo, "my-sbom:v1" ) . unwrap ( ) ;
1051+ let gc = repo. gc ( & [ ] ) . unwrap ( ) ;
1052+ assert ! ( gc. objects_removed > 0 ) ;
1053+ }
1054+
1055+ /// Test that open_layer_fd rejects tar layers.
1056+ #[ test]
1057+ fn test_open_layer_fd_rejects_tar ( ) {
1058+ let test_repo = TestRepo :: < Sha256HashValue > :: new ( ) ;
1059+ let repo = & test_repo. repo ;
1060+
1061+ let ( digest, verity, _) = create_test_image ( repo, Some ( "myimage:v1" ) , "amd64" ) ;
1062+ let img = OciImage :: open ( & repo, & digest, Some ( & verity) ) . unwrap ( ) ;
1063+ assert ! ( img. is_container_image( ) ) ;
1064+
1065+ // Tar layer should be rejected
1066+ let err = img. open_layer_fd ( & repo, 0 ) . unwrap_err ( ) ;
1067+ let msg = format ! ( "{err}" ) ;
1068+ assert ! ( msg. contains( "does not support tar layers" ) , "got: {msg}" ) ;
1069+ }
1070+
1071+ /// Test storing a non-tar layer as a splitstream with a single
1072+ /// external reference, simulating how `ensure_layer` handles
1073+ /// non-tar media types. The raw bytes go into objects/ and a
1074+ /// tiny splitstream holds the reference for GC tracking.
1075+ #[ test]
1076+ fn test_non_tar_layer_storage ( ) {
1077+ let test_repo = TestRepo :: < Sha256HashValue > :: new ( ) ;
1078+ let repo = & test_repo. repo ;
1079+
1080+ let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"# ;
1081+ let diff_id = hash ( sbom_data) ;
1082+
1083+ // Store the raw bytes as a repository object
1084+ let object_id = repo. ensure_object ( sbom_data) . unwrap ( ) ;
1085+
1086+ // Create a splitstream with a single external ref (matches ensure_layer)
1087+ let content_id = crate :: layer_identifier ( & diff_id) ;
1088+ let mut stream = repo. create_stream ( crate :: skopeo:: OCI_BLOB_CONTENT_TYPE ) ;
1089+ stream. add_external_size ( sbom_data. len ( ) as u64 ) ;
1090+ stream. write_reference ( object_id. clone ( ) ) . unwrap ( ) ;
1091+ let stream_verity = repo. write_stream ( stream, & content_id, None ) . unwrap ( ) ;
1092+
1093+ // Verify has_stream finds it
1094+ let found = repo. has_stream ( & content_id) . unwrap ( ) ;
1095+ assert ! ( found. is_some( ) ) ;
1096+ assert_eq ! ( found. unwrap( ) , stream_verity) ;
1097+
1098+ // Verify we can get the external ref back from the splitstream
1099+ let mut reader = repo
1100+ . open_stream (
1101+ & content_id,
1102+ Some ( & stream_verity) ,
1103+ Some ( crate :: skopeo:: OCI_BLOB_CONTENT_TYPE ) ,
1104+ )
1105+ . unwrap ( ) ;
1106+ let mut refs = vec ! [ ] ;
1107+ reader. get_object_refs ( |id| refs. push ( id. clone ( ) ) ) . unwrap ( ) ;
1108+ assert_eq ! ( refs. len( ) , 1 ) ;
1109+ assert_eq ! ( refs[ 0 ] , object_id) ;
1110+
1111+ // Verify we can open the raw object and read the data back
1112+ let mut recovered = vec ! [ ] ;
1113+ File :: from ( repo. open_object ( & object_id) . unwrap ( ) )
1114+ . read_to_end ( & mut recovered)
1115+ . unwrap ( ) ;
1116+ assert_eq ! ( recovered, sbom_data) ;
1117+ }
1118+
1119+ /// Test that a non-tar artifact layer (stored as an external ref)
1120+ /// is preserved by GC when referenced from a tagged manifest.
1121+ #[ test]
1122+ fn test_non_tar_artifact_gc ( ) {
1123+ use containers_image_proxy:: oci_spec:: image:: {
1124+ DescriptorBuilder , Digest as OciDigest , ImageManifestBuilder ,
1125+ } ;
1126+ use std:: str:: FromStr ;
1127+
1128+ let test_repo = TestRepo :: < Sha256HashValue > :: new ( ) ;
1129+ let repo = & test_repo. repo ;
1130+
1131+ // Store the raw blob as an object
1132+ let sbom_data = br#"{"spdxVersion":"SPDX-2.3","name":"example"}"# ;
1133+ let diff_id = hash ( sbom_data) ;
1134+ let blob_object_id = repo. ensure_object ( sbom_data) . unwrap ( ) ;
1135+
1136+ // Create a splitstream with external ref (matches ensure_layer)
1137+ let layer_content_id = crate :: layer_identifier ( & diff_id) ;
1138+ let mut layer_stream = repo. create_stream ( crate :: skopeo:: OCI_BLOB_CONTENT_TYPE ) ;
1139+ layer_stream. add_external_size ( sbom_data. len ( ) as u64 ) ;
1140+ layer_stream
1141+ . write_reference ( blob_object_id. clone ( ) )
1142+ . unwrap ( ) ;
1143+ let layer_verity = repo
1144+ . write_stream ( layer_stream, & layer_content_id, None )
1145+ . unwrap ( ) ;
1146+
1147+ // Store a minimal config
1148+ let config_bytes = b"{}" ;
1149+ let config_digest = hash ( config_bytes) ;
1150+ let mut config_stream = repo. create_stream ( OCI_CONFIG_CONTENT_TYPE ) ;
1151+ config_stream. write_inline ( config_bytes) ;
1152+ let config_verity = repo
1153+ . write_stream (
1154+ config_stream,
1155+ & crate :: config_identifier ( & config_digest) ,
1156+ None ,
1157+ )
1158+ . unwrap ( ) ;
1159+
1160+ // Build and store a manifest referencing both
1161+ let config_descriptor = DescriptorBuilder :: default ( )
1162+ . media_type ( MediaType :: ImageConfig )
1163+ . digest ( OciDigest :: from_str ( & config_digest) . unwrap ( ) )
1164+ . size ( config_bytes. len ( ) as u64 )
1165+ . build ( )
1166+ . unwrap ( ) ;
1167+ let layer_descriptor = DescriptorBuilder :: default ( )
1168+ . media_type ( MediaType :: Other ( "text/spdx+json" . to_string ( ) ) )
1169+ . digest ( OciDigest :: from_str ( & diff_id) . unwrap ( ) )
1170+ . size ( sbom_data. len ( ) as u64 )
1171+ . build ( )
1172+ . unwrap ( ) ;
1173+ let manifest = ImageManifestBuilder :: default ( )
1174+ . schema_version ( 2u32 )
1175+ . media_type ( MediaType :: ImageManifest )
1176+ . config ( config_descriptor)
1177+ . layers ( vec ! [ layer_descriptor] )
1178+ . build ( )
1179+ . unwrap ( ) ;
1180+
1181+ let mut layer_verities = HashMap :: new ( ) ;
1182+ layer_verities. insert ( diff_id. clone ( ) . into_boxed_str ( ) , layer_verity) ;
1183+
1184+ let manifest_json = manifest. to_string ( ) . unwrap ( ) ;
1185+ let manifest_digest = hash ( manifest_json. as_bytes ( ) ) ;
1186+
1187+ let ( _stored_digest, _manifest_verity) = write_manifest (
1188+ & repo,
1189+ & manifest,
1190+ & manifest_digest,
1191+ & config_verity,
1192+ & layer_verities,
1193+ Some ( "my-sbom:v1" ) ,
1194+ )
1195+ . unwrap ( ) ;
1196+
1197+ // GC should preserve everything — the blob object is reachable via
1198+ // manifest → config named ref → layer splitstream → external ref
1199+ let gc = repo. gc ( & [ ] ) . unwrap ( ) ;
1200+ assert_eq ! ( gc. objects_removed, 0 , "tagged artifact should be preserved" ) ;
1201+
1202+ // Verify we can still get an fd to the raw blob object
1203+ let mut recovered = vec ! [ ] ;
1204+ File :: from ( repo. open_object ( & blob_object_id) . unwrap ( ) )
1205+ . read_to_end ( & mut recovered)
1206+ . unwrap ( ) ;
1207+ assert_eq ! ( recovered, sbom_data) ;
1208+ }
1209+
8761210 /// Test storing and listing multiple container images.
8771211 #[ test]
8781212 fn test_multiple_images ( ) {
0 commit comments