Skip to content

Commit b3ff471

Browse files
authored
Merge pull request #19 from cgwalters/various-pax-gnu-bits
Various patches motivated by bootc-dev/bootc#2073
2 parents d6dce93 + 2d098aa commit b3ff471

File tree

4 files changed

+274
-11
lines changed

4 files changed

+274
-11
lines changed

fuzz/generate_corpus.rs

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,9 @@ fn main() {
272272
.unwrap()
273273
.size(0)
274274
.unwrap();
275-
builder.add_pax("SCHILY.xattr.user.test", b"value123");
275+
builder
276+
.add_pax("SCHILY.xattr.user.test", b"value123")
277+
.unwrap();
276278
let hdr = builder.finish_bytes();
277279
let mut archive = hdr;
278280
archive.extend_from_slice(&EOA);
@@ -292,10 +294,12 @@ fn main() {
292294
.unwrap()
293295
.mtime(9999999999)
294296
.unwrap();
295-
builder.add_pax(
296-
"SCHILY.xattr.security.selinux",
297-
b"system_u:object_r:usr_t:s0",
298-
);
297+
builder
298+
.add_pax(
299+
"SCHILY.xattr.security.selinux",
300+
b"system_u:object_r:usr_t:s0",
301+
)
302+
.unwrap();
299303
let hdr = builder.finish_bytes();
300304
let mut archive = hdr;
301305
archive.extend_from_slice(b"payload");
@@ -512,7 +516,7 @@ fn main() {
512516
.unwrap()
513517
.mtime(1700000000)
514518
.unwrap();
515-
builder.add_pax("mtime", b"1700000000.123456789");
519+
builder.add_pax("mtime", b"1700000000.123456789").unwrap();
516520
let hdr = builder.finish_bytes();
517521
let mut archive = hdr;
518522
archive.extend_from_slice(&EOA);

src/builder.rs

Lines changed: 51 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -863,11 +863,23 @@ impl EntryBuilder {
863863
/// Add a custom PAX extension record.
864864
///
865865
/// This is useful for adding metadata that doesn't fit in standard
866-
/// header fields. The PAX extension will be emitted regardless of
867-
/// the extension mode setting.
868-
pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> &mut Self {
866+
/// header fields (e.g. extended attributes, high-precision timestamps).
867+
///
868+
/// # Errors
869+
///
870+
/// Returns [`HeaderError::IncompatibleMode`] if the extension mode is
871+
/// [`ExtensionMode::Gnu`]. PAX records are only emitted in PAX mode;
872+
/// call [`Self::set_extension_mode`] first or use [`Self::new_ustar`]
873+
/// to create a PAX-mode builder.
874+
pub fn add_pax(&mut self, key: &str, value: impl AsRef<[u8]>) -> Result<&mut Self> {
875+
if self.mode != ExtensionMode::Pax {
876+
return Err(HeaderError::IncompatibleMode {
877+
required: ExtensionMode::Pax,
878+
current: self.mode,
879+
});
880+
}
869881
self.pax_mut().add(key, value);
870-
self
882+
Ok(self)
871883
}
872884

873885
/// Get or create the PAX builder for this entry.
@@ -1861,6 +1873,7 @@ mod tests {
18611873
.size(0)
18621874
.unwrap()
18631875
.add_pax("SCHILY.xattr.user.test", b"value")
1876+
.unwrap()
18641877
.entry_type(EntryType::Regular);
18651878

18661879
assert!(builder.needs_extension()); // Due to custom PAX
@@ -1876,6 +1889,40 @@ mod tests {
18761889
assert!(pax_str.contains("SCHILY.xattr.user.test=value"));
18771890
}
18781891

1892+
#[test]
1893+
fn test_add_pax_rejects_gnu_mode() {
1894+
let mut builder = EntryBuilder::new_gnu();
1895+
let result = builder.add_pax("SCHILY.xattr.user.test", b"value");
1896+
assert!(result.is_err());
1897+
let err = result.unwrap_err();
1898+
assert!(
1899+
err.to_string().contains("Pax"),
1900+
"error should mention Pax mode: {err}"
1901+
);
1902+
}
1903+
1904+
#[test]
1905+
fn test_add_pax_after_mode_switch() {
1906+
// Switching from GNU to PAX mode should allow add_pax().
1907+
let mut builder = EntryBuilder::new_gnu();
1908+
assert!(builder.add_pax("key", b"val").is_err());
1909+
1910+
builder.set_extension_mode(ExtensionMode::Pax);
1911+
builder
1912+
.path(b"file.txt")
1913+
.mode(0o644)
1914+
.unwrap()
1915+
.size(0)
1916+
.unwrap()
1917+
.add_pax("SCHILY.xattr.user.test", b"value")
1918+
.unwrap();
1919+
1920+
let blocks = builder.finish();
1921+
// Should have PAX 'x' header + PAX data + main header
1922+
assert!(blocks.len() >= 3);
1923+
assert_eq!(blocks[0].entry_type(), EntryType::XHeader);
1924+
}
1925+
18791926
#[test]
18801927
fn test_entry_builder_extension_mode_switching() {
18811928
let long_path = "x/".repeat(60);

src/lib.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,15 @@ pub enum HeaderError {
9494
detail: String,
9595
},
9696

97+
/// Operation requires a different extension mode.
98+
#[error("operation requires {required:?} extension mode, but builder is in {current:?} mode")]
99+
IncompatibleMode {
100+
/// The mode the operation requires.
101+
required: crate::builder::ExtensionMode,
102+
/// The mode the builder is currently in.
103+
current: crate::builder::ExtensionMode,
104+
},
105+
97106
/// The header checksum does not match the computed value.
98107
#[error("checksum mismatch: expected {expected}, computed {computed}")]
99108
ChecksumMismatch {
@@ -1662,6 +1671,13 @@ pub const PAX_MTIME: &str = "mtime";
16621671
pub const PAX_ATIME: &str = "atime";
16631672
/// PAX extended header key for change time.
16641673
pub const PAX_CTIME: &str = "ctime";
1674+
1675+
// TODO: POSIX defines `hdrcharset` and `charset` PAX keys for signaling
1676+
// non-UTF-8 header values and file data encoding respectively. No major
1677+
// implementation (GNU tar, Go archive/tar, Rust tar crate) acts on these
1678+
// in practice -- they all just accept raw bytes. We should add support if
1679+
// interoperability requires it.
1680+
16651681
/// PAX extended header prefix for SCHILY extended attributes.
16661682
pub const PAX_SCHILY_XATTR: &str = "SCHILY.xattr.";
16671683

@@ -2272,6 +2288,18 @@ mod tests {
22722288
assert_eq!(pax.get("path"), Some("日本語/ファイル.txt"));
22732289
}
22742290

2291+
#[test]
2292+
fn test_pax_non_utf8_value() {
2293+
// PAX values may contain non-UTF-8 bytes in practice.
2294+
// value() should fail but value_bytes() should preserve them.
2295+
let record = b"25 path=etc/\x80\xfe\xff/file.txt\n";
2296+
let mut pax = PaxExtensions::new(record);
2297+
let ext = pax.next().unwrap().unwrap();
2298+
assert_eq!(ext.key().unwrap(), "path");
2299+
assert!(ext.value().is_err(), "non-UTF-8 value should fail value()");
2300+
assert_eq!(ext.value_bytes(), b"etc/\x80\xfe\xff/file.txt");
2301+
}
2302+
22752303
#[test]
22762304
fn test_pax_mtime_fractional() {
22772305
// PAX mtime can have fractional seconds
@@ -4084,7 +4112,8 @@ mod tests {
40844112
.mtime(params.mtime)
40854113
.unwrap()
40864114
.entry_type(EntryType::Regular)
4087-
.add_pax(&params.xattr_key, params.xattr_value.as_bytes());
4115+
.add_pax(&params.xattr_key, params.xattr_value.as_bytes())
4116+
.unwrap();
40884117

40894118
builder.finish()
40904119
}

src/parse.rs

Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2004,6 +2004,93 @@ mod tests {
20042004
}
20052005
}
20062006

2007+
#[test]
2008+
#[allow(invalid_from_utf8)]
2009+
fn test_parser_pax_non_utf8_path() {
2010+
// Non-UTF-8 bytes in PAX path values are accepted, matching the
2011+
// pragmatic behavior of Go's archive/tar and the Rust tar crate.
2012+
// The POSIX spec says PAX path values SHOULD be UTF-8, but real-world
2013+
// archives (e.g. from Docker/BuildKit) may contain non-UTF-8 paths.
2014+
// See bootc-dev/bootc#2073 for a concrete example.
2015+
let non_utf8_path: &[u8] = b"etc/ssl/certs/F\xf5tan\xfas\xedtv\xe1ny.pem";
2016+
assert!(
2017+
core::str::from_utf8(non_utf8_path).is_err(),
2018+
"test data must be non-UTF-8"
2019+
);
2020+
2021+
let mut archive = Vec::new();
2022+
archive.extend(make_pax_header(&[("path", non_utf8_path)]));
2023+
archive.extend_from_slice(&make_header(b"placeholder.pem", 0, b'0'));
2024+
archive.extend(zeroes(1024));
2025+
2026+
let mut parser = Parser::new(Limits::default());
2027+
let event = parser.parse(&archive).unwrap();
2028+
2029+
match event {
2030+
ParseEvent::Entry { entry, .. } => {
2031+
assert_eq!(entry.path.as_ref(), non_utf8_path);
2032+
// The lossy accessor should replace invalid bytes
2033+
let lossy = entry.path_lossy();
2034+
assert!(
2035+
lossy.contains('\u{FFFD}'),
2036+
"lossy conversion should have replacement chars"
2037+
);
2038+
}
2039+
other => panic!("Expected Entry, got {:?}", other),
2040+
}
2041+
}
2042+
2043+
#[test]
2044+
fn test_pax_non_utf8_path_roundtrip() {
2045+
// Verify that a non-UTF-8 PAX path survives a builder -> parser
2046+
// roundtrip. The path must exceed 100 bytes to trigger PAX emission.
2047+
use crate::builder::EntryBuilder;
2048+
2049+
// 101+ byte path with non-UTF-8 bytes embedded
2050+
let mut long_path =
2051+
b"a/very/deep/directory/structure/that/needs/to/exceed/one/hundred/bytes/\xf0\xf1\xf2/"
2052+
.to_vec();
2053+
long_path.extend(b"and/some/more/nested/dirs/to/be/safe/file.bin");
2054+
assert!(long_path.len() > 100, "path must exceed 100 bytes");
2055+
assert!(
2056+
core::str::from_utf8(&long_path).is_err(),
2057+
"path must contain non-UTF-8"
2058+
);
2059+
2060+
let mut builder = EntryBuilder::new_ustar();
2061+
builder
2062+
.path(&long_path)
2063+
.mode(0o644)
2064+
.unwrap()
2065+
.size(5)
2066+
.unwrap()
2067+
.mtime(0)
2068+
.unwrap()
2069+
.uid(0)
2070+
.unwrap()
2071+
.gid(0)
2072+
.unwrap();
2073+
2074+
let mut archive = Vec::new();
2075+
archive.extend_from_slice(&builder.finish_bytes());
2076+
// 5 bytes of content, padded to 512
2077+
let mut content_block = [0u8; 512];
2078+
content_block[..5].copy_from_slice(b"hello");
2079+
archive.extend_from_slice(&content_block);
2080+
archive.extend(zeroes(1024));
2081+
2082+
let mut parser = Parser::new(Limits::default());
2083+
let event = parser.parse(&archive).unwrap();
2084+
2085+
match event {
2086+
ParseEvent::Entry { entry, .. } => {
2087+
assert_eq!(entry.path.as_ref(), long_path.as_slice());
2088+
assert_eq!(entry.size, 5);
2089+
}
2090+
other => panic!("Expected Entry, got {:?}", other),
2091+
}
2092+
}
2093+
20072094
#[test]
20082095
fn test_parser_pax_size_override() {
20092096
// PAX header should override the size in the actual header
@@ -2186,6 +2273,29 @@ mod tests {
21862273
}
21872274
}
21882275

2276+
#[test]
2277+
#[allow(invalid_from_utf8)]
2278+
fn test_parser_pax_non_utf8_linkpath() {
2279+
// Non-UTF-8 bytes in PAX linkpath values should be preserved.
2280+
let non_utf8_target: &[u8] = b"targets/\xc0\xc1invalid.so";
2281+
assert!(core::str::from_utf8(non_utf8_target).is_err());
2282+
2283+
let mut archive = Vec::new();
2284+
archive.extend(make_pax_header(&[("linkpath", non_utf8_target)]));
2285+
archive.extend_from_slice(&make_header(b"link.so", 0, b'2')); // symlink
2286+
archive.extend(zeroes(1024));
2287+
2288+
let mut parser = Parser::new(Limits::default());
2289+
let event = parser.parse(&archive).unwrap();
2290+
2291+
match event {
2292+
ParseEvent::Entry { entry, .. } => {
2293+
assert_eq!(entry.link_target.as_deref(), Some(non_utf8_target.as_ref()));
2294+
}
2295+
other => panic!("Expected Entry, got {:?}", other),
2296+
}
2297+
}
2298+
21892299
// =========================================================================
21902300
// PAX global header tests
21912301
// =========================================================================
@@ -2453,6 +2563,79 @@ mod tests {
24532563
}
24542564
}
24552565

2566+
#[test]
2567+
fn test_parser_pax_before_gnu_long_name() {
2568+
// PAX 'x' -> GNU 'L' -> real entry: this is what tar-rs's builder
2569+
// produces when you call append_pax_extensions() (e.g. for xattrs)
2570+
// followed by append_data() with a long path. The PAX metadata
2571+
// should still be associated with the real entry, and PAX path
2572+
// (if present) should take precedence over the GNU long name.
2573+
//
2574+
// This ordering matters for ecosystem compatibility with bootc
2575+
// (see bootc-dev/bootc#2073).
2576+
let gnu_name =
2577+
"gnu/long/name/that/exceeds/one/hundred/bytes/".to_string() + &"g".repeat(60);
2578+
let xattr_value = b"some xattr value";
2579+
2580+
let mut archive = Vec::new();
2581+
// PAX header first (with xattr but no path -- simulating bootc's
2582+
// copy_entry which strips path/linkpath from PAX)
2583+
archive.extend(make_pax_header(&[(
2584+
"SCHILY.xattr.user.test",
2585+
xattr_value.as_slice(),
2586+
)]));
2587+
// GNU long name second
2588+
archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
2589+
// Real entry last
2590+
archive.extend_from_slice(&make_header(b"placeholder", 0, b'0'));
2591+
archive.extend(zeroes(1024));
2592+
2593+
let mut parser = Parser::new(Limits::default());
2594+
let event = parser.parse(&archive).unwrap();
2595+
2596+
match event {
2597+
ParseEvent::Entry { entry, .. } => {
2598+
// GNU long name should be used (no PAX path to override it)
2599+
assert_eq!(entry.path.as_ref(), gnu_name.as_bytes());
2600+
// PAX xattr should still be preserved
2601+
assert!(entry.pax.is_some());
2602+
let pax = PaxExtensions::new(entry.pax.unwrap());
2603+
let xattr = pax
2604+
.filter_map(|e| e.ok())
2605+
.find(|e| e.key_bytes().starts_with(b"SCHILY.xattr."));
2606+
assert!(xattr.is_some(), "xattr should be preserved");
2607+
assert_eq!(xattr.unwrap().value_bytes(), xattr_value);
2608+
}
2609+
other => panic!("Expected Entry, got {:?}", other),
2610+
}
2611+
}
2612+
2613+
#[test]
2614+
fn test_parser_pax_path_overrides_gnu_long_name_reversed_order() {
2615+
// Same as test_parser_combined_gnu_pax but with reversed ordering:
2616+
// PAX 'x' (with path) -> GNU 'L' -> real entry.
2617+
// PAX path should still win regardless of order.
2618+
let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100);
2619+
let pax_path = "pax/should/still/win/file.txt";
2620+
2621+
let mut archive = Vec::new();
2622+
// PAX first this time (reversed from test_parser_combined_gnu_pax)
2623+
archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
2624+
archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
2625+
archive.extend_from_slice(&make_header(b"header.txt", 0, b'0'));
2626+
archive.extend(zeroes(1024));
2627+
2628+
let mut parser = Parser::new(Limits::default());
2629+
let event = parser.parse(&archive).unwrap();
2630+
2631+
match event {
2632+
ParseEvent::Entry { entry, .. } => {
2633+
assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
2634+
}
2635+
other => panic!("Expected Entry, got {:?}", other),
2636+
}
2637+
}
2638+
24562639
#[test]
24572640
fn test_parser_gnu_long_name_and_link_combined() {
24582641
// Both GNU long name and long link for the same entry

0 commit comments

Comments
 (0)