Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
186 changes: 134 additions & 52 deletions fuzz/fuzz_targets/differential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,63 +9,145 @@
#![no_main]

use libfuzzer_sys::fuzz_target;
use tar_core_testutil::{parse_tar_core, parse_tar_rs};
use tar_core_testutil::{parse_tar_core, parse_tar_rs, OwnedEntry};

/// Dump the raw 512-byte headers from the (post-fixup) data to stderr.
fn dump_headers(data: &[u8]) {
let mut offset = 0;
let mut i = 0;
while offset + 512 <= data.len() {
let block = &data[offset..offset + 512];
if block.iter().all(|&b| b == 0) {
eprintln!("block[{i}] @{offset}: <all zeros>");
offset += 512;
i += 1;
continue;
}
let header = tar_core::Header::from_bytes(block.try_into().unwrap());
eprintln!("block[{i}] @{offset}: {header:?}");
offset += 512;
i += 1;
}
}

/// Compare entries parsed by tar-rs and tar-core, asserting equivalence.
///
/// tar-core is intentionally more lenient than tar-rs in some cases (e.g.
/// all-null numeric fields are accepted as 0), so we only require that
/// tar-core parses *at least* as many entries as tar-rs and that those
/// entries match.
fn compare_entries(data: &[u8], tar_rs_entries: &[OwnedEntry], tar_core_entries: &[OwnedEntry]) {
if tar_core_entries.len() < tar_rs_entries.len() {
eprintln!(
"entry count mismatch: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len()
);
dump_headers(data);
for (i, e) in tar_rs_entries.iter().enumerate() {
eprintln!("tar-rs [{i}]: {e:?}");
}
for (i, e) in tar_core_entries.iter().enumerate() {
eprintln!("tar-core[{i}]: {e:?}");
}
panic!(
"tar-core parsed fewer entries than tar-rs: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len(),
);
}

for (i, (rs, core)) in tar_rs_entries.iter().zip(tar_core_entries).enumerate() {
if rs != core {
eprintln!("mismatch at entry {i}:");
dump_headers(data);
eprintln!(" tar-rs: {rs:?}");
eprintln!(" tar-core: {core:?}");
panic!("entry {i} differs between tar-rs and tar-core");
}
}
}

/// Preprocess fuzz input to fix up tar header checksums.
///
/// Walks through 512-byte aligned blocks. For each non-zero block (potential
/// header), computes and sets a valid checksum. Then attempts to parse the
/// size field to skip over content blocks, advancing to the next header.
///
/// This dramatically improves fuzzing coverage by allowing the parser to get
/// past the checksum verification gate and exercise deeper parsing logic
/// (PAX extensions, GNU long name/link, sparse files, etc.).
fn fixup_checksums(data: &mut [u8]) {
let mut offset = 0;
while offset + 512 <= data.len() {
let block = &data[offset..offset + 512];

// Skip zero blocks (end-of-archive markers)
if block.iter().all(|&b| b == 0) {
offset += 512;
continue;
}

// Fill checksum field (bytes 148..156) with spaces
let block = &mut data[offset..offset + 512];
block[148..156].fill(b' ');

// Compute checksum: unsigned sum of all 512 bytes
let checksum: u64 = block.iter().map(|&b| u64::from(b)).sum();

// Encode as 7 octal digits + NUL terminator
let cksum_str = format!("{:07o}\0", checksum);
let cksum_bytes = cksum_str.as_bytes();
let copy_len = cksum_bytes.len().min(8);
block[148..148 + copy_len].copy_from_slice(&cksum_bytes[..copy_len]);

offset += 512;

// Try to parse the size field (bytes 124..136) to skip content blocks
let size_field = &data[offset - 512 + 124..offset - 512 + 136];
if let Some(size) = parse_octal_simple(size_field) {
let padded = ((size as usize) + 511) & !511;
if offset + padded <= data.len() {
offset += padded;
}
}
}
}

/// Simple octal parser for the size field - doesn't need to handle base-256
/// since we're just trying to skip content. Returns None on any parse failure.
fn parse_octal_simple(bytes: &[u8]) -> Option<u64> {
let trimmed: Vec<u8> = bytes
.iter()
.copied()
.skip_while(|&b| b == b' ')
.take_while(|&b| b != b' ' && b != 0)
.collect();
if trimmed.is_empty() {
return Some(0);
}
let s = core::str::from_utf8(&trimmed).ok()?;
u64::from_str_radix(s, 8).ok()
}

fuzz_target!(|data: &[u8]| {
if data.len() > 256 * 1024 {
return;
}

let tar_rs_entries = parse_tar_rs(data);
let tar_core_entries = parse_tar_core(data);

assert_eq!(
tar_core_entries.len(),
tar_rs_entries.len(),
"entry count mismatch: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len(),
);

for i in 0..tar_rs_entries.len() {
let rs = &tar_rs_entries[i];
let core = &tar_core_entries[i];

assert_eq!(
rs.path,
core.path,
"path mismatch at entry {i}: tar-rs={:?} tar-core={:?}",
String::from_utf8_lossy(&rs.path),
String::from_utf8_lossy(&core.path),
);
assert_eq!(rs.size, core.size, "size mismatch at entry {i}");
assert_eq!(
rs.entry_type, core.entry_type,
"entry_type mismatch at entry {i}"
);
assert_eq!(rs.mode, core.mode, "mode mismatch at entry {i}");
assert_eq!(rs.uid, core.uid, "uid mismatch at entry {i}");
assert_eq!(rs.gid, core.gid, "gid mismatch at entry {i}");
assert_eq!(rs.mtime, core.mtime, "mtime mismatch at entry {i}");
assert_eq!(
rs.link_target, core.link_target,
"link_target mismatch at entry {i}"
);
assert_eq!(rs.uname, core.uname, "uname mismatch at entry {i}");
assert_eq!(rs.gname, core.gname, "gname mismatch at entry {i}");
assert_eq!(
rs.dev_major, core.dev_major,
"dev_major mismatch at entry {i}"
);
assert_eq!(
rs.dev_minor, core.dev_minor,
"dev_minor mismatch at entry {i}"
);
assert_eq!(
rs.content, core.content,
"content mismatch at entry {i} (size={})",
rs.size,
);
assert_eq!(rs.xattrs, core.xattrs, "xattr mismatch at entry {i}");
// 90% of the time, fix up checksums to exercise deeper parser logic.
// 10% of the time, pass raw bytes to test checksum validation itself.
let should_fixup = !data.is_empty() && data[0] % 10 != 0;

if should_fixup {
let mut data = data.to_vec();
fixup_checksums(&mut data);
let tar_rs_entries = parse_tar_rs(&data);
let tar_core_entries = parse_tar_core(&data);
compare_entries(&data, &tar_rs_entries, &tar_core_entries);
} else {
let tar_rs_entries = parse_tar_rs(data);
let tar_core_entries = parse_tar_core(data);
compare_entries(data, &tar_rs_entries, &tar_core_entries);
}
});
17 changes: 12 additions & 5 deletions fuzz/fuzz_targets/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ use tar_core::HEADER_SIZE;

/// Drive a parser to completion over `data`, checking invariants on each entry.
/// Returns normally on errors or NeedData — the point is that it must not panic.
fn run_parser(data: &[u8], limits: Limits) {
fn run_parser(data: &[u8], limits: Limits, verify_checksums: bool) {
let mut parser = Parser::new(limits);
parser.set_verify_checksums(verify_checksums);
let mut offset: usize = 0;

loop {
Expand Down Expand Up @@ -99,8 +100,14 @@ fn run_parser(data: &[u8], limits: Limits) {
}

fuzz_target!(|data: &[u8]| {
// Run with permissive limits (should accept anything that isn't structurally broken).
run_parser(data, Limits::permissive());
// Run with default limits (stricter — may error on oversized paths/pax, but must not panic).
run_parser(data, Limits::default());
// 90% of the time, skip checksum verification to exercise deeper parser
// logic (PAX extensions, GNU long name/link, sparse files, field parsing,
// etc.). Random fuzz input almost never has valid checksums, so without
// this the fuzzer would break immediately on every input.
//
// 10% of the time, verify checksums normally to test that code path too.
let skip_checksums = !data.is_empty() && data[0] % 10 != 0;

run_parser(data, Limits::permissive(), !skip_checksums);
run_parser(data, Limits::default(), !skip_checksums);
});
36 changes: 29 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1444,6 +1444,18 @@ impl OctU64 {
}
}

/// Test whether a byte is whitespace in the context of tar header fields.
///
/// This includes all bytes that `u8::is_ascii_whitespace()` recognizes
/// (HT, LF, FF, CR, space) **plus** vertical tab (0x0b). Rust's
/// `is_ascii_whitespace` follows the WHATWG definition which omits VT,
/// but real tar implementations (and Rust's `str::trim()`) treat it as
/// whitespace. Without this, fields like `"0000000\x0b"` would fail to
/// parse.
fn is_tar_whitespace(b: u8) -> bool {
b.is_ascii_whitespace() || b == 0x0b
}

/// Parse an octal ASCII field into a u64.
///
/// Octal fields in tar headers are ASCII strings with optional leading
Expand All @@ -1456,17 +1468,27 @@ impl OctU64 {
/// Returns [`HeaderError::InvalidOctal`] if the field contains invalid
/// characters (anything other than spaces, digits 0-7, or null bytes).
pub(crate) fn parse_octal(bytes: &[u8]) -> Result<u64> {
// Tar octal fields are padded with leading spaces and terminated by
// spaces or null bytes. Strip both ends to get the digit run.
let trimmed = bytes
// Tar octal fields are padded with leading spaces/nulls and terminated
// by spaces, tabs, or null bytes. We first truncate at the first null
// (matching how C-string fields work in tar), then trim whitespace from
// both ends to isolate the digit run.
//
// Note: we use `is_tar_whitespace` rather than `u8::is_ascii_whitespace`
// because the latter omits vertical tab (0x0b), which real tar
// implementations treat as whitespace (and Rust's `str::trim()` strips).
let truncated = match bytes.iter().position(|&b| b == 0) {
Some(i) => &bytes[..i],
None => bytes,
};
let trimmed = truncated
.iter()
.position(|&b| b != b' ')
.position(|&b| !is_tar_whitespace(b))
.map(|start| {
let rest = &bytes[start..];
let rest = &truncated[start..];
let end = rest
.iter()
.position(|&b| b == b' ' || b == b'\0')
.unwrap_or(rest.len());
.rposition(|&b| !is_tar_whitespace(b))
.map_or(0, |p| p + 1);
&rest[..end]
})
.unwrap_or(&[]);
Expand Down
Loading
Loading