Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 88 additions & 13 deletions fuzz/fuzz_targets/differential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,10 @@
#![no_main]

use libfuzzer_sys::fuzz_target;
use tar_core_testutil::{parse_tar_core, parse_tar_rs};

fuzz_target!(|data: &[u8]| {
if data.len() > 256 * 1024 {
return;
}

let tar_rs_entries = parse_tar_rs(data);
let tar_core_entries = parse_tar_core(data);
use tar_core_testutil::{parse_tar_core, parse_tar_rs, OwnedEntry};

/// Compare entries parsed by tar-rs and tar-core, asserting equivalence.
fn compare_entries(tar_rs_entries: &[OwnedEntry], tar_core_entries: &[OwnedEntry]) {
assert_eq!(
tar_core_entries.len(),
tar_rs_entries.len(),
Expand All @@ -27,10 +21,7 @@ fuzz_target!(|data: &[u8]| {
tar_rs_entries.len(),
);

for i in 0..tar_rs_entries.len() {
let rs = &tar_rs_entries[i];
let core = &tar_core_entries[i];

for (i, (rs, core)) in tar_rs_entries.iter().zip(tar_core_entries).enumerate() {
assert_eq!(
rs.path,
core.path,
Expand Down Expand Up @@ -68,4 +59,88 @@ fuzz_target!(|data: &[u8]| {
);
assert_eq!(rs.xattrs, core.xattrs, "xattr mismatch at entry {i}");
}
}

/// Preprocess fuzz input to fix up tar header checksums.
///
/// Walks through 512-byte aligned blocks. For each non-zero block (potential
/// header), computes and sets a valid checksum. Then attempts to parse the
/// size field to skip over content blocks, advancing to the next header.
///
/// This dramatically improves fuzzing coverage by allowing the parser to get
/// past the checksum verification gate and exercise deeper parsing logic
/// (PAX extensions, GNU long name/link, sparse files, etc.).
fn fixup_checksums(data: &mut [u8]) {
let mut offset = 0;
while offset + 512 <= data.len() {
let block = &data[offset..offset + 512];

// Skip zero blocks (end-of-archive markers)
if block.iter().all(|&b| b == 0) {
offset += 512;
continue;
}

// Fill checksum field (bytes 148..156) with spaces
let block = &mut data[offset..offset + 512];
block[148..156].fill(b' ');

// Compute checksum: unsigned sum of all 512 bytes
let checksum: u64 = block.iter().map(|&b| u64::from(b)).sum();

// Encode as 7 octal digits + NUL terminator
let cksum_str = format!("{:07o}\0", checksum);
let cksum_bytes = cksum_str.as_bytes();
let copy_len = cksum_bytes.len().min(8);
block[148..148 + copy_len].copy_from_slice(&cksum_bytes[..copy_len]);

offset += 512;

// Try to parse the size field (bytes 124..136) to skip content blocks
let size_field = &data[offset - 512 + 124..offset - 512 + 136];
if let Some(size) = parse_octal_simple(size_field) {
let padded = ((size as usize) + 511) & !511;
if offset + padded <= data.len() {
offset += padded;
}
}
}
}

/// Simple octal parser for the size field - doesn't need to handle base-256
/// since we're just trying to skip content. Returns None on any parse failure.
fn parse_octal_simple(bytes: &[u8]) -> Option<u64> {
let trimmed: Vec<u8> = bytes
.iter()
.copied()
.skip_while(|&b| b == b' ')
.take_while(|&b| b != b' ' && b != 0)
.collect();
if trimmed.is_empty() {
return Some(0);
}
let s = core::str::from_utf8(&trimmed).ok()?;
u64::from_str_radix(s, 8).ok()
}

fuzz_target!(|data: &[u8]| {
if data.len() > 256 * 1024 {
return;
}

// 90% of the time, fix up checksums to exercise deeper parser logic.
// 10% of the time, pass raw bytes to test checksum validation itself.
let should_fixup = !data.is_empty() && data[0] % 10 != 0;

if should_fixup {
let mut data = data.to_vec();
fixup_checksums(&mut data);
let tar_rs_entries = parse_tar_rs(&data);
let tar_core_entries = parse_tar_core(&data);
compare_entries(&tar_rs_entries, &tar_core_entries);
} else {
let tar_rs_entries = parse_tar_rs(data);
let tar_core_entries = parse_tar_core(data);
compare_entries(&tar_rs_entries, &tar_core_entries);
}
});
79 changes: 75 additions & 4 deletions fuzz/fuzz_targets/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,80 @@ fn run_parser(data: &[u8], limits: Limits) {
);
}

/// Preprocess fuzz input to fix up tar header checksums.
///
/// Walks through 512-byte aligned blocks. For each non-zero block (potential
/// header), computes and sets a valid checksum. Then attempts to parse the
/// size field to skip over content blocks, advancing to the next header.
///
/// This dramatically improves fuzzing coverage by allowing the parser to get
/// past the checksum verification gate and exercise deeper parsing logic
/// (PAX extensions, GNU long name/link, sparse files, etc.).
fn fixup_checksums(data: &mut [u8]) {
let mut offset = 0;
while offset + 512 <= data.len() {
let block = &data[offset..offset + 512];

// Skip zero blocks (end-of-archive markers)
if block.iter().all(|&b| b == 0) {
offset += 512;
continue;
}

// Fill checksum field (bytes 148..156) with spaces
let block = &mut data[offset..offset + 512];
block[148..156].fill(b' ');

// Compute checksum: unsigned sum of all 512 bytes
let checksum: u64 = block.iter().map(|&b| u64::from(b)).sum();

// Encode as 7 octal digits + NUL terminator
let cksum_str = format!("{:07o}\0", checksum);
let cksum_bytes = cksum_str.as_bytes();
let copy_len = cksum_bytes.len().min(8);
block[148..148 + copy_len].copy_from_slice(&cksum_bytes[..copy_len]);

offset += 512;

// Try to parse the size field (bytes 124..136) to skip content blocks
let size_field = &data[offset - 512 + 124..offset - 512 + 136];
if let Some(size) = parse_octal_simple(size_field) {
let padded = ((size as usize) + 511) & !511;
if offset + padded <= data.len() {
offset += padded;
}
}
}
}

/// Simple octal parser for the size field - doesn't need to handle base-256
/// since we're just trying to skip content. Returns None on any parse failure.
fn parse_octal_simple(bytes: &[u8]) -> Option<u64> {
let trimmed: Vec<u8> = bytes
.iter()
.copied()
.skip_while(|&b| b == b' ')
.take_while(|&b| b != b' ' && b != 0)
.collect();
if trimmed.is_empty() {
return Some(0);
}
let s = core::str::from_utf8(&trimmed).ok()?;
u64::from_str_radix(s, 8).ok()
}

fuzz_target!(|data: &[u8]| {
// Run with permissive limits (should accept anything that isn't structurally broken).
run_parser(data, Limits::permissive());
// Run with default limits (stricter — may error on oversized paths/pax, but must not panic).
run_parser(data, Limits::default());
// 90% of the time, fix up checksums to exercise deeper parser logic.
// 10% of the time, pass raw bytes to test checksum validation itself.
let should_fixup = !data.is_empty() && data[0] % 10 != 0;

if should_fixup {
let mut data = data.to_vec();
fixup_checksums(&mut data);
run_parser(&data, Limits::permissive());
run_parser(&data, Limits::default());
} else {
run_parser(data, Limits::permissive());
run_parser(data, Limits::default());
}
});
24 changes: 23 additions & 1 deletion src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,12 @@ pub struct Parser {
/// When true, entries with empty paths are allowed through instead of
/// returning [`ParseError::EmptyPath`].
allow_empty_path: bool,
/// When false, header checksum verification is skipped. This is useful
/// for fuzzing, where random input almost never has valid checksums,
/// preventing the fuzzer from exercising deeper parser logic.
///
/// Default: `true`.
verify_checksums: bool,
}

impl Parser {
Expand All @@ -655,6 +661,7 @@ impl Parser {
state: State::ReadHeader,
pending: PendingMetadata::default(),
allow_empty_path: false,
verify_checksums: true,
}
}

Expand All @@ -664,6 +671,19 @@ impl Parser {
self.allow_empty_path = allow;
}

/// Control whether header checksums are verified during parsing.
///
/// When set to `false`, the parser skips [`Header::verify_checksum`]
/// calls, accepting headers regardless of their checksum field. This
/// is primarily useful for fuzz testing, where random input almost
/// never produces valid checksums, preventing the fuzzer from reaching
/// deeper parser code paths.
///
/// Default: `true`.
pub fn set_verify_checksums(&mut self, verify: bool) {
self.verify_checksums = verify;
}

/// Create a new parser with default limits.
#[must_use]
pub fn with_defaults() -> Self {
Expand Down Expand Up @@ -756,7 +776,9 @@ impl Parser {

// Parse header
let header = Header::from_bytes(header_bytes);
header.verify_checksum()?;
if self.verify_checksums {
header.verify_checksum()?;
}

let entry_type = header.entry_type();
let size = header.entry_size()?;
Expand Down
Loading