Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cargo-fuzz = true
[dependencies]
arbitrary = { version = "1", features = ["derive"] }
libfuzzer-sys = "0.4"
rand = { version = "0.9", features = ["small_rng"] }
tar = "0.4"

[dependencies.tar-core]
Expand Down
104 changes: 79 additions & 25 deletions fuzz/fuzz_targets/differential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,23 @@
//! tar-core must never panic. A secondary goal is that whenever tar-rs
//! successfully parses an entry, tar-core should produce a matching entry
//! with equivalent metadata, xattrs, and identical content bytes.
//!
//! ## Behavioral difference allowlist
//!
//! tar-core is intentionally stricter than tar-rs in some areas. When
//! tar-core rejects an archive that tar-rs accepts, we check whether the
//! error falls into a known category of expected divergence before
//! failing the test. Current allowlisted differences:
//!
//! - **Malformed PAX records**: tar-core propagates PAX parse errors
//! (malformed record format, non-UTF-8 keys). tar-rs silently skips
//! malformed PAX records via `.flatten()`.

#![no_main]

use libfuzzer_sys::fuzz_target;
use tar_core_testutil::{parse_tar_core, parse_tar_rs, OwnedEntry};
use tar_core::parse::{Limits, ParseError};
use tar_core_testutil::{parse_tar_core_detailed, parse_tar_rs, OwnedEntry};

/// Dump the raw 512-byte headers from the (post-fixup) data to stderr.
fn dump_headers(data: &[u8]) {
Expand All @@ -30,33 +42,68 @@ fn dump_headers(data: &[u8]) {
}
}

/// Returns true if the error is a known behavioral difference where
/// tar-core is intentionally stricter than tar-rs.
///
/// When this returns true, tar-rs may have parsed more entries than
/// tar-core, and that's expected.
fn is_allowlisted_divergence(err: &ParseError) -> bool {
matches!(
err,
// tar-core rejects malformed PAX records; tar-rs silently skips them.
ParseError::Pax(_) | ParseError::InvalidUtf8(_)
)
}

/// Compare entries parsed by tar-rs and tar-core, asserting equivalence.
///
/// tar-core is intentionally more lenient than tar-rs in some cases (e.g.
/// all-null numeric fields are accepted as 0), so we only require that
/// tar-core parses *at least* as many entries as tar-rs and that those
/// entries match.
fn compare_entries(data: &[u8], tar_rs_entries: &[OwnedEntry], tar_core_entries: &[OwnedEntry]) {
if tar_core_entries.len() < tar_rs_entries.len() {
eprintln!(
"entry count mismatch: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len()
);
dump_headers(data);
for (i, e) in tar_rs_entries.iter().enumerate() {
eprintln!("tar-rs [{i}]: {e:?}");
}
for (i, e) in tar_core_entries.iter().enumerate() {
eprintln!("tar-core[{i}]: {e:?}");
fn compare_entries(
data: &[u8],
tar_rs_entries: &[OwnedEntry],
tar_core_entries: &[OwnedEntry],
tar_core_error: Option<&ParseError>,
) {
let count_mismatch = tar_core_entries.len() < tar_rs_entries.len();

if count_mismatch {
// Check the behavioral difference allowlist: if tar-core stopped
// due to a known stricter-than-tar-rs error, the entry count
// divergence is expected. We still verify the entries that *were*
// parsed match (fall through to the zip comparison below).
let allowlisted = tar_core_error.is_some_and(|err| is_allowlisted_divergence(err));

if !allowlisted {
eprintln!(
"entry count mismatch: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len()
);
if let Some(err) = tar_core_error {
eprintln!("tar-core error: {err}");
}
dump_headers(data);
for (i, e) in tar_rs_entries.iter().enumerate() {
eprintln!("tar-rs [{i}]: {e:?}");
}
for (i, e) in tar_core_entries.iter().enumerate() {
eprintln!("tar-core[{i}]: {e:?}");
}
panic!(
"tar-core parsed fewer entries than tar-rs: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len(),
);
}
panic!(
"tar-core parsed fewer entries than tar-rs: tar-core={} tar-rs={}",
tar_core_entries.len(),
tar_rs_entries.len(),
);
}

// Compare entries that both parsers produced. When there's an
// allowlisted count mismatch, zip stops at the shorter list,
// verifying that tar-core's entries are correct up to the point
// where it diverged.
for (i, (rs, core)) in tar_rs_entries.iter().zip(tar_core_entries).enumerate() {
if rs != core {
eprintln!("mismatch at entry {i}:");
Expand Down Expand Up @@ -130,6 +177,17 @@ fn parse_octal_simple(bytes: &[u8]) -> Option<u64> {
u64::from_str_radix(s, 8).ok()
}

fn run_differential(data: &[u8]) {
let tar_rs_entries = parse_tar_rs(data);
let result = parse_tar_core_detailed(data, Limits::permissive());
compare_entries(
data,
&tar_rs_entries,
&result.entries,
result.error.as_ref(),
);
}

fuzz_target!(|data: &[u8]| {
if data.len() > 256 * 1024 {
return;
Expand All @@ -142,12 +200,8 @@ fuzz_target!(|data: &[u8]| {
if should_fixup {
let mut data = data.to_vec();
fixup_checksums(&mut data);
let tar_rs_entries = parse_tar_rs(&data);
let tar_core_entries = parse_tar_core(&data);
compare_entries(&data, &tar_rs_entries, &tar_core_entries);
run_differential(&data);
} else {
let tar_rs_entries = parse_tar_rs(data);
let tar_core_entries = parse_tar_core(data);
compare_entries(data, &tar_rs_entries, &tar_core_entries);
run_differential(data);
}
});
85 changes: 62 additions & 23 deletions fuzz/fuzz_targets/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,69 @@
//! - Padded size is always >= size and block-aligned (or both zero).
//! - Parsed entry paths are never empty.
//! - Total consumed bytes never exceed the input length.
//!
//! The parser is exercised with variable-length short reads to stress the
//! NeedData/retry path that real callers hit with partial I/O. Each parse
//! call gets a different chunk size from a seeded RNG, simulating realistic
//! non-uniform read patterns.

#![no_main]

use std::mem::size_of;

use libfuzzer_sys::fuzz_target;
use rand::rngs::SmallRng;
use rand::{Rng, SeedableRng};
use tar_core::parse::{Limits, ParseEvent, Parser};
use tar_core::HEADER_SIZE;

/// Drive a parser to completion over `data`, checking invariants on each entry.
/// Returns normally on errors or NeedData — the point is that it must not panic.
fn run_parser(data: &[u8], limits: Limits, verify_checksums: bool) {
/// Max chunk size ceiling for short-read simulation (1 MiB).
const MAX_CHUNK_CEILING: u32 = 1024 * 1024;

/// Drive a parser to completion over `data`, feeding it variable-sized
/// chunks drawn from `rng` to simulate realistic partial reads.
///
/// On NeedData, the exposed window grows to provide the requested minimum.
/// After each successfully processed event, a fresh chunk size is drawn
/// from the RNG so the parser sees different split points throughout.
///
/// Checks invariants on each entry and returns normally on errors or
/// NeedData — the point is that it must not panic.
fn run_parser(data: &[u8], limits: Limits, verify_checksums: bool, rng: &mut SmallRng) {
let mut parser = Parser::new(limits);
parser.set_verify_checksums(verify_checksums);
let mut offset: usize = 0;
let mut window = rng.random_range(1..=MAX_CHUNK_CEILING) as usize;

loop {
assert!(offset <= data.len(), "offset exceeded input length");
let input = &data[offset..];
let remaining = data.len() - offset;
if remaining == 0 {
break;
}

let input = &data[offset..offset + remaining.min(window)];

match parser.parse(input) {
Ok(ParseEvent::NeedData { .. }) => break,
Ok(ParseEvent::NeedData { min_bytes }) => {
if remaining < min_bytes {
break;
}
// Widen the window to satisfy the parser's request and retry.
window = min_bytes;
continue;
}

Ok(ParseEvent::Entry { consumed, entry })
| Ok(ParseEvent::SparseEntry {
consumed, entry, ..
}) => {
// consumed bytes must not exceed remaining input
assert!(
consumed <= input.len(),
"consumed {consumed} > remaining {}",
"consumed {consumed} > input len {}",
input.len()
);

// Padded-size invariants
assert!(
entry.padded_size() >= entry.size,
"padded_size {} < size {}",
Expand All @@ -55,39 +85,39 @@ fn run_parser(data: &[u8], limits: Limits, verify_checksums: bool) {
);
}

// Path must not be empty
assert!(!entry.path.is_empty(), "entry path is empty");

offset += consumed;

// Skip content + padding; if not enough data remains, bail out.
let padded = entry.padded_size() as usize;
if offset.saturating_add(padded) > data.len() {
break;
}
offset += padded;

window = rng.random_range(1..=MAX_CHUNK_CEILING) as usize;
}

Ok(ParseEvent::GlobalExtensions { consumed, .. }) => {
assert!(
consumed <= input.len(),
"GlobalExtensions consumed {consumed} > remaining {}",
"GlobalExtensions consumed {consumed} > input len {}",
input.len()
);
offset += consumed;
window = rng.random_range(1..=MAX_CHUNK_CEILING) as usize;
}

Ok(ParseEvent::End { consumed }) => {
assert!(
consumed <= input.len(),
"End consumed {consumed} > remaining {}",
"End consumed {consumed} > input len {}",
input.len()
);
offset += consumed;
break;
}

// Parse errors are expected on fuzzed input — just stop.
Err(_) => break,
}
}
Expand All @@ -99,15 +129,24 @@ fn run_parser(data: &[u8], limits: Limits, verify_checksums: bool) {
);
}

/// Byte offset where the tar payload begins (after the config/seed header).
const PAYLOAD_OFFSET: usize = 1 + size_of::<u64>();

fuzz_target!(|data: &[u8]| {
// 90% of the time, skip checksum verification to exercise deeper parser
// logic (PAX extensions, GNU long name/link, sparse files, field parsing,
// etc.). Random fuzz input almost never has valid checksums, so without
// this the fuzzer would break immediately on every input.
//
// 10% of the time, verify checksums normally to test that code path too.
let skip_checksums = !data.is_empty() && data[0] % 10 != 0;

run_parser(data, Limits::permissive(), !skip_checksums);
run_parser(data, Limits::default(), !skip_checksums);
if data.len() < PAYLOAD_OFFSET {
return;
}

// First byte: checksum behavior.
// 90% skip checksums, 10% verify them.
let skip_checksums = data[0] % 10 != 0;

// Bytes 1..9: seed for the chunk-size RNG.
let seed = u64::from_le_bytes(data[1..PAYLOAD_OFFSET].try_into().unwrap());
let mut rng = SmallRng::seed_from_u64(seed);

let payload = &data[PAYLOAD_OFFSET..];

run_parser(payload, Limits::permissive(), !skip_checksums, &mut rng);
run_parser(payload, Limits::default(), !skip_checksums, &mut rng);
});
Loading