Skip to content

Commit 5a33071

Browse files
committed
parse: Remove Limits::strict(), move error leniency to Parser
The strict/lenient toggle for PAX parse errors was a parser behavior flag, not a resource limit — it was misplaced on Limits. Move it to Parser as set_ignore_parsing_errors(bool), matching the pattern of set_allow_empty_path and set_verify_checksums. Remove Limits::strict() entirely. The defaults are already safe for untrusted input (1 MiB metadata cap, bounded pending/sparse counts). Callers who want tighter resource limits can set fields directly; callers who want lenient PAX parsing call set_ignore_parsing_errors. Assisted-by: OpenCode (Claude claude-opus-4-6)
1 parent a6f8436 commit 5a33071

File tree

1 file changed

+51
-61
lines changed

1 file changed

+51
-61
lines changed

src/parse.rs

Lines changed: 51 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ use crate::{
8080
/// let limits = Limits::default();
8181
///
8282
/// // Customize limits
83-
/// let strict_limits = Limits {
83+
/// let limits = Limits {
8484
/// max_metadata_size: 64 * 1024,
8585
/// // Set to libc::PATH_MAX when extracting to disk
8686
/// max_path_len: Some(4096),
@@ -132,13 +132,6 @@ pub struct Limits {
132132
///
133133
/// Default: 10000.
134134
pub max_sparse_entries: usize,
135-
136-
/// When true, PAX extension values that fail to parse (invalid UTF-8,
137-
/// invalid integer for numeric fields like `uid`, `gid`, `size`, `mtime`)
138-
/// cause errors instead of being silently ignored.
139-
///
140-
/// Default: `true`.
141-
pub strict: bool,
142135
}
143136

144137
impl Default for Limits {
@@ -148,7 +141,6 @@ impl Default for Limits {
148141
max_path_len: None,
149142
max_pending_entries: 16,
150143
max_sparse_entries: 10_000,
151-
strict: true,
152144
}
153145
}
154146
}
@@ -171,22 +163,6 @@ impl Limits {
171163
max_path_len: None,
172164
max_pending_entries: usize::MAX,
173165
max_sparse_entries: 1_000_000,
174-
strict: false,
175-
}
176-
}
177-
178-
/// Create strict limits suitable for untrusted archives.
179-
///
180-
/// This sets conservative limits to minimize resource consumption
181-
/// from potentially malicious archives.
182-
#[must_use]
183-
pub fn strict() -> Self {
184-
Self {
185-
max_metadata_size: 64 * 1024, // 64 KiB
186-
max_path_len: Some(4096),
187-
max_pending_entries: 8,
188-
max_sparse_entries: 1000,
189-
strict: true,
190166
}
191167
}
192168

@@ -291,7 +267,7 @@ pub enum ParseError {
291267
#[error("invalid PAX sparse map: {0}")]
292268
InvalidPaxSparseMap(Cow<'static, str>),
293269

294-
/// A PAX extension value failed to parse in strict mode.
270+
/// A PAX extension value failed to parse.
295271
#[error("invalid PAX {key} value: {value:?}")]
296272
InvalidPaxValue {
297273
/// The PAX key (e.g. "uid", "size").
@@ -604,23 +580,23 @@ impl PendingMetadata<'_> {
604580
///
605581
/// Returns `Some((major, minor))` if `GNU.sparse.major` and
606582
/// `GNU.sparse.minor` are both present and parseable, `None` if
607-
/// the keys are absent. In strict mode, malformed values produce
608-
/// errors instead of being silently ignored.
609-
fn pax_sparse_version(pax: &[u8], strict: bool) -> Result<Option<(u64, u64)>> {
583+
/// the keys are absent. When `ignore_errors` is true, malformed values
584+
/// are silently skipped instead of producing errors.
585+
fn pax_sparse_version(pax: &[u8], ignore_errors: bool) -> Result<Option<(u64, u64)>> {
610586
let mut major = None;
611587
let mut minor = None;
612588
for ext in PaxExtensions::new(pax) {
613589
let ext = ext?;
614590
let key = match ext.key() {
615591
Ok(k) => k,
616-
Err(_) if !strict => continue,
592+
Err(_) if ignore_errors => continue,
617593
Err(e) => return Err(ParseError::from(e)),
618594
};
619595
match key {
620596
PAX_GNU_SPARSE_MAJOR => {
621597
let s = match ext.value() {
622598
Ok(s) => s,
623-
Err(_) if !strict => continue,
599+
Err(_) if ignore_errors => continue,
624600
Err(_) => {
625601
return Err(ParseError::InvalidPaxValue {
626602
key: PAX_GNU_SPARSE_MAJOR,
@@ -630,7 +606,7 @@ fn pax_sparse_version(pax: &[u8], strict: bool) -> Result<Option<(u64, u64)>> {
630606
};
631607
match s.parse::<u64>() {
632608
Ok(v) => major = Some(v),
633-
Err(_) if !strict => {}
609+
Err(_) if ignore_errors => {}
634610
Err(_) => {
635611
return Err(ParseError::InvalidPaxValue {
636612
key: PAX_GNU_SPARSE_MAJOR,
@@ -642,7 +618,7 @@ fn pax_sparse_version(pax: &[u8], strict: bool) -> Result<Option<(u64, u64)>> {
642618
PAX_GNU_SPARSE_MINOR => {
643619
let s = match ext.value() {
644620
Ok(s) => s,
645-
Err(_) if !strict => continue,
621+
Err(_) if ignore_errors => continue,
646622
Err(_) => {
647623
return Err(ParseError::InvalidPaxValue {
648624
key: PAX_GNU_SPARSE_MINOR,
@@ -652,7 +628,7 @@ fn pax_sparse_version(pax: &[u8], strict: bool) -> Result<Option<(u64, u64)>> {
652628
};
653629
match s.parse::<u64>() {
654630
Ok(v) => minor = Some(v),
655-
Err(_) if !strict => {}
631+
Err(_) if ignore_errors => {}
656632
Err(_) => {
657633
return Err(ParseError::InvalidPaxValue {
658634
key: PAX_GNU_SPARSE_MINOR,
@@ -725,6 +701,13 @@ pub struct Parser {
725701
///
726702
/// Default: `true`.
727703
verify_checksums: bool,
704+
/// When true, malformed PAX extension values (invalid UTF-8, unparseable
705+
/// integers for uid/gid/size/mtime) are silently skipped instead of
706+
/// producing errors. This matches the behavior of many real-world tar
707+
/// implementations.
708+
///
709+
/// Default: `false`.
710+
ignore_pax_errors: bool,
728711
}
729712

730713
impl Parser {
@@ -736,6 +719,7 @@ impl Parser {
736719
state: State::ReadHeader,
737720
allow_empty_path: false,
738721
verify_checksums: true,
722+
ignore_pax_errors: false,
739723
}
740724
}
741725

@@ -758,6 +742,18 @@ impl Parser {
758742
self.verify_checksums = verify;
759743
}
760744

745+
/// Control whether malformed PAX extension values are silently ignored.
746+
///
747+
/// When set to `true`, PAX values that fail to parse (invalid UTF-8,
748+
/// unparseable integers for `uid`, `gid`, `size`, `mtime`) are skipped
749+
/// instead of producing [`ParseError::InvalidPaxValue`] errors. This
750+
/// matches the lenient behavior of many real-world tar implementations.
751+
///
752+
/// Default: `false` (malformed values produce errors).
753+
pub fn set_ignore_pax_errors(&mut self, ignore: bool) {
754+
self.ignore_pax_errors = ignore;
755+
}
756+
761757
/// Create a new parser with default limits.
762758
#[must_use]
763759
pub fn with_defaults() -> Self {
@@ -918,7 +914,7 @@ impl Parser {
918914
// Check for PAX v1.0 sparse before emitting — it requires
919915
// reading the sparse map from the data stream.
920916
let sparse_version = if let Some(pax) = slices.pax_extensions {
921-
pax_sparse_version(pax, self.limits.strict)?
917+
pax_sparse_version(pax, self.ignore_pax_errors)?
922918
} else {
923919
None
924920
};
@@ -1039,21 +1035,21 @@ impl Parser {
10391035
"missing PAX extensions",
10401036
)))?;
10411037

1042-
let strict = self.limits.strict;
1038+
let ignore_errors = self.ignore_pax_errors;
10431039
let mut real_size = None;
10441040
let mut sparse_name = None;
10451041
for ext in PaxExtensions::new(pax) {
10461042
let ext = ext?;
10471043
let key = match ext.key() {
10481044
Ok(k) => k,
1049-
Err(_) if !strict => continue,
1045+
Err(_) if ignore_errors => continue,
10501046
Err(e) => return Err(ParseError::from(e)),
10511047
};
10521048
match key {
10531049
PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
10541050
let s = match ext.value() {
10551051
Ok(s) => s,
1056-
Err(_) if !strict => continue,
1052+
Err(_) if ignore_errors => continue,
10571053
Err(_) => {
10581054
return Err(ParseError::InvalidPaxValue {
10591055
key: PAX_GNU_SPARSE_REALSIZE,
@@ -1063,7 +1059,7 @@ impl Parser {
10631059
};
10641060
match s.parse::<u64>() {
10651061
Ok(v) => real_size = Some(v),
1066-
Err(_) if !strict => {}
1062+
Err(_) if ignore_errors => {}
10671063
Err(_) => {
10681064
return Err(ParseError::InvalidPaxValue {
10691065
key: PAX_GNU_SPARSE_REALSIZE,
@@ -1345,16 +1341,16 @@ impl Parser {
13451341
let mut pax_sparse_pending_offset: Option<u64> = None;
13461342

13471343
if let Some(pax) = raw_pax {
1348-
let strict = self.limits.strict;
1344+
let ignore_errors = self.ignore_pax_errors;
13491345
let extensions = PaxExtensions::new(pax);
13501346

1351-
// Helper: parse a PAX numeric value, returning Err in strict mode
1352-
// or Ok(None) in lenient mode when the value is unparseable.
1347+
// Helper: parse a PAX numeric value, returning Ok(None) when
1348+
// ignore_pax_errors is set and the value is unparseable.
13531349
let parse_pax_u64 =
13541350
|ext: &crate::PaxExtension<'_>, key: &'static str| -> Result<Option<u64>> {
13551351
let s = match ext.value() {
13561352
Ok(s) => s,
1357-
Err(_) if !strict => return Ok(None),
1353+
Err(_) if ignore_errors => return Ok(None),
13581354
Err(_) => {
13591355
return Err(ParseError::InvalidPaxValue {
13601356
key,
@@ -1364,7 +1360,7 @@ impl Parser {
13641360
};
13651361
match s.parse::<u64>() {
13661362
Ok(v) => Ok(Some(v)),
1367-
Err(_) if !strict => Ok(None),
1363+
Err(_) if ignore_errors => Ok(None),
13681364
Err(_) => Err(ParseError::InvalidPaxValue {
13691365
key,
13701366
value: s.to_owned().into(),
@@ -1406,7 +1402,7 @@ impl Parser {
14061402
// parse only the integer part.
14071403
let s = match ext.value() {
14081404
Ok(s) => s,
1409-
Err(_) if !strict => continue,
1405+
Err(_) if ignore_errors => continue,
14101406
Err(_) => {
14111407
return Err(ParseError::InvalidPaxValue {
14121408
key: PAX_MTIME,
@@ -1417,7 +1413,7 @@ impl Parser {
14171413
let int_part = s.split('.').next().unwrap_or(s);
14181414
match int_part.parse::<u64>() {
14191415
Ok(v) => mtime = v,
1420-
Err(_) if !strict => {}
1416+
Err(_) if ignore_errors => {}
14211417
Err(_) => {
14221418
return Err(ParseError::InvalidPaxValue {
14231419
key: PAX_MTIME,
@@ -1458,7 +1454,7 @@ impl Parser {
14581454
PAX_GNU_SPARSE_MAP => {
14591455
let s = match ext.value() {
14601456
Ok(s) => s,
1461-
Err(_) if !strict => continue,
1457+
Err(_) if ignore_errors => continue,
14621458
Err(_) => {
14631459
return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
14641460
"non-UTF8 sparse map",
@@ -1613,10 +1609,10 @@ mod tests {
16131609
}
16141610

16151611
#[test]
1616-
fn test_strict_limits() {
1617-
let limits = Limits::strict();
1618-
assert_eq!(limits.max_path_len, Some(4096));
1619-
assert!(limits.max_metadata_size < Limits::default().max_metadata_size);
1612+
fn test_permissive_limits_relaxed() {
1613+
let limits = Limits::permissive();
1614+
assert!(limits.max_metadata_size > Limits::default().max_metadata_size);
1615+
assert!(limits.max_pending_entries > Limits::default().max_pending_entries);
16201616
}
16211617

16221618
#[test]
@@ -2860,11 +2856,8 @@ mod tests {
28602856
#[test]
28612857
fn test_lenient_ignores_invalid_pax_uid() {
28622858
let archive = make_archive_with_pax("uid", b"notanumber");
2863-
let limits = Limits {
2864-
strict: false,
2865-
..Default::default()
2866-
};
2867-
let mut parser = Parser::new(limits);
2859+
let mut parser = Parser::new(Limits::default());
2860+
parser.set_ignore_pax_errors(true);
28682861
let event = parser.parse(&archive).unwrap();
28692862
match event {
28702863
ParseEvent::Entry { entry, .. } => {
@@ -2878,11 +2871,8 @@ mod tests {
28782871
#[test]
28792872
fn test_lenient_ignores_invalid_pax_size() {
28802873
let archive = make_archive_with_pax("size", b"xyz");
2881-
let limits = Limits {
2882-
strict: false,
2883-
..Default::default()
2884-
};
2885-
let mut parser = Parser::new(limits);
2874+
let mut parser = Parser::new(Limits::default());
2875+
parser.set_ignore_pax_errors(true);
28862876
let event = parser.parse(&archive).unwrap();
28872877
match event {
28882878
ParseEvent::Entry { entry, .. } => {

0 commit comments

Comments
 (0)