Skip to content

Commit a6f8436

Browse files
committed
parse: Consolidate metadata limits into max_metadata_size
Replace max_pax_size, max_gnu_long_size, and the previous max_path_len with two cleaner limits: - max_metadata_size: u32 — aggregate budget for all extension data (PAX headers + GNU long name/link) per entry. Default 1 MiB. Consolidates PaxTooLarge/GnuLongTooLarge into MetadataTooLarge. - max_path_len: Option<u32> — optional filesystem-level path length check. None by default (we're a parser, not a filesystem). Callers extracting to disk should set this to libc::PATH_MAX or equivalent. Also add Limits::check_path_len() helper and track running metadata size in PendingMetadata. Assisted-by: OpenCode (Claude claude-opus-4-6)
1 parent 28dd29f commit a6f8436

File tree

2 files changed

+109
-159
lines changed

2 files changed

+109
-159
lines changed

src/parse.rs

Lines changed: 81 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -81,37 +81,36 @@ use crate::{
8181
///
8282
/// // Customize limits
8383
/// let strict_limits = Limits {
84-
/// max_path_len: 1024,
85-
/// max_pax_size: 64 * 1024,
84+
/// max_metadata_size: 64 * 1024,
85+
/// // Set to libc::PATH_MAX when extracting to disk
86+
/// max_path_len: Some(4096),
8687
/// ..Default::default()
8788
/// };
8889
/// ```
8990
#[derive(Debug, Clone, PartialEq, Eq)]
9091
pub struct Limits {
91-
/// Maximum path length in bytes.
92+
/// Maximum total size of all extension metadata for a single entry, in bytes.
9293
///
93-
/// Applies to both file paths and link targets. Paths exceeding this
94-
/// limit will cause a [`ParseError::PathTooLong`] error.
95-
///
96-
/// Default: 4096 bytes (Linux PATH_MAX).
97-
pub max_path_len: usize,
98-
99-
/// Maximum size of PAX extended header data in bytes.
100-
///
101-
/// This limits the total size of a single PAX 'x' entry's content.
102-
/// PAX headers larger than this will cause a [`ParseError::PaxTooLarge`] error.
94+
/// This is an aggregate budget: the combined size of PAX extended headers,
95+
/// GNU long name, and GNU long link data for one file entry must not exceed
96+
/// this limit. Exceeding it will cause a [`ParseError::MetadataTooLarge`]
97+
/// error.
10398
///
10499
/// Default: 1 MiB (1,048,576 bytes).
105-
pub max_pax_size: u64,
100+
pub max_metadata_size: u32,
106101

107-
/// Maximum size of GNU long name/link data in bytes.
102+
/// Optional maximum path length in bytes.
103+
///
104+
/// When set, paths and link targets exceeding this limit will cause a
105+
/// [`ParseError::PathTooLong`] error. When `None`, no path length check
106+
/// is performed (the default).
108107
///
109-
/// GNU 'L' (long name) and 'K' (long link) entries should only contain
110-
/// a single path. Values exceeding this limit will cause a
111-
/// [`ParseError::GnuLongTooLarge`] error.
108+
/// Callers extracting to a real filesystem should set this to
109+
/// `libc::PATH_MAX` (4096 on Linux, 1024 on macOS) or the appropriate
110+
/// platform constant.
112111
///
113-
/// Default: 4096 bytes.
114-
pub max_gnu_long_size: u64,
112+
/// Default: `None`.
113+
pub max_path_len: Option<u32>,
115114

116115
/// Maximum number of consecutive metadata entries before an actual entry.
117116
///
@@ -145,9 +144,8 @@ pub struct Limits {
145144
impl Default for Limits {
146145
fn default() -> Self {
147146
Self {
148-
max_path_len: 4096,
149-
max_pax_size: 1024 * 1024, // 1 MiB
150-
max_gnu_long_size: 4096,
147+
max_metadata_size: 1024 * 1024, // 1 MiB
148+
max_path_len: None,
151149
max_pending_entries: 16,
152150
max_sparse_entries: 10_000,
153151
strict: true,
@@ -169,9 +167,8 @@ impl Limits {
169167
#[must_use]
170168
pub fn permissive() -> Self {
171169
Self {
172-
max_path_len: usize::MAX,
173-
max_pax_size: u64::MAX,
174-
max_gnu_long_size: u64::MAX,
170+
max_metadata_size: u32::MAX,
171+
max_path_len: None,
175172
max_pending_entries: usize::MAX,
176173
max_sparse_entries: 1_000_000,
177174
strict: false,
@@ -185,14 +182,26 @@ impl Limits {
185182
#[must_use]
186183
pub fn strict() -> Self {
187184
Self {
188-
max_path_len: 1024,
189-
max_pax_size: 64 * 1024, // 64 KiB
190-
max_gnu_long_size: 1024,
185+
max_metadata_size: 64 * 1024, // 64 KiB
186+
max_path_len: Some(4096),
191187
max_pending_entries: 8,
192188
max_sparse_entries: 1000,
193189
strict: true,
194190
}
195191
}
192+
193+
/// Check a path length against the configured limit.
194+
///
195+
/// Returns `Ok(())` if the path is within the limit (or no limit is set),
196+
/// or `Err(ParseError::PathTooLong)` if it exceeds it.
197+
pub fn check_path_len(&self, len: usize) -> Result<()> {
198+
if let Some(limit) = self.max_path_len {
199+
if len > limit as usize {
200+
return Err(ParseError::PathTooLong { len, limit });
201+
}
202+
}
203+
Ok(())
204+
}
196205
}
197206

198207
// ============================================================================
@@ -225,25 +234,19 @@ pub enum ParseError {
225234
/// Actual path length.
226235
len: usize,
227236
/// Configured limit.
228-
limit: usize,
237+
limit: u32,
229238
},
230239

231-
/// PAX extended header exceeds configured maximum size.
232-
#[error("PAX header exceeds limit: {size} bytes > {limit} bytes")]
233-
PaxTooLarge {
234-
/// Actual PAX header size.
235-
size: u64,
236-
/// Configured limit.
237-
limit: u64,
238-
},
239-
240-
/// GNU long name/link exceeds configured maximum size.
241-
#[error("GNU long name/link exceeds limit: {size} bytes > {limit} bytes")]
242-
GnuLongTooLarge {
243-
/// Actual GNU long name/link size.
240+
/// Extension metadata exceeds configured maximum size.
241+
///
242+
/// The aggregate size of all extension data (PAX headers, GNU long
243+
/// name/link) for a single entry exceeded [`Limits::max_metadata_size`].
244+
#[error("metadata exceeds limit: {size} bytes > {limit} bytes")]
245+
MetadataTooLarge {
246+
/// Total metadata size that would result.
244247
size: u64,
245248
/// Configured limit.
246-
limit: u64,
249+
limit: u32,
247250
},
248251

249252
/// Duplicate GNU long name entry without an intervening actual entry.
@@ -575,6 +578,8 @@ struct PendingMetadata<'a> {
575578
gnu_long_link: Option<&'a [u8]>,
576579
pax_extensions: Option<&'a [u8]>,
577580
count: usize,
581+
/// Running total of all extension data bytes accumulated so far.
582+
metadata_size: u64,
578583
}
579584

580585
/// Context for GNU sparse entries, passed from `handle_gnu_sparse` to
@@ -880,11 +885,11 @@ impl Parser {
880885
// them here. Routing through emit_entry would fail because
881886
// global headers have arbitrary metadata fields.
882887
EntryType::XGlobalHeader => {
883-
// Check size limit (same as local PAX headers)
884-
if size > self.limits.max_pax_size {
885-
return Err(ParseError::PaxTooLarge {
888+
// Check size limit
889+
if size > self.limits.max_metadata_size as u64 {
890+
return Err(ParseError::MetadataTooLarge {
886891
size,
887-
limit: self.limits.max_pax_size,
892+
limit: self.limits.max_metadata_size,
888893
});
889894
}
890895

@@ -956,25 +961,12 @@ impl Parser {
956961
});
957962
}
958963

959-
// Check size limit
960-
let max_size = match kind {
961-
ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink => {
962-
self.limits.max_gnu_long_size
963-
}
964-
ExtensionKind::Pax => self.limits.max_pax_size,
965-
};
966-
if size > max_size {
967-
return Err(match kind {
968-
ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink => {
969-
ParseError::GnuLongTooLarge {
970-
size,
971-
limit: max_size,
972-
}
973-
}
974-
ExtensionKind::Pax => ParseError::PaxTooLarge {
975-
size,
976-
limit: max_size,
977-
},
964+
// Check aggregate metadata size limit
965+
let new_metadata_size = slices.metadata_size + size;
966+
if new_metadata_size > self.limits.max_metadata_size as u64 {
967+
return Err(ParseError::MetadataTooLarge {
968+
size: new_metadata_size,
969+
limit: self.limits.max_metadata_size,
978970
});
979971
}
980972

@@ -1000,17 +992,13 @@ impl Parser {
1000992
if let Some(trimmed) = data.strip_suffix(&[0]) {
1001993
data = trimmed;
1002994
}
1003-
if data.len() > self.limits.max_path_len {
1004-
return Err(ParseError::PathTooLong {
1005-
len: data.len(),
1006-
limit: self.limits.max_path_len,
1007-
});
1008-
}
995+
self.limits.check_path_len(data.len())?;
1009996
}
1010997

1011-
// Build new pending slices with the added extension data
998+
// Build new pending metadata with the added extension data
1012999
let mut new_slices = PendingMetadata {
10131000
count: slices.count + 1,
1001+
metadata_size: new_metadata_size,
10141002
..slices
10151003
};
10161004
match kind {
@@ -1391,21 +1379,11 @@ impl Parser {
13911379

13921380
match key {
13931381
PAX_PATH => {
1394-
if value.len() > self.limits.max_path_len {
1395-
return Err(ParseError::PathTooLong {
1396-
len: value.len(),
1397-
limit: self.limits.max_path_len,
1398-
});
1399-
}
1382+
self.limits.check_path_len(value.len())?;
14001383
path = Cow::Borrowed(value);
14011384
}
14021385
PAX_LINKPATH => {
1403-
if value.len() > self.limits.max_path_len {
1404-
return Err(ParseError::PathTooLong {
1405-
len: value.len(),
1406-
limit: self.limits.max_path_len,
1407-
});
1408-
}
1386+
self.limits.check_path_len(value.len())?;
14091387
link_target = Some(Cow::Borrowed(value));
14101388
}
14111389
PAX_SIZE => {
@@ -1523,12 +1501,7 @@ impl Parser {
15231501
}
15241502
}
15251503
PAX_GNU_SPARSE_NAME => {
1526-
if value.len() > self.limits.max_path_len {
1527-
return Err(ParseError::PathTooLong {
1528-
len: value.len(),
1529-
limit: self.limits.max_path_len,
1530-
});
1531-
}
1504+
self.limits.check_path_len(value.len())?;
15321505
pax_sparse_name = Some(value);
15331506
}
15341507

@@ -1570,12 +1543,7 @@ impl Parser {
15701543
}
15711544

15721545
// Validate final path length
1573-
if path.len() > self.limits.max_path_len {
1574-
return Err(ParseError::PathTooLong {
1575-
len: path.len(),
1576-
limit: self.limits.max_path_len,
1577-
});
1578-
}
1546+
self.limits.check_path_len(path.len())?;
15791547

15801548
let entry = ParsedEntry {
15811549
header,
@@ -1632,24 +1600,23 @@ mod tests {
16321600
#[test]
16331601
fn test_default_limits() {
16341602
let limits = Limits::default();
1635-
assert_eq!(limits.max_path_len, 4096);
1636-
assert_eq!(limits.max_pax_size, 1024 * 1024);
1637-
assert_eq!(limits.max_gnu_long_size, 4096);
1603+
assert_eq!(limits.max_metadata_size, 1024 * 1024);
1604+
assert_eq!(limits.max_path_len, None);
16381605
assert_eq!(limits.max_pending_entries, 16);
16391606
}
16401607

16411608
#[test]
16421609
fn test_permissive_limits() {
16431610
let limits = Limits::permissive();
1644-
assert_eq!(limits.max_path_len, usize::MAX);
1645-
assert_eq!(limits.max_pax_size, u64::MAX);
1611+
assert_eq!(limits.max_metadata_size, u32::MAX);
1612+
assert_eq!(limits.max_path_len, None);
16461613
}
16471614

16481615
#[test]
16491616
fn test_strict_limits() {
16501617
let limits = Limits::strict();
1651-
assert!(limits.max_path_len < Limits::default().max_path_len);
1652-
assert!(limits.max_pax_size < Limits::default().max_pax_size);
1618+
assert_eq!(limits.max_path_len, Some(4096));
1619+
assert!(limits.max_metadata_size < Limits::default().max_metadata_size);
16531620
}
16541621

16551622
#[test]
@@ -2290,7 +2257,7 @@ mod tests {
22902257

22912258
#[test]
22922259
fn test_parser_global_pax_header_too_large() {
2293-
// Global PAX header exceeding max_pax_size should error
2260+
// Global PAX header exceeding max_metadata_size should error
22942261
let large_value = "x".repeat(1000);
22952262

22962263
let mut archive = Vec::new();
@@ -2302,13 +2269,13 @@ mod tests {
23022269
archive.extend(zeroes(1024));
23032270

23042271
let limits = Limits {
2305-
max_pax_size: 100,
2272+
max_metadata_size: 100,
23062273
..Default::default()
23072274
};
23082275
let mut parser = Parser::new(limits);
23092276
let result = parser.parse(&archive);
23102277

2311-
assert!(matches!(result, Err(ParseError::PaxTooLarge { .. })));
2278+
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
23122279
}
23132280

23142281
#[test]
@@ -2606,13 +2573,13 @@ mod tests {
26062573
archive.extend(zeroes(1024));
26072574

26082575
let limits = Limits {
2609-
max_gnu_long_size: 100,
2576+
max_metadata_size: 100,
26102577
..Default::default()
26112578
};
26122579
let mut parser = Parser::new(limits);
26132580
let result = parser.parse(&archive);
26142581

2615-
assert!(matches!(result, Err(ParseError::GnuLongTooLarge { .. })));
2582+
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
26162583
}
26172584

26182585
#[test]
@@ -2625,7 +2592,7 @@ mod tests {
26252592
archive.extend(zeroes(1024));
26262593

26272594
let limits = Limits {
2628-
max_path_len: 100,
2595+
max_path_len: Some(100),
26292596
..Default::default()
26302597
};
26312598
let mut parser = Parser::new(limits);
@@ -2642,7 +2609,7 @@ mod tests {
26422609

26432610
#[test]
26442611
fn test_parser_pax_too_large() {
2645-
// Create a PAX header that exceeds the size limit
2612+
// Create a PAX header that exceeds the metadata size limit
26462613
let large_value = "x".repeat(1000);
26472614

26482615
let mut archive = Vec::new();
@@ -2651,13 +2618,13 @@ mod tests {
26512618
archive.extend(zeroes(1024));
26522619

26532620
let limits = Limits {
2654-
max_pax_size: 100,
2621+
max_metadata_size: 100,
26552622
..Default::default()
26562623
};
26572624
let mut parser = Parser::new(limits);
26582625
let result = parser.parse(&archive);
26592626

2660-
assert!(matches!(result, Err(ParseError::PaxTooLarge { .. })));
2627+
assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
26612628
}
26622629

26632630
// =========================================================================

0 commit comments

Comments
 (0)