Skip to content

Commit 49bd3af

Browse files
committed
Refactor Script::bytes_to_asm_fmt to use iterator
This refactors `Script::bytes_to_asm_fmt`` function to use an iterator instead of index. Such change makes it easier to reason about overflows or out-of-bounds accesses. As a result this also fixes three unlikely overflows and happens to improve formatting to not output space at the beginning in some weird cases. To improve robustness even better it also moves `read_uint` implementation to internal function which returns a more specific error type which can be exhaustively matched on to guarantee correct error handling. Probably because of lack of this the code was previously checking the same condition twice, the second time being unreachable and attempting to behave differently than the first one. Finally this uses macro to deduplicate code which differs only in single number, ensuring the code stays in sync across all branches.
1 parent 23ccc58 commit 49bd3af

File tree

1 file changed

+95
-46
lines changed

1 file changed

+95
-46
lines changed

src/blockdata/script.rs

+95-46
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,22 @@ impl fmt::Display for Error {
154154
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
155155
impl ::std::error::Error for Error {}
156156

157+
// Our internal error proves that we only return these two cases from `read_uint_iter`.
158+
// Since it's private we don't bother with trait impls besides From.
159+
enum UintError {
160+
EarlyEndOfScript,
161+
NumericOverflow,
162+
}
163+
164+
impl From<UintError> for Error {
165+
fn from(error: UintError) -> Self {
166+
match error {
167+
UintError::EarlyEndOfScript => Error::EarlyEndOfScript,
168+
UintError::NumericOverflow => Error::NumericOverflow,
169+
}
170+
}
171+
}
172+
157173
#[cfg(feature="bitcoinconsensus")]
158174
#[doc(hidden)]
159175
impl From<bitcoinconsensus::Error> for Error {
@@ -227,13 +243,38 @@ pub fn read_scriptbool(v: &[u8]) -> bool {
227243
}
228244

229245
/// Read a script-encoded unsigned integer
246+
///
247+
/// ## Errors
248+
///
249+
/// This function returns an error in these cases:
250+
///
251+
/// * `data` is shorter than `size` => `EarlyEndOfScript`
252+
/// * `size` is greater than `u16::max_value / 8` (8191) => `NumericOverflow`
253+
/// * The number being read overflows `usize` => `NumericOverflow`
254+
///
255+
/// Note that this does **not** return an error for `size` between `core::size_of::<usize>()`
256+
/// and `u16::max_value / 8` if there's no overflow.
230257
pub fn read_uint(data: &[u8], size: usize) -> Result<usize, Error> {
258+
read_uint_iter(&mut data.iter(), size).map_err(Into::into)
259+
}
260+
261+
// We internally use implementation based on iterator so that it automatically advances as needed
262+
// Errors are same as above, just different type.
263+
fn read_uint_iter(data: &mut ::core::slice::Iter<'_, u8>, size: usize) -> Result<usize, UintError> {
231264
if data.len() < size {
232-
Err(Error::EarlyEndOfScript)
265+
Err(UintError::EarlyEndOfScript)
266+
} else if size > usize::from(u16::max_value() / 8) {
267+
// Casting to u32 would overflow
268+
Err(UintError::NumericOverflow)
233269
} else {
234270
let mut ret = 0;
235-
for (i, item) in data.iter().take(size).enumerate() {
236-
ret += (*item as usize) << (i * 8);
271+
for (i, item) in data.take(size).enumerate() {
272+
ret = usize::from(*item)
273+
// Casting is safe because we checked above to not repeat the same check in a loop
274+
.checked_shl((i * 8) as u32)
275+
.ok_or(UintError::NumericOverflow)?
276+
.checked_add(ret)
277+
.ok_or(UintError::NumericOverflow)?;
237278
}
238279
Ok(ret)
239280
}
@@ -477,50 +518,62 @@ impl Script {
477518

478519
/// Write the assembly decoding of the script bytes to the formatter.
479520
pub fn bytes_to_asm_fmt(script: &[u8], f: &mut dyn fmt::Write) -> fmt::Result {
480-
let mut index = 0;
481-
while index < script.len() {
482-
let opcode = opcodes::All::from(script[index]);
483-
index += 1;
521+
// This has to be a macro because it needs to break the loop
522+
macro_rules! read_push_data_len {
523+
($iter:expr, $len:expr, $formatter:expr) => {
524+
match read_uint_iter($iter, $len) {
525+
Ok(n) => {
526+
n
527+
},
528+
Err(UintError::EarlyEndOfScript) => {
529+
$formatter.write_str("<unexpected end>")?;
530+
break;
531+
}
532+
// We got the data in a slice which implies it being shorter than `usize::max_value()`
533+
// So if we got overflow, we can confidently say the number is higher than length of
534+
// the slice even though we don't know the exact number. This implies attempt to push
535+
// past end.
536+
Err(UintError::NumericOverflow) => {
537+
$formatter.write_str("<push past end>")?;
538+
break;
539+
}
540+
}
541+
}
542+
}
543+
544+
let mut iter = script.iter();
545+
// Was at least one opcode emitted?
546+
let mut at_least_one = false;
547+
// `iter` needs to be borrowed in `read_push_data_len`, so we have to use `while let` instead
548+
// of `for`.
549+
while let Some(byte) = iter.next() {
550+
let opcode = opcodes::All::from(*byte);
484551

485552
let data_len = if let opcodes::Class::PushBytes(n) = opcode.classify(opcodes::ClassifyContext::Legacy) {
486553
n as usize
487554
} else {
488555
match opcode {
489556
opcodes::all::OP_PUSHDATA1 => {
490-
if script.len() < index + 1 {
491-
f.write_str("<unexpected end>")?;
492-
break;
493-
}
494-
match read_uint(&script[index..], 1) {
495-
Ok(n) => { index += 1; n as usize }
496-
Err(_) => { f.write_str("<bad length>")?; break; }
497-
}
557+
// side effects: may write and break from the loop
558+
read_push_data_len!(&mut iter, 1, f)
498559
}
499560
opcodes::all::OP_PUSHDATA2 => {
500-
if script.len() < index + 2 {
501-
f.write_str("<unexpected end>")?;
502-
break;
503-
}
504-
match read_uint(&script[index..], 2) {
505-
Ok(n) => { index += 2; n as usize }
506-
Err(_) => { f.write_str("<bad length>")?; break; }
507-
}
561+
// side effects: may write and break from the loop
562+
read_push_data_len!(&mut iter, 2, f)
508563
}
509564
opcodes::all::OP_PUSHDATA4 => {
510-
if script.len() < index + 4 {
511-
f.write_str("<unexpected end>")?;
512-
break;
513-
}
514-
match read_uint(&script[index..], 4) {
515-
Ok(n) => { index += 4; n as usize }
516-
Err(_) => { f.write_str("<bad length>")?; break; }
517-
}
565+
// side effects: may write and break from the loop
566+
read_push_data_len!(&mut iter, 4, f)
518567
}
519568
_ => 0
520569
}
521570
};
522571

523-
if index > 1 { f.write_str(" ")?; }
572+
if at_least_one {
573+
f.write_str(" ")?;
574+
} else {
575+
at_least_one = true;
576+
}
524577
// Write the opcode
525578
if opcode == opcodes::all::OP_PUSHBYTES_0 {
526579
f.write_str("OP_0")?;
@@ -530,17 +583,13 @@ impl Script {
530583
// Write any pushdata
531584
if data_len > 0 {
532585
f.write_str(" ")?;
533-
match index.checked_add(data_len) {
534-
Some(end) if end <= script.len() => {
535-
for ch in &script[index..end] {
536-
write!(f, "{:02x}", ch)?;
537-
}
538-
index = end;
539-
},
540-
_ => {
541-
f.write_str("<push past end>")?;
542-
break;
543-
},
586+
if data_len <= iter.len() {
587+
for ch in iter.by_ref().take(data_len) {
588+
write!(f, "{:02x}", ch)?;
589+
}
590+
} else {
591+
f.write_str("<push past end>")?;
592+
break;
544593
}
545594
}
546595
}
@@ -1167,13 +1216,13 @@ mod test {
11671216
assert_eq!(hex_script!("4c").asm(),
11681217
"<unexpected end>");
11691218
assert_eq!(hex_script!("4c0201").asm(),
1170-
" OP_PUSHDATA1 <push past end>");
1219+
"OP_PUSHDATA1 <push past end>");
11711220
assert_eq!(hex_script!("4d").asm(),
11721221
"<unexpected end>");
11731222
assert_eq!(hex_script!("4dffff01").asm(),
1174-
" OP_PUSHDATA2 <push past end>");
1223+
"OP_PUSHDATA2 <push past end>");
11751224
assert_eq!(hex_script!("4effffffff01").asm(),
1176-
" OP_PUSHDATA4 <push past end>");
1225+
"OP_PUSHDATA4 <push past end>");
11771226
}
11781227

11791228
#[test]

0 commit comments

Comments
 (0)