Skip to content

Commit 4de469d

Browse files
committed
EBML: Support parsing VINTs
1 parent a979db0 commit 4de469d

File tree

4 files changed

+307
-1
lines changed

4 files changed

+307
-1
lines changed

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ byteorder = "1.4.3"
1818
# ID3 compressed frames
1919
flate2 = { version = "1.0.26", optional = true }
2020
# Proc macros
21-
lofty_attr = "0.9.0"
21+
lofty_attr = { path = "lofty_attr" }
2222
# Debug logging
2323
log = "0.4.19"
2424
# OGG Vorbis/Opus

src/ebml/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@
22
mod properties;
33
mod read;
44
mod tag;
5+
mod vint;
56

67
use lofty_attr::LoftyFile;
78

89
// Exports
910

1011
pub use properties::EbmlProperties;
1112
pub use tag::EbmlTag;
13+
pub use vint::VInt;
1214

1315
/// An EBML file
1416
#[derive(LoftyFile, Default)]

src/ebml/vint.rs

Lines changed: 297 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,297 @@
1+
use crate::error::Result;
2+
use crate::macros::err;
3+
4+
use std::io::Read;
5+
6+
use byteorder::{ReadBytesExt, WriteBytesExt};
7+
8+
/// An EMBL variable-size integer
9+
///
10+
/// A `VInt` is an unsigned integer composed of up to 8 octets, with 7 usable bits per octet.
11+
///
12+
/// To ensure safe construction of `VInt`s, users must create them through [`VInt::parse`] or [`VInt::from_u64`].
13+
#[repr(transparent)]
14+
#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
15+
pub struct VInt(u64);
16+
17+
impl VInt {
18+
// Each octet will shave a single bit off each byte
19+
const USABLE_BITS_PER_BYTE: u64 = 7;
20+
const MAX_OCTET_LENGTH: u64 = 8;
21+
const USABLE_BITS: u64 = Self::MAX_OCTET_LENGTH * Self::USABLE_BITS_PER_BYTE;
22+
23+
const MAX_VALUE: u64 = u64::MAX >> (u64::BITS as u64 - Self::USABLE_BITS);
24+
25+
/// Create a signed `VInt` from a `u64`
26+
///
27+
/// # Errors
28+
///
29+
/// * `uint` cannot fit within the maximum width of 56 bits
30+
///
31+
/// # Examples
32+
///
33+
/// ```rust
34+
/// use lofty::ebml::VInt;
35+
///
36+
/// # fn main() -> lofty::Result<()> {
37+
/// // This value is too large to represent
38+
/// let invalid_vint = VInt::from_u64(u64::MAX);
39+
/// assert!(invalid_vint.is_err());
40+
///
41+
/// // This value is small enough to represent
42+
/// let valid_vint = VInt::from_u64(500)?;
43+
/// # Ok(()) }
44+
/// ```
45+
pub fn from_u64(uint: u64) -> Result<Self> {
46+
if uint > Self::MAX_VALUE {
47+
err!(BadVintSize);
48+
}
49+
50+
Ok(Self(uint))
51+
}
52+
53+
/// Gets the inner value of the `VInt`
54+
///
55+
/// # Examples
56+
///
57+
/// ```rust
58+
/// use lofty::ebml::VInt;
59+
///
60+
/// # fn main() -> lofty::Result<()> {
61+
/// let vint = VInt::from_u64(2)?;
62+
/// assert_eq!(vint.value(), 2);
63+
/// # Ok(()) }
64+
/// ```
65+
pub fn value(&self) -> u64 {
66+
self.0
67+
}
68+
69+
/// Parse a `VInt` from a reader
70+
///
71+
/// `max_length` can be used to specify the maximum number of octets the number should
72+
/// occupy, otherwise it should be `8`.
73+
///
74+
/// # Errors
75+
///
76+
/// * `uint` cannot fit within the maximum width of 54 bits
77+
///
78+
/// # Examples
79+
///
80+
/// ```rust
81+
/// use lofty::ebml::VInt;
82+
///
83+
/// # fn main() -> lofty::Result<()> {
84+
/// // This octet count (9) is too large to represent
85+
/// let mut invalid_vint_reader = &[0b0000_0000_1];
86+
/// let invalid_vint = VInt::parse(&mut &invalid_vint_reader[..], 8);
87+
/// assert!(invalid_vint.is_err());
88+
///
89+
/// // This octet count (4) is too large to represent given our `max_length`
90+
/// let mut invalid_vint_reader2 = &[0b0001_1111];
91+
/// let invalid_vint2 = VInt::parse(&mut &invalid_vint_reader2[..], 3);
92+
/// assert!(invalid_vint2.is_err());
93+
///
94+
/// // This value is small enough to represent
95+
/// let mut valid_vint_reader = &[0b1000_0010];
96+
/// let valid_vint = VInt::parse(&mut &valid_vint_reader[..], 8)?;
97+
/// assert_eq!(valid_vint.value(), 2);
98+
/// # Ok(()) }
99+
/// ```
100+
pub fn parse<R>(reader: &mut R, max_length: u8) -> Result<Self>
101+
where
102+
R: Read,
103+
{
104+
// A value of 0b0000_0000 indicates either an invalid VInt, or one with an octet length > 8
105+
let start = reader.read_u8()?;
106+
if start == 0b0000_0000 {
107+
err!(BadVintSize);
108+
}
109+
110+
let octet_length = (Self::MAX_OCTET_LENGTH as u32) - start.ilog2();
111+
dbg!(octet_length);
112+
if octet_length > 8 || octet_length as u8 > max_length {
113+
err!(BadVintSize);
114+
}
115+
116+
let mut bytes_read = 1;
117+
let mut val = start as u64 ^ (1 << start.ilog2()) as u64;
118+
while bytes_read < octet_length {
119+
bytes_read += 1;
120+
val = (val << 8) | reader.read_u8()? as u64;
121+
}
122+
123+
Ok(Self(val))
124+
}
125+
126+
/// Represents the length of the `VInt` in octets
127+
///
128+
/// NOTE: The value returned will always be <= 8
129+
///
130+
/// # Examples
131+
///
132+
/// ```rust
133+
/// use lofty::ebml::VInt;
134+
///
135+
/// # fn main() -> lofty::Result<()> {
136+
/// // Anything <= 254 will fit into a single octet
137+
/// let vint = VInt::from_u64(100)?;
138+
/// assert_eq!(vint.octet_length(), 1);
139+
///
140+
/// // A larger number will need to
141+
/// let vint = VInt::from_u64(500_000)?;
142+
/// assert_eq!(vint.octet_length(), 3);
143+
/// # Ok(()) }
144+
/// ```
145+
pub fn octet_length(&self) -> u8 {
146+
let mut octets = 0;
147+
let mut v = self.0;
148+
loop {
149+
octets += 1;
150+
151+
v >>= Self::USABLE_BITS_PER_BYTE;
152+
if v == 0 {
153+
break;
154+
}
155+
}
156+
157+
octets
158+
}
159+
160+
/// Converts the `VInt` into a byte Vec
161+
///
162+
/// `length` can be used to specify the number of bytes to use to write the integer. If unspecified,
163+
/// the integer will be represented in the minimum number of bytes.
164+
///
165+
/// # Errors
166+
///
167+
/// * `length` > 8 or `length` == 0
168+
/// * Unable to write to the buffer
169+
///
170+
/// # Examples
171+
///
172+
/// ```rust
173+
/// use lofty::ebml::VInt;
174+
///
175+
/// # fn main() -> lofty::Result<()> {
176+
/// let vint = VInt::from_u64(10)?;
177+
/// let bytes = vint.as_bytes(None)?;
178+
///
179+
/// assert_eq!(bytes, &[0b1000_1010]);
180+
/// # Ok(()) }
181+
/// ```
182+
pub fn as_bytes(&self, length: Option<u8>) -> Result<Vec<u8>> {
183+
let octets: u8;
184+
if let Some(length) = length {
185+
if length > (Self::MAX_OCTET_LENGTH as u8) || length == 0 {
186+
err!(BadVintSize);
187+
}
188+
189+
octets = length;
190+
} else {
191+
octets = self.octet_length()
192+
}
193+
194+
let mut ret = Vec::with_capacity(octets as usize);
195+
196+
let mut val = self.value();
197+
198+
// Add the octet length
199+
val |= 1 << octets * (Self::USABLE_BITS_PER_BYTE as u8);
200+
201+
let mut byte_shift = (octets - 1) as i8;
202+
while byte_shift >= 0 {
203+
ret.write_u8((val >> (byte_shift * 8)) as u8)?;
204+
byte_shift -= 1;
205+
}
206+
207+
Ok(ret)
208+
}
209+
}
210+
211+
#[cfg(test)]
212+
mod tests {
213+
use crate::ebml::VInt;
214+
use std::io::Cursor;
215+
216+
const VALID_REPRESENTATIONS_OF_2: [&[u8]; 8] = [
217+
&[0b1000_0010],
218+
&[0b0100_0000, 0b0000_0010],
219+
&[0b0010_0000, 0b0000_0000, 0b0000_0010],
220+
&[0b0001_0000, 0b0000_0000, 0b0000_0000, 0b0000_0010],
221+
&[0b0000_1000, 0b0000_0000, 0b0000_0000, 0b0000_0000, 0b0010],
222+
&[
223+
0b0000_0100,
224+
0b0000_0000,
225+
0b0000_0000,
226+
0b0000_0000,
227+
0b0000_0000,
228+
0b0000_0010,
229+
],
230+
&[
231+
0b0000_0010,
232+
0b0000_0000,
233+
0b0000_0000,
234+
0b0000_0000,
235+
0b0000_0000,
236+
0b0000_0000,
237+
0b0000_0010,
238+
],
239+
&[
240+
0b0000_0001,
241+
0b0000_0000,
242+
0b0000_0000,
243+
0b0000_0000,
244+
0b0000_0000,
245+
0b0000_0000,
246+
0b0000_0000,
247+
0b0000_0010,
248+
],
249+
];
250+
251+
#[test]
252+
fn bytes_to_vint() {
253+
for representation in VALID_REPRESENTATIONS_OF_2 {
254+
assert_eq!(
255+
VInt::parse(&mut Cursor::new(representation), 8)
256+
.unwrap()
257+
.value(),
258+
2
259+
);
260+
}
261+
}
262+
263+
#[test]
264+
fn vint_to_bytes() {
265+
for representation in VALID_REPRESENTATIONS_OF_2 {
266+
let vint = VInt::parse(&mut Cursor::new(representation), 8).unwrap();
267+
assert_eq!(
268+
vint.as_bytes(Some(representation.len() as u8)).unwrap(),
269+
representation
270+
);
271+
}
272+
}
273+
274+
#[test]
275+
fn large_integers_should_fail() {
276+
assert!(VInt::from_u64(u64::MAX).is_err());
277+
278+
let mut acc = 1000;
279+
for _ in 0..16 {
280+
assert!(VInt::from_u64(u64::MAX - acc).is_err());
281+
acc *= 10;
282+
}
283+
}
284+
285+
#[test]
286+
fn maximum_possible_representable_vint() {
287+
assert!(VInt::from_u64(u64::MAX >> 8).is_ok());
288+
}
289+
290+
#[test]
291+
fn octet_lengths() {
292+
let n = u64::MAX >> 8;
293+
for i in 1u8..=7 {
294+
assert_eq!(VInt::from_u64(n >> (i * 7)).unwrap().octet_length(), 8 - i);
295+
}
296+
}
297+
}

src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ pub enum ErrorKind {
5656
/// Arises when attempting to use [`Atom::merge`](crate::mp4::Atom::merge) with mismatching identifiers
5757
AtomMismatch,
5858

59+
/// Arises when an EBML variable-size integer exceeds the maximum allowed size
60+
BadVintSize,
61+
5962
// Conversions for external errors
6063
/// Errors that arise while parsing OGG pages
6164
OggPage(ogg_pager::PageError),
@@ -526,6 +529,10 @@ impl Display for LoftyError {
526529
f,
527530
"MP4 Atom: Attempted to use `Atom::merge()` with mismatching identifiers"
528531
),
532+
ErrorKind::BadVintSize => write!(
533+
f,
534+
"EBML: Attempted to create a VInt with an invalid octet length"
535+
),
529536

530537
// Files
531538
ErrorKind::TooMuchData => write!(

0 commit comments

Comments
 (0)