diff --git a/benches/basic.rs b/benches/basic.rs index 69146d9e..15117186 100644 --- a/benches/basic.rs +++ b/benches/basic.rs @@ -17,7 +17,6 @@ fn count>>(path: &str) -> usize { count += excel .worksheet_range(&s) .unwrap() - .unwrap() .rows() .flat_map(|r| r.iter()) .count(); @@ -44,3 +43,41 @@ fn bench_xlsb(b: &mut Bencher) { fn bench_ods(b: &mut Bencher) { b.iter(|| count::>("tests/issues.ods")); } + +#[bench] +fn bench_xlsx_cells_reader(b: &mut Bencher) { + fn count>>(path: &str) -> usize { + let path = format!("{}/{}", env!("CARGO_MANIFEST_DIR"), path); + let mut excel: Xlsx<_> = open_workbook(&path).expect("cannot open excel file"); + + let sheets = excel.sheet_names().to_owned(); + let mut count = 0; + for s in sheets { + let mut cells_reader = excel.worksheet_cells_reader(&s).unwrap(); + while let Some(_) = cells_reader.next_cell().unwrap() { + count += 1; + } + } + count + } + b.iter(|| count::>("tests/issues.xlsx")); +} + +#[bench] +fn bench_xlsb_cells_reader(b: &mut Bencher) { + fn count>>(path: &str) -> usize { + let path = format!("{}/{}", env!("CARGO_MANIFEST_DIR"), path); + let mut excel: Xlsb<_> = open_workbook(&path).expect("cannot open excel file"); + + let sheets = excel.sheet_names().to_owned(); + let mut count = 0; + for s in sheets { + let mut cells_reader = excel.worksheet_cells_reader(&s).unwrap(); + while let Some(_) = cells_reader.next_cell().unwrap() { + count += 1; + } + } + count + } + b.iter(|| count::>("tests/issues.xlsb")); +} diff --git a/examples/excel_to_csv.rs b/examples/excel_to_csv.rs index 16bfe97a..da68fcc3 100644 --- a/examples/excel_to_csv.rs +++ b/examples/excel_to_csv.rs @@ -24,7 +24,7 @@ fn main() { let dest = sce.with_extension("csv"); let mut dest = BufWriter::new(File::create(dest).unwrap()); let mut xl = open_workbook_auto(&sce).unwrap(); - let range = xl.worksheet_range(&sheet).unwrap().unwrap(); + let range = xl.worksheet_range(&sheet).unwrap(); write_range(&mut dest, &range).unwrap(); } diff --git a/examples/search_errors.rs b/examples/search_errors.rs index c8e10b28..2b068022 100644 --- a/examples/search_errors.rs +++ b/examples/search_errors.rs @@ -74,10 +74,7 @@ fn run(f: GlobResult) -> Result<(PathBuf, Option, usize), FileStatus> { let sheets = xl.sheet_names().to_owned(); for s in sheets { - let range = xl - .worksheet_range(&s) - .unwrap() - .map_err(FileStatus::RangeError)?; + let range = xl.worksheet_range(&s).map_err(FileStatus::RangeError)?; cell_errors += range .rows() .flat_map(|r| { diff --git a/src/auto.rs b/src/auto.rs index 332a90fc..61f14d60 100644 --- a/src/auto.rs +++ b/src/auto.rs @@ -105,22 +105,22 @@ where } /// Read worksheet data in corresponding worksheet path - fn worksheet_range(&mut self, name: &str) -> Option, Self::Error>> { + fn worksheet_range(&mut self, name: &str) -> Result, Self::Error> { match *self { - Sheets::Xls(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xls)), - Sheets::Xlsx(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xlsx)), - Sheets::Xlsb(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xlsb)), - Sheets::Ods(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Ods)), + Sheets::Xls(ref mut e) => e.worksheet_range(name).map_err(Error::Xls), + Sheets::Xlsx(ref mut e) => e.worksheet_range(name).map_err(Error::Xlsx), + Sheets::Xlsb(ref mut e) => e.worksheet_range(name).map_err(Error::Xlsb), + Sheets::Ods(ref mut e) => e.worksheet_range(name).map_err(Error::Ods), } } /// Read worksheet formula in corresponding worksheet path - fn worksheet_formula(&mut self, name: &str) -> Option, Self::Error>> { + fn worksheet_formula(&mut self, name: &str) -> Result, Self::Error> { match *self { - Sheets::Xls(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xls)), - Sheets::Xlsx(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xlsx)), - Sheets::Xlsb(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xlsb)), - Sheets::Ods(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Ods)), + Sheets::Xls(ref mut e) => e.worksheet_formula(name).map_err(Error::Xls), + Sheets::Xlsx(ref mut e) => e.worksheet_formula(name).map_err(Error::Xlsx), + Sheets::Xlsb(ref mut e) => e.worksheet_formula(name).map_err(Error::Xlsb), + Sheets::Ods(ref mut e) => e.worksheet_formula(name).map_err(Error::Ods), } } diff --git a/src/datatype.rs b/src/datatype.rs index 9e3fb76c..f3d508e2 100644 --- a/src/datatype.rs +++ b/src/datatype.rs @@ -360,6 +360,53 @@ where } } +/// An enum to represent all different data types that can appear as +/// a value in a worksheet cell +#[derive(Debug, Clone, PartialEq, Default)] +pub enum DataTypeRef<'a> { + /// Signed integer + Int(i64), + /// Float + Float(f64), + /// String + String(String), + /// Shared String + SharedString(&'a str), + /// Boolean + Bool(bool), + /// Date or Time + DateTime(f64), + /// Duration + Duration(f64), + /// Date, Time or DateTime in ISO 8601 + DateTimeIso(String), + /// Duration in ISO 8601 + DurationIso(String), + /// Error + Error(CellErrorType), + /// Empty cell + #[default] + Empty, +} + +impl<'a> From> for DataType { + fn from(value: DataTypeRef<'a>) -> Self { + match value { + DataTypeRef::Int(v) => DataType::Int(v), + DataTypeRef::Float(v) => DataType::Float(v), + DataTypeRef::String(v) => DataType::String(v), + DataTypeRef::SharedString(v) => DataType::String(v.into()), + DataTypeRef::Bool(v) => DataType::Bool(v), + DataTypeRef::DateTime(v) => DataType::DateTime(v), + DataTypeRef::Duration(v) => DataType::Duration(v), + DataTypeRef::DateTimeIso(v) => DataType::DateTimeIso(v), + DataTypeRef::DurationIso(v) => DataType::DurationIso(v), + DataTypeRef::Error(v) => DataType::Error(v), + DataTypeRef::Empty => DataType::Empty, + } + } +} + #[cfg(all(test, feature = "dates"))] mod date_tests { use super::*; diff --git a/src/de.rs b/src/de.rs index 16a414fa..3debf558 100644 --- a/src/de.rs +++ b/src/de.rs @@ -110,8 +110,7 @@ impl RangeDeserializerBuilder<'static, &'static str> { /// fn main() -> Result<(), Error> { /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR")); /// let mut workbook: Xlsx<_> = open_workbook(path)?; - /// let range = workbook.worksheet_range("Sheet1") - /// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??; + /// let range = workbook.worksheet_range("Sheet1")?; /// /// let mut iter = RangeDeserializerBuilder::new() /// .has_headers(false) @@ -154,8 +153,7 @@ impl<'h, H: AsRef + Clone + 'h> RangeDeserializerBuilder<'h, H> { /// fn main() -> Result<(), Error> { /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR")); /// let mut workbook: Xlsx<_> = open_workbook(path)?; - /// let range = workbook.worksheet_range("Sheet1") - /// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??; + /// let range = workbook.worksheet_range("Sheet1")?; /// let mut iter = RangeDeserializerBuilder::with_headers(&["value", "label"]).from_range(&range)?; /// /// if let Some(result) = iter.next() { @@ -184,8 +182,7 @@ impl<'h, H: AsRef + Clone + 'h> RangeDeserializerBuilder<'h, H> { /// fn main() -> Result<(), Error> { /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR")); /// let mut workbook: Xlsx<_> = open_workbook(path)?; - /// let range = workbook.worksheet_range("Sheet1") - /// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??; + /// let range = workbook.worksheet_range("Sheet1")?; /// let mut iter = RangeDeserializerBuilder::new().from_range(&range)?; /// /// if let Some(result) = iter.next() { @@ -220,8 +217,7 @@ impl<'h, H: AsRef + Clone + 'h> RangeDeserializerBuilder<'h, H> { /// fn main() -> Result<(), Error> { /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR")); /// let mut workbook: Xlsx<_> = open_workbook(path)?; -/// let range = workbook.worksheet_range("Sheet1") -/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??; +/// let range = workbook.worksheet_range("Sheet1")?; /// /// let mut iter = RangeDeserializerBuilder::new().from_range(&range)?; /// diff --git a/src/formats.rs b/src/formats.rs index e945f6f2..3cfae549 100644 --- a/src/formats.rs +++ b/src/formats.rs @@ -1,4 +1,4 @@ -use crate::DataType; +use crate::{datatype::DataTypeRef, DataType}; /// https://learn.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system static EXCEL_1900_1904_DIFF: i64 = 1462; @@ -104,18 +104,28 @@ pub fn format_excel_i64(value: i64, format: Option<&CellFormat>, is_1904: bool) } // convert f64 to date, if format == Date -pub fn format_excel_f64(value: f64, format: Option<&CellFormat>, is_1904: bool) -> DataType { +#[inline] +pub fn format_excel_f64_ref<'a>( + value: f64, + format: Option<&CellFormat>, + is_1904: bool, +) -> DataTypeRef<'static> { match format { - Some(CellFormat::DateTime) => DataType::DateTime(if is_1904 { + Some(CellFormat::DateTime) => DataTypeRef::DateTime(if is_1904 { value + EXCEL_1900_1904_DIFF as f64 } else { value }), - Some(CellFormat::TimeDelta) => DataType::Duration(value), - _ => DataType::Float(value), + Some(CellFormat::TimeDelta) => DataTypeRef::Duration(value), + _ => DataTypeRef::Float(value), } } +// convert f64 to date, if format == Date +pub fn format_excel_f64(value: f64, format: Option<&CellFormat>, is_1904: bool) -> DataType { + format_excel_f64_ref(value, format, is_1904).into() +} + /// Ported from openpyxl, MIT License /// https://foss.heptapod.net/openpyxl/openpyxl/-/blob/a5e197c530aaa49814fd1d993dd776edcec35105/openpyxl/styles/tests/test_number_style.py #[test] diff --git a/src/lib.rs b/src/lib.rs index d0154d28..6ab55fcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,7 +15,7 @@ //! let mut workbook: Xlsx<_> = open_workbook(path).expect("Cannot open file"); //! //! // Read whole worksheet data and provide some statistics -//! if let Some(Ok(range)) = workbook.worksheet_range("Sheet1") { +//! if let Ok(range) = workbook.worksheet_range("Sheet1") { //! let total_cells = range.get_size().0 * range.get_size().1; //! let non_empty_cells: usize = range.used_cells().count(); //! println!("Found {} cells in 'Sheet1', including {} non empty cells", @@ -49,7 +49,6 @@ //! println!("found {} formula in '{}'", //! workbook //! .worksheet_formula(&s) -//! .expect("sheet not found") //! .expect("error while getting formula") //! .rows().flat_map(|r| r.iter().filter(|f| !f.is_empty())) //! .count(), @@ -74,6 +73,7 @@ mod de; mod errors; pub mod vba; +use datatype::DataTypeRef; use serde::de::DeserializeOwned; use std::borrow::Cow; use std::cmp::{max, min}; @@ -132,6 +132,18 @@ impl fmt::Display for CellErrorType { } } +#[derive(Debug, PartialEq, Default, Clone, Copy)] +pub(crate) struct Dimensions { + pub start: (u32, u32), + pub end: (u32, u32), +} + +impl Dimensions { + pub fn len(&self) -> u64 { + (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 + } +} + /// Common file metadata /// /// Depending on file type, some extra information may be stored @@ -203,18 +215,21 @@ where /// Creates a new instance. fn new(reader: RS) -> Result; + /// Gets `VbaProject` fn vba_project(&mut self) -> Option, Self::Error>>; + /// Initialize fn metadata(&self) -> &Metadata; + /// Read worksheet data in corresponding worksheet path - fn worksheet_range(&mut self, name: &str) -> Option, Self::Error>>; + fn worksheet_range(&mut self, name: &str) -> Result, Self::Error>; /// Fetch all worksheet data & paths fn worksheets(&mut self) -> Vec<(String, Range)>; /// Read worksheet formula in corresponding worksheet path - fn worksheet_formula(&mut self, _: &str) -> Option, Self::Error>>; + fn worksheet_formula(&mut self, _: &str) -> Result, Self::Error>; /// Get all sheet names of this workbook, in workbook order /// @@ -248,7 +263,7 @@ where /// sheet_name, then the corresponding worksheet. fn worksheet_range_at(&mut self, n: usize) -> Option, Self::Error>> { let name = self.sheet_names().get(n)?.to_string(); - self.worksheet_range(&name) + Some(self.worksheet_range(&name)) } /// Get all pictures, tuple as (ext: String, data: Vec) @@ -279,6 +294,7 @@ where pub trait CellType: Default + Clone + PartialEq {} impl CellType for DataType {} +impl<'a> CellType for DataTypeRef<'a> {} impl CellType for String {} impl CellType for usize {} // for tests @@ -616,8 +632,7 @@ impl Range { /// fn main() -> Result<(), Error> { /// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR")); /// let mut workbook: Xlsx<_> = open_workbook(path)?; - /// let mut sheet = workbook.worksheet_range("Sheet1") - /// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??; + /// let mut sheet = workbook.worksheet_range("Sheet1")?; /// let mut iter = sheet.deserialize()?; /// /// if let Some(result) = iter.next() { diff --git a/src/ods.rs b/src/ods.rs index f904c2f4..45c436d2 100644 --- a/src/ods.rs +++ b/src/ods.rs @@ -56,6 +56,8 @@ pub enum OdsError { /// Found found: String, }, + /// Worksheet not found + WorksheetNotFound(String), } from_err!(std::io::Error, OdsError, Io); @@ -67,20 +69,21 @@ from_err!(std::num::ParseFloatError, OdsError, ParseFloat); impl std::fmt::Display for OdsError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - OdsError::Io(e) => write!(f, "I/O error: {}", e), - OdsError::Zip(e) => write!(f, "Zip error: {:?}", e), - OdsError::Xml(e) => write!(f, "Xml error: {}", e), - OdsError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e), - OdsError::Parse(e) => write!(f, "Parse string error: {}", e), - OdsError::ParseInt(e) => write!(f, "Parse integer error: {}", e), - OdsError::ParseFloat(e) => write!(f, "Parse float error: {}", e), - OdsError::ParseBool(e) => write!(f, "Parse bool error: {}", e), - OdsError::InvalidMime(mime) => write!(f, "Invalid MIME type: {:?}", mime), - OdsError::FileNotFound(file) => write!(f, "'{}' file not found in archive", file), - OdsError::Eof(node) => write!(f, "Expecting '{}' node, found end of xml file", node), + OdsError::Io(e) => write!(f, "I/O error: {e}"), + OdsError::Zip(e) => write!(f, "Zip error: {e:?}"), + OdsError::Xml(e) => write!(f, "Xml error: {e}"), + OdsError::XmlAttr(e) => write!(f, "Xml attribute error: {e}"), + OdsError::Parse(e) => write!(f, "Parse string error: {e}"), + OdsError::ParseInt(e) => write!(f, "Parse integer error: {e}"), + OdsError::ParseFloat(e) => write!(f, "Parse float error: {e}"), + OdsError::ParseBool(e) => write!(f, "Parse bool error: {e}"), + OdsError::InvalidMime(mime) => write!(f, "Invalid MIME type: {mime:?}"), + OdsError::FileNotFound(file) => write!(f, "'{file}' file not found in archive"), + OdsError::Eof(node) => write!(f, "Expecting '{node}' node, found end of xml file"), OdsError::Mismatch { expected, found } => { - write!(f, "Expecting '{}', found '{}'", expected, found) + write!(f, "Expecting '{expected}', found '{found}'") } + OdsError::WorksheetNotFound(name) => write!(f, "Worksheet '{name}' not found"), } } } @@ -167,8 +170,11 @@ where } /// Read worksheet data in corresponding worksheet path - fn worksheet_range(&mut self, name: &str) -> Option, OdsError>> { - self.sheets.get(name).map(|r| Ok(r.0.to_owned())) + fn worksheet_range(&mut self, name: &str) -> Result, OdsError> { + self.sheets + .get(name) + .ok_or_else(|| OdsError::WorksheetNotFound(name.into())) + .map(|r| r.0.to_owned()) } fn worksheets(&mut self) -> Vec<(String, Range)> { @@ -179,8 +185,11 @@ where } /// Read worksheet data in corresponding worksheet path - fn worksheet_formula(&mut self, name: &str) -> Option, OdsError>> { - self.sheets.get(name).map(|r| Ok(r.1.to_owned())) + fn worksheet_formula(&mut self, name: &str) -> Result, OdsError> { + self.sheets + .get(name) + .ok_or_else(|| OdsError::WorksheetNotFound(name.into())) + .map(|r| r.1.to_owned()) } #[cfg(feature = "picture")] diff --git a/src/xls.rs b/src/xls.rs index 2f83677a..b925e014 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -70,6 +70,8 @@ pub enum XlsError { /// Invalid OfficeArt Record #[cfg(feature = "picture")] Art(&'static str), + /// Worksheet not found + WorksheetNotFound(String), } from_err!(std::io::Error, XlsError, Io); @@ -79,11 +81,11 @@ from_err!(crate::vba::VbaError, XlsError, Vba); impl std::fmt::Display for XlsError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - XlsError::Io(e) => write!(f, "I/O error: {}", e), - XlsError::Cfb(e) => write!(f, "Cfb error: {}", e), - XlsError::Vba(e) => write!(f, "Vba error: {}", e), + XlsError::Io(e) => write!(f, "I/O error: {e}"), + XlsError::Cfb(e) => write!(f, "Cfb error: {e}"), + XlsError::Vba(e) => write!(f, "Vba error: {e}"), XlsError::StackLen => write!(f, "Invalid stack length"), - XlsError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: 0x{:0X}", typ, val), + XlsError::Unrecognized { typ, val } => write!(f, "Unrecognized {typ}: 0x{val:0X}"), XlsError::Password => write!(f, "Workbook is password protected"), XlsError::Len { expected, @@ -91,22 +93,22 @@ impl std::fmt::Display for XlsError { typ, } => write!( f, - "Invalid {} length, expected {} maximum, found {}", - typ, expected, found + "Invalid {typ} length, expected {expected} maximum, found {found}", ), XlsError::ContinueRecordTooShort => write!( f, "Continued record too short while reading extended string" ), - XlsError::EoStream(s) => write!(f, "End of stream '{}'", s), + XlsError::EoStream(s) => write!(f, "End of stream '{s}'"), XlsError::InvalidFormula { stack_size } => { - write!(f, "Invalid formula (stack size: {})", stack_size) + write!(f, "Invalid formula (stack size: {stack_size})") } - XlsError::IfTab(iftab) => write!(f, "Invalid iftab {:X}", iftab), - XlsError::Etpg(etpg) => write!(f, "Invalid etpg {:X}", etpg), + XlsError::IfTab(iftab) => write!(f, "Invalid iftab {iftab:X}"), + XlsError::Etpg(etpg) => write!(f, "Invalid etpg {etpg:X}"), XlsError::NoVba => write!(f, "No VBA project"), #[cfg(feature = "picture")] - XlsError::Art(s) => write!(f, "Invalid art record '{}'", s), + XlsError::Art(s) => write!(f, "Invalid art record '{s}'"), + XlsError::WorksheetNotFound(name) => write!(f, "Worksheet '{name}' not found"), } } } @@ -220,8 +222,11 @@ impl Reader for Xls { &self.metadata } - fn worksheet_range(&mut self, name: &str) -> Option, XlsError>> { - self.sheets.get(name).map(|r| Ok(r.0.clone())) + fn worksheet_range(&mut self, name: &str) -> Result, XlsError> { + self.sheets + .get(name) + .map(|r| r.0.clone()) + .ok_or_else(|| XlsError::WorksheetNotFound(name.into())) } fn worksheets(&mut self) -> Vec<(String, Range)> { @@ -231,8 +236,11 @@ impl Reader for Xls { .collect() } - fn worksheet_formula(&mut self, name: &str) -> Option, XlsError>> { - self.sheets.get(name).map(|r| Ok(r.1.clone())) + fn worksheet_formula(&mut self, name: &str) -> Result, XlsError> { + self.sheets + .get(name) + .ok_or_else(|| XlsError::WorksheetNotFound(name.into())) + .map(|r| r.1.clone()) } #[cfg(feature = "picture")] diff --git a/src/xlsb/cells_reader.rs b/src/xlsb/cells_reader.rs new file mode 100644 index 00000000..474a0738 --- /dev/null +++ b/src/xlsb/cells_reader.rs @@ -0,0 +1,206 @@ +use crate::{ + datatype::DataTypeRef, + formats::{format_excel_f64_ref, CellFormat}, + utils::{read_f64, read_i32, read_u32, read_usize}, + Cell, CellErrorType, Dimensions, XlsbError, +}; + +use super::{cell_format, parse_formula, wide_str, RecordIter}; + +/// A cells reader for xlsb files +pub struct XlsbCellsReader<'a> { + iter: RecordIter<'a>, + formats: &'a [CellFormat], + strings: &'a [String], + extern_sheets: &'a [String], + metadata_names: &'a [(String, String)], + typ: u16, + row: u32, + is_1904: bool, + dimensions: Dimensions, + buf: Vec, +} + +impl<'a> XlsbCellsReader<'a> { + pub(crate) fn new( + mut iter: RecordIter<'a>, + formats: &'a [CellFormat], + strings: &'a [String], + extern_sheets: &'a [String], + metadata_names: &'a [(String, String)], + is_1904: bool, + ) -> Result { + let mut buf = Vec::with_capacity(1024); + // BrtWsDim + let _ = iter.next_skip_blocks( + 0x0094, + &[ + (0x0081, None), // BrtBeginSheet + (0x0093, None), // BrtWsProp + ], + &mut buf, + )?; + let dimensions = parse_dimensions(&buf[..16]); + + // BrtBeginSheetData + let _ = iter.next_skip_blocks( + 0x0091, + &[ + (0x0085, Some(0x0086)), // Views + (0x0025, Some(0x0026)), // AC blocks + (0x01E5, None), // BrtWsFmtInfo + (0x0186, Some(0x0187)), // Col Infos + ], + &mut buf, + )?; + + Ok(XlsbCellsReader { + iter, + formats, + is_1904, + strings, + extern_sheets, + metadata_names, + dimensions, + typ: 0, + row: 0, + buf, + }) + } + + pub(crate) fn dimensions(&self) -> Dimensions { + self.dimensions + } + + pub fn next_cell(&mut self) -> Result>>, XlsbError> { + // loop until end of sheet + let value = loop { + self.buf.clear(); + self.typ = self.iter.read_type()?; + let _ = self.iter.fill_buffer(&mut self.buf)?; + let value = match self.typ { + // 0x0001 => continue, // DataType::Empty, // BrtCellBlank + 0x0002 => { + // BrtCellRk MS-XLSB 2.5.122 + let d100 = (self.buf[8] & 1) != 0; + let is_int = (self.buf[8] & 2) != 0; + self.buf[8] &= 0xFC; + + if is_int { + let v = (read_i32(&self.buf[8..12]) >> 2) as i64; + if d100 { + let v = (v as f64) / 100.0; + format_excel_f64_ref( + v, + cell_format(&self.formats, &self.buf), + self.is_1904, + ) + } else { + DataTypeRef::Int(v) + } + } else { + let mut v = [0u8; 8]; + v[4..].copy_from_slice(&self.buf[8..12]); + let v = read_f64(&v); + let v = if d100 { v / 100.0 } else { v }; + format_excel_f64_ref(v, cell_format(&self.formats, &self.buf), self.is_1904) + } + } + 0x0003 => { + let error = match self.buf[8] { + 0x00 => CellErrorType::Null, + 0x07 => CellErrorType::Div0, + 0x0F => CellErrorType::Value, + 0x17 => CellErrorType::Ref, + 0x1D => CellErrorType::Name, + 0x24 => CellErrorType::Num, + 0x2A => CellErrorType::NA, + 0x2B => CellErrorType::GettingData, + c => return Err(XlsbError::CellError(c)), + }; + // BrtCellError + DataTypeRef::Error(error) + } + 0x0004 | 0x000A => DataTypeRef::Bool(self.buf[8] != 0), // BrtCellBool or BrtFmlaBool + 0x0005 | 0x0009 => { + let v = read_f64(&self.buf[8..16]); + format_excel_f64_ref(v, cell_format(&self.formats, &self.buf), self.is_1904) + } // BrtCellReal or BrtFmlaNum + 0x0006 | 0x0008 => { + DataTypeRef::String(wide_str(&self.buf[8..], &mut 0)?.into_owned()) + } // BrtCellSt or BrtFmlaString + 0x0007 => { + // BrtCellIsst + let isst = read_usize(&self.buf[8..12]); + DataTypeRef::SharedString(&self.strings[isst]) + } + 0x0000 => { + // BrtRowHdr + self.row = read_u32(&self.buf); + if self.row > 0x0010_0000 { + return Ok(None); // invalid row + } + continue; + } + 0x0092 => return Ok(None), // BrtEndSheetData + _ => continue, // anything else, ignore and try next, without changing idx + }; + break value; + }; + let col = read_u32(&self.buf); + Ok(Some(Cell::new((self.row, col), value))) + } + + pub fn next_formula(&mut self) -> Result>, XlsbError> { + let value = loop { + self.typ = self.iter.read_type()?; + let _ = self.iter.fill_buffer(&mut self.buf)?; + + let value = match self.typ { + // 0x0001 => continue, // DataType::Empty, // BrtCellBlank + 0x0008 => { + // BrtFmlaString + let cch = read_u32(&self.buf[8..]) as usize; + let formula = &self.buf[14 + cch * 2..]; + let cce = read_u32(formula) as usize; + let rgce = &formula[4..4 + cce]; + parse_formula(rgce, &self.extern_sheets, &self.metadata_names)? + } + 0x0009 => { + // BrtFmlaNum + let formula = &self.buf[18..]; + let cce = read_u32(formula) as usize; + let rgce = &formula[4..4 + cce]; + parse_formula(rgce, &self.extern_sheets, &self.metadata_names)? + } + 0x000A | 0x000B => { + // BrtFmlaBool | BrtFmlaError + let formula = &self.buf[11..]; + let cce = read_u32(formula) as usize; + let rgce = &formula[4..4 + cce]; + parse_formula(rgce, &self.extern_sheets, &self.metadata_names)? + } + 0x0000 => { + // BrtRowHdr + self.row = read_u32(&self.buf); + if self.row > 0x0010_0000 { + return Ok(None); // invalid row + } + continue; + } + 0x0092 => return Ok(None), // BrtEndSheetData + _ => continue, // anything else, ignore and try next, without changing idx + }; + break value; + }; + let col = read_u32(&self.buf); + Ok(Some(Cell::new((self.row, col), value))) + } +} + +fn parse_dimensions(buf: &[u8]) -> Dimensions { + Dimensions { + start: (read_u32(&buf[0..4]), read_u32(&buf[8..12])), + end: (read_u32(&buf[4..8]), read_u32(&buf[12..16])), + } +} diff --git a/src/xlsb.rs b/src/xlsb/mod.rs similarity index 76% rename from src/xlsb.rs rename to src/xlsb/mod.rs index b8b9c2e5..c94a1079 100644 --- a/src/xlsb.rs +++ b/src/xlsb/mod.rs @@ -1,3 +1,7 @@ +mod cells_reader; + +pub use cells_reader::XlsbCellsReader; + use std::borrow::Cow; use std::collections::BTreeMap; use std::io::{BufReader, Read, Seek}; @@ -13,14 +17,11 @@ use quick_xml::Reader as XmlReader; use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; -use crate::formats::{ - builtin_format_by_code, detect_custom_number_format, format_excel_f64, CellFormat, -}; +use crate::datatype::DataTypeRef; +use crate::formats::{builtin_format_by_code, detect_custom_number_format, CellFormat}; use crate::utils::{push_column, read_f64, read_i32, read_u16, read_u32, read_usize}; use crate::vba::VbaProject; -use crate::{ - Cell, CellErrorType, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, -}; +use crate::{Cell, DataType, Metadata, Range, Reader, Sheet, SheetType, SheetVisible}; /// A Xlsb specific error #[derive(Debug)] @@ -74,6 +75,8 @@ pub enum XlsbError { /// value found val: String, }, + /// Worksheet not found + WorksheetNotFound(String), } from_err!(std::io::Error, XlsbError, Io); @@ -83,28 +86,30 @@ from_err!(quick_xml::Error, XlsbError, Xml); impl std::fmt::Display for XlsbError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - XlsbError::Io(e) => write!(f, "I/O error: {}", e), - XlsbError::Zip(e) => write!(f, "Zip error: {}", e), - XlsbError::Xml(e) => write!(f, "Xml error: {}", e), - XlsbError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e), - XlsbError::Vba(e) => write!(f, "Vba error: {}", e), + XlsbError::Io(e) => write!(f, "I/O error: {e}"), + XlsbError::Zip(e) => write!(f, "Zip error: {e}"), + XlsbError::Xml(e) => write!(f, "Xml error: {e}"), + XlsbError::XmlAttr(e) => write!(f, "Xml attribute error: {e}"), + XlsbError::Vba(e) => write!(f, "Vba error: {e}"), XlsbError::Mismatch { expected, found } => { - write!(f, "Expecting {}, got {:X}", expected, found) + write!(f, "Expecting {expected}, got {found:X}") } - XlsbError::FileNotFound(file) => write!(f, "File not found: '{}'", file), + XlsbError::FileNotFound(file) => write!(f, "File not found: '{file}'"), XlsbError::StackLen => write!(f, "Invalid stack length"), - XlsbError::UnsupportedType(t) => write!(f, "Unsupported type {:X}", t), - XlsbError::Etpg(t) => write!(f, "Unsupported etpg {:X}", t), - XlsbError::IfTab(t) => write!(f, "Unsupported iftab {:X}", t), - XlsbError::BErr(t) => write!(f, "Unsupported BErr {:X}", t), - XlsbError::Ptg(t) => write!(f, "Unsupported Ptf {:X}", t), - XlsbError::CellError(t) => write!(f, "Unsupported Cell Error code {:X}", t), + XlsbError::UnsupportedType(t) => write!(f, "Unsupported type {t:X}"), + XlsbError::Etpg(t) => write!(f, "Unsupported etpg {t:X}"), + XlsbError::IfTab(t) => write!(f, "Unsupported iftab {t:X}"), + XlsbError::BErr(t) => write!(f, "Unsupported BErr {t:X}"), + XlsbError::Ptg(t) => write!(f, "Unsupported Ptf {t:X}"), + XlsbError::CellError(t) => write!(f, "Unsupported Cell Error code {t:X}"), XlsbError::WideStr { ws_len, buf_len } => write!( f, - "Wide str length exceeds buffer length ({} > {})", - ws_len, buf_len + "Wide str length exceeds buffer length ({ws_len} > {buf_len})", ), - XlsbError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: {}", typ, val), + XlsbError::Unrecognized { typ, val } => { + write!(f, "Unrecognized {typ}: {val}") + } + XlsbError::WorksheetNotFound(name) => write!(f, "Worksheet '{name}' not found"), } } } @@ -377,205 +382,24 @@ impl Xlsb { } } - fn worksheet_range_from_path(&mut self, path: &str) -> Result, XlsbError> { - let mut iter = RecordIter::from_zip(&mut self.zip, &path)?; - let mut buf = Vec::with_capacity(1024); - let formats = &self.formats; - // BrtWsDim - let _ = iter.next_skip_blocks( - 0x0094, - &[ - (0x0081, None), // BrtBeginSheet - (0x0093, None), // BrtWsProp - ], - &mut buf, - )?; - let (start, end) = parse_dimensions(&buf[..16]); - let len = (end.0 - start.0 + 1) * (end.1 - start.1 + 1); - let mut cells = if len < 1_000_000 { - Vec::with_capacity(len as usize) - } else { - Vec::new() + /// Get a cells reader for a given worksheet + pub fn worksheet_cells_reader<'a>( + &'a mut self, + name: &str, + ) -> Result, XlsbError> { + let path = match self.sheets.iter().find(|&(n, _)| n == name) { + Some((_, path)) => path.clone(), + None => return Err(XlsbError::WorksheetNotFound(name.into())), }; - - // BrtBeginSheetData - let _ = iter.next_skip_blocks( - 0x0091, - &[ - (0x0085, Some(0x0086)), // Views - (0x0025, Some(0x0026)), // AC blocks - (0x01E5, None), // BrtWsFmtInfo - (0x0186, Some(0x0187)), // Col Infos - ], - &mut buf, - )?; - - // Initialization: first BrtRowHdr - let mut typ: u16; - let mut row = 0u32; - - // loop until end of sheet - loop { - typ = iter.read_type()?; - let _ = iter.fill_buffer(&mut buf)?; - - let value = match typ { - // 0x0001 => continue, // DataType::Empty, // BrtCellBlank - 0x0002 => { - // BrtCellRk MS-XLSB 2.5.122 - let d100 = (buf[8] & 1) != 0; - let is_int = (buf[8] & 2) != 0; - buf[8] &= 0xFC; - - if is_int { - let v = (read_i32(&buf[8..12]) >> 2) as i64; - if d100 { - let v = (v as f64) / 100.0; - format_excel_f64(v, cell_format(formats, &buf), self.is_1904) - } else { - DataType::Int(v) - } - } else { - let mut v = [0u8; 8]; - v[4..].copy_from_slice(&buf[8..12]); - let v = read_f64(&v); - let v = if d100 { v / 100.0 } else { v }; - format_excel_f64(v, cell_format(formats, &buf), self.is_1904) - } - } - 0x0003 => { - let error = match buf[8] { - 0x00 => CellErrorType::Null, - 0x07 => CellErrorType::Div0, - 0x0F => CellErrorType::Value, - 0x17 => CellErrorType::Ref, - 0x1D => CellErrorType::Name, - 0x24 => CellErrorType::Num, - 0x2A => CellErrorType::NA, - 0x2B => CellErrorType::GettingData, - c => return Err(XlsbError::CellError(c)), - }; - // BrtCellError - DataType::Error(error) - } - 0x0004 | 0x000A => DataType::Bool(buf[8] != 0), // BrtCellBool or BrtFmlaBool - 0x0005 | 0x0009 => { - let v = read_f64(&buf[8..16]); - format_excel_f64(v, cell_format(formats, &buf), self.is_1904) - } // BrtCellReal or BrtFmlaNum - 0x0006 | 0x0008 => DataType::String(wide_str(&buf[8..], &mut 0)?.into_owned()), // BrtCellSt or BrtFmlaString - 0x0007 => { - // BrtCellIsst - let isst = read_usize(&buf[8..12]); - DataType::String(self.strings[isst].clone()) - } - 0x0000 => { - // BrtRowHdr - row = read_u32(&buf); - if row > 0x0010_0000 { - return Ok(Range::from_sparse(cells)); // invalid row - } - continue; - } - 0x0092 => return Ok(Range::from_sparse(cells)), // BrtEndSheetData - _ => continue, // anything else, ignore and try next, without changing idx - }; - - let col = read_u32(&buf); - match value { - DataType::Empty => (), - DataType::String(s) if s.is_empty() => (), - value => cells.push(Cell::new((row, col), value)), - } - } - } - - fn worksheet_formula_from_path(&mut self, path: String) -> Result, XlsbError> { - let mut iter = RecordIter::from_zip(&mut self.zip, &path)?; - let mut buf = Vec::with_capacity(1024); - - // BrtWsDim - let _ = iter.next_skip_blocks( - 0x0094, - &[ - (0x0081, None), // BrtBeginSheet - (0x0093, None), // BrtWsProp - ], - &mut buf, - )?; - let (start, end) = parse_dimensions(&buf[..16]); - let mut cells = Vec::new(); - if start.0 <= end.0 && start.1 <= end.1 { - let rows = (end.0 - start.0 + 1) as usize; - let cols = (end.1 - start.1 + 1) as usize; - let len = rows.saturating_mul(cols); - if len < 1_000_000 { - cells.reserve(len); - } - } - - // BrtBeginSheetData - let _ = iter.next_skip_blocks( - 0x0091, - &[ - (0x0085, Some(0x0086)), // Views - (0x0025, Some(0x0026)), // AC blocks - (0x01E5, None), // BrtWsFmtInfo - (0x0186, Some(0x0187)), // Col Infos - ], - &mut buf, - )?; - - // Initialization: first BrtRowHdr - let mut typ: u16; - let mut row = 0u32; - - // loop until end of sheet - loop { - typ = iter.read_type()?; - let _ = iter.fill_buffer(&mut buf)?; - - let value = match typ { - // 0x0001 => continue, // DataType::Empty, // BrtCellBlank - 0x0008 => { - // BrtFmlaString - let cch = read_u32(&buf[8..]) as usize; - let formula = &buf[14 + cch * 2..]; - let cce = read_u32(formula) as usize; - let rgce = &formula[4..4 + cce]; - parse_formula(rgce, &self.extern_sheets, &self.metadata.names)? - } - 0x0009 => { - // BrtFmlaNum - let formula = &buf[18..]; - let cce = read_u32(formula) as usize; - let rgce = &formula[4..4 + cce]; - parse_formula(rgce, &self.extern_sheets, &self.metadata.names)? - } - 0x000A | 0x000B => { - // BrtFmlaBool | BrtFmlaError - let formula = &buf[11..]; - let cce = read_u32(formula) as usize; - let rgce = &formula[4..4 + cce]; - parse_formula(rgce, &self.extern_sheets, &self.metadata.names)? - } - 0x0000 => { - // BrtRowHdr - row = read_u32(&buf); - if row > 0x0010_0000 { - return Ok(Range::from_sparse(cells)); // invalid row - } - continue; - } - 0x0092 => return Ok(Range::from_sparse(cells)), // BrtEndSheetData - _ => continue, // anything else, ignore and try next, without changing idx - }; - - let col = read_u32(&buf); - if !value.is_empty() { - cells.push(Cell::new((row, col), value)); - } - } + let iter = RecordIter::from_zip(&mut self.zip, &path)?; + XlsbCellsReader::new( + iter, + &self.formats, + &self.strings, + &self.extern_sheets, + &self.metadata.names, + self.is_1904, + ) } #[cfg(feature = "picture")] @@ -646,30 +470,40 @@ impl Reader for Xlsb { } /// MS-XLSB 2.1.7.62 - fn worksheet_range(&mut self, name: &str) -> Option, XlsbError>> { - let path = match self.sheets.iter().find(|&(n, _)| n == name) { - Some((_, path)) => path.clone(), - None => return None, - }; - Some(self.worksheet_range_from_path(&path)) + fn worksheet_range(&mut self, name: &str) -> Result, XlsbError> { + let mut cells_reader = self.worksheet_cells_reader(name)?; + let mut cells = Vec::with_capacity(cells_reader.dimensions().len().min(1_000_000) as _); + while let Some(cell) = cells_reader.next_cell()? { + if cell.val != DataTypeRef::Empty { + cells.push(Cell::new(cell.pos, DataType::from(cell.val))); + } + } + Ok(Range::from_sparse(cells)) } /// MS-XLSB 2.1.7.62 - fn worksheet_formula(&mut self, name: &str) -> Option, XlsbError>> { - let path = match self.sheets.iter().find(|&(n, _)| n == name) { - Some((_, path)) => path.clone(), - None => return None, - }; - Some(self.worksheet_formula_from_path(path)) + fn worksheet_formula(&mut self, name: &str) -> Result, XlsbError> { + let mut cells_reader = self.worksheet_cells_reader(name)?; + let mut cells = Vec::with_capacity(cells_reader.dimensions().len().min(1_000_000) as _); + while let Some(cell) = cells_reader.next_formula()? { + if !cell.val.is_empty() { + cells.push(cell); + } + } + Ok(Range::from_sparse(cells)) } /// MS-XLSB 2.1.7.62 fn worksheets(&mut self) -> Vec<(String, Range)> { - let sheets = self.sheets.clone(); + let sheets = self + .sheets + .iter() + .map(|(name, _)| name.clone()) + .collect::>(); sheets .into_iter() - .filter_map(|(name, path)| { - let ws = self.worksheet_range_from_path(&path).ok()?; + .filter_map(|name| { + let ws = self.worksheet_range(&name).ok()?; Some((name, ws)) }) .collect() @@ -681,7 +515,7 @@ impl Reader for Xlsb { } } -struct RecordIter<'a> { +pub(crate) struct RecordIter<'a> { b: [u8; 1], r: BufReader>, } @@ -771,13 +605,6 @@ fn wide_str<'a>(buf: &'a [u8], str_len: &mut usize) -> Result, Xlsb Ok(UTF_16LE.decode(s).0) } -fn parse_dimensions(buf: &[u8]) -> ((u32, u32), (u32, u32)) { - ( - (read_u32(&buf[0..4]), read_u32(&buf[8..12])), - (read_u32(&buf[4..8]), read_u32(&buf[12..16])), - ) -} - /// Formula parsing /// /// [MS-XLSB 2.2.2] diff --git a/src/xlsx/cells_reader.rs b/src/xlsx/cells_reader.rs new file mode 100644 index 00000000..1a768225 --- /dev/null +++ b/src/xlsx/cells_reader.rs @@ -0,0 +1,332 @@ +use quick_xml::{ + events::{attributes::Attribute, BytesStart, Event}, + name::QName, +}; + +use super::{ + get_attribute, get_dimension, get_row, get_row_column, read_string, Dimensions, XlReader, +}; +use crate::{ + datatype::DataTypeRef, + formats::{format_excel_f64_ref, CellFormat}, + Cell, XlsxError, +}; + +/// An xlsx Cell Iterator +pub struct XlsxCellReader<'a> { + xml: XlReader<'a>, + strings: &'a [String], + formats: &'a [CellFormat], + is_1904: bool, + dimensions: Dimensions, + row_index: u32, + col_index: u32, + buf: Vec, + cell_buf: Vec, +} + +impl<'a> XlsxCellReader<'a> { + pub fn new( + mut xml: XlReader<'a>, + strings: &'a [String], + formats: &'a [CellFormat], + is_1904: bool, + ) -> Result { + let mut buf = Vec::with_capacity(1024); + let mut dimensions = Dimensions::default(); + 'xml: loop { + buf.clear(); + match xml.read_event_into(&mut buf).map_err(XlsxError::Xml)? { + Event::Start(ref e) => match e.local_name().as_ref() { + b"dimension" => { + for a in e.attributes() { + if let Attribute { + key: QName(b"ref"), + value: rdim, + } = a.map_err(XlsxError::XmlAttr)? + { + dimensions = get_dimension(&rdim)?; + continue 'xml; + } + } + return Err(XlsxError::UnexpectedNode("dimension")); + } + b"sheetData" => break, + _ => (), + }, + Event::Eof => return Err(XlsxError::XmlEof("sheetData")), + _ => (), + } + } + Ok(Self { + xml, + strings, + formats, + is_1904, + dimensions, + row_index: 0, + col_index: 0, + buf: Vec::with_capacity(1024), + cell_buf: Vec::with_capacity(1024), + }) + } + + pub(crate) fn dimensions(&self) -> Dimensions { + self.dimensions + } + + pub fn next_cell(&mut self) -> Result>>, XlsxError> { + loop { + self.buf.clear(); + match self.xml.read_event_into(&mut self.buf) { + Ok(Event::Start(ref row_element)) + if row_element.local_name().as_ref() == b"row" => + { + let attribute = get_attribute(row_element.attributes(), QName(b"r"))?; + if let Some(range) = attribute { + let row = get_row(range)?; + self.row_index = row; + } + } + Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => { + self.row_index += 1; + self.col_index = 0; + } + Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => { + let attribute = get_attribute(c_element.attributes(), QName(b"r"))?; + let pos = if let Some(range) = attribute { + let (row, col) = get_row_column(range)?; + self.col_index = col; + (row, col) + } else { + (self.row_index, self.col_index) + }; + let mut value = DataTypeRef::Empty; + loop { + self.cell_buf.clear(); + match self.xml.read_event_into(&mut self.cell_buf) { + Ok(Event::Start(ref e)) => { + value = read_value( + self.strings, + self.formats, + self.is_1904, + &mut self.xml, + e, + c_element, + )? + } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, + Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + self.col_index += 1; + return Ok(Some(Cell::new(pos, value))); + } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => { + return Ok(None); + } + Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + } + + pub fn next_formula(&mut self) -> Result>, XlsxError> { + loop { + self.buf.clear(); + match self.xml.read_event_into(&mut self.buf) { + Ok(Event::Start(ref row_element)) + if row_element.local_name().as_ref() == b"row" => + { + let attribute = get_attribute(row_element.attributes(), QName(b"r"))?; + if let Some(range) = attribute { + let row = get_row(range)?; + self.row_index = row; + } + } + Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => { + self.row_index += 1; + self.col_index = 0; + } + Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => { + let attribute = get_attribute(c_element.attributes(), QName(b"r"))?; + let pos = if let Some(range) = attribute { + let (row, col) = get_row_column(range)?; + self.col_index = col; + (row, col) + } else { + (self.row_index, self.col_index) + }; + let mut value = None; + loop { + self.cell_buf.clear(); + match self.xml.read_event_into(&mut self.cell_buf) { + Ok(Event::Start(ref e)) => { + if let Some(f) = read_formula(&mut self.xml, e)? { + value = Some(f); + } + } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, + Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + self.col_index += 1; + return Ok(value.map(|value| Cell::new(pos, value))); + } + Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => { + return Ok(None); + } + Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")), + Err(e) => return Err(XlsxError::Xml(e)), + _ => (), + } + } + } +} + +fn read_value<'s>( + strings: &'s [String], + formats: &[CellFormat], + is_1904: bool, + xml: &mut XlReader<'_>, + e: &BytesStart<'_>, + c_element: &BytesStart<'_>, +) -> Result, XlsxError> { + Ok(match e.local_name().as_ref() { + b"is" => { + // inlineStr + read_string(xml, e.name())?.map_or(DataTypeRef::Empty, DataTypeRef::String) + } + b"v" => { + // value + let mut v = String::new(); + let mut v_buf = Vec::new(); + loop { + v_buf.clear(); + match xml.read_event_into(&mut v_buf)? { + Event::Text(t) => v.push_str(&t.unescape()?), + Event::End(end) if end.name() == e.name() => break, + Event::Eof => return Err(XlsxError::XmlEof("v")), + _ => (), + } + } + read_v(v, strings, formats, c_element, is_1904)? + } + b"f" => { + xml.read_to_end_into(e.name(), &mut Vec::new())?; + DataTypeRef::Empty + } + _n => return Err(XlsxError::UnexpectedNode("v, f, or is")), + }) +} + +/// read the contents of a cell +fn read_v<'s>( + v: String, + strings: &'s [String], + formats: &[CellFormat], + c_element: &BytesStart<'_>, + is_1904: bool, +) -> Result, XlsxError> { + let cell_format = match get_attribute(c_element.attributes(), QName(b"s")) { + Ok(Some(style)) => { + let id: usize = std::str::from_utf8(style).unwrap_or("0").parse()?; + formats.get(id) + } + _ => Some(&CellFormat::Other), + }; + match get_attribute(c_element.attributes(), QName(b"t"))? { + Some(b"s") => { + // shared string + let idx: usize = v.parse()?; + Ok(DataTypeRef::SharedString(&strings[idx])) + } + Some(b"b") => { + // boolean + Ok(DataTypeRef::Bool(v != "0")) + } + Some(b"e") => { + // error + Ok(DataTypeRef::Error(v.parse()?)) + } + Some(b"d") => { + // date + Ok(DataTypeRef::DateTimeIso(v)) + } + Some(b"str") => { + // see http://officeopenxml.com/SScontentOverview.php + // str - refers to formula cells + // * indicates calculated value (this case) + // * to the formula string (ignored case + // TODO: Fully support a DataType::Formula representing both Formula string & + // last calculated value? + // + // NB: the result of a formula may not be a numeric value (=A3&" "&A4). + // We do try an initial parse as Float for utility, but fall back to a string + // representation if that fails + v.parse() + .map(DataTypeRef::Float) + .or(Ok(DataTypeRef::String(v))) + } + Some(b"n") => { + // n - number + if v.is_empty() { + Ok(DataTypeRef::Empty) + } else { + v.parse() + .map(|n| format_excel_f64_ref(n, cell_format, is_1904)) + .map_err(XlsxError::ParseFloat) + } + } + None => { + // If type is not known, we try to parse as Float for utility, but fall back to + // String if this fails. + v.parse() + .map(|n| format_excel_f64_ref(n, cell_format, is_1904)) + .or(Ok(DataTypeRef::String(v))) + } + Some(b"is") => { + // this case should be handled in outer loop over cell elements, in which + // case read_inline_str is called instead. Case included here for completeness. + Err(XlsxError::Unexpected( + "called read_value on a cell of type inlineStr", + )) + } + Some(t) => { + let t = std::str::from_utf8(t).unwrap_or("").to_string(); + Err(XlsxError::CellTAttribute(t)) + } + } +} + +fn read_formula<'s>( + xml: &mut XlReader<'_>, + e: &BytesStart<'_>, +) -> Result, XlsxError> { + match e.local_name().as_ref() { + b"is" | b"v" => { + xml.read_to_end_into(e.name(), &mut Vec::new())?; + Ok(None) + } + b"f" => { + let mut f_buf = Vec::with_capacity(512); + let mut f = String::new(); + loop { + match xml.read_event_into(&mut f_buf)? { + Event::Text(t) => f.push_str(&t.unescape()?), + Event::End(end) if end.name() == e.name() => break, + Event::Eof => return Err(XlsxError::XmlEof("f")), + _ => (), + } + f_buf.clear(); + } + Ok(Some(f)) + } + _ => Err(XlsxError::UnexpectedNode("v, f, or is")), + } +} diff --git a/src/xlsx.rs b/src/xlsx/mod.rs similarity index 71% rename from src/xlsx.rs rename to src/xlsx/mod.rs index f2825537..d927049b 100644 --- a/src/xlsx.rs +++ b/src/xlsx/mod.rs @@ -1,3 +1,5 @@ +mod cells_reader; + use std::borrow::Cow; use std::collections::BTreeMap; use std::io::BufReader; @@ -6,22 +8,22 @@ use std::str::FromStr; use log::warn; use quick_xml::events::attributes::{Attribute, Attributes}; -use quick_xml::events::{BytesStart, Event}; +use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::Reader as XmlReader; use zip::read::{ZipArchive, ZipFile}; use zip::result::ZipError; -use crate::formats::{ - builtin_format_by_id, detect_custom_number_format, format_excel_f64, CellFormat, -}; +use crate::datatype::DataTypeRef; +use crate::formats::{builtin_format_by_id, detect_custom_number_format, CellFormat}; use crate::vba::VbaProject; use crate::{ - Cell, CellErrorType, CellType, DataType, Metadata, Range, Reader, Sheet, SheetType, + Cell, CellErrorType, DataType, Dimensions, Metadata, Range, Reader, Sheet, SheetType, SheetVisible, Table, }; +pub use cells_reader::XlsxCellReader; -type XlsReader<'a> = XmlReader>>; +pub(crate) type XlReader<'a> = XmlReader>>; /// Maximum number of rows allowed in an xlsx file pub const MAX_ROWS: u32 = 1_048_576; @@ -48,7 +50,6 @@ pub enum XlsxError { ParseFloat(std::num::ParseFloatError), /// ParseInt error ParseInt(std::num::ParseIntError), - /// Unexpected end of xml XmlEof(&'static str), /// Unexpected node @@ -82,6 +83,10 @@ pub enum XlsxError { CellError(String), /// Workbook is password protected Password, + /// Worksheet not found + WorksheetNotFound(String), + /// Table not found + TableNotFound(String), } from_err!(std::io::Error, XlsxError, Io); @@ -95,41 +100,42 @@ from_err!(std::num::ParseIntError, XlsxError, ParseInt); impl std::fmt::Display for XlsxError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - XlsxError::Io(e) => write!(f, "I/O error: {}", e), - XlsxError::Zip(e) => write!(f, "Zip error: {}", e), - XlsxError::Xml(e) => write!(f, "Xml error: {}", e), - XlsxError::XmlAttr(e) => write!(f, "Xml attribute error: {}", e), - XlsxError::Vba(e) => write!(f, "Vba error: {}", e), - XlsxError::Parse(e) => write!(f, "Parse string error: {}", e), - XlsxError::ParseInt(e) => write!(f, "Parse integer error: {}", e), - XlsxError::ParseFloat(e) => write!(f, "Parse float error: {}", e), - - XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting ''", e), - XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{}' node", e), - XlsxError::FileNotFound(e) => write!(f, "File not found '{}'", e), + XlsxError::Io(e) => write!(f, "I/O error: {e}"), + XlsxError::Zip(e) => write!(f, "Zip error: {e}"), + XlsxError::Xml(e) => write!(f, "Xml error: {e}"), + XlsxError::XmlAttr(e) => write!(f, "Xml attribute error: {e}"), + XlsxError::Vba(e) => write!(f, "Vba error: {e}"), + XlsxError::Parse(e) => write!(f, "Parse string error: {e}"), + XlsxError::ParseInt(e) => write!(f, "Parse integer error: {e}"), + XlsxError::ParseFloat(e) => write!(f, "Parse float error: {e}"), + + XlsxError::XmlEof(e) => write!(f, "Unexpected end of xml, expecting ''"), + XlsxError::UnexpectedNode(e) => write!(f, "Expecting '{e}' node"), + XlsxError::FileNotFound(e) => write!(f, "File not found '{e}'"), XlsxError::RelationshipNotFound => write!(f, "Relationship not found"), XlsxError::Alphanumeric(e) => { - write!(f, "Expecting alphanumeric character, got {:X}", e) + write!(f, "Expecting alphanumeric character, got {e:X}") } XlsxError::NumericColumn(e) => write!( f, - "Numeric character is not allowed for column name, got {}", - e + "Numeric character is not allowed for column name, got {e}", ), XlsxError::DimensionCount(e) => { - write!(f, "Range dimension must be lower than 2. Got {}", e) + write!(f, "Range dimension must be lower than 2. Got {e}") } - XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {:?}", e), + XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {e:?}"), XlsxError::RangeWithoutColumnComponent => { write!(f, "Range is missing the expected column component.") } XlsxError::RangeWithoutRowComponent => { write!(f, "Range is missing the expected row component.") } - XlsxError::Unexpected(e) => write!(f, "{}", e), - XlsxError::Unrecognized { typ, val } => write!(f, "Unrecognized {}: {}", typ, val), - XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{}'", e), + XlsxError::Unexpected(e) => write!(f, "{e}"), + XlsxError::Unrecognized { typ, val } => write!(f, "Unrecognized {typ}: {val}"), + XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{e}'"), + XlsxError::WorksheetNotFound(n) => write!(f, "Worksheet '{n}' not found"), XlsxError::Password => write!(f, "Workbook is password protected"), + XlsxError::TableNotFound(n) => write!(f, "Table '{n}' not found"), } } } @@ -669,34 +675,27 @@ impl Xlsx { /// Get the table by name // TODO: If retrieving multiple tables from a single sheet, get tables by sheet will be more efficient - pub fn table_by_name( - &mut self, - table_name: &str, - ) -> Option, XlsxError>> { + pub fn table_by_name(&mut self, table_name: &str) -> Result, XlsxError> { let match_table_meta = self .tables .as_ref() .expect("Tables must be loaded before they are referenced") .iter() - .find(|(table, ..)| table == table_name)?; + .find(|(table, ..)| table == table_name) + .ok_or_else(|| XlsxError::TableNotFound(table_name.into()))?; let name = match_table_meta.0.to_owned(); let sheet_name = match_table_meta.1.clone(); let columns = match_table_meta.2.clone(); let start_dim = match_table_meta.3.start; let end_dim = match_table_meta.3.end; - let r_range = self.worksheet_range(&sheet_name)?; - match r_range { - Ok(range) => { - let tbl_rng = range.range(start_dim, end_dim); - Some(Ok(Table { - name, - sheet_name, - columns, - data: tbl_rng, - })) - } - Err(e) => Some(Err(e)), - } + let range = self.worksheet_range(&sheet_name)?; + let tbl_rng = range.range(start_dim, end_dim); + Ok(Table { + name, + sheet_name, + columns, + data: tbl_rng, + }) } } @@ -720,59 +719,49 @@ impl InnerTableMetadata { } } -fn worksheet( - strings: &[String], - formats: &[CellFormat], - mut xml: XlsReader<'_>, - read_data: &mut F, -) -> Result, XlsxError> -where - T: CellType, - F: FnMut( - &[String], - &[CellFormat], - &mut XlsReader<'_>, - &mut Vec>, - ) -> Result<(), XlsxError>, -{ - let mut cells = Vec::with_capacity(1024); - let mut buf = Vec::with_capacity(1024); - 'xml: loop { - buf.clear(); - match xml.read_event_into(&mut buf) { - Ok(Event::Start(ref e)) => { - match e.local_name().as_ref() { - b"dimension" => { - for a in e.attributes() { - if let Attribute { - key: QName(b"ref"), - value: rdim, - } = a.map_err(XlsxError::XmlAttr)? - { - let len = get_dimension(&rdim)?.len(); - if len < 1_000_000 { - // it is unlikely to have more than that - // there may be of empty cells - cells.reserve(len as usize); - } - continue 'xml; - } - } - return Err(XlsxError::UnexpectedNode("dimension")); - } - b"sheetData" => { - read_data(strings, formats, &mut xml, &mut cells)?; - break; - } - _ => (), - } +impl Xlsx { + /// Get a reader over all used cells in the given worksheet cell reader + pub fn worksheet_cells_reader<'a>( + &'a mut self, + name: &str, + ) -> Result, XlsxError> { + let (_, path) = self + .sheets + .iter() + .find(|&&(ref n, _)| n == name) + .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))?; + let xml = xml_reader(&mut self.zip, path) + .ok_or_else(|| XlsxError::WorksheetNotFound(name.into()))??; + let is_1904 = self.is_1904; + let strings = &self.strings; + let formats = &self.formats; + XlsxCellReader::new(xml, strings, formats, is_1904) + } + + /// Get worksheet range where shared string values are only borrowed + pub fn worksheet_range_ref<'a>( + &'a mut self, + name: &str, + ) -> Result>, XlsxError> { + let mut cell_reader = self.worksheet_cells_reader(name)?; + let len = cell_reader.dimensions().len(); + let mut cells = Vec::new(); + if len < 100_000 { + cells.reserve(len as usize); + } + loop { + match cell_reader.next_cell() { + Ok(Some(Cell { + val: DataTypeRef::Empty, + .. + })) => (), + Ok(Some(cell)) => cells.push(cell), + Ok(None) => break, + Err(e) => return Err(e), } - Ok(Event::Eof) => break, - Err(e) => return Err(XlsxError::Xml(e)), - _ => (), } + Ok(Range::from_sparse(cells)) } - Ok(Range::from_sparse(cells)) } impl Reader for Xlsx { @@ -803,87 +792,55 @@ impl Reader for Xlsx { } fn vba_project(&mut self) -> Option, XlsxError>> { - self.zip.by_name("xl/vbaProject.bin").ok().map(|mut f| { - let len = f.size() as usize; + let mut f = self.zip.by_name("xl/vbaProject.bin").ok()?; + let len = f.size() as usize; + Some( VbaProject::new(&mut f, len) .map(Cow::Owned) - .map_err(XlsxError::Vba) - }) + .map_err(XlsxError::Vba), + ) } fn metadata(&self) -> &Metadata { &self.metadata } - fn worksheet_range(&mut self, name: &str) -> Option, XlsxError>> { - let xml = match self.sheets.iter().find(|&(n, _)| n == name) { - Some((_, path)) => xml_reader(&mut self.zip, path), - None => return None, - }; - let is_1904 = self.is_1904; - let strings = &self.strings; - let formats = &self.formats; - xml.map(|xml| { - worksheet(strings, formats, xml?, &mut |s, f, xml, cells| { - read_sheet_data(xml, s, f, cells, is_1904) - }) + fn worksheet_range(&mut self, name: &str) -> Result, XlsxError> { + let rge = self.worksheet_range_ref(name)?; + let inner = rge.inner.into_iter().map(|v| v.into()).collect(); + Ok(Range { + start: rge.start, + end: rge.end, + inner, }) } - fn worksheet_formula(&mut self, name: &str) -> Option, XlsxError>> { - let xml = match self.sheets.iter().find(|&(n, _)| n == name) { - Some((_, path)) => xml_reader(&mut self.zip, path), - None => return None, - }; - - let strings = &self.strings; - let formats = &self.formats; - xml.map(|xml| { - worksheet(strings, formats, xml?, &mut |_, _, xml, cells| { - read_sheet(xml, cells, &mut |cells, xml, e, pos, _| { - match e.local_name().as_ref() { - b"is" | b"v" => { - xml.read_to_end_into(e.name(), &mut Vec::new())?; - } - b"f" => { - let mut f_buf = Vec::with_capacity(512); - let mut f = String::new(); - loop { - match xml.read_event_into(&mut f_buf)? { - Event::Text(t) => f.push_str(&t.unescape()?), - Event::End(end) if end.name() == e.name() => break, - Event::Eof => return Err(XlsxError::XmlEof("f")), - _ => (), - } - f_buf.clear(); - } - if !f.is_empty() { - cells.push(Cell::new(pos, f)); - } - } - _ => return Err(XlsxError::UnexpectedNode("v, f, or is")), - } - Ok(()) - }) - }) - }) + fn worksheet_formula(&mut self, name: &str) -> Result, XlsxError> { + let mut cell_reader = self.worksheet_cells_reader(name)?; + let len = cell_reader.dimensions().len(); + let mut cells = Vec::new(); + if len < 100_000 { + cells.reserve(len as usize); + } + while let Some(cell) = cell_reader.next_formula()? { + if !cell.val.is_empty() { + cells.push(cell); + } + } + Ok(Range::from_sparse(cells)) } fn worksheets(&mut self) -> Vec<(String, Range)> { - let is_1904 = self.is_1904; - self.sheets - .clone() + let names = self + .sheets + .iter() + .map(|(n, _)| n.clone()) + .collect::>(); + names .into_iter() - .filter_map(|(name, path)| { - let xml = xml_reader(&mut self.zip, &path)?.ok()?; - let range = worksheet( - &self.strings, - &self.formats, - xml, - &mut |s, f, xml, cells| read_sheet_data(xml, s, f, cells, is_1904), - ) - .ok()?; - Some((name, range)) + .filter_map(|n| { + let rge = self.worksheet_range(&n).ok()?; + Some((n, rge)) }) .collect() } @@ -897,7 +854,7 @@ impl Reader for Xlsx { fn xml_reader<'a, RS: Read + Seek>( zip: &'a mut ZipArchive, path: &str, -) -> Option, XlsxError>> { +) -> Option, XlsxError>> { match zip.by_name(path) { Ok(f) => { let mut r = XmlReader::from_reader(BufReader::new(f)); @@ -913,7 +870,10 @@ fn xml_reader<'a, RS: Read + Seek>( } /// search through an Element's attributes for the named one -fn get_attribute<'a>(atts: Attributes<'a>, n: QName) -> Result, XlsxError> { +pub(crate) fn get_attribute<'a>( + atts: Attributes<'a>, + n: QName, +) -> Result, XlsxError> { for a in atts { match a { Ok(Attribute { @@ -927,209 +887,10 @@ fn get_attribute<'a>(atts: Attributes<'a>, n: QName) -> Result, Ok(None) } -fn read_sheet( - xml: &mut XlsReader<'_>, - cells: &mut Vec>, - push_cell: &mut F, -) -> Result<(), XlsxError> -where - T: CellType, - F: FnMut( - &mut Vec>, - &mut XlsReader<'_>, - &BytesStart<'_>, - (u32, u32), - &BytesStart<'_>, - ) -> Result<(), XlsxError>, -{ - let mut buf = Vec::with_capacity(1024); - let mut cell_buf = Vec::with_capacity(1024); - - let mut row_index = 0; - let mut col_index = 0; - - loop { - buf.clear(); - match xml.read_event_into(&mut buf) { - Ok(Event::Start(ref row_element)) if row_element.local_name().as_ref() == b"row" => { - let attribute = get_attribute(row_element.attributes(), QName(b"r"))?; - if let Some(range) = attribute { - let row = get_row(range)?; - row_index = row; - } - } - Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => { - row_index += 1; - col_index = 0; - } - Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => { - let attribute = get_attribute(c_element.attributes(), QName(b"r"))?; - - let pos = if let Some(range) = attribute { - let (row, col) = get_row_column(range)?; - col_index = col; - (row, col) - } else { - (row_index, col_index) - }; - - loop { - cell_buf.clear(); - match xml.read_event_into(&mut cell_buf) { - Ok(Event::Start(ref e)) => push_cell(cells, xml, e, pos, c_element)?, - Ok(Event::End(ref e)) if e.local_name().as_ref() == b"c" => break, - Ok(Event::Eof) => return Err(XlsxError::XmlEof("c")), - Err(e) => return Err(XlsxError::Xml(e)), - _ => (), - } - } - col_index += 1; - } - Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => return Ok(()), - Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")), - Err(e) => return Err(XlsxError::Xml(e)), - _ => (), - } - } -} - -/// read sheetData node -fn read_sheet_data( - xml: &mut XlsReader<'_>, - strings: &[String], - formats: &[CellFormat], - cells: &mut Vec>, - is_1904: bool, -) -> Result<(), XlsxError> { - /// read the contents of a cell - fn read_value( - v: String, - strings: &[String], - formats: &[CellFormat], - c_element: &BytesStart<'_>, - is_1904: bool, - ) -> Result { - let cell_format = match get_attribute(c_element.attributes(), QName(b"s")) { - Ok(Some(style)) => { - let id: usize = std::str::from_utf8(style).unwrap_or("0").parse()?; - formats.get(id) - } - _ => Some(&CellFormat::Other), - }; - - match get_attribute(c_element.attributes(), QName(b"t"))? { - Some(b"s") => { - // shared string - let idx: usize = v.parse()?; - Ok(DataType::String(strings[idx].clone())) - } - Some(b"b") => { - // boolean - Ok(DataType::Bool(v != "0")) - } - Some(b"e") => { - // error - Ok(DataType::Error(v.parse()?)) - } - Some(b"d") => { - // date - Ok(DataType::DateTimeIso(v)) - } - Some(b"str") => { - // see http://officeopenxml.com/SScontentOverview.php - // str - refers to formula cells - // * indicates calculated value (this case) - // * to the formula string (ignored case - // TODO: Fully support a DataType::Formula representing both Formula string & - // last calculated value? - // - // NB: the result of a formula may not be a numeric value (=A3&" "&A4). - // We do try an initial parse as Float for utility, but fall back to a string - // representation if that fails - v.parse().map(DataType::Float).or(Ok(DataType::String(v))) - } - Some(b"n") => { - // n - number - if v.is_empty() { - Ok(DataType::Empty) - } else { - v.parse() - .map(|n| format_excel_f64(n, cell_format, is_1904)) - .map_err(XlsxError::ParseFloat) - } - } - None => { - // If type is not known, we try to parse as Float for utility, but fall back to - // String if this fails. - v.parse() - .map(|n| format_excel_f64(n, cell_format, is_1904)) - .or(Ok(DataType::String(v))) - } - Some(b"is") => { - // this case should be handled in outer loop over cell elements, in which - // case read_inline_str is called instead. Case included here for completeness. - Err(XlsxError::Unexpected( - "called read_value on a cell of type inlineStr", - )) - } - Some(t) => { - let t = std::str::from_utf8(t).unwrap_or("").to_string(); - Err(XlsxError::CellTAttribute(t)) - } - } - } - - read_sheet(xml, cells, &mut |cells, xml, e, pos, c_element| { - match e.local_name().as_ref() { - b"is" => { - // inlineStr - if let Some(s) = read_string(xml, e.name())? { - cells.push(Cell::new(pos, DataType::String(s))); - } - } - b"v" => { - // value - let mut v = String::new(); - let mut v_buf = Vec::new(); - loop { - v_buf.clear(); - match xml.read_event_into(&mut v_buf)? { - Event::Text(t) => v.push_str(&t.unescape()?), - Event::End(end) if end.name() == e.name() => break, - Event::Eof => return Err(XlsxError::XmlEof("v")), - _ => (), - } - } - match read_value(v, strings, formats, c_element, is_1904)? { - DataType::Empty => (), - v => cells.push(Cell::new(pos, v)), - } - } - b"f" => { - xml.read_to_end_into(e.name(), &mut Vec::new())?; - } - _n => return Err(XlsxError::UnexpectedNode("v, f, or is")), - } - Ok(()) - }) -} - -#[derive(Debug, PartialEq)] -struct Dimensions { - start: (u32, u32), - end: (u32, u32), -} - -impl Dimensions { - fn len(&self) -> u64 { - (self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64 - } -} - /// converts a text representation (e.g. "A6:G67") of a dimension into integers /// - top left (row, column), /// - bottom right (row, column) -fn get_dimension(dimension: &[u8]) -> Result { +pub(crate) fn get_dimension(dimension: &[u8]) -> Result { let parts: Vec<_> = dimension .split(|c| *c == b':') .map(get_row_column) @@ -1167,7 +928,7 @@ fn get_dimension(dimension: &[u8]) -> Result { /// Converts a text range name into its position (row, column) (0 based index). /// If the row or column component in the range is missing, an Error is returned. -fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> { +pub(crate) fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> { let (row, col) = get_row_and_optional_column(range)?; let col = col.ok_or(XlsxError::RangeWithoutColumnComponent)?; Ok((row, col)) @@ -1176,7 +937,7 @@ fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> { /// Converts a text row name into its position (0 based index). /// If the row component in the range is missing, an Error is returned. /// If the text row name also contains a column component, it is ignored. -fn get_row(range: &[u8]) -> Result { +pub(crate) fn get_row(range: &[u8]) -> Result { get_row_and_optional_column(range).map(|(row, _)| row) } @@ -1223,8 +984,8 @@ fn get_row_and_optional_column(range: &[u8]) -> Result<(u32, Option), XlsxE } /// attempts to read either a simple or richtext string -fn read_string( - xml: &mut XlsReader<'_>, +pub(crate) fn read_string( + xml: &mut XlReader<'_>, QName(closing): QName, ) -> Result, XlsxError> { let mut buf = Vec::with_capacity(1024); @@ -1288,60 +1049,65 @@ fn check_for_password_protected(reader: &mut RS) -> Result<(), Ok(()) } -#[test] -fn test_dimensions() { - assert_eq!(get_row_column(b"A1").unwrap(), (0, 0)); - assert_eq!(get_row_column(b"C107").unwrap(), (106, 2)); - assert_eq!( - get_dimension(b"C2:D35").unwrap(), - Dimensions { - start: (1, 2), - end: (34, 3) - } - ); - assert_eq!( - get_dimension(b"A1:XFD1048576").unwrap(), - Dimensions { - start: (0, 0), - end: (1_048_575, 16_383), - } - ); -} +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_dimensions() { + assert_eq!(get_row_column(b"A1").unwrap(), (0, 0)); + assert_eq!(get_row_column(b"C107").unwrap(), (106, 2)); + assert_eq!( + get_dimension(b"C2:D35").unwrap(), + Dimensions { + start: (1, 2), + end: (34, 3) + } + ); + assert_eq!( + get_dimension(b"A1:XFD1048576").unwrap(), + Dimensions { + start: (0, 0), + end: (1_048_575, 16_383), + } + ); + } -#[test] -fn test_dimension_length() { - assert_eq!(get_dimension(b"A1:Z99").unwrap().len(), 2_574); - assert_eq!( - get_dimension(b"A1:XFD1048576").unwrap().len(), - 17_179_869_184 - ); -} + #[test] + fn test_dimension_length() { + assert_eq!(get_dimension(b"A1:Z99").unwrap().len(), 2_574); + assert_eq!( + get_dimension(b"A1:XFD1048576").unwrap().len(), + 17_179_869_184 + ); + } -#[test] -fn test_parse_error() { - assert_eq!( - CellErrorType::from_str("#DIV/0!").unwrap(), - CellErrorType::Div0 - ); - assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA); - assert_eq!( - CellErrorType::from_str("#NAME?").unwrap(), - CellErrorType::Name - ); - assert_eq!( - CellErrorType::from_str("#NULL!").unwrap(), - CellErrorType::Null - ); - assert_eq!( - CellErrorType::from_str("#NUM!").unwrap(), - CellErrorType::Num - ); - assert_eq!( - CellErrorType::from_str("#REF!").unwrap(), - CellErrorType::Ref - ); - assert_eq!( - CellErrorType::from_str("#VALUE!").unwrap(), - CellErrorType::Value - ); + #[test] + fn test_parse_error() { + assert_eq!( + CellErrorType::from_str("#DIV/0!").unwrap(), + CellErrorType::Div0 + ); + assert_eq!(CellErrorType::from_str("#N/A").unwrap(), CellErrorType::NA); + assert_eq!( + CellErrorType::from_str("#NAME?").unwrap(), + CellErrorType::Name + ); + assert_eq!( + CellErrorType::from_str("#NULL!").unwrap(), + CellErrorType::Null + ); + assert_eq!( + CellErrorType::from_str("#NUM!").unwrap(), + CellErrorType::Num + ); + assert_eq!( + CellErrorType::from_str("#REF!").unwrap(), + CellErrorType::Ref + ); + assert_eq!( + CellErrorType::from_str("#VALUE!").unwrap(), + CellErrorType::Value + ); + } } diff --git a/tests/test.rs b/tests/test.rs index a02cf753..41453f37 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -39,7 +39,7 @@ fn issue_2() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); range_eq!( range, [ @@ -58,7 +58,7 @@ fn issue_3() { let path = format!("{}/tests/issue3.xlsm", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("Sheet1").unwrap(); range_eq!(range, [[Float(1.), String("a".to_string())]]); } @@ -70,7 +70,7 @@ fn issue_4() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue5").unwrap().unwrap(); + let range = excel.worksheet_range("issue5").unwrap(); range_eq!(range, [[Float(0.5)]]); } @@ -82,7 +82,7 @@ fn issue_6() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue6").unwrap().unwrap(); + let range = excel.worksheet_range("issue6").unwrap(); range_eq!( range, [ @@ -101,7 +101,7 @@ fn error_file() { let path = format!("{}/tests/errors.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Feuil1").unwrap().unwrap(); + let range = excel.worksheet_range("Feuil1").unwrap(); range_eq!( range, [ @@ -123,7 +123,7 @@ fn issue_9() { let path = format!("{}/tests/issue9.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Feuil1").unwrap().unwrap(); + let range = excel.worksheet_range("Feuil1").unwrap(); range_eq!( range, [ @@ -157,7 +157,7 @@ fn xlsb() { let path = format!("{}/tests/issues.xlsb", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsb<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); range_eq!( range, [ @@ -175,7 +175,7 @@ fn xlsx() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); range_eq!( range, [ @@ -193,7 +193,7 @@ fn xls() { let path = format!("{}/tests/issues.xls", env!("CARGO_MANIFEST_DIR")); let mut excel: Xls<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); range_eq!( range, [ @@ -217,8 +217,7 @@ fn issue_195() { let mut excel: Xls<_> = open_workbook(&path).expect("can't open wb"); let range = excel .worksheet_range("JLCPCB SMT Parts Library") - .expect("error in wks range") - .expect("sheet not found"); + .expect("error in wks range"); assert_eq!(range.get_size(), (52046, 12)); } @@ -229,7 +228,7 @@ fn ods() { let path = format!("{}/tests/issues.ods", env!("CARGO_MANIFEST_DIR")); let mut excel: Ods<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("datatypes").unwrap().unwrap(); + let range = excel.worksheet_range("datatypes").unwrap(); range_eq!( range, [ @@ -242,7 +241,7 @@ fn ods() { ] ); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); range_eq!( range, [ @@ -252,7 +251,7 @@ fn ods() { ] ); - let range = excel.worksheet_range("issue5").unwrap().unwrap(); + let range = excel.worksheet_range("issue5").unwrap(); range_eq!(range, [[Float(0.5)]]); } @@ -263,7 +262,7 @@ fn ods_covered() { let path = format!("{}/tests/covered.ods", env!("CARGO_MANIFEST_DIR")); let mut excel: Ods<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("sheet1").unwrap(); range_eq!( range, [ @@ -279,7 +278,7 @@ fn special_cells() { let path = format!("{}/tests/special_cells.ods", env!("CARGO_MANIFEST_DIR")); let mut excel: Ods<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("sheet1").unwrap(); range_eq!( range, [ @@ -301,7 +300,7 @@ fn special_chrs_xlsx() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("spc_chrs").unwrap().unwrap(); + let range = excel.worksheet_range("spc_chrs").unwrap(); range_eq!( range, [ @@ -324,7 +323,7 @@ fn special_chrs_xlsb() { let path = format!("{}/tests/issues.xlsb", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsb<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("spc_chrs").unwrap().unwrap(); + let range = excel.worksheet_range("spc_chrs").unwrap(); range_eq!( range, [ @@ -347,7 +346,7 @@ fn special_chrs_ods() { let path = format!("{}/tests/issues.ods", env!("CARGO_MANIFEST_DIR")); let mut excel: Ods<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("spc_chrs").unwrap().unwrap(); + let range = excel.worksheet_range("spc_chrs").unwrap(); range_eq!( range, [ @@ -370,7 +369,7 @@ fn partial_richtext_ods() { let path = format!("{}/tests/richtext_issue.ods", env!("CARGO_MANIFEST_DIR")); let mut excel: Ods<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("datatypes").unwrap().unwrap(); + let range = excel.worksheet_range("datatypes").unwrap(); range_eq!(range, [[String("abc".to_string())]]); } @@ -384,7 +383,7 @@ fn xlsx_richtext_namespaced() { ); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("Sheet1").unwrap(); range_eq!( range, [[ @@ -525,10 +524,10 @@ fn formula_xlsx() { let sheets = excel.sheet_names().to_owned(); for s in sheets { - let _ = excel.worksheet_formula(&s).unwrap().unwrap(); + let _ = excel.worksheet_formula(&s).unwrap(); } - let formula = excel.worksheet_formula("Sheet1").unwrap().unwrap(); + let formula = excel.worksheet_formula("Sheet1").unwrap(); range_eq!(formula, [["B1+OneRange".to_string()]]); } @@ -541,10 +540,10 @@ fn formula_xlsb() { let sheets = excel.sheet_names().to_owned(); for s in sheets { - let _ = excel.worksheet_formula(&s).unwrap().unwrap(); + let _ = excel.worksheet_formula(&s).unwrap(); } - let formula = excel.worksheet_formula("Sheet1").unwrap().unwrap(); + let formula = excel.worksheet_formula("Sheet1").unwrap(); range_eq!(formula, [["B1+OneRange".to_string()]]); } @@ -555,7 +554,7 @@ fn formula_vals_xlsb() { let path = format!("{}/tests/issue_182.xlsb", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsb<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("formula_vals").unwrap().unwrap(); + let range = excel.worksheet_range("formula_vals").unwrap(); range_eq!( range, [[Float(3.)], [String("Ab".to_string())], [Bool(false)]] @@ -569,7 +568,7 @@ fn float_vals_xlsb() { let path = format!("{}/tests/issue_186.xlsb", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsb<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("Sheet1").unwrap(); range_eq!( range, [ @@ -591,10 +590,10 @@ fn formula_xls() { let sheets = excel.sheet_names().to_owned(); for s in sheets { - let _ = excel.worksheet_formula(&s).unwrap().unwrap(); + let _ = excel.worksheet_formula(&s).unwrap(); } - let formula = excel.worksheet_formula("Sheet1").unwrap().unwrap(); + let formula = excel.worksheet_formula("Sheet1").unwrap(); range_eq!(formula, [["B1+OneRange".to_string()]]); } @@ -606,10 +605,10 @@ fn formula_ods() { let mut excel: Ods<_> = open_workbook(&path).unwrap(); for s in excel.sheet_names().to_owned() { - let _ = excel.worksheet_formula(&s).unwrap().unwrap(); + let _ = excel.worksheet_formula(&s).unwrap(); } - let formula = excel.worksheet_formula("Sheet1").unwrap().unwrap(); + let formula = excel.worksheet_formula("Sheet1").unwrap(); range_eq!(formula, [["of:=[.B1]+$$OneRange".to_string()]]); } @@ -620,7 +619,7 @@ fn empty_sheet() { let path = format!("{}/tests/empty_sheet.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); for s in excel.sheet_names().to_owned() { - let range = excel.worksheet_range(&s).unwrap().unwrap(); + let range = excel.worksheet_range(&s).unwrap(); assert_eq!(range.start(), None, "wrong start"); assert_eq!(range.end(), None, "wrong end"); assert_eq!(range.get_size(), (0, 0), "wrong size"); @@ -634,7 +633,7 @@ fn issue_120() { let path = format!("{}/tests/issues.xlsx", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("issue2").unwrap().unwrap(); + let range = excel.worksheet_range("issue2").unwrap(); let end = range.end().unwrap(); let a = range.get_value((0, end.1 + 1)); @@ -677,7 +676,7 @@ fn mul_rk() { env!("CARGO_MANIFEST_DIR") ); let mut xls: Xls<_> = open_workbook(&path).unwrap(); - let range = xls.worksheet_range("Boys").unwrap().unwrap(); + let range = xls.worksheet_range("Boys").unwrap(); assert_eq!(range.get_value((6, 2)), Some(&Float(9.))); } @@ -687,7 +686,7 @@ fn skip_phonetic_text() { let path = format!("{}/tests/rph.xlsx", env!("CARGO_MANIFEST_DIR")); let mut xls: Xlsx<_> = open_workbook(&path).unwrap(); - let range = xls.worksheet_range("Sheet1").unwrap().unwrap(); + let range = xls.worksheet_range("Sheet1").unwrap(); assert_eq!( range.get_value((0, 0)), Some(&String("課きく 毛こ".to_string())) @@ -717,7 +716,6 @@ fn table() { assert_eq!(table_names[1], "OtherTable"); let table = xls .table_by_name("Temperature") - .expect("Table should exist") .expect("Parsing table's sheet should not error"); assert_eq!(table.name(), "Temperature"); assert_eq!(table.columns()[0], "label"); @@ -730,7 +728,6 @@ fn table() { // Check the second table let table = xls .table_by_name("OtherTable") - .expect("Table should exist") .expect("Parsing table's sheet should not error"); assert_eq!(table.name(), "OtherTable"); assert_eq!(table.columns()[0], "label2"); @@ -998,7 +995,7 @@ fn issue_221() { let path = format!("{}/tests/issue221.xlsm", env!("CARGO_MANIFEST_DIR")); let mut excel: Xlsx<_> = open_workbook(&path).unwrap(); - let range = excel.worksheet_range("Sheet1").unwrap().unwrap(); + let range = excel.worksheet_range("Sheet1").unwrap(); range_eq!( range, [ @@ -1037,12 +1034,10 @@ fn issue_261() { let range_a = workbook_fixed_by_excel .worksheet_range("Some Sheet") - .unwrap() .unwrap(); let range_b = workbook_with_missing_r_attributes .worksheet_range("Some Sheet") - .unwrap() .unwrap(); assert_eq!(range_a.cells().count(), 462); @@ -1290,7 +1285,7 @@ fn issue304_xls_formula() { setup(); let path = format!("{}/tests/xls_formula.xls", env!("CARGO_MANIFEST_DIR")); let mut wb: Xls<_> = open_workbook(&path).unwrap(); - let formula = wb.worksheet_formula("Sheet1").unwrap().unwrap(); + let formula = wb.worksheet_formula("Sheet1").unwrap(); let mut rows = formula.rows(); assert_eq!(rows.next(), Some(&["A1*2".to_owned()][..])); assert_eq!(rows.next(), Some(&["2*Sheet2!A1".to_owned()][..])); @@ -1303,7 +1298,7 @@ fn issue304_xls_values() { setup(); let path = format!("{}/tests/xls_formula.xls", env!("CARGO_MANIFEST_DIR")); let mut wb: Xls<_> = open_workbook(&path).unwrap(); - let rge = wb.worksheet_range("Sheet1").unwrap().unwrap(); + let rge = wb.worksheet_range("Sheet1").unwrap(); let mut rows = rge.rows(); assert_eq!(rows.next(), Some(&[DataType::Float(10.)][..])); assert_eq!(rows.next(), Some(&[DataType::Float(20.)][..])); @@ -1317,7 +1312,7 @@ fn issue334_xls_values_string() { setup(); let path = format!("{}/tests/xls_ref_String.xls", env!("CARGO_MANIFEST_DIR")); let mut wb: Xls<_> = open_workbook(&path).unwrap(); - let rge = wb.worksheet_range("Sheet1").unwrap().unwrap(); + let rge = wb.worksheet_range("Sheet1").unwrap(); let mut rows = rge.rows(); assert_eq!(rows.next(), Some(&[DataType::String("aa".into())][..])); assert_eq!(rows.next(), Some(&[DataType::String("bb".into())][..])); @@ -1513,11 +1508,7 @@ fn issue_374() { assert_eq!("SheetJS", first_sheet_name); - let range = workbook - .worksheet_range(&first_sheet_name) - .unwrap() - .unwrap(); - + let range = workbook.worksheet_range(&first_sheet_name).unwrap(); let second_row = range.rows().nth(1).unwrap(); let cell_text = second_row.get(3).unwrap().to_string();