Skip to content

Commit

Permalink
Merge pull request #370 from tafia/DataTypeRef
Browse files Browse the repository at this point in the history
support DataTypeRef for shared strings and worksheet CellsReader
  • Loading branch information
tafia authored Dec 12, 2023
2 parents 7342e2d + aa19868 commit b72eb4f
Show file tree
Hide file tree
Showing 15 changed files with 1,020 additions and 779 deletions.
39 changes: 38 additions & 1 deletion benches/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ fn count<R: Reader<BufReader<File>>>(path: &str) -> usize {
count += excel
.worksheet_range(&s)
.unwrap()
.unwrap()
.rows()
.flat_map(|r| r.iter())
.count();
Expand All @@ -44,3 +43,41 @@ fn bench_xlsb(b: &mut Bencher) {
fn bench_ods(b: &mut Bencher) {
b.iter(|| count::<Ods<_>>("tests/issues.ods"));
}

#[bench]
fn bench_xlsx_cells_reader(b: &mut Bencher) {
fn count<R: Reader<BufReader<File>>>(path: &str) -> usize {
let path = format!("{}/{}", env!("CARGO_MANIFEST_DIR"), path);
let mut excel: Xlsx<_> = open_workbook(&path).expect("cannot open excel file");

let sheets = excel.sheet_names().to_owned();
let mut count = 0;
for s in sheets {
let mut cells_reader = excel.worksheet_cells_reader(&s).unwrap();
while let Some(_) = cells_reader.next_cell().unwrap() {
count += 1;
}
}
count
}
b.iter(|| count::<Xlsx<_>>("tests/issues.xlsx"));
}

#[bench]
fn bench_xlsb_cells_reader(b: &mut Bencher) {
fn count<R: Reader<BufReader<File>>>(path: &str) -> usize {
let path = format!("{}/{}", env!("CARGO_MANIFEST_DIR"), path);
let mut excel: Xlsb<_> = open_workbook(&path).expect("cannot open excel file");

let sheets = excel.sheet_names().to_owned();
let mut count = 0;
for s in sheets {
let mut cells_reader = excel.worksheet_cells_reader(&s).unwrap();
while let Some(_) = cells_reader.next_cell().unwrap() {
count += 1;
}
}
count
}
b.iter(|| count::<Xlsx<_>>("tests/issues.xlsb"));
}
2 changes: 1 addition & 1 deletion examples/excel_to_csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ fn main() {
let dest = sce.with_extension("csv");
let mut dest = BufWriter::new(File::create(dest).unwrap());
let mut xl = open_workbook_auto(&sce).unwrap();
let range = xl.worksheet_range(&sheet).unwrap().unwrap();
let range = xl.worksheet_range(&sheet).unwrap();

write_range(&mut dest, &range).unwrap();
}
Expand Down
5 changes: 1 addition & 4 deletions examples/search_errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,7 @@ fn run(f: GlobResult) -> Result<(PathBuf, Option<usize>, usize), FileStatus> {
let sheets = xl.sheet_names().to_owned();

for s in sheets {
let range = xl
.worksheet_range(&s)
.unwrap()
.map_err(FileStatus::RangeError)?;
let range = xl.worksheet_range(&s).map_err(FileStatus::RangeError)?;
cell_errors += range
.rows()
.flat_map(|r| {
Expand Down
20 changes: 10 additions & 10 deletions src/auto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -105,22 +105,22 @@ where
}

/// Read worksheet data in corresponding worksheet path
fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, Self::Error>> {
fn worksheet_range(&mut self, name: &str) -> Result<Range<DataType>, Self::Error> {
match *self {
Sheets::Xls(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xls)),
Sheets::Xlsx(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xlsx)),
Sheets::Xlsb(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Xlsb)),
Sheets::Ods(ref mut e) => e.worksheet_range(name).map(|r| r.map_err(Error::Ods)),
Sheets::Xls(ref mut e) => e.worksheet_range(name).map_err(Error::Xls),
Sheets::Xlsx(ref mut e) => e.worksheet_range(name).map_err(Error::Xlsx),
Sheets::Xlsb(ref mut e) => e.worksheet_range(name).map_err(Error::Xlsb),
Sheets::Ods(ref mut e) => e.worksheet_range(name).map_err(Error::Ods),
}
}

/// Read worksheet formula in corresponding worksheet path
fn worksheet_formula(&mut self, name: &str) -> Option<Result<Range<String>, Self::Error>> {
fn worksheet_formula(&mut self, name: &str) -> Result<Range<String>, Self::Error> {
match *self {
Sheets::Xls(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xls)),
Sheets::Xlsx(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xlsx)),
Sheets::Xlsb(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Xlsb)),
Sheets::Ods(ref mut e) => e.worksheet_formula(name).map(|r| r.map_err(Error::Ods)),
Sheets::Xls(ref mut e) => e.worksheet_formula(name).map_err(Error::Xls),
Sheets::Xlsx(ref mut e) => e.worksheet_formula(name).map_err(Error::Xlsx),
Sheets::Xlsb(ref mut e) => e.worksheet_formula(name).map_err(Error::Xlsb),
Sheets::Ods(ref mut e) => e.worksheet_formula(name).map_err(Error::Ods),
}
}

Expand Down
47 changes: 47 additions & 0 deletions src/datatype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,53 @@ where
}
}

/// An enum to represent all different data types that can appear as
/// a value in a worksheet cell
#[derive(Debug, Clone, PartialEq, Default)]
pub enum DataTypeRef<'a> {
/// Signed integer
Int(i64),
/// Float
Float(f64),
/// String
String(String),
/// Shared String
SharedString(&'a str),
/// Boolean
Bool(bool),
/// Date or Time
DateTime(f64),
/// Duration
Duration(f64),
/// Date, Time or DateTime in ISO 8601
DateTimeIso(String),
/// Duration in ISO 8601
DurationIso(String),
/// Error
Error(CellErrorType),
/// Empty cell
#[default]
Empty,
}

impl<'a> From<DataTypeRef<'a>> for DataType {
fn from(value: DataTypeRef<'a>) -> Self {
match value {
DataTypeRef::Int(v) => DataType::Int(v),
DataTypeRef::Float(v) => DataType::Float(v),
DataTypeRef::String(v) => DataType::String(v),
DataTypeRef::SharedString(v) => DataType::String(v.into()),
DataTypeRef::Bool(v) => DataType::Bool(v),
DataTypeRef::DateTime(v) => DataType::DateTime(v),
DataTypeRef::Duration(v) => DataType::Duration(v),
DataTypeRef::DateTimeIso(v) => DataType::DateTimeIso(v),
DataTypeRef::DurationIso(v) => DataType::DurationIso(v),
DataTypeRef::Error(v) => DataType::Error(v),
DataTypeRef::Empty => DataType::Empty,
}
}
}

#[cfg(all(test, feature = "dates"))]
mod date_tests {
use super::*;
Expand Down
12 changes: 4 additions & 8 deletions src/de.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ impl RangeDeserializerBuilder<'static, &'static str> {
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let range = workbook.worksheet_range("Sheet1")
/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??;
/// let range = workbook.worksheet_range("Sheet1")?;
///
/// let mut iter = RangeDeserializerBuilder::new()
/// .has_headers(false)
Expand Down Expand Up @@ -154,8 +153,7 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let range = workbook.worksheet_range("Sheet1")
/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??;
/// let range = workbook.worksheet_range("Sheet1")?;
/// let mut iter = RangeDeserializerBuilder::with_headers(&["value", "label"]).from_range(&range)?;
///
/// if let Some(result) = iter.next() {
Expand Down Expand Up @@ -184,8 +182,7 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let range = workbook.worksheet_range("Sheet1")
/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??;
/// let range = workbook.worksheet_range("Sheet1")?;
/// let mut iter = RangeDeserializerBuilder::new().from_range(&range)?;
///
/// if let Some(result) = iter.next() {
Expand Down Expand Up @@ -220,8 +217,7 @@ impl<'h, H: AsRef<str> + Clone + 'h> RangeDeserializerBuilder<'h, H> {
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let range = workbook.worksheet_range("Sheet1")
/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??;
/// let range = workbook.worksheet_range("Sheet1")?;
///
/// let mut iter = RangeDeserializerBuilder::new().from_range(&range)?;
///
Expand Down
20 changes: 15 additions & 5 deletions src/formats.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::DataType;
use crate::{datatype::DataTypeRef, DataType};

/// https://learn.microsoft.com/en-us/office/troubleshoot/excel/1900-and-1904-date-system
static EXCEL_1900_1904_DIFF: i64 = 1462;
Expand Down Expand Up @@ -104,18 +104,28 @@ pub fn format_excel_i64(value: i64, format: Option<&CellFormat>, is_1904: bool)
}

// convert f64 to date, if format == Date
pub fn format_excel_f64(value: f64, format: Option<&CellFormat>, is_1904: bool) -> DataType {
#[inline]
pub fn format_excel_f64_ref<'a>(
value: f64,
format: Option<&CellFormat>,
is_1904: bool,
) -> DataTypeRef<'static> {
match format {
Some(CellFormat::DateTime) => DataType::DateTime(if is_1904 {
Some(CellFormat::DateTime) => DataTypeRef::DateTime(if is_1904 {
value + EXCEL_1900_1904_DIFF as f64
} else {
value
}),
Some(CellFormat::TimeDelta) => DataType::Duration(value),
_ => DataType::Float(value),
Some(CellFormat::TimeDelta) => DataTypeRef::Duration(value),
_ => DataTypeRef::Float(value),
}
}

// convert f64 to date, if format == Date
pub fn format_excel_f64(value: f64, format: Option<&CellFormat>, is_1904: bool) -> DataType {
format_excel_f64_ref(value, format, is_1904).into()
}

/// Ported from openpyxl, MIT License
/// https://foss.heptapod.net/openpyxl/openpyxl/-/blob/a5e197c530aaa49814fd1d993dd776edcec35105/openpyxl/styles/tests/test_number_style.py
#[test]
Expand Down
29 changes: 22 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
//! let mut workbook: Xlsx<_> = open_workbook(path).expect("Cannot open file");
//!
//! // Read whole worksheet data and provide some statistics
//! if let Some(Ok(range)) = workbook.worksheet_range("Sheet1") {
//! if let Ok(range) = workbook.worksheet_range("Sheet1") {
//! let total_cells = range.get_size().0 * range.get_size().1;
//! let non_empty_cells: usize = range.used_cells().count();
//! println!("Found {} cells in 'Sheet1', including {} non empty cells",
Expand Down Expand Up @@ -49,7 +49,6 @@
//! println!("found {} formula in '{}'",
//! workbook
//! .worksheet_formula(&s)
//! .expect("sheet not found")
//! .expect("error while getting formula")
//! .rows().flat_map(|r| r.iter().filter(|f| !f.is_empty()))
//! .count(),
Expand All @@ -74,6 +73,7 @@ mod de;
mod errors;
pub mod vba;

use datatype::DataTypeRef;
use serde::de::DeserializeOwned;
use std::borrow::Cow;
use std::cmp::{max, min};
Expand Down Expand Up @@ -132,6 +132,18 @@ impl fmt::Display for CellErrorType {
}
}

#[derive(Debug, PartialEq, Default, Clone, Copy)]
pub(crate) struct Dimensions {
pub start: (u32, u32),
pub end: (u32, u32),
}

impl Dimensions {
pub fn len(&self) -> u64 {
(self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
}
}

/// Common file metadata
///
/// Depending on file type, some extra information may be stored
Expand Down Expand Up @@ -203,18 +215,21 @@ where

/// Creates a new instance.
fn new(reader: RS) -> Result<Self, Self::Error>;

/// Gets `VbaProject`
fn vba_project(&mut self) -> Option<Result<Cow<'_, VbaProject>, Self::Error>>;

/// Initialize
fn metadata(&self) -> &Metadata;

/// Read worksheet data in corresponding worksheet path
fn worksheet_range(&mut self, name: &str) -> Option<Result<Range<DataType>, Self::Error>>;
fn worksheet_range(&mut self, name: &str) -> Result<Range<DataType>, Self::Error>;

/// Fetch all worksheet data & paths
fn worksheets(&mut self) -> Vec<(String, Range<DataType>)>;

/// Read worksheet formula in corresponding worksheet path
fn worksheet_formula(&mut self, _: &str) -> Option<Result<Range<String>, Self::Error>>;
fn worksheet_formula(&mut self, _: &str) -> Result<Range<String>, Self::Error>;

/// Get all sheet names of this workbook, in workbook order
///
Expand Down Expand Up @@ -248,7 +263,7 @@ where
/// sheet_name, then the corresponding worksheet.
fn worksheet_range_at(&mut self, n: usize) -> Option<Result<Range<DataType>, Self::Error>> {
let name = self.sheet_names().get(n)?.to_string();
self.worksheet_range(&name)
Some(self.worksheet_range(&name))
}

/// Get all pictures, tuple as (ext: String, data: Vec<u8>)
Expand Down Expand Up @@ -279,6 +294,7 @@ where
pub trait CellType: Default + Clone + PartialEq {}

impl CellType for DataType {}
impl<'a> CellType for DataTypeRef<'a> {}
impl CellType for String {}
impl CellType for usize {} // for tests

Expand Down Expand Up @@ -616,8 +632,7 @@ impl<T: CellType> Range<T> {
/// fn main() -> Result<(), Error> {
/// let path = format!("{}/tests/temperature.xlsx", env!("CARGO_MANIFEST_DIR"));
/// let mut workbook: Xlsx<_> = open_workbook(path)?;
/// let mut sheet = workbook.worksheet_range("Sheet1")
/// .ok_or(Error::Msg("Cannot find 'Sheet1'"))??;
/// let mut sheet = workbook.worksheet_range("Sheet1")?;
/// let mut iter = sheet.deserialize()?;
///
/// if let Some(result) = iter.next() {
Expand Down
Loading

0 comments on commit b72eb4f

Please sign in to comment.