Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding in ability to read merge cells from xls and xlsx files. #307

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 54 additions & 5 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ enum CellFormat {

/// A struct representing an old xls format file (CFB)
pub struct Xls<RS> {
sheets: BTreeMap<String, (Range<DataType>, Range<String>)>,
sheets: BTreeMap<String, (Range<DataType>, Range<String>, Vec<Dimensions>)>,
vba: Option<VbaProject>,
metadata: Metadata,
marker: PhantomData<RS>,
Expand Down Expand Up @@ -205,6 +205,18 @@ impl<RS: Read + Seek> Xls<RS> {

Ok(xls)
}

/// Gets the worksheet merge cell demensions
pub fn worksheet_merge_cells(&mut self, name: &str) -> Option<Result<Vec<Dimensions>, XlsError>> {
self.sheets.get(name).map(|r| Ok(r.2.clone()))
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(&mut self, n: usize) -> Option<Result<Vec<Dimensions>, XlsError>> {
let name = self.sheet_names().get(n)?.to_string();
self.worksheet_merge_cells(&name)
}
}

impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
Expand All @@ -230,7 +242,7 @@ impl<RS: Read + Seek> Reader<RS> for Xls<RS> {
fn worksheets(&mut self) -> Vec<(String, Range<DataType>)> {
self.sheets
.iter()
.map(|(name, (data, _))| (name.to_owned(), data.clone()))
.map(|(name, (data, _, _))| (name.to_owned(), data.clone()))
.collect()
}

Expand Down Expand Up @@ -379,6 +391,7 @@ impl<RS: Read + Seek> Xls<RS> {
let records = RecordIter { stream: sh };
let mut cells = Vec::new();
let mut formulas = Vec::new();
let mut merge_cells = Vec::new();
for record in records {
let r = record?;
match r.typ {
Expand All @@ -394,7 +407,8 @@ impl<RS: Read + Seek> Xls<RS> {
0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr
0x027E => cells.push(parse_rk(r.data, &self.formats)?), // 636: Rk
0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst
0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats)?, // 189: MulRk
0x00BD => parse_mul_rk(r.data, &mut cells)?, // 189: MulRk
0x00E5 => parse_merge_cells(r.data, &mut merge_cells)?, // 229: Merge Cells
0x000A => break, // 10: EOF,
0x0006 => {
// 6: Formula
Expand All @@ -421,7 +435,7 @@ impl<RS: Read + Seek> Xls<RS> {
}
let range = Range::from_sparse(cells);
let formula = Range::from_sparse(formulas);
sheets.insert(name, (range, formula));
sheets.insert(name, (range, formula, merge_cells));
}

self.sheets = sheets;
Expand Down Expand Up @@ -528,6 +542,26 @@ fn parse_rk(r: &[u8], formats: &[CellFormat]) -> Result<Cell<DataType>, XlsError
))
}

fn parse_merge_cells(r: &[u8], merge_cells: &mut Vec<Dimensions>) -> Result<(), XlsError> {
let count = read_u16(r);

for i in 0..count {
let offset: usize = (2 + i * 8).into();

let rf = read_u16(&r[offset + 0..]);
let rl = read_u16(&r[offset + 2..]);
let cf = read_u16(&r[offset + 4..]);
let cl = read_u16(&r[offset + 6..]);

merge_cells.push(Dimensions {
start: (rf.into(), cf.into()),
end: (rl.into(), cl.into()),
})
}

Ok(())
}

fn parse_mul_rk(
r: &[u8],
cells: &mut Vec<Cell<DataType>>,
Expand Down Expand Up @@ -622,11 +656,26 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<DataType>
Ok(None)
}

struct Dimensions {
#[derive(Debug, Clone)]
pub struct Dimensions {
start: (u32, u32),
end: (u32, u32),
}

impl Dimensions {
/// Get top left cell position (row, column)
#[inline]
pub fn start(&self) -> (u32, u32) {
self.start
}

/// Get bottom right cell position (row, column)
#[inline]
pub fn end(&self) -> (u32, u32) {
self.end
}
}

fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
let (rf, rl, cf, cl) = match r.len() {
10 => (
Expand Down
91 changes: 90 additions & 1 deletion src/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,48 @@ impl<RS: Read + Seek> Xlsx<RS> {
Err(e) => Some(Err(e)),
}
}

/// Gets the worksheet merge cell demensions
pub fn worksheet_merge_cells(&mut self, name: &str) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let xml = match self.sheets.iter().find(|&&(ref n, _)| n == name) {
Some(&(_, ref path)) => xml_reader(&mut self.zip, path),
None => return None,
};

xml.map(|xml_result| {
let mut xml = xml_result.unwrap();
let mut merge_cells = Vec::new();
let mut buf = Vec::new();

loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
match e.local_name().as_ref() {
b"mergeCells" => {
if let Ok(cells) = read_merge_cells(&mut xml) {
merge_cells = cells;
}
break;
}
_ => (),
}
}
Ok(Event::Eof) => break,
Err(e) => return Err(XlsxError::Xml(e)),
_ => (),
}
}
Ok(merge_cells)
})
}

/// Get the nth worksheet. Shortcut for getting the nth
/// sheet_name, then the corresponding worksheet.
pub fn worksheet_merge_cells_at(&mut self, n: usize) -> Option<Result<Vec<Dimensions>, XlsxError>> {
let name = self.sheet_names().get(n)?.to_string();
self.worksheet_merge_cells(&name)
}
}

struct InnerTableMetadata {
Expand Down Expand Up @@ -1041,7 +1083,7 @@ fn read_sheet_data(
}

#[derive(Debug, PartialEq)]
struct Dimensions {
pub struct Dimensions {
start: (u32, u32),
end: (u32, u32),
}
Expand All @@ -1050,6 +1092,18 @@ impl Dimensions {
fn len(&self) -> u64 {
(self.end.0 - self.start.0 + 1) as u64 * (self.end.1 - self.start.1 + 1) as u64
}

/// Get top left cell position (row, column)
#[inline]
pub fn start(&self) -> (u32, u32) {
self.start
}

/// Get bottom right cell position (row, column)
#[inline]
pub fn end(&self) -> (u32, u32) {
self.end
}
}

/// converts a text representation (e.g. "A6:G67") of a dimension into integers
Expand Down Expand Up @@ -1181,6 +1235,41 @@ fn read_string(
}
}

fn read_merge_cells(xml: &mut XlsReader<'_>) -> Result<Vec<Dimensions>, XlsxError> {
let mut buf = Vec::new();
let mut merge_cells = Vec::new();
loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) if e.local_name().as_ref() == b"mergeCell" => {
for a in e.attributes() {
match a.map_err(XlsxError::XmlAttr)? {
Attribute {
key: QName(b"ref"),
value: v,
} => {
match get_dimension(&v) {
Ok(d) => merge_cells.push(d),
Err(e) => return Err(e)
}
break;
}
_ => (),
}
}
},
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"mergeCells" => {
break;
},
Ok(Event::Eof) => return Err(XlsxError::XmlEof("")),
Err(e) => return Err(XlsxError::Xml(e)),
_ => ()
}
}

Ok(merge_cells)
}

#[test]
fn test_dimensions() {
assert_eq!(get_row_column(b"A1").unwrap(), (0, 0));
Expand Down
Binary file added tests/merge_cells.xls
Binary file not shown.
Binary file added tests/merge_cells.xlsx
Binary file not shown.
42 changes: 42 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,18 @@ macro_rules! range_eq {
};
}

macro_rules! merge_cells_eq {
($merge_cells:expr, $right:expr) => {
for (i, item) in $right.iter().enumerate() {
assert_eq!($merge_cells[i].start().0, item[0].0, "Mismatch at position ({})", i);
assert_eq!($merge_cells[i].start().1, item[0].1, "Mismatch at position ({})", i);
assert_eq!($merge_cells[i].end().0, item[1].0, "Mismatch at position ({})", i);
assert_eq!($merge_cells[i].end().1, item[1].1, "Mismatch at position ({})", i);
}

};
}

#[test]
fn issue_2() {
setup();
Expand Down Expand Up @@ -956,6 +968,36 @@ fn issue_271() -> Result<(), calamine::Error> {
Ok(())
}

#[test]
fn issue_305_merge_cells() {
let path = format!(
"{}/tests/merge_cells.xlsx",
env!("CARGO_MANIFEST_DIR")
);
let mut excel: Xlsx<_> = open_workbook(&path).unwrap();
let merge_cells = excel
.worksheet_merge_cells_at(0)
.unwrap()
.unwrap();

merge_cells_eq!(merge_cells, [[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]]);
}

#[test]
fn issue_305_merge_cells_xls() {
let path = format!(
"{}/tests/merge_cells.xls",
env!("CARGO_MANIFEST_DIR")
);
let mut excel: Xls<_> = open_workbook(&path).unwrap();
let merge_cells = excel
.worksheet_merge_cells_at(0)
.unwrap()
.unwrap();

merge_cells_eq!(merge_cells, [[(0, 0), (0, 1)], [(1, 0), (3, 0)], [(1, 1), (3, 3)]]);
}

// cargo test --features picture
#[test]
#[cfg(feature = "picture")]
Expand Down