Skip to content

Commit 577b341

Browse files
authored
Remove dependency on ZipFiles.jl and EzXML.jl (#280)
* Remove remaining dependency on ZipFile.jl * Remove overlooked use of ZipFile * No need to close zip io any longer * No need to close zip io any longer * Now also removed dependency on EzXML.jl except for calls to the overloaded findall() and findfirst() functions and a single call to EzXMLunlnk(). * Replace EzXML.findall() with a new find_all_nodes() function that uses the XML.jl API. * Replaced EzXML.findfirst() with find_all_nodes()[begin] * Further changes to unlink rows in write.jl * Yesterday's changes * Finally got SheetRowStreamIterator to work! * Further changes bug fixing failed tests * Force recompile? * Now passing all tests except `escape` * Clean up remaining open and close actions * Final fixes to escape tests * Remove unnecesaary data files * Tidy-up * Don't pretty print * Remove last pretty printing example * Simplify regex that undoes pretty printing
1 parent 1b57119 commit 577b341

15 files changed

+691
-593
lines changed

Project.toml

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,15 @@ version = "0.10.5-dev"
88
[deps]
99
Artifacts = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
1010
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
11-
EzXML = "8f5d6c58-4d21-5cfd-889c-e3ad7ee6a615"
1211
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
1312
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
13+
XML = "72c71f33-b9b6-44de-8c94-c961784809e2"
1414
ZipArchives = "49080126-0e18-4c2a-b176-c102e4b3760c"
15-
ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
1615

1716
[compat]
18-
EzXML = "1"
1917
Tables = "1"
18+
XML = "0.3.4"
2019
ZipArchives = "2"
21-
ZipFile = "0.8, 0.9, 0.10"
2220
julia = "1.6"
2321

2422
[extras]

data/escape.xlsx

-10.2 KB
Binary file not shown.

src/XLSX.jl

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,12 @@ module XLSX
44
import Artifacts
55
import Dates
66
import Printf.@printf
7-
import ZipFile
87
import ZipArchives
9-
import EzXML
8+
import XML
109
import Tables
1110
import Base.convert
1211

13-
# https://github.com/fhs/ZipFile.jl/issues/39
14-
if !hasmethod(Base.bytesavailable, Tuple{ZipFile.ReadableFile})
15-
Base.bytesavailable(f::ZipFile.ReadableFile) = f.uncompressedsize - f._pos
16-
end
17-
18-
const SPREADSHEET_NAMESPACE_XPATH_ARG = [ "xpath" => "http://schemas.openxmlformats.org/spreadsheetml/2006/main" ]
12+
const SPREADSHEET_NAMESPACE_XPATH_ARG = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"
1913
const EXCEL_MAX_COLS = 16_384 # total columns supported by Excel per sheet
2014
const EXCEL_MAX_ROWS = 1_048_576 # total rows supported by Excel per sheet (including headers)
2115

src/cell.jl

Lines changed: 60 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,13 @@ Base.hash(c::Cell) = hash(c.ref) + hash(c.datatype) + hash(c.style) + hash(c.val
1818
Base.:(==)(c1::EmptyCell, c2::EmptyCell) = c1.ref == c2.ref
1919
Base.hash(c::EmptyCell) = hash(c.ref) + 10
2020

21-
function find_t_node_recursively(n::EzXML.Node) :: Union{Nothing, EzXML.Node}
22-
if EzXML.nodename(n) == "t"
21+
function find_t_node_recursively(n::XML.LazyNode) :: Union{Nothing, XML.LazyNode}
22+
if XML.tag(n) == "t"
2323
return n
2424
else
25-
for child in EzXML.eachelement(n)
25+
for child in XML.children(n)
2626
result = find_t_node_recursively(child)
27-
if result != nothing
27+
if result !== nothing
2828
return result
2929
end
3030
end
@@ -33,25 +33,27 @@ function find_t_node_recursively(n::EzXML.Node) :: Union{Nothing, EzXML.Node}
3333
return nothing
3434
end
3535

36-
function Cell(c::EzXML.Node)
36+
function Cell(c::XML.LazyNode)
3737
# c (Cell) element is defined at section 18.3.1.4
3838
# t (Cell Data Type) is an enumeration representing the cell's data type. The possible values for this attribute are defined by the ST_CellType simple type (§18.18.11).
3939
# s (Style Index) is the index of this cell's style. Style records are stored in the Styles Part.
4040

41-
@assert EzXML.nodename(c) == "c" "`Cell` Expects a `c` (cell) XML node."
41+
@assert XML.tag(c) == "c" "`Cell` Expects a `c` (cell) XML node."
4242

43-
ref = CellRef(c["r"])
43+
a = XML.attributes(c) # Dict of cell attributes
44+
45+
ref = CellRef(a["r"])
4446

4547
# type
46-
if haskey(c, "t")
47-
t = c["t"]
48+
if haskey(a, "t")
49+
t = a["t"]
4850
else
4951
t = ""
5052
end
5153

5254
# style
53-
if haskey(c, "s")
54-
s = c["s"]
55+
if haskey(a, "s")
56+
s = a["s"]
5557
else
5658
s = ""
5759
end
@@ -62,29 +64,35 @@ function Cell(c::EzXML.Node)
6264
local found_v::Bool = false
6365
local found_f::Bool = false
6466

65-
for c_child_element in EzXML.eachelement(c)
66-
67+
for c_child_element in XML.children(c)
6768
if t == "inlineStr"
68-
69-
if EzXML.nodename(c_child_element) == "is"
69+
if XML.tag(c_child_element) == "is"
7070
t_node = find_t_node_recursively(c_child_element)
71-
if t_node != nothing
72-
v = EzXML.nodecontent(t_node)
71+
if t_node !== nothing
72+
c = XML.children(t_node)
73+
if length(c) == 0
74+
v = ""
75+
elseif length(c) == 1
76+
v= XML.value(c[1])
77+
else
78+
error("Too amny children in `t` node. Expected >=1, found: $(length(c))")
79+
end
7380
end
7481
end
7582

7683
else
77-
if EzXML.nodename(c_child_element) == "v"
84+
if XML.tag(c_child_element) == "v"
7885

7986
# we should have only one v element
8087
if found_v
8188
error("Unsupported: cell $(ref) has more than 1 `v` elements.")
8289
else
8390
found_v = true
8491
end
92+
93+
v = XML.unescape(XML.simple_value(c_child_element))
8594

86-
v = EzXML.nodecontent(c_child_element)
87-
elseif EzXML.nodename(c_child_element) == "f"
95+
elseif XML.tag(c_child_element) == "f"
8896

8997
# we should have only one f element
9098
if found_f
@@ -97,40 +105,54 @@ function Cell(c::EzXML.Node)
97105
end
98106
end
99107
end
100-
101108
return Cell(ref, t, s, v, f)
102109
end
103110

104111
function parse_formula_from_element(c_child_element) :: AbstractFormula
105112

106-
if EzXML.nodename(c_child_element) != "f"
107-
error("Expected nodename `f`. Found: `$(EzXML.nodename(c_child_element))`")
113+
if XML.tag(c_child_element) != "f"
114+
error("Expected nodename `f`. Found: `$(XML.tag(c_child_element))`")
108115
end
109116

110-
formula_string = EzXML.nodecontent(c_child_element)
117+
if XML.is_simple(c_child_element)
118+
formula_string = XML.simple_value(c_child_element)
119+
else
120+
fs = [x for x in XML.children(c_child_element) if XML.nodetype(x) == XML.Text]
121+
if length(fs)==0
122+
formula_string=""
123+
else
124+
formula_string=XML.value(fs[1])
125+
end
126+
end
111127

112-
if haskey(c_child_element, "ref") && haskey(c_child_element, "t") && c_child_element["t"] == "shared"
128+
a = XML.attributes(c_child_element)
113129

114-
haskey(c_child_element, "si") || error("Expected shared formula to have an index. `si` attribute is missing: $c_child_element")
130+
if !isnothing(a)
115131

116-
return ReferencedFormula(
117-
formula_string,
118-
parse(Int, c_child_element["si"]),
119-
c_child_element["ref"],
120-
)
132+
if haskey(a, "ref") && haskey(a, "t") && a["t"] == "shared"
121133

122-
elseif haskey(c_child_element, "t") && c_child_element["t"] == "shared"
134+
haskey(a, "si") || error("Expected shared formula to have an index. `si` attribute is missing: $c_child_element")
123135

124-
haskey(c_child_element, "si") || error("Expected shared formula to have an index. `si` attribute is missing: $c_child_element")
136+
return ReferencedFormula(
137+
formula_string,
138+
parse(Int, a["si"]),
139+
a["ref"],
140+
)
125141

126-
return FormulaReference(
127-
parse(Int, c_child_element["si"]),
128-
)
129-
else
130-
return Formula(formula_string)
142+
elseif haskey(a, "t") && a["t"] == "shared"
143+
144+
haskey(a, "si") || error("Expected shared formula to have an index. `si` attribute is missing: $c_child_element")
145+
146+
return FormulaReference(
147+
parse(Int, a["si"]),
148+
)
149+
end
131150
end
151+
152+
return Formula(formula_string)
132153
end
133154

155+
134156
# Constructor with simple formula string for backward compatibility
135157
function Cell(ref::CellRef, datatype::String, style::String, value::String, formula::String)
136158
return Cell(ref, datatype, style, value, Formula(formula))
@@ -197,7 +219,6 @@ function getdata(ws::Worksheet, cell::Cell) :: CellValueType
197219
return _celldata_datetime(cell.value, isdate1904(ws))
198220

199221
elseif !isempty(cell.style) && styles_is_float(ws, cell.style)
200-
201222
# float
202223
return parse(Float64, cell.value)
203224

0 commit comments

Comments
 (0)