Skip to content

Commit 12e9009

Browse files
ThomasBarnekowtomjebo
authored andcommitted
Refactor WmlComparer (#255)
Refactor WmlComparer and Compare Method; beautify ComparisonUnitGroup, ComparisonUnitword, LCS-related methods, and CorrelatedSequence
1 parent f7d36ea commit 12e9009

30 files changed

+8455
-27
lines changed

OpenXmlPowerTools.Tests/WmlContentAtomListTests.cs

+6-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,10 @@ namespace OxPt
3737
{
3838
public class CaTests
3939
{
40+
/*
41+
* This test was removed because it depends on the Coalesce method, which is only ever used
42+
* by this test.
43+
*
4044
[Theory]
4145
[InlineData("CA/CA001-Plain.docx", 60)]
4246
[InlineData("CA/CA002-Bookmark.docx", 7)]
@@ -57,7 +61,7 @@ public class CaTests
5761
//[InlineData("", 0)]
5862
//[InlineData("", 0)]
5963
//[InlineData("", 0)]
60-
64+
6165
public void CA001_ContentAtoms(string name, int contentAtomCount)
6266
{
6367
FileInfo sourceDocx = new FileInfo(Path.Combine(TestUtil.SourceDir.FullName, name));
@@ -96,6 +100,7 @@ public void CA001_ContentAtoms(string name, int contentAtomCount)
96100
Assert.Equal(contentAtomCount, contentAtomList.Count());
97101
}
98102
}
103+
*/
99104

100105
[Theory]
101106
[InlineData("HC009-Test-04.docx")]

OpenXmlPowerTools.sln.DotSettings

+270
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
6+
namespace OpenXmlPowerTools
7+
{
8+
public abstract class ComparisonUnit
9+
{
10+
private int? _descendantContentAtomsCount;
11+
12+
public CorrelationStatus CorrelationStatus { get; set; }
13+
14+
public List<ComparisonUnit> Contents { get; protected set; }
15+
16+
public string SHA1Hash { get; protected set; }
17+
18+
public int DescendantContentAtomsCount
19+
{
20+
get
21+
{
22+
if (_descendantContentAtomsCount != null) return (int) _descendantContentAtomsCount;
23+
24+
_descendantContentAtomsCount = DescendantContentAtoms().Count();
25+
return (int) _descendantContentAtomsCount;
26+
}
27+
}
28+
29+
private IEnumerable<ComparisonUnit> Descendants()
30+
{
31+
var comparisonUnitList = new List<ComparisonUnit>();
32+
DescendantsInternal(this, comparisonUnitList);
33+
return comparisonUnitList;
34+
}
35+
36+
public IEnumerable<ComparisonUnitAtom> DescendantContentAtoms()
37+
{
38+
return Descendants().OfType<ComparisonUnitAtom>();
39+
}
40+
41+
private static void DescendantsInternal(
42+
ComparisonUnit comparisonUnit,
43+
List<ComparisonUnit> comparisonUnitList)
44+
{
45+
foreach (ComparisonUnit cu in comparisonUnit.Contents)
46+
{
47+
comparisonUnitList.Add(cu);
48+
if (cu.Contents != null && cu.Contents.Any())
49+
DescendantsInternal(cu, comparisonUnitList);
50+
}
51+
}
52+
53+
public abstract string ToString(int indent);
54+
55+
internal static string ComparisonUnitListToString(ComparisonUnit[] cul)
56+
{
57+
var sb = new StringBuilder();
58+
sb.Append("Dump Comparision Unit List To String" + Environment.NewLine);
59+
foreach (ComparisonUnit item in cul) sb.Append(item.ToString(2) + Environment.NewLine);
60+
61+
return sb.ToString();
62+
}
63+
}
64+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
using System.Collections.Generic;
2+
using System.Linq;
3+
using System.Text;
4+
using System.Xml.Linq;
5+
using DocumentFormat.OpenXml.Packaging;
6+
7+
namespace OpenXmlPowerTools
8+
{
9+
public class ComparisonUnitAtom : ComparisonUnit
10+
{
11+
public ComparisonUnitAtom(
12+
XElement contentElement,
13+
XElement[] ancestorElements,
14+
OpenXmlPart part,
15+
WmlComparerSettings settings)
16+
{
17+
ContentElement = contentElement;
18+
AncestorElements = ancestorElements;
19+
Part = part;
20+
RevTrackElement = GetRevisionTrackingElementFromAncestors(contentElement, AncestorElements);
21+
22+
if (RevTrackElement == null)
23+
{
24+
CorrelationStatus = CorrelationStatus.Equal;
25+
}
26+
else
27+
{
28+
if (RevTrackElement.Name == W.del)
29+
{
30+
CorrelationStatus = CorrelationStatus.Deleted;
31+
}
32+
else if (RevTrackElement.Name == W.ins)
33+
{
34+
CorrelationStatus = CorrelationStatus.Inserted;
35+
}
36+
}
37+
38+
var sha1Hash = (string) contentElement.Attribute(PtOpenXml.SHA1Hash);
39+
if (sha1Hash != null)
40+
{
41+
SHA1Hash = sha1Hash;
42+
}
43+
else
44+
{
45+
string shaHashString = GetSha1HashStringForElement(ContentElement, settings);
46+
SHA1Hash = WmlComparerUtil.SHA1HashStringForUTF8String(shaHashString);
47+
}
48+
}
49+
50+
// AncestorElements are kept in order from the body to the leaf, because this is the order in which we need to access in order
51+
// to reassemble the document. However, in many places in the code, it is necessary to find the nearest ancestor, i.e. cell
52+
// so it is necessary to reverse the order when looking for it, i.e. look from the leaf back to the body element.
53+
54+
public XElement[] AncestorElements { get; }
55+
56+
public XElement ContentElement { get; }
57+
58+
public XElement RevTrackElement { get; }
59+
60+
public string[] AncestorUnids { get; set; }
61+
62+
public ComparisonUnitAtom ComparisonUnitAtomBefore { get; set; }
63+
64+
public XElement ContentElementBefore { get; set; }
65+
66+
public OpenXmlPart Part { get; }
67+
68+
private static string GetSha1HashStringForElement(XElement contentElement, WmlComparerSettings settings)
69+
{
70+
string text = contentElement.Value;
71+
if (settings.CaseInsensitive)
72+
{
73+
text = text.ToUpper(settings.CultureInfo);
74+
}
75+
76+
return contentElement.Name.LocalName + text;
77+
}
78+
79+
private static XElement GetRevisionTrackingElementFromAncestors(
80+
XElement contentElement,
81+
IEnumerable<XElement> ancestors)
82+
{
83+
return contentElement.Name == W.pPr
84+
? contentElement.Elements(W.rPr).Elements().FirstOrDefault(e => e.Name == W.del || e.Name == W.ins)
85+
: ancestors.FirstOrDefault(a => a.Name == W.del || a.Name == W.ins);
86+
}
87+
88+
public override string ToString()
89+
{
90+
return ToString(0);
91+
}
92+
93+
public override string ToString(int indent)
94+
{
95+
const int xNamePad = 16;
96+
string indentString = "".PadRight(indent);
97+
98+
var sb = new StringBuilder();
99+
sb.Append(indentString);
100+
101+
var correlationStatus = "";
102+
if (CorrelationStatus != CorrelationStatus.Nil)
103+
{
104+
correlationStatus = $"[{CorrelationStatus.ToString().PadRight(8)}] ";
105+
}
106+
107+
if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
108+
{
109+
sb.AppendFormat(
110+
"Atom {0}: {1} {2} SHA1:{3} ",
111+
PadLocalName(xNamePad, this),
112+
ContentElement.Value,
113+
correlationStatus,
114+
SHA1Hash.Substring(0, 8));
115+
116+
AppendAncestorsDump(sb, this);
117+
}
118+
else
119+
{
120+
sb.AppendFormat(
121+
"Atom {0}: {1} SHA1:{2} ",
122+
PadLocalName(xNamePad, this),
123+
correlationStatus,
124+
SHA1Hash.Substring(0, 8));
125+
126+
AppendAncestorsDump(sb, this);
127+
}
128+
129+
return sb.ToString();
130+
}
131+
132+
public string ToStringAncestorUnids()
133+
{
134+
return ToStringAncestorUnids(0);
135+
}
136+
137+
private string ToStringAncestorUnids(int indent)
138+
{
139+
const int xNamePad = 16;
140+
string indentString = "".PadRight(indent);
141+
142+
var sb = new StringBuilder();
143+
sb.Append(indentString);
144+
145+
var correlationStatus = "";
146+
if (CorrelationStatus != CorrelationStatus.Nil)
147+
{
148+
correlationStatus = $"[{CorrelationStatus.ToString().PadRight(8)}] ";
149+
}
150+
151+
if (ContentElement.Name == W.t || ContentElement.Name == W.delText)
152+
{
153+
sb.AppendFormat(
154+
"Atom {0}: {1} {2} SHA1:{3} ",
155+
PadLocalName(xNamePad, this),
156+
ContentElement.Value,
157+
correlationStatus,
158+
SHA1Hash.Substring(0, 8));
159+
160+
AppendAncestorsUnidsDump(sb, this);
161+
}
162+
else
163+
{
164+
sb.AppendFormat(
165+
"Atom {0}: {1} SHA1:{2} ",
166+
PadLocalName(xNamePad, this),
167+
correlationStatus,
168+
SHA1Hash.Substring(0, 8));
169+
170+
AppendAncestorsUnidsDump(sb, this);
171+
}
172+
173+
return sb.ToString();
174+
}
175+
176+
private static string PadLocalName(int xNamePad, ComparisonUnitAtom item)
177+
{
178+
return (item.ContentElement.Name.LocalName + " ").PadRight(xNamePad, '-') + " ";
179+
}
180+
181+
private static void AppendAncestorsDump(StringBuilder sb, ComparisonUnitAtom sr)
182+
{
183+
string s = sr
184+
.AncestorElements.Select(p => p.Name.LocalName + GetUnid(p) + "/")
185+
.StringConcatenate()
186+
.TrimEnd('/');
187+
188+
sb.Append("Ancestors:" + s);
189+
}
190+
191+
private static void AppendAncestorsUnidsDump(StringBuilder sb, ComparisonUnitAtom sr)
192+
{
193+
var zipped = sr.AncestorElements.Zip(sr.AncestorUnids, (a, u) => new
194+
{
195+
AncestorElement = a,
196+
AncestorUnid = u
197+
});
198+
199+
string s = zipped
200+
.Select(p => p.AncestorElement.Name.LocalName + "[" + p.AncestorUnid.Substring(0, 8) + "]/")
201+
.StringConcatenate().TrimEnd('/');
202+
203+
sb.Append("Ancestors:" + s);
204+
}
205+
206+
private static string GetUnid(XElement p)
207+
{
208+
var unid = (string) p.Attribute(PtOpenXml.Unid);
209+
return unid == null ? "" : "[" + unid.Substring(0, 8) + "]";
210+
}
211+
}
212+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using System.Text;
5+
using System.Xml.Linq;
6+
7+
namespace OpenXmlPowerTools
8+
{
9+
internal class ComparisonUnitGroup : ComparisonUnit
10+
{
11+
public ComparisonUnitGroup(
12+
IEnumerable<ComparisonUnit> comparisonUnitList,
13+
ComparisonUnitGroupType groupType,
14+
int level)
15+
{
16+
Contents = comparisonUnitList.ToList();
17+
ComparisonUnitGroupType = groupType;
18+
ComparisonUnit first = Contents.First();
19+
ComparisonUnitAtom comparisonUnitAtom = GetFirstComparisonUnitAtomOfGroup(first);
20+
21+
XElement[] ancestorsToLookAt = comparisonUnitAtom
22+
.AncestorElements
23+
.Where(e => e.Name == W.tbl || e.Name == W.tr || e.Name == W.tc || e.Name == W.p || e.Name == W.txbxContent)
24+
.ToArray();
25+
26+
XElement ancestor = ancestorsToLookAt[level];
27+
if (ancestor == null) throw new OpenXmlPowerToolsException("Internal error: ComparisonUnitGroup");
28+
29+
SHA1Hash = (string) ancestor.Attribute(PtOpenXml.SHA1Hash);
30+
CorrelatedSHA1Hash = (string) ancestor.Attribute(PtOpenXml.CorrelatedSHA1Hash);
31+
StructureSHA1Hash = (string) ancestor.Attribute(PtOpenXml.StructureSHA1Hash);
32+
}
33+
34+
public ComparisonUnitGroupType ComparisonUnitGroupType { get; }
35+
36+
public string CorrelatedSHA1Hash { get; }
37+
38+
public string StructureSHA1Hash { get; }
39+
40+
private static ComparisonUnitAtom GetFirstComparisonUnitAtomOfGroup(ComparisonUnit group)
41+
{
42+
ComparisonUnit thisGroup = group;
43+
while (true)
44+
{
45+
if (thisGroup is ComparisonUnitGroup tg)
46+
{
47+
thisGroup = tg.Contents.First();
48+
continue;
49+
}
50+
51+
if (!(thisGroup is ComparisonUnitWord tw))
52+
{
53+
throw new OpenXmlPowerToolsException("Internal error: GetFirstComparisonUnitAtomOfGroup");
54+
}
55+
56+
var ca = (ComparisonUnitAtom) tw.Contents.First();
57+
return ca;
58+
}
59+
}
60+
61+
public override string ToString(int indent)
62+
{
63+
var sb = new StringBuilder();
64+
sb.Append("".PadRight(indent) + "Group Type: " + ComparisonUnitGroupType + " SHA1:" + SHA1Hash + Environment.NewLine);
65+
66+
foreach (ComparisonUnit comparisonUnitAtom in Contents)
67+
{
68+
sb.Append(comparisonUnitAtom.ToString(indent + 2));
69+
}
70+
71+
return sb.ToString();
72+
}
73+
}
74+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
namespace OpenXmlPowerTools
2+
{
3+
internal enum ComparisonUnitGroupType
4+
{
5+
Paragraph,
6+
Table,
7+
Row,
8+
Cell,
9+
Textbox,
10+
};
11+
}

0 commit comments

Comments
 (0)