Skip to content

Commit 963bf63

Browse files
author
Esty Thomas
committed
Merge branch 'staging'
2 parents c4722b5 + 7e7885a commit 963bf63

File tree

2 files changed

+227
-3
lines changed

2 files changed

+227
-3
lines changed

core/xml2kvp.py

+20-3
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,10 @@ class DelimiterCollision(Exception):
221221
"repeating_element_suffix_count": {
222222
"description": "Boolean to suffix field name with incrementing integer (after first instance, which does not receieve a suffix), e.g. XML ``<foo><bar>42</bar><bar>109</bar></foo>`` would map to ``foo_bar``:``42``, ``foo_bar_#1``:``109`` [Default: ``false``, Overrides: ``skip_repeating_values``]",
223223
"type": "boolean"
224+
},
225+
"add_element_root": {
226+
"description": "xml tag with which to wrap each element as a root",
227+
"type": "string"
224228
}
225229
}
226230
}
@@ -735,6 +739,14 @@ def kvp_to_xml(kvp, handler=None, return_handler=False, serialize_xml=False, **k
735739

736740
# init XMLRecord
737741
xml_record = XMLRecord()
742+
if hasattr(handler, 'add_element_root'):
743+
root_node = handler.add_element_root
744+
if handler.ns_prefix_delim in root_node:
745+
prefix, tag_name = root_node.split(handler.ns_prefix_delim)
746+
tag_name = '{%s}%s' % (handler.nsmap[prefix], tag_name)
747+
else:
748+
tag_name = root_node
749+
xml_record.root_node = etree.Element(tag_name, nsmap=handler.nsmap)
738750

739751
# loop through items
740752
for k, v in kvp.items():
@@ -1146,18 +1158,23 @@ def merge_root_nodes(self):
11461158
Method to merge all nodes from self.nodes
11471159
'''
11481160

1161+
node_list = self.nodes
11491162
# set root with arbitrary first node
1150-
self.root_node = self.nodes[0]
1163+
if self.root_node is None:
1164+
self.root_node = self.nodes[0]
1165+
node_list = self.nodes[1:]
11511166

11521167
# loop through others, add children to root node
1153-
for node in self.nodes[1:]:
1168+
for node in node_list:
11541169

11551170
# get children
11561171
children = node.getchildren()
11571172

11581173
# loop through and add to root node
11591174
for child in children:
11601175
self.root_node.append(child)
1176+
if len(children) == 0 and node.tag != self.root_node.tag:
1177+
self.root_node.append(node)
11611178

11621179
def merge_siblings(self, remove_empty_nodes=True, remove_sibling_hash_attrib=True):
11631180
'''
@@ -1252,4 +1269,4 @@ def serialize(self, pretty_print=True):
12521269
Method to serialize self.root_node to XML
12531270
'''
12541271

1255-
return etree.tostring(self.root_node, pretty_print=pretty_print).decode('utf-8')
1272+
return etree.tostring(self.root_node, pretty_print=pretty_print, xml_declaration=True, encoding="UTF-8").decode('utf-8')

tests/test_xml2kvp.py

+207
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,207 @@
1+
# encoding: utf-8
2+
3+
import importlib.util
4+
spec = importlib.util.spec_from_file_location("xml2kvp", "./core/xml2kvp.py")
5+
xml2kvp = importlib.util.module_from_spec(spec)
6+
spec.loader.exec_module(xml2kvp)
7+
import json
8+
import difflib
9+
import pprint
10+
11+
def test_xml():
12+
return '''<?xml version='1.0' encoding='UTF-8'?>
13+
<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dpla="http://dp.la/about/map/" xmlns:edm="http://www.europeana.eu/schemas/edm/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:oai_qdc="http://worldcat.org/xmlschemas/qdc-1.0/" xmlns:schema="http://schema.org">
14+
<dcterms:title>Boğazkale (Boğazköy, Hattusha), Turkey</dcterms:title>
15+
<dcterms:creator>Mellink, Machteld J. (Machteld Johanna)</dcterms:creator>
16+
<dcterms:description>Lion Gate. South side</dcterms:description>
17+
<dcterms:date>1953</dcterms:date>
18+
<dcterms:subject>14th-13th century BC</dcterms:subject>
19+
<dcterms:format>35mm Kodachrome slide</dcterms:format>
20+
<dcterms:type>Image</dcterms:type>
21+
<dcterms:rights>The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/</dcterms:rights>
22+
<dcterms:identifier>dplapa:BRYNMAWR_Mellink_3213</dcterms:identifier>
23+
<edm:isShownAt>http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213</edm:isShownAt>
24+
<edm:preview>http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213</edm:preview>
25+
<dcterms:isPartOf>Machteld J. Mellink Collection of Archaeological Site Photography</dcterms:isPartOf>
26+
<edm:dataProvider>Bryn Mawr College</edm:dataProvider>
27+
<edm:provider>PA Digital</edm:provider>
28+
</oai_dc:dc>
29+
'''
30+
31+
def test_kvp():
32+
return '''{
33+
"oai_dc:dc|dcterms:title": "Bo\u011fazkale (Bo\u011fazk\u00f6y, Hattusha), Turkey",
34+
"oai_dc:dc|dcterms:creator": "Mellink, Machteld J. (Machteld Johanna)",
35+
"oai_dc:dc|dcterms:description": "Lion Gate. South side",
36+
"oai_dc:dc|dcterms:date": "1953",
37+
"oai_dc:dc|dcterms:subject": "14th-13th century BC",
38+
"oai_dc:dc|dcterms:format": "35mm Kodachrome slide",
39+
"oai_dc:dc|dcterms:type": "Image",
40+
"oai_dc:dc|dcterms:rights": "The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/",
41+
"oai_dc:dc|dcterms:identifier": "dplapa:BRYNMAWR_Mellink_3213",
42+
"oai_dc:dc|edm:isShownAt": "http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213",
43+
"oai_dc:dc|edm:preview": "http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213",
44+
"oai_dc:dc|dcterms:isPartOf": "Machteld J. Mellink Collection of Archaeological Site Photography",
45+
"oai_dc:dc|edm:dataProvider": "Bryn Mawr College",
46+
"oai_dc:dc|edm:provider": "PA Digital"
47+
}'''
48+
49+
def test_kvp_from_csv():
50+
return '''{
51+
"dcterms:title": "Bo\u011fazkale (Bo\u011fazk\u00f6y, Hattusha), Turkey",
52+
"dcterms:creator": "Mellink, Machteld J. (Machteld Johanna)",
53+
"dcterms:description": "Lion Gate. South side",
54+
"dcterms:date": "1953",
55+
"dcterms:subject": "14th-13th century BC",
56+
"dcterms:format": "35mm Kodachrome slide",
57+
"dcterms:type": "Image",
58+
"dcterms:rights": "The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/",
59+
"dcterms:identifier": "dplapa:BRYNMAWR_Mellink_3213",
60+
"edm:isShownAt": "http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213",
61+
"edm:preview": "http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213",
62+
"dcterms:isPartOf": "Machteld J. Mellink Collection of Archaeological Site Photography",
63+
"edm:dataProvider": "Bryn Mawr College",
64+
"edm:provider": "PA Digital"
65+
}
66+
'''
67+
68+
def test_xml_config():
69+
return {
70+
"add_literals":{},
71+
"capture_attribute_values":[],
72+
"concat_values_on_all_fields":False,
73+
"concat_values_on_fields":{},
74+
"copy_to":{},
75+
"copy_to_regex":{},
76+
"copy_value_to_regex":{},
77+
"error_on_delims_collision":False,
78+
"exclude_attributes":[],
79+
"exclude_elements":[],
80+
"include_all_attributes":True,
81+
"include_attributes":[],
82+
"include_sibling_id":False,
83+
"multivalue_delim":"|",
84+
"node_delim":"|",
85+
"ns_prefix_delim":":",
86+
"remove_copied_key":True,
87+
"remove_copied_value":False,
88+
"remove_ns_prefix":False,
89+
"repeating_element_suffix_count":False,
90+
"self_describing":False,
91+
"skip_attribute_ns_declarations":True,
92+
"skip_repeating_values":True,
93+
"skip_root":False,
94+
"split_values_on_all_fields":None,
95+
"split_values_on_fields":{},
96+
"nsmap": {
97+
"dc":"http://purl.org/dc/elements/1.1/",
98+
"dcterms":"http://purl.org/dc/terms/",
99+
"edm":"http://www.europeana.eu/schemas/edm/",
100+
"oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/",
101+
"dpla":"http://dp.la/about/map/",
102+
"schema":"http://schema.org",
103+
"oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/"
104+
}
105+
}
106+
107+
def test_kvp_config():
108+
return {
109+
"add_literals":{},
110+
"capture_attribute_values":[],
111+
"concat_values_on_all_fields":False,
112+
"concat_values_on_fields":{},
113+
"copy_to":{},
114+
"copy_to_regex":{},
115+
"copy_value_to_regex":{},
116+
"error_on_delims_collision":False,
117+
"exclude_attributes":[],
118+
"exclude_elements":[],
119+
"include_all_attributes":True,
120+
"include_attributes":[],
121+
"include_sibling_id":False,
122+
"multivalue_delim":"|",
123+
"node_delim":"|",
124+
"ns_prefix_delim":":",
125+
"remove_copied_key":True,
126+
"remove_copied_value":False,
127+
"remove_ns_prefix":False,
128+
"repeating_element_suffix_count":False,
129+
"self_describing":False,
130+
"skip_attribute_ns_declarations":True,
131+
"skip_repeating_values":True,
132+
"skip_root":False,
133+
"split_values_on_all_fields":"|",
134+
"split_values_on_fields":{},
135+
"nsmap": {
136+
"dc":"http://purl.org/dc/elements/1.1/",
137+
"dcterms":"http://purl.org/dc/terms/",
138+
"edm":"http://www.europeana.eu/schemas/edm/",
139+
"oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/",
140+
"dpla":"http://dp.la/about/map/",
141+
"schema":"http://schema.org",
142+
"oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/"
143+
}
144+
}
145+
146+
def test_csv_config():
147+
return {
148+
"add_literals":{},
149+
"capture_attribute_values":[],
150+
"concat_values_on_all_fields":False,
151+
"concat_values_on_fields":{},
152+
"copy_to":{},
153+
"copy_to_regex":{},
154+
"copy_value_to_regex":{},
155+
"error_on_delims_collision":False,
156+
"exclude_attributes":[],
157+
"exclude_elements":[],
158+
"include_all_attributes":True,
159+
"include_attributes":[],
160+
"include_sibling_id":False,
161+
"multivalue_delim":"|",
162+
"node_delim":"|",
163+
"ns_prefix_delim":":",
164+
"remove_copied_key":True,
165+
"remove_copied_value":False,
166+
"remove_ns_prefix":False,
167+
"repeating_element_suffix_count":False,
168+
"self_describing":False,
169+
"skip_attribute_ns_declarations":True,
170+
"skip_repeating_values":True,
171+
"skip_root":False,
172+
"split_values_on_all_fields":"|",
173+
"split_values_on_fields":{},
174+
"add_element_root": "oai_dc:dc",
175+
"nsmap": {
176+
"dc":"http://purl.org/dc/elements/1.1/",
177+
"dcterms":"http://purl.org/dc/terms/",
178+
"edm":"http://www.europeana.eu/schemas/edm/",
179+
"oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/",
180+
"dpla":"http://dp.la/about/map/",
181+
"schema":"http://schema.org",
182+
"oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/"
183+
}
184+
}
185+
186+
def test_xml_to_kvp():
187+
kvp_output = xml2kvp.XML2kvp.xml_to_kvp(test_xml(), **test_xml_config())
188+
assert kvp_output == json.loads(test_kvp())
189+
print('xml to kvp test passed!')
190+
191+
def test_kvp_to_xml():
192+
xml_output = xml2kvp.XML2kvp.kvp_to_xml(json.loads(test_kvp()),
193+
serialize_xml=True,
194+
**test_kvp_config())
195+
assert xml_output == test_xml()
196+
print('kvp to xml test passed!')
197+
198+
def test_csv_to_xml():
199+
xml_output = xml2kvp.XML2kvp.kvp_to_xml(json.loads(test_kvp_from_csv()),
200+
serialize_xml=True,
201+
**test_csv_config())
202+
assert xml_output == test_xml()
203+
print('csv to xml test passed!')
204+
205+
test_xml_to_kvp()
206+
test_kvp_to_xml()
207+
test_csv_to_xml()

0 commit comments

Comments
 (0)