|
| 1 | +# encoding: utf-8 |
| 2 | + |
| 3 | +import importlib.util |
| 4 | +spec = importlib.util.spec_from_file_location("xml2kvp", "./core/xml2kvp.py") |
| 5 | +xml2kvp = importlib.util.module_from_spec(spec) |
| 6 | +spec.loader.exec_module(xml2kvp) |
| 7 | +import json |
| 8 | +import difflib |
| 9 | +import pprint |
| 10 | + |
| 11 | +def test_xml(): |
| 12 | + return '''<?xml version='1.0' encoding='UTF-8'?> |
| 13 | +<oai_dc:dc xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:dpla="http://dp.la/about/map/" xmlns:edm="http://www.europeana.eu/schemas/edm/" xmlns:oai_dc="http://www.openarchives.org/OAI/2.0/oai_dc/" xmlns:oai_qdc="http://worldcat.org/xmlschemas/qdc-1.0/" xmlns:schema="http://schema.org"> |
| 14 | + <dcterms:title>Boğazkale (Boğazköy, Hattusha), Turkey</dcterms:title> |
| 15 | + <dcterms:creator>Mellink, Machteld J. (Machteld Johanna)</dcterms:creator> |
| 16 | + <dcterms:description>Lion Gate. South side</dcterms:description> |
| 17 | + <dcterms:date>1953</dcterms:date> |
| 18 | + <dcterms:subject>14th-13th century BC</dcterms:subject> |
| 19 | + <dcterms:format>35mm Kodachrome slide</dcterms:format> |
| 20 | + <dcterms:type>Image</dcterms:type> |
| 21 | + <dcterms:rights>The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/</dcterms:rights> |
| 22 | + <dcterms:identifier>dplapa:BRYNMAWR_Mellink_3213</dcterms:identifier> |
| 23 | + <edm:isShownAt>http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213</edm:isShownAt> |
| 24 | + <edm:preview>http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213</edm:preview> |
| 25 | + <dcterms:isPartOf>Machteld J. Mellink Collection of Archaeological Site Photography</dcterms:isPartOf> |
| 26 | + <edm:dataProvider>Bryn Mawr College</edm:dataProvider> |
| 27 | + <edm:provider>PA Digital</edm:provider> |
| 28 | +</oai_dc:dc> |
| 29 | +''' |
| 30 | + |
| 31 | +def test_kvp(): |
| 32 | + return '''{ |
| 33 | + "oai_dc:dc|dcterms:title": "Bo\u011fazkale (Bo\u011fazk\u00f6y, Hattusha), Turkey", |
| 34 | + "oai_dc:dc|dcterms:creator": "Mellink, Machteld J. (Machteld Johanna)", |
| 35 | + "oai_dc:dc|dcterms:description": "Lion Gate. South side", |
| 36 | + "oai_dc:dc|dcterms:date": "1953", |
| 37 | + "oai_dc:dc|dcterms:subject": "14th-13th century BC", |
| 38 | + "oai_dc:dc|dcterms:format": "35mm Kodachrome slide", |
| 39 | + "oai_dc:dc|dcterms:type": "Image", |
| 40 | + "oai_dc:dc|dcterms:rights": "The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/", |
| 41 | + "oai_dc:dc|dcterms:identifier": "dplapa:BRYNMAWR_Mellink_3213", |
| 42 | + "oai_dc:dc|edm:isShownAt": "http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213", |
| 43 | + "oai_dc:dc|edm:preview": "http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213", |
| 44 | + "oai_dc:dc|dcterms:isPartOf": "Machteld J. Mellink Collection of Archaeological Site Photography", |
| 45 | + "oai_dc:dc|edm:dataProvider": "Bryn Mawr College", |
| 46 | + "oai_dc:dc|edm:provider": "PA Digital" |
| 47 | +}''' |
| 48 | + |
| 49 | +def test_kvp_from_csv(): |
| 50 | + return '''{ |
| 51 | +"dcterms:title": "Bo\u011fazkale (Bo\u011fazk\u00f6y, Hattusha), Turkey", |
| 52 | +"dcterms:creator": "Mellink, Machteld J. (Machteld Johanna)", |
| 53 | +"dcterms:description": "Lion Gate. South side", |
| 54 | +"dcterms:date": "1953", |
| 55 | +"dcterms:subject": "14th-13th century BC", |
| 56 | +"dcterms:format": "35mm Kodachrome slide", |
| 57 | +"dcterms:type": "Image", |
| 58 | +"dcterms:rights": "The images included in this collection are licensed under a Creative Commons Attribution-Noncommercial 3.0 United States License http://creativecommons.org/licenses/by-nc/3.0/us/", |
| 59 | +"dcterms:identifier": "dplapa:BRYNMAWR_Mellink_3213", |
| 60 | +"edm:isShownAt": "http://triptych.brynmawr.edu/cdm/ref/collection/Mellink/id/3213", |
| 61 | +"edm:preview": "http://triptych.brynmawr.edu/utils/getthumbnail/collection/Mellink/id/3213", |
| 62 | +"dcterms:isPartOf": "Machteld J. Mellink Collection of Archaeological Site Photography", |
| 63 | +"edm:dataProvider": "Bryn Mawr College", |
| 64 | +"edm:provider": "PA Digital" |
| 65 | +} |
| 66 | +''' |
| 67 | + |
| 68 | +def test_xml_config(): |
| 69 | + return { |
| 70 | + "add_literals":{}, |
| 71 | + "capture_attribute_values":[], |
| 72 | + "concat_values_on_all_fields":False, |
| 73 | + "concat_values_on_fields":{}, |
| 74 | + "copy_to":{}, |
| 75 | + "copy_to_regex":{}, |
| 76 | + "copy_value_to_regex":{}, |
| 77 | + "error_on_delims_collision":False, |
| 78 | + "exclude_attributes":[], |
| 79 | + "exclude_elements":[], |
| 80 | + "include_all_attributes":True, |
| 81 | + "include_attributes":[], |
| 82 | + "include_sibling_id":False, |
| 83 | + "multivalue_delim":"|", |
| 84 | + "node_delim":"|", |
| 85 | + "ns_prefix_delim":":", |
| 86 | + "remove_copied_key":True, |
| 87 | + "remove_copied_value":False, |
| 88 | + "remove_ns_prefix":False, |
| 89 | + "repeating_element_suffix_count":False, |
| 90 | + "self_describing":False, |
| 91 | + "skip_attribute_ns_declarations":True, |
| 92 | + "skip_repeating_values":True, |
| 93 | + "skip_root":False, |
| 94 | + "split_values_on_all_fields":None, |
| 95 | + "split_values_on_fields":{}, |
| 96 | + "nsmap": { |
| 97 | + "dc":"http://purl.org/dc/elements/1.1/", |
| 98 | + "dcterms":"http://purl.org/dc/terms/", |
| 99 | + "edm":"http://www.europeana.eu/schemas/edm/", |
| 100 | + "oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/", |
| 101 | + "dpla":"http://dp.la/about/map/", |
| 102 | + "schema":"http://schema.org", |
| 103 | + "oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/" |
| 104 | + } |
| 105 | + } |
| 106 | + |
| 107 | +def test_kvp_config(): |
| 108 | + return { |
| 109 | + "add_literals":{}, |
| 110 | + "capture_attribute_values":[], |
| 111 | + "concat_values_on_all_fields":False, |
| 112 | + "concat_values_on_fields":{}, |
| 113 | + "copy_to":{}, |
| 114 | + "copy_to_regex":{}, |
| 115 | + "copy_value_to_regex":{}, |
| 116 | + "error_on_delims_collision":False, |
| 117 | + "exclude_attributes":[], |
| 118 | + "exclude_elements":[], |
| 119 | + "include_all_attributes":True, |
| 120 | + "include_attributes":[], |
| 121 | + "include_sibling_id":False, |
| 122 | + "multivalue_delim":"|", |
| 123 | + "node_delim":"|", |
| 124 | + "ns_prefix_delim":":", |
| 125 | + "remove_copied_key":True, |
| 126 | + "remove_copied_value":False, |
| 127 | + "remove_ns_prefix":False, |
| 128 | + "repeating_element_suffix_count":False, |
| 129 | + "self_describing":False, |
| 130 | + "skip_attribute_ns_declarations":True, |
| 131 | + "skip_repeating_values":True, |
| 132 | + "skip_root":False, |
| 133 | + "split_values_on_all_fields":"|", |
| 134 | + "split_values_on_fields":{}, |
| 135 | + "nsmap": { |
| 136 | + "dc":"http://purl.org/dc/elements/1.1/", |
| 137 | + "dcterms":"http://purl.org/dc/terms/", |
| 138 | + "edm":"http://www.europeana.eu/schemas/edm/", |
| 139 | + "oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/", |
| 140 | + "dpla":"http://dp.la/about/map/", |
| 141 | + "schema":"http://schema.org", |
| 142 | + "oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/" |
| 143 | + } |
| 144 | + } |
| 145 | + |
| 146 | +def test_csv_config(): |
| 147 | + return { |
| 148 | + "add_literals":{}, |
| 149 | + "capture_attribute_values":[], |
| 150 | + "concat_values_on_all_fields":False, |
| 151 | + "concat_values_on_fields":{}, |
| 152 | + "copy_to":{}, |
| 153 | + "copy_to_regex":{}, |
| 154 | + "copy_value_to_regex":{}, |
| 155 | + "error_on_delims_collision":False, |
| 156 | + "exclude_attributes":[], |
| 157 | + "exclude_elements":[], |
| 158 | + "include_all_attributes":True, |
| 159 | + "include_attributes":[], |
| 160 | + "include_sibling_id":False, |
| 161 | + "multivalue_delim":"|", |
| 162 | + "node_delim":"|", |
| 163 | + "ns_prefix_delim":":", |
| 164 | + "remove_copied_key":True, |
| 165 | + "remove_copied_value":False, |
| 166 | + "remove_ns_prefix":False, |
| 167 | + "repeating_element_suffix_count":False, |
| 168 | + "self_describing":False, |
| 169 | + "skip_attribute_ns_declarations":True, |
| 170 | + "skip_repeating_values":True, |
| 171 | + "skip_root":False, |
| 172 | + "split_values_on_all_fields":"|", |
| 173 | + "split_values_on_fields":{}, |
| 174 | + "add_element_root": "oai_dc:dc", |
| 175 | + "nsmap": { |
| 176 | + "dc":"http://purl.org/dc/elements/1.1/", |
| 177 | + "dcterms":"http://purl.org/dc/terms/", |
| 178 | + "edm":"http://www.europeana.eu/schemas/edm/", |
| 179 | + "oai_dc":"http://www.openarchives.org/OAI/2.0/oai_dc/", |
| 180 | + "dpla":"http://dp.la/about/map/", |
| 181 | + "schema":"http://schema.org", |
| 182 | + "oai_qdc":"http://worldcat.org/xmlschemas/qdc-1.0/" |
| 183 | + } |
| 184 | + } |
| 185 | + |
| 186 | +def test_xml_to_kvp(): |
| 187 | + kvp_output = xml2kvp.XML2kvp.xml_to_kvp(test_xml(), **test_xml_config()) |
| 188 | + assert kvp_output == json.loads(test_kvp()) |
| 189 | + print('xml to kvp test passed!') |
| 190 | + |
| 191 | +def test_kvp_to_xml(): |
| 192 | + xml_output = xml2kvp.XML2kvp.kvp_to_xml(json.loads(test_kvp()), |
| 193 | + serialize_xml=True, |
| 194 | + **test_kvp_config()) |
| 195 | + assert xml_output == test_xml() |
| 196 | + print('kvp to xml test passed!') |
| 197 | + |
| 198 | +def test_csv_to_xml(): |
| 199 | + xml_output = xml2kvp.XML2kvp.kvp_to_xml(json.loads(test_kvp_from_csv()), |
| 200 | + serialize_xml=True, |
| 201 | + **test_csv_config()) |
| 202 | + assert xml_output == test_xml() |
| 203 | + print('csv to xml test passed!') |
| 204 | + |
| 205 | +test_xml_to_kvp() |
| 206 | +test_kvp_to_xml() |
| 207 | +test_csv_to_xml() |
0 commit comments