-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path1.0-to-aardvark.py
70 lines (56 loc) · 2.04 KB
/
1.0-to-aardvark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import json
import csv
import os
# Manual changes before run
dir_crosswalk = 'crosswalk.csv'
# add directory of JSON files in the 1.0 schema
dir_old_schema = '1.0/'
#add directory for new JSON files in the Aardvark schema
dir_new_schema = 'aardvark/'
# Load the crosswalk.csv and make it a dictionary
crosswalk = {}
with open(dir_crosswalk, encoding='utf8') as f:
reader = csv.reader(f)
fields = next(reader)
for record in reader:
old = record[0]
new = record[1]
crosswalk[old] = new
# Function to update the metadata schema
def schema_update(filepath):
# Open the JSON file with schema GBL 1.0
with open(filepath, encoding='utf8') as fr:
# Load its content and make a new dictionary
data = json.load(fr)
# Loop over crosswalk to change dictionary keys
for old_schema, new_schema in crosswalk.items():
if old_schema in data:
data[new_schema] = data.pop(old_schema)
# Change the metadata type:
data["gbl_mdVersion_s"] = "Aardvark"
# Remove geoblacklight_version
if "geoblacklight_version" in data:
data.pop("geoblacklight_version")
# check for multi-valued fields - if so, convert its value to an array
data = string2array(data)
# Write updated JSON to a new folder
filepath_updated = dir_new_schema + file
with open(filepath_updated, 'w', encoding='utf8') as fw:
j = json.dumps(data, indent=2)
fw.write(j)
# Function to convert fields that ends with '_sm' to an array
def string2array(dict):
for key in dict.keys():
suffix = key.split('_')[-1]
if suffix == 'sm' or suffix == 'im':
val = dict[key]
if type(val) != list:
dict[key] = [val]
return dict
# Collect all JSON files in a list
# Iterate the list to update metadata schema
files = [x for x in os.listdir(dir_old_schema) if x.endswith('.json')]
for file in files:
print(f'Executing {file} ...')
filepath = dir_old_schema + file
schema_update(filepath)