-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinventory_helpers.py
212 lines (189 loc) · 7.51 KB
/
inventory_helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
#!/usr/bin/env python3
"""
Helper functions related to inventories.
"""
import glob
import os
import csv
import nulrdcscripts.ingest.helpers as helpers
def load_inventory(inventory_path: str, desc_arg: list[str]):
"""
Finds work type to call image_load_inventory() or av_load_inventory().
.. note:: worktype can be IMAGE, AUDIO, or VIDEO
:param str inventory_path: fullpath to inventory csv
:param list[str] desc_arg: inventory fields to use for making description
:returns: inventory data and worktype
:rtype: tuple of list[dict] and str
"""
inventory_dictlist: list[dict[str, str]]
work_type = get_work_type(inventory_path)
if work_type == "IMAGE":
inventory_dictlist = image_load_inventory(inventory_path)
else:
inventory_dictlist = av_load_inventory(inventory_path, desc_arg)
return inventory_dictlist, work_type
def image_load_inventory(inventory_path: str):
"""
Loads image inventory.
:param str inventory_path: fullpath to inventory csv
:returns: inventory data for each item including filename, label, work_accession_number,
file_accession_number, role, and description
:rtype: list of dicts
"""
inventory_dictlist: list[dict[str, str]] = []
with open(inventory_path, encoding="utf-8") as f:
# skip non fieldname lines
while True:
# save spot
stream_index = f.tell()
# skip advancing line by line
line = f.readline()
if not ("Name of Person Inventorying" in line or "MEADOW Ingest fields" in line):
# go back one line and break out of loop once fieldnames are found
f.seek(stream_index, os.SEEK_SET)
break
reader = csv.DictReader(f, delimiter=",")
for row in reader:
row_data: dict[str, str] = {
"filename": row["filename"],
"label": row["label"],
"work_accession_number": row["work_accession_number"],
"file_accession_number": row["file_accession_number"],
"role": row["role"],
"description": row["description"],
}
inventory_dictlist.append(row_data)
return inventory_dictlist
def av_load_inventory(inventory_path: str, desc_arg: list[str]):
"""
Loads av inventory.
:param str inventory_path: fullpath to inventory csv
:returns: inventory data for each item including filename, work_accession_number,
description, and label
:rtype: list of dicts
"""
inventory_dictlist: list[dict[str, str]] = []
with open(inventory_path, encoding="utf-8") as f:
f = skip_non_fieldnames(f)
reader = csv.DictReader(f, delimiter=",")
description_fields = get_description_fields(desc_arg, reader.fieldnames)
for row in reader:
row_data: dict[str, str] = {
"filename": row["filename"],
"work_accession_number": row["work_accession_number"],
"description": get_inventory_description(row, description_fields),
"label": row["label"],
}
inventory_dictlist.append(row_data)
return inventory_dictlist
def check_inventory(inventory_path: str):
"""
Checks given inventory is a csv and exists.
Prints an error and quits if not.
:param str inventory_path: fullpath to inventory csv
"""
if not inventory_path.endswith(".csv"):
print("\n--- ERROR: " + inventory_path + " is not a csv file ---\n")
quit()
if not os.path.isfile(inventory_path):
print("\n--- ERROR: " + inventory_path + " is not a file ---\n")
quit()
def find_inventory(dir: str):
"""
Searches for inventory csv in given directory.
Returns None if no inventory is found.
.. note::
Will choose the first valid file it finds.
Valid file: csv that is not ingest sheet or qc log
:param str dir: fullpath to search directory
"""
csv_files = glob.glob(os.path.join(dir, "*.csv"))
for f in csv_files:
if not ("_ingest.csv" in f or "qc_log.csv" in f):
return f
# will only reach here if no valid file is found
return None
def get_inventory_description(row: dict[str: str], description_fields: list[str]):
"""
Generates inventory description based on description fields.
:param dict row: inventory dict for item
:param list[str] description_fields: inventory fields to use for making description
:returns: inventory description for file
:rtype: str
"""
description_list: list[str] = []
for header in description_fields:
description_list.append(row[header])
description = "; ".join(i for i in description_list if i)
return description
def get_work_type(inventory_path: str):
"""
Determines work type based on inventory fieldnames.
:param str inventory_path: fullpath to inventory csv
:returns: worktype as 'IMAGE', 'AUDIO', or 'VIDEO'
:rtype: str
"""
with open(inventory_path, encoding="utf-8") as f:
f = skip_non_fieldnames(f)
reader = csv.DictReader(f, delimiter=",")
inventory_fields = reader.fieldnames
if "Width (cm.)" in inventory_fields:
return "IMAGE"
elif any(x in ["speed IPS", "Speed IPS"] for x in inventory_fields):
return "AUDIO"
elif any(x in ["video standard", "Region", "stock", "Stock"] for x in inventory_fields):
return "VIDEO"
else:
print("\n---ERROR: Unable to determine work_type. ---\n")
print("make sure that your inventory has the necessary format-specific columns")
print('IMAGE: "Width (cm.)"')
print('AUDIO: "speed IPS"')
print('VIDEO: "video standard", "Region" or "Stock"')
quit()
def get_description_fields(desc_arg: list[str], inventory_fields: list[str]):
"""
Checks the inventory contains necessary fields for description creation.
Prompts user to continue if there are missing fields.
If the user continues, removes missing fields from description fields
:param list desc_arg: description fields to check
:param list inventory_fields: inventory fields(columns)
:returns: valid description fields
:rtype: list of str
"""
if not desc_arg:
return ["description"]
#find missing description fields
missing_fields = [
fields for fields in desc_arg if not fields in inventory_fields
]
if missing_fields:
print("+++ WARNING: Your inventory is missing the following columns +++")
print(missing_fields)
if not helpers.yn_check("SKIP COLUMNS AND CONTINUE?"):
quit()
# remove missing fields
description_fields: list[str] = [
header
for header in description_fields
if header not in missing_fields
]
return description_fields
def skip_non_fieldnames(f):
"""
Takes in TextIOWrapper result from open() and returns new TextIOWrapper indexed after
non-fieldname lines in inventory.
:param f: inventory file
:type f: TextIOWrapper
:return: new file TextIOWrapper indexed after extraneous lines
:rtype: TextIOWrapper
"""
while True:
# save spot
stream_index = f.tell()
# skip advancing line by line
line = f.readline()
if not ("Name of Person Inventorying" in line or "MEADOW Ingest fields" in line):
# go back one line and break out of loop once fieldnames are found
f.seek(stream_index, os.SEEK_SET)
break
return f