Skip to content

Commit bb29e08

Browse files
committed
ENH Make crude harmonization script idempotent
1 parent 8ebfb35 commit bb29e08

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

db_harmonisation/construct_groot_mappings.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -157,21 +157,29 @@ def combine_groot_mappings(argannot_path, resfinder_path, card_path, missing_pat
157157
card_groot_mapping,
158158
missing_groot_mapping
159159
]).sort_values(by=['Original ID'])
160-
comb_groot_mapping.to_csv('./mapping/groot_ARO_mapping.tsv', sep='\t', index=False)
160+
oname_aro = 'mapping/groot_ARO_mapping.tsv'
161+
comb_groot_mapping.to_csv(oname_aro, sep='\t', index=False)
161162

162163
groot_missing_genes = []
163164
with open('./manual_curation/groot_missing.fasta', 'r') as ifile:
164165
for record in SeqIO.parse(ifile, 'fasta'):
165166
groot_missing_genes.append(record.id)
166167

167168
groot_manual_curation = pd.DataFrame(list(set(groot_missing_genes) - set(comb_groot_mapping['Original ID'])), columns=['Original ID'])
168-
groot_manual_curation.to_csv('./manual_curation/groot_curation.tsv', sep='\t', index=False)
169+
oname_manual = 'manual_curation/groot_curation.tsv'
170+
groot_manual_curation.to_csv(oname_manual, sep='\t', index=False)
171+
return oname_aro, oname_manual
172+
173+
@TaskGenerator
174+
def copy_file(oname, dest):
175+
os.makedirs(os.path.dirname(dest), exist_ok=True)
176+
os.rename(oname, dest)
177+
return dest
169178

170179
def get_groot_aro_mapping():
171180
argannot_input = get_groot_argannot_db()
172181
resfinder_input = get_groot_resfinder_db()
173182
card_input = get_groot_card_db()
174183
missing_input = get_groot_missing()
175-
combine_groot_mappings(argannot_input, resfinder_input, card_input, missing_input)
176-
barrier()
177-
os.rename('./mapping/groot_ARO_mapping.tsv', '../argnorm/data/groot_ARO_mapping.tsv')
184+
onames = combine_groot_mappings(argannot_input, resfinder_input, card_input, missing_input)
185+
copy_file(onames[0], '../argnorm/data/groot_ARO_mapping.tsv')

0 commit comments

Comments
 (0)