-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsample_receipt_to_tsv.py
51 lines (34 loc) · 1.44 KB
/
sample_receipt_to_tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This script extracts the sample identifiers contained in the XML data
returned when a collection of samples are registered in the ENA.
"""
import argparse
import xml.etree.cElementTree as et
def main(input_file, output_file):
tree = et.parse(input_file)
root = tree.getroot()
ids = []
for sample in root.iter('SAMPLE'):
alias = sample.get('alias')
ena_accession = sample.get('accession')
biosample = sample[0].get('accession')
ids.append(f'{alias}\t{biosample}\t{ena_accession}')
# Write output TSV
with open(output_file, 'w') as outfile:
outfile.write('\n'.join(ids)+'\n')
def parse_arguments():
parser = argparse.ArgumentParser(description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-i', '--input-file', type=str, required=True,
dest='input_file',
help='Path to the input XML file with the data returned when the samples were registered.')
parser.add_argument('-o', '--output-file', type=str, required=True,
dest='output_file',
help='Path to the output TSV file with the sample identifiers extracted from the input XML file.')
args = parser.parse_args()
return args
if __name__ == '__main__':
args = parse_arguments()
main(**vars(args))