-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpractical_4_parser.py
More file actions
29 lines (19 loc) · 859 Bytes
/
Copy pathpractical_4_parser.py
File metadata and controls
29 lines (19 loc) · 859 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# this script takes the output of a blast run
# and outputs a file with the best hits between genomes in the format:
#reference_proteome ref_seq target proteome target-seq
import sys
import re
from Bio.Blast import NCBIXML
blastOutputXMLFile = sys.argv [1]
referenceProteome = sys.argv [2]
targetProteome = sys.argv [3]
blastOutputXMLHandle = open (blastOutputXMLFile)
listOfBlastRecords = NCBIXML.parse (blastOutputXMLHandle)
for aSingleBlastRecord in listOfBlastRecords:
for i in range (len (aSingleBlastRecord.alignments)):
description = aSingleBlastRecord.descriptions [i]
alignment = aSingleBlastRecord.alignments [i]
reftitle = aSingleBlastRecord.query
title = re.compile ("gnl\|BL_ORD_ID\|\d* ").sub ("", description.title)
print reftitle + ' ' + alignment.hsps [0].query + ' '+ title + ' ' + alignment.hsps [0].sbjct
break