Skip to content

Commit 621a17e

Browse files
committed
Handle different databases for the different BLAST applications
1 parent 1e6ee72 commit 621a17e

6 files changed

+134
-118
lines changed

Diff for: BioPythonUtils.py

+23-34
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import time
77
import sys
88
import os
9+
import json
910
# BioPython 1.68 is bundled with this package
1011
sys.path.append(os.path.dirname(__file__))
1112
from Bio import SeqIO, Entrez
@@ -14,13 +15,15 @@
1415
from Bio.Alphabet import IUPAC
1516
from Bio.Blast import NCBIWWW
1617

17-
# Use globals which can be set by show_quick_panel()
18+
# Get BLAST details from the JSON config file
19+
app_info = json.load(open(os.path.join(os.path.dirname(__file__),
20+
"config.json")))
21+
blast_formats = app_info['blast_formats']
22+
blast_info = app_info['blast_info']
23+
# Globals which are used or set by show_quick_panel()
1824
blast_db = None
1925
blast_app = None
2026
blast_format = None
21-
blast_apps = ['blastp', 'blastn', 'blastx', 'tblastn', 'tblastx']
22-
blast_formats = ['HTML', 'Text', 'ASN.1', 'XML']
23-
blast_dbs = ['nr', 'refseq', 'swissprot', 'pat', 'month', 'pdb', 'env_nr']
2427

2528

2629
# "Download Sequence by Search"
@@ -31,10 +34,6 @@ def run(self, edit):
3134
entrez_retmax = sublime.load_settings(
3235
'BioPythonUtils.sublime-settings').get('entrez_retmax')
3336

34-
# Default is 20
35-
if not entrez_retmax:
36-
entrez_retmax = 20
37-
3837
email_for_eutils = sublime.load_settings(
3938
'BioPythonUtils.sublime-settings').get('email_for_eutils')
4039

@@ -403,14 +402,6 @@ def run(self, edit):
403402
sublime.error_message("No BLAST database specified")
404403
return
405404

406-
if not blast_app:
407-
sublime.error_message("No BLAST application specified")
408-
return
409-
410-
if not blast_format:
411-
sublime.error_message("No BLAST format specified")
412-
return
413-
414405
# 1 page is written for each report if there are multiple selections
415406
for region in self.view.sel():
416407
seq_str = self.view.substr(region)
@@ -451,43 +442,41 @@ def run(self, edit):
451442
class SelectBlastDatabase(sublime_plugin.WindowCommand):
452443

453444
def run(self):
454-
global blast_dbs
455-
sublime.active_window().show_quick_panel(blast_dbs, setBlastDatabase)
445+
sublime.active_window().show_quick_panel(
446+
blast_info[blast_app], setBlastDatabase)
447+
448+
449+
def setBlastDatabase(index):
450+
global blast_db
451+
if index > -1:
452+
blast_db = blast_info[blast_app][index]
456453

457454

458455
class SelectBlastApplication(sublime_plugin.WindowCommand):
459456

460457
def run(self):
461-
global blast_apps
462458
sublime.active_window().show_quick_panel(
463-
blast_apps, setBlastApplication)
459+
list(blast_info.keys()), setBlastApplication)
460+
461+
462+
def setBlastApplication(index):
463+
global blast_app
464+
if index > -1:
465+
blast_app = list(blast_info.keys())[index]
464466

465467

466468
class SelectBlastFormat(sublime_plugin.WindowCommand):
467469

468470
def run(self):
469-
global blast_formats
470471
sublime.active_window().show_quick_panel(blast_formats, setBlastFormat)
471472

472473

473474
def setBlastFormat(index):
474-
global blast_format, blast_formats
475+
global blast_format
475476
if index > -1:
476477
blast_format = blast_formats[index]
477478

478479

479-
def setBlastDatabase(index):
480-
global blast_db, blast_dbs
481-
if index > -1:
482-
blast_db = blast_dbs[index]
483-
484-
485-
def setBlastApplication(index):
486-
global blast_app, blast_apps
487-
if index > -1:
488-
blast_app = blast_apps[index]
489-
490-
491480
def validate_nt(seq):
492481
# Valid: {'G', 'T', 'U', 'C', 'A'}
493482
valid_bases = set(IUPAC.unambiguous_dna.letters +

Diff for: BioPythonUtils.sublime-settings

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
2-
"email_for_eutils": "", // This email address will be used for EUtils queries
3-
"entrez_retmax": "", // Maximum number of Entrez records downloaded
4-
"remote_blast_app": "", // blastp, blastn, blastx, tblastn, tblastx
5-
"remote_blast_format": "", // HTML, Text, ASN.1, XML
6-
"remote_blast_db": "" // nr, refseq, swissprot, pat, month, pdb, env_nr
2+
"email_for_eutils": "", // This email address will be used for EUtils queries
3+
"entrez_retmax": "1000", // Maximum number of Entrez records downloaded
4+
"remote_blast_app": "blastp", // blastp, blastn, blastx, tblastn, tblastx
5+
"remote_blast_format": "Text", // HTML, Text, ASN.1, XML
6+
"remote_blast_db": "nr" // Will depend on the BLAST application
77
}

Diff for: README.md

+1-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ Downloads a taxon as GenBank format entries from [NCBI](http://www.ncbi.nlm.nih.
9595

9696
#### "Remote BLAST"
9797

98-
Sends the selected Fasta format or "plain" sequence(s) to the [BLAST server at NCBI](http://blast.ncbi.nlm.nih.gov/Blast.cgi) and retrieves the results. Set the application, database, and result format using the Command Palette. You can also set default values
99-
for these in your "Settings - User" file ("remote_blast_app", "remote_blast_db", "remote_blast_format").
98+
Sends the selected Fasta format or "plain" sequence(s) to the [BLAST server at NCBI](http://blast.ncbi.nlm.nih.gov/Blast.cgi) and retrieves the results. You can set the application, database, and result format using the Command Palette. You can also set some default values in your "Settings - User" file ("remote_blast_app", "remote_blast_format"). Note that the available databases changes depending on the BLAST application.
10099

101100
#### "Genbank To Fasta"
102101

Diff for: config.json

+84-76
Original file line numberDiff line numberDiff line change
@@ -1,79 +1,87 @@
11
{
2-
"blastn": [
3-
"alu_repeats",
4-
"Bacteria and Archaea",
5-
"chromosome",
6-
"dbsts",
7-
"est",
8-
"gss",
9-
"HTGS",
10-
"nr/nt",
11-
"pat",
12-
"pdb",
13-
"refseq_gene",
14-
"refseq_genomic",
15-
"refseq_representative_genomes",
16-
"refseq_rna",
17-
"SRA",
18-
"TSA",
19-
"wgs"
2+
"blast_formats": [
3+
"HTML",
4+
"Text",
5+
"ASN.1",
6+
"XML"
207
],
21-
"blastp": [
22-
"env_nr",
23-
"landmark"
24-
"nr",
25-
"pat",
26-
"pdb",
27-
"refseq_protein",
28-
"swissprot",
29-
"tsa_nr",
30-
],
31-
"blastx": [
32-
"env_nr",
33-
"landmark"
34-
"nr",
35-
"pat",
36-
"pdb",
37-
"refseq_protein",
38-
"swissprot",
39-
"tsa_nr",
40-
],
41-
"tblastn": [
42-
"alu_repeats",
43-
"Bacteria and Archaea",
44-
"chromosome",
45-
"dbsts",
46-
"est",
47-
"gss",
48-
"HTGS",
49-
"nr/nt",
50-
"pat",
51-
"pdb",
52-
"refseq_gene",
53-
"refseq_genomic",
54-
"refseq_representative_genomes",
55-
"refseq_rna",
56-
"SRA",
57-
"TSA",
58-
"wgs"
59-
],
60-
"tblastx": [
61-
"alu_repeats",
62-
"Bacteria and Archaea",
63-
"chromosome",
64-
"dbsts",
65-
"est",
66-
"gss",
67-
"HTGS",
68-
"nr/nt",
69-
"pat",
70-
"pdb",
71-
"refseq_gene",
72-
"refseq_genomic",
73-
"refseq_representative_genomes",
74-
"refseq_rna",
75-
"SRA",
76-
"TSA",
77-
"wgs"
78-
]
8+
"blast_info": {
9+
"blastn": [
10+
"alu_repeats",
11+
"Bacteria and Archaea",
12+
"chromosome",
13+
"dbsts",
14+
"est",
15+
"gss",
16+
"HTGS",
17+
"nt",
18+
"pat",
19+
"pdb",
20+
"refseq_gene",
21+
"refseq_genomic",
22+
"refseq_representative_genomes",
23+
"refseq_rna",
24+
"SRA",
25+
"TSA",
26+
"wgs"
27+
],
28+
"blastp": [
29+
"env_nr",
30+
"landmark",
31+
"nr",
32+
"pat",
33+
"pdb",
34+
"refseq_protein",
35+
"swissprot",
36+
"tsa_nr"
37+
],
38+
"blastx": [
39+
"env_nr",
40+
"landmark",
41+
"nr",
42+
"pat",
43+
"pdb",
44+
"refseq_protein",
45+
"swissprot",
46+
"tsa_nr"
47+
],
48+
"tblastn": [
49+
"alu_repeats",
50+
"Bacteria and Archaea",
51+
"chromosome",
52+
"dbsts",
53+
"est",
54+
"gss",
55+
"HTGS",
56+
"nt",
57+
"pat",
58+
"pdb",
59+
"refseq_gene",
60+
"refseq_genomic",
61+
"refseq_representative_genomes",
62+
"refseq_rna",
63+
"SRA",
64+
"TSA",
65+
"wgs"
66+
],
67+
"tblastx": [
68+
"alu_repeats",
69+
"Bacteria and Archaea",
70+
"chromosome",
71+
"dbsts",
72+
"est",
73+
"gss",
74+
"HTGS",
75+
"nt",
76+
"pat",
77+
"pdb",
78+
"refseq_gene",
79+
"refseq_genomic",
80+
"refseq_representative_genomes",
81+
"refseq_rna",
82+
"SRA",
83+
"TSA",
84+
"wgs"
85+
]
86+
}
7987
}

Diff for: package-metadata.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@
77
],
88
"description": "BioPython Utilities for Sublime Text 3",
99
"url": "https://github.com/bosborne/BioPythonUtils",
10-
"version": "2017-01-19_12-21"
10+
"version": "2017-02-18_12-21"
1111
}
1212
}

Diff for: test/test.fa

+20
Original file line numberDiff line numberDiff line change
@@ -129,3 +129,23 @@ IDGWYGFRHQNAEGTGTAADLKSTQAAIDQINGKLNRLIEKTNEKYHQIEKEFEQVEGRI
129129
QDLEKYVEDTKIDLWSYNAELLVALENQHTIDVTDSEMNKLFERVRRQLRENAEDKGNGC
130130
FEIFHQCDNNCIESIRNGTYDHDIYRDEAINNRFQIQGVKLTQGYKDIILWISFSISCFL
131131
LVALLLAFILWACQNGNIRCQICI
132+
>S000320704 uncultured eubacterium TRA2-10; AF047642
133+
GACGAACGCTGGCGGCGTGCTTAACACATGCAAGTCGAACGgtttataagggc
134+
ttgcccttatagatAGTGGCGAACGGGTGCGTAACACGTGAGCAACCTGCCCCAAAGTTTGGAATAACACCGGGAAACCG
135+
ATGCTAATACCAAATATGcTCACACTATCACAAGATAGAGTGAgGAAAGTtttTCGCTTTGGGAGGGGCTCGCGGCCTAT
136+
CAGCTTGTTGGTGAGGTAACGGCTCACCAAGGCATCGACGGGTAGCTGGTCTGAGAGGACGATCAGCCACACTGGGACTG
137+
AGACACGGCCCAGACTCCTACGGGAGGCAGCAGTGGGGAATATTGCGCAATGGGCGAAAGCCTGACGCAGCAACGCCGCG
138+
TGGAGGATGAAGGCCTTAGGGTCGTAAACTCCTTTCAGCAGGAACGAaaaTGACGGTACCTGCAGAAGAAGCTCCGGCCA
139+
ACTACGTGCCAGCAGCCGCGGTAATACGTAGGGAGCAAGCGTTGTCCGGATTTATTGGGCGTAAAGAGCTCGTAGGCGGC
140+
TTGGCAAGTCGGATGTGAAACCCCCAGGCTTAACCTGGGGCCGCCATTCGATACTGCTAtGGCTTGAGTTCGGTAGGGGA
141+
TTGTGGAATTCCCGGTGTAGCGATGAAATGCGCAGATATCGGGAGGAACACCAATGGCGAAGGCAGCAATCTGGGCCGAC
142+
ACTGACGCTGAGGAGCGAAAGCGTGGGGAGCAAACAGGATTAGATACCCTGGTAGTCCACGCCCTAAACGTTGGGCACTA
143+
GGTGTGGGACctacttcgacggGTTCCGTGCCGTAGCTAACGCATTAAGTGCCCCGCCTGGGGAGTACGGCCGCAAGGCT
144+
AAAACTCAAAGGAATTGACGGGGGCCCGCACAAGCGGCGGAGCATGTGGCTTAATTCGATGCAACGCGAAGAACCTCACC
145+
TGGGCTTGACATGTTGGGaAAAGCCGTAGAGATACGGTGtccattagggCCCTT-CACAGGTGGTGCATGGCTGTCGTCA
146+
GCTCGTGTCGTGAGATGTTGAGTTAAGTCCCGCAACGAGCGCAACCCTTGTCTTATGTTACCAGCGagtaatgtCGGGGA
147+
CTCATGAGAGACTGCCGGGGTCAACTCGGAGGAAGGTGGGGATGACGTCAAGTCATCATGCCCCTTATGTCCAGGGCTGC
148+
ACACATGCTACAATGGCCGGTACAGAGGGTCGCAATCCCGCGAGGGGGAGCTAATCCCACAAAGCCGGTCTCAGTTCGGA
149+
TCGCAGTCTGCAACTCGACTGCGTGAAGCCGGAGTCGCTAGTAATCCCGAATCAGCaTTGTCGGGGTGAATACGTTCCCG
150+
GGCCTTGTACACACCGCCCGTCACACCACGAAAGTCGGCAACACCCGAAGCCGGTggcccaaccagtaatggagggaGCC
151+

0 commit comments

Comments
 (0)