Skip to content

Commit e8922a0

Browse files
Merge pull request #130 from rigdenlab/development
Development
2 parents da508e2 + a2bce40 commit e8922a0

23 files changed

+237
-87
lines changed

app.py

+1
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,7 @@ def recover_account(n_clicks, username, email, secret, password_1, password_2):
203203

204204
return app_utils.recover_account(username, email, secret, password_1, password_2, app.logger)
205205

206+
206207
@app.callback([Output('invalid-create-user-collapse', 'is_open'),
207208
Output('create-user-modal-div', 'children'),
208209
Output('create-username-input', 'value'),

components/__init__.py

+12
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ def TutorialFourModal(*args, **kwargs):
9494
return TutorialFourModal(*args, **kwargs)
9595

9696

97+
def TutorialFiveModal(*args, **kwargs):
98+
from components.modals import TutorialFiveModal
99+
100+
return TutorialFiveModal(*args, **kwargs)
101+
102+
97103
def GdprAgreementCheckbox(*args, **kwargs):
98104
from components.inputgroups import GdprAgreementCheckbox
99105

@@ -454,6 +460,12 @@ def NoAdditionalTracksCard(*args, **kwargs):
454460
return NoAdditionalTracksCard(*args, **kwargs)
455461

456462

463+
def MismatchDatasetModal(*args, **kwargs):
464+
from components.modals import MismatchDatasetModal
465+
466+
return MismatchDatasetModal(*args, **kwargs)
467+
468+
457469
def ContactDisplayControlCard(*args, **kwargs):
458470
from components.cards import DisplayControlCard
459471

components/listgrpoups.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ def TutorialList():
9393
TutorialItem(idx=1, name='Creating your first plot'),
9494
TutorialItem(idx=2, name='Compare a contact prediction with a PDB file'),
9595
TutorialItem(idx=3, name='Storing, loading and sharing a session'),
96-
TutorialItem(idx=4, name='Residue-Residue distance predictions')
96+
TutorialItem(idx=4, name='Residue-Residue distance predictions'),
97+
#TutorialItem(idx=5, name='Video tutorial')
9798
], style={'width': '75%'}
9899
), justify='center', align='center')
99100

components/modals.py

+31-2
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,23 @@ def MismatchModal(*args):
1616
], id='mismatch-modal', is_open=True)
1717

1818

19+
def MismatchDatasetModal(fname, seq_fname):
20+
return dbc.Modal([
21+
ModalHeader("Mismatch Detected"),
22+
dbc.ModalBody([
23+
html.P("""We were unable to match the sequence at {} with the data at the file {}. Please
24+
ensure that the file you attempt to upload corresponds with the protein sequence
25+
in the provided FASTA file.""".format(seq_fname, fname), style={'text-align': "justify"}),
26+
])
27+
], id='mismatch-dataset-modal', is_open=True)
28+
29+
1930
def MismatchSequenceModal(*args):
2031
return dbc.Modal([
2132
ModalHeader("Sequence Mismatch"),
2233
dbc.ModalBody([
23-
html.P("""We were unable to match the uploaded sequence with contact maps in the following files.
24-
Please ensure that the provided the sequence corresponds with the structure in these contact maps.""",
34+
html.P("""We were unable to match the uploaded sequence with datasets in the following files.
35+
Please ensure that the provided sequence corresponds with the structure described in these datasets.""",
2536
style={'text-align': "justify"}),
2637
html.Ul([html.Li('File: %s' % arg) for arg in args], id='mismatched-maps-div')
2738
])
@@ -541,6 +552,23 @@ def TutorialFourModal():
541552
autoFocus=True)
542553

543554

555+
def TutorialFiveModal():
556+
return dbc.Modal([
557+
dbc.ModalHeader('Tutorial 5: Video Tutorial'),
558+
dbc.ModalBody([
559+
'Below there is a short video tutorial with an overview of the main ConPlot features. ',
560+
html.Br(),
561+
html.Br(),
562+
html.Iframe(width=1100, height=630, src=UrlIndex.YOUTUBE_EMBED.value,
563+
style={'frameborder': 0, 'allow': "accelerometer; autoplay; clipboard-write; "
564+
"encrypted-media; gyroscope; picture-in-picture"}),
565+
html.Br(),
566+
'If you cannot see the video click ', html.A(html.U('here'), href=UrlIndex.YOUTUBE_LINK.value), '.'
567+
])
568+
], id={'type': 'tutorial-modal', 'index': 5}, is_open=False, size='xl', scrollable=True, centered=True,
569+
autoFocus=True)
570+
571+
544572
def RedisConnectionErrorModal():
545573
return dbc.Modal([
546574
ModalHeader("Redis connection error"),
@@ -679,6 +707,7 @@ def FailureRecoverAccount():
679707
),
680708
], id='fail-recovery-modal', is_open=True)
681709

710+
682711
def InvalidPasswordRecoverAccount():
683712
return dbc.Modal([
684713
dbc.ModalHeader(

layouts/help.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ def Body(cache):
2121
components.TutorialTwoModal(),
2222
components.TutorialThreeModal(),
2323
components.TutorialFourModal(),
24+
#components.TutorialFiveModal(),
2425
components.CustomFormatDescriptionModal(),
2526
dbc.Row([
2627
dbc.Col([

layouts/home.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ def Body():
9090
html.P('If you found ConPlot useful for your work and you would like to cite us '
9191
'please use the following reference:',
9292
style={"font-size": "120%", "text-align": "justify"}),
93-
html.I('Sánchez Rodríguez, et al., ConPlot: Web-based application for the visualisation '
93+
html.I('Sánchez Rodríguez F. et al., ConPlot: Web-based application for the visualisation '
9494
'of protein contact maps integrated with other data, Bioinformatics, 2021.',
9595
style={"font-size": "120%", "text-align": "justify"}),
9696
html.Br(),

loaders/loader.py

+4-5
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,14 @@ def Loader(raw_file, input_format):
1010

1111
if raw_file is not None:
1212
try:
13-
decoded = decode_raw_file(raw_file)
14-
if input_format in ContactInformationFormats.__members__ and input_format != ContactInformationFormats.CCMPRED.name \
15-
and input_format != ContactInformationFormats.PDB.name:
13+
if input_format != ContactInformationFormats.trROSETTA_NPZ.name:
14+
decoded = decode_raw_file(raw_file)
1615
data_raw = ParserFormats.__dict__[input_format](decoded, input_format)
1716
else:
18-
data_raw = ParserFormats.__dict__[input_format](decoded)
17+
data_raw = ParserFormats.__dict__[input_format](raw_file, input_format)
1918
data = compress_data(data_raw)
2019
except (InvalidFormat, UnicodeDecodeError) as e:
2120
data = None
2221
invalid = True
2322

24-
return data, invalid
23+
return data, invalid

parsers/__init__.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,10 @@ def guess_psipred_format(*args, **kwargs):
2525
return guess_psipred_format(*args, **kwargs)
2626

2727

28-
def DistogramParser(*args, **kwargs):
29-
from parsers.distogramparser import DistogramParser
28+
def CASPRR2Parser(*args, **kwargs):
29+
from parsers.casprr2parser import CASPRR2Parser
3030

31-
return DistogramParser(*args, **kwargs)
31+
return CASPRR2Parser(*args, **kwargs)
3232

3333

3434
def TopconsParser(*args, **kwargs):
@@ -73,6 +73,12 @@ def CustomParser(*args, **kwargs):
7373
return CustomParser(*args, **kwargs)
7474

7575

76+
def NpzParser(*args, **kwargs):
77+
from parsers.npzparser import NpzParser
78+
79+
return NpzParser(*args, **kwargs)
80+
81+
7682
class ParserFormats(Enum):
7783
TOPCONS = TopconsParser
7884
CONSURF = ConsurfParser
@@ -97,7 +103,8 @@ class ParserFormats(Enum):
97103
CCMPRED = CCMpredParser
98104
COLSTATS = CCMpredParser
99105
PDB = PDBParser
100-
CASPRR_MODE_2 = DistogramParser
106+
CASPRR_MODE_2 = CASPRR2Parser
107+
trROSETTA_NPZ = NpzParser
101108

102109

103110
class ContactInformationFormats(Enum):
@@ -122,6 +129,7 @@ class ContactInformationFormats(Enum):
122129
MAPALIGN = 18
123130
ALEIGEN = 19
124131
PDB = 20
132+
trROSETTA_NPZ = 21
125133

126134

127135
class ContactMapFormats(Enum):
@@ -152,6 +160,7 @@ class StructuralInformationFormats(Enum):
152160

153161
class DistanceInformationFormats(Enum):
154162
CASPRR_MODE_2 = 1
163+
trROSETTA_NPZ = 2
155164

156165

157166
class MembraneStates(Enum):

parsers/distogramparser.py renamed to parsers/casprr2parser.py

+5-9
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from operator import itemgetter
2-
from utils import unique_by_key
1+
from utils import get_unique_distances
32
from utils.exceptions import InvalidFormat
43

54

6-
def DistogramParser(input, input_format=None):
5+
def CASPRR2Parser(input, input_format=None):
76
contents = input.split('\n')
87
output = []
98
res_1_idx = 0
@@ -32,10 +31,7 @@ def DistogramParser(input, input_format=None):
3231
output.append((tuple(contact[:2]), *contact[2:]))
3332

3433
if not output:
35-
raise InvalidFormat('Unable to parse contacts')
34+
raise InvalidFormat('Unable to parse CASPRR_MODE_2 file')
3635
else:
37-
unique_contacts = unique_by_key(output, key=itemgetter(0))
38-
output = [(*contact[0], *contact[1:]) for contact in unique_contacts]
39-
output = sorted(output, key=itemgetter(2), reverse=True)
40-
output.append('DISTO')
41-
return output
36+
unique_contacts = get_unique_distances(output)
37+
return unique_contacts

parsers/ccmpredparser.py

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
from operator import itemgetter
21
from utils.exceptions import InvalidFormat
3-
from utils import unique_by_key
2+
from utils import get_unique_contacts
43

54

6-
def CCMpredParser(input):
5+
def CCMpredParser(input, input_format=None):
76
contents = input.split('\n')
87

98
output = []
@@ -31,7 +30,5 @@ def CCMpredParser(input):
3130
if not output:
3231
raise InvalidFormat('Unable to parse contacts')
3332
else:
34-
unique_contacts = unique_by_key(output, key=itemgetter(0))
35-
output = [(*contact[0], contact[1]) for contact in unique_contacts]
36-
output = sorted(output, key=itemgetter(2), reverse=True)
37-
return output
33+
unique_contacts = get_unique_contacts(output)
34+
return unique_contacts

parsers/consurfparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from utils.exceptions import InvalidFormat
22

33

4-
def ConsurfParser(input):
4+
def ConsurfParser(input, input_format=None):
55
contents = input.split('\n')
66
output = []
77

parsers/contactparser.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from enum import Enum
2-
from operator import itemgetter
32
import re
4-
from utils import unique_by_key
3+
from utils import get_unique_contacts
54
from utils.exceptions import InvalidFormat
65

76

@@ -138,7 +137,5 @@ def ContactParser(input, input_format):
138137
if not output:
139138
raise InvalidFormat('Unable to parse contacts')
140139
else:
141-
unique_contacts = unique_by_key(output, key=itemgetter(0))
142-
output = [(*contact[0], contact[1]) for contact in unique_contacts]
143-
output = sorted(output, key=itemgetter(2), reverse=True)
144-
return output
140+
unique_contacts = get_unique_contacts(output)
141+
return unique_contacts

parsers/customparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from parsers import CustomStates
33

44

5-
def CustomParser(input):
5+
def CustomParser(input, input_format=None):
66
contents = input.split('\n')
77

88
len_flag = []

parsers/iupredparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from utils.exceptions import InvalidFormat
33

44

5-
def IupredParser(input):
5+
def IupredParser(input, input_format=None):
66
contents = input.split('\n')
77
output = []
88

parsers/npzparser.py

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import io
2+
import base64
3+
import numpy as np
4+
from utils.exceptions import InvalidFormat
5+
from utils import get_unique_distances
6+
7+
8+
def parse_array(array):
9+
# Bin #0 corresponds with d>20A
10+
# Bins #1 ~ #36 correspond with 2A<d<20A in increments of 0.5A
11+
contacts = np.sum(array[:, :, 1:13], axis=-1)
12+
L = contacts.shape[0]
13+
BINS = [np.sum(array[:, :, x:x+4], axis=-1) for x in range(1, 37, 4)]
14+
BINS.append(array[:, :, 0].copy())
15+
array = np.dstack(BINS)
16+
dist_bins = np.nanargmax(array, axis=2)
17+
dist_prob = np.amax(array, axis=2)
18+
return [[i + 1, j + 1, float(contacts[i, j]), int(dist_bins[i, j]), float(dist_prob[i, j])]
19+
for i in range(L) for j in range(i + 5, L)]
20+
21+
22+
def NpzParser(input, input_format=None):
23+
output = []
24+
content_type, content_string = input.split(',')
25+
try:
26+
decoded = base64.b64decode(content_string)
27+
archive = np.load(io.BytesIO(decoded), allow_pickle=True)
28+
array = archive['dist']
29+
tmp_output = parse_array(array)
30+
except (OSError, KeyError, IndexError) as e:
31+
raise InvalidFormat('Unable to parse distance NPZ file')
32+
33+
for contact in tmp_output:
34+
# contact = [res_1, res_2, raw_score, distance_bin, distance_score]
35+
contact[:2] = sorted(contact[:2], reverse=True)
36+
output.append((tuple(contact[:2]), *contact[2:]))
37+
38+
if not output:
39+
raise InvalidFormat('Unable to parse NPZ file')
40+
else:
41+
unique_contacts = get_unique_distances(output)
42+
return unique_contacts

parsers/pdbparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def remove_atoms(chain):
7373
chain[residue.id].detach_child(atom.id)
7474

7575

76-
def PDBParser(input):
76+
def PDBParser(input, input_format=None):
7777
try:
7878
parser = BioPDBParser().get_structure('pdb', io.StringIO(input))
7979
chain = list(parser.get_chains())[0]

parsers/psipredparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ def guess_psipred_format(contents):
1212
raise InvalidFormat('Unable to guess psipred file format')
1313

1414

15-
def PsipredParser(input):
15+
def PsipredParser(input, input_format=None):
1616
contents = input.split('\n')
1717
parser = guess_psipred_format(contents)
1818
return parser(contents)

parsers/tests/test_distogramparser.py renamed to parsers/tests/test_casprr2parser.py

+4-4
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import unittest
2-
from parsers import DistogramParser
2+
from parsers import CASPRR2Parser
33
from utils.exceptions import InvalidFormat
44

55

6-
class ContactParserTestCase(unittest.TestCase):
6+
class CASPRRMODE2ParserTestCase(unittest.TestCase):
77

88
def test_1(self):
99
dummy_prediction = """PFRMAT RR
@@ -33,7 +33,7 @@ def test_1(self):
3333
expected_bin_distance = [0, 0, 1, 0, 0, 0, 1, 2, 2, 1, 2, 3]
3434
expected_bin_score = [0.345, 0.34, 0.56, 0.34, 0.33, 0.33, 0.51, 0.305, 0.3, 0.2, 0.2, 0.3]
3535

36-
output = DistogramParser(dummy_prediction)
36+
output = CASPRR2Parser(dummy_prediction)
3737

3838
self.assertEqual('DISTO', output.pop(-1))
3939
self.assertEqual(12, len(output))
@@ -49,5 +49,5 @@ def test_2(self):
4949
100 8 5.382865
5050
"""
5151
with self.assertRaises(InvalidFormat):
52-
output = DistogramParser(dummy_prediction)
52+
output = CASPRR2Parser(dummy_prediction)
5353
self.assertListEqual(output, [])

parsers/topconsparser.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from utils.exceptions import InvalidFormat
33

44

5-
def TopconsParser(input):
5+
def TopconsParser(input, input_format=None):
66
contents = input.split('\n')
77

88
try:

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@ visdcc~=0.0.40
1919
yagmail~=0.14.245
2020
keyring~=22.0.1
2121
keyrings.cryptfile~=1.3.6
22+
numpy~=1.19.4

0 commit comments

Comments
 (0)