Skip to content

Commit 785746d

Browse files
refactor code at parsers
1 parent b4892a3 commit 785746d

File tree

5 files changed

+29
-32
lines changed

5 files changed

+29
-32
lines changed

parsers/casprr2parser.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
1-
from operator import itemgetter
2-
from utils import unique_by_key
1+
from utils import get_unique_distances
32
from utils.exceptions import InvalidFormat
43

54

@@ -34,8 +33,5 @@ def CASPRR2Parser(input, input_format=None):
3433
if not output:
3534
raise InvalidFormat('Unable to parse CASPRR_MODE_2 file')
3635
else:
37-
unique_contacts = unique_by_key(output, key=itemgetter(0))
38-
output = [(*contact[0], *contact[1:]) for contact in unique_contacts]
39-
output = sorted(output, key=itemgetter(2), reverse=True)
40-
output.append('DISTO')
41-
return output
36+
unique_contacts = get_unique_distances(output)
37+
return unique_contacts

parsers/ccmpredparser.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
1-
from operator import itemgetter
21
from utils.exceptions import InvalidFormat
3-
from utils import unique_by_key
2+
from utils import get_unique_contacts
43

54

65
def CCMpredParser(input, input_format=None):
@@ -31,7 +30,5 @@ def CCMpredParser(input, input_format=None):
3130
if not output:
3231
raise InvalidFormat('Unable to parse contacts')
3332
else:
34-
unique_contacts = unique_by_key(output, key=itemgetter(0))
35-
output = [(*contact[0], contact[1]) for contact in unique_contacts]
36-
output = sorted(output, key=itemgetter(2), reverse=True)
37-
return output
33+
unique_contacts = get_unique_contacts(output)
34+
return unique_contacts

parsers/contactparser.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from enum import Enum
2-
from operator import itemgetter
32
import re
4-
from utils import unique_by_key
3+
from utils import get_unique_contacts
54
from utils.exceptions import InvalidFormat
65

76

@@ -138,7 +137,5 @@ def ContactParser(input, input_format):
138137
if not output:
139138
raise InvalidFormat('Unable to parse contacts')
140139
else:
141-
unique_contacts = unique_by_key(output, key=itemgetter(0))
142-
output = [(*contact[0], contact[1]) for contact in unique_contacts]
143-
output = sorted(output, key=itemgetter(2), reverse=True)
144-
return output
140+
unique_contacts = get_unique_contacts(output)
141+
return unique_contacts

parsers/npzparser.py

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
11
import io
22
import base64
33
import numpy as np
4-
from operator import itemgetter
54
from utils.exceptions import InvalidFormat
6-
from utils import unique_by_key
5+
from utils import get_unique_distances
76

87

98
def parse_array(array):
@@ -31,7 +30,6 @@ def NpzParser(input, input_format=None):
3130
except (OSError, KeyError, IndexError) as e:
3231
raise InvalidFormat('Unable to parse distance NPZ file')
3332

34-
3533
for contact in tmp_output:
3634
# contact = [res_1, res_2, raw_score, distance_bin, distance_score]
3735
contact[:2] = sorted(contact[:2], reverse=True)
@@ -40,8 +38,5 @@ def NpzParser(input, input_format=None):
4038
if not output:
4139
raise InvalidFormat('Unable to parse NPZ file')
4240
else:
43-
unique_contacts = unique_by_key(output, key=itemgetter(0))
44-
output = [(*contact[0], *contact[1:]) for contact in unique_contacts]
45-
output = sorted(output, key=itemgetter(2), reverse=True)
46-
output.append('DISTO')
47-
return output
41+
unique_contacts = get_unique_distances(output)
42+
return unique_contacts

utils/__init__.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
from operator import itemgetter
23
from enum import Enum
34

45

@@ -110,6 +111,7 @@ class UrlIndex(Enum):
110111
YOUTUBE_EMBED = 'https://www.youtube.com/embed/dQw4w9WgXcQ'
111112
YOUTUBE_LINK = 'https://www.youtube.com/watch?v=dQw4w9WgXcQ'
112113

114+
113115
def create_ConPlot(*args, **kwargs):
114116
from utils.plot_utils import create_ConPlot
115117

@@ -236,9 +238,19 @@ def get_session_action(*args, **kwargs):
236238
return get_session_action(*args, **kwargs)
237239

238240

239-
def unique_by_key(elements, key=None):
241+
def get_unique_contacts(elements):
240242
# Credits to: https://stackoverflow.com/questions/31499259/making-a-sequence-of-tuples-unique-by-a-specific-element
241-
if key is None:
242-
# no key: the whole element must be unique
243-
key = lambda e: e
244-
return list({key(el): el for el in elements}.values())
243+
key = itemgetter(0)
244+
unique = list({key(el): el for el in elements}.values())
245+
output = [(*contact[0], contact[1]) for contact in unique]
246+
output = sorted(output, key=itemgetter(2), reverse=True)
247+
return output
248+
249+
250+
def get_unique_distances(elements):
251+
key = itemgetter(0)
252+
unique_contacts = list({key(el): el for el in elements}.values())
253+
output = [(*contact[0], *contact[1:]) for contact in unique_contacts]
254+
output = sorted(output, key=itemgetter(2), reverse=True)
255+
output.append('DISTO')
256+
return output

0 commit comments

Comments
 (0)