Skip to content

Commit

Permalink
Merge pull request #167 from dhuvik/netmhcstabpan
Browse files Browse the repository at this point in the history
Added support for NetMHCstabpan (GH Issue #64)
  • Loading branch information
iskandr authored Mar 13, 2024
2 parents 8c78caf + c57e9a6 commit 9837a5c
Show file tree
Hide file tree
Showing 7 changed files with 153 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ htmlcov/
.cache
nosetests.xml
coverage.xml
.hypothesis/

# Translations
*.mo
Expand Down
Binary file added .hypothesis/unicode_data/13.0.0/charmap.json.gz
Binary file not shown.
Binary file added .hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz
Binary file not shown.
2 changes: 2 additions & 0 deletions mhctools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from .netmhc_pan41 import NetMHCpan41, NetMHCpan41_BA, NetMHCpan41_EL
from .netmhcii_pan import NetMHCIIpan, NetMHCIIpan3, NetMHCIIpan4, NetMHCIIpan4_BA, NetMHCIIpan4_EL
from .random_predictor import RandomBindingPredictor
from .netmhcstabpan import NetMHCstabpan
from .unsupported_allele import UnsupportedAllele

__version__ = "1.9.0"
Expand Down Expand Up @@ -54,6 +55,7 @@
"NetMHCIIpan4",
"NetMHCIIpan4_BA",
"NetMHCIIpan4_EL",
"NetMHCstabpan",
"RandomBindingPredictor",
"UnsupportedAllele",
]
45 changes: 45 additions & 0 deletions mhctools/netmhcstabpan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .base_commandline_predictor import BaseCommandlinePredictor
from .parsing import parse_netmhcstabpan

class NetMHCstabpan(BaseCommandlinePredictor):
def __init__(
self,
alleles,
default_peptide_lengths=[],
program_name="netMHCstabpan",
process_limit=-1,
flags=[]):
"""
Wrapper for NetMHCstabpan.
"""
BaseCommandlinePredictor.__init__(
self,
program_name=program_name,
alleles=alleles,
default_peptide_lengths=default_peptide_lengths,
parse_output_fn=parse_netmhcstabpan,
supported_alleles_flag="-listMHC",
input_file_flag="-f",
length_flag="-l",
allele_flag="-a",
extra_flags=flags,
process_limit=process_limit)

def predict_peptides(self, peptides):
peptide_lengths = set(len(p) for p in peptides)
if len(peptide_lengths) > 1:
raise ValueError("All peptides must be the same length")
return super().predict_peptides(peptides)

43 changes: 43 additions & 0 deletions mhctools/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,3 +600,46 @@ def parse_netmhciipan4_stdout(
rank_index=8 if mode == "elution_score" else 12,
score_index=7 if mode == "elution_score" else 10,
transforms=transforms)

def parse_netmhcstabpan(
stdout,
prediction_method_name="netmhcstabpan",
sequence_key_mapping=None,
mode=None):
"""
# NetMHCstabpan version 1.0
# Input is in PEPTIDE format
HLA-A02:01 : Distance to traning data 0.000 (using nearest neighbor HLA-A02:01)
# Rank Threshold for Strong binding peptides 0.500
# Rank Threshold for Weak binding peptides 2.000
-----------------------------------------------------------------------------------------------------
pos HLA peptide Identity Pred Thalf(h) %Rank_Stab BindLevel
-----------------------------------------------------------------------------------------------------
0 HLA-A*02:01 AAAAAAAAAA PEPLIST 0.075 0.27 19.00
-----------------------------------------------------------------------------------------------------
Protein PEPLIST. Allele HLA-A*02:01. Number of high binders 0. Number of weak binders 0. Number of peptides 1
-----------------------------------------------------------------------------------------------------
"""

# the offset specified in "pos" (at index 0) is 1-based instead of 0-based. we adjust it to be
# 0-based, as in all the other netmhc predictors supported by this library.
transforms = {
0: lambda x: int(x) - 1,
}
return parse_stdout(
stdout=stdout,
prediction_method_name=prediction_method_name,
sequence_key_mapping=sequence_key_mapping,
key_index=3,
offset_index=0,
peptide_index=2,
allele_index=1,
score_index=5,
rank_index=6,
transforms=transforms)

62 changes: 62 additions & 0 deletions tests/test_netmhc_stabpan.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np
from mhctools import NetMHCstabpan


DEFAULT_ALLELE = 'HLA-A*02:01'

# Protein sequences to test with against known values of netMHCstabpan web server.
protein_sequences = [
"ALDKNLHQL",
"ALHEEVVCV",
"ALPPTVYEV",
"AVLGSFSYV",
"EMASVLFKA",
]

web_server_predictions = [
4.6393,
10.6011,
11.8201,
4.6722,
1.8404,
]

# REMINDER: a program called "netMHCstabpan" MUST be installed and working for
# this test suite to succeeed. Also all peptides must be the same length.


def test_netmhc_stabpan_accuracy():
# Check that the netMHCstabpan program is working and returning th eexpected outputs.
predictor = NetMHCstabpan(
alleles=[DEFAULT_ALLELE], program_name='netMHCstabpan'
)

binding_predictions = predictor.predict_peptides(protein_sequences)
stability_predictions = [p.score for p in binding_predictions]
rank_predictions = [p.percentile_rank for p in binding_predictions]

assert len(web_server_predictions) == len(binding_predictions)
assert len(stability_predictions) == len(binding_predictions)

for prank in rank_predictions:
# Make sure that correct mapping is done by checking percentiles aren't above 100.
assert prank < 100

for i, (expected, actual) in enumerate(zip(web_server_predictions, stability_predictions)):
# Check to make sure that the stability predictions are within 0.01 of the webserver values.
# This could be the result of different versions of dependencies or the nature of the ANN itself.
np.testing.assert_allclose(expected, actual, atol=0.01, error_msg="Peptide %d: expected %f but got %f" % (i, expected, actual))


0 comments on commit 9837a5c

Please sign in to comment.