diff --git a/.gitignore b/.gitignore index db4561e..180f92d 100644 --- a/.gitignore +++ b/.gitignore @@ -39,6 +39,7 @@ htmlcov/ .cache nosetests.xml coverage.xml +.hypothesis/ # Translations *.mo diff --git a/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/.hypothesis/unicode_data/13.0.0/charmap.json.gz new file mode 100644 index 0000000..be78728 Binary files /dev/null and b/.hypothesis/unicode_data/13.0.0/charmap.json.gz differ diff --git a/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz b/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz new file mode 100644 index 0000000..6d86fda Binary files /dev/null and b/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz differ diff --git a/mhctools/__init__.py b/mhctools/__init__.py index f541f64..8e39671 100644 --- a/mhctools/__init__.py +++ b/mhctools/__init__.py @@ -21,6 +21,7 @@ from .netmhc_pan41 import NetMHCpan41, NetMHCpan41_BA, NetMHCpan41_EL from .netmhcii_pan import NetMHCIIpan, NetMHCIIpan3, NetMHCIIpan4, NetMHCIIpan4_BA, NetMHCIIpan4_EL from .random_predictor import RandomBindingPredictor +from .netmhcstabpan import NetMHCstabpan from .unsupported_allele import UnsupportedAllele __version__ = "1.9.0" @@ -54,6 +55,7 @@ "NetMHCIIpan4", "NetMHCIIpan4_BA", "NetMHCIIpan4_EL", + "NetMHCstabpan", "RandomBindingPredictor", "UnsupportedAllele", ] diff --git a/mhctools/netmhcstabpan.py b/mhctools/netmhcstabpan.py new file mode 100644 index 0000000..c2ed6b0 --- /dev/null +++ b/mhctools/netmhcstabpan.py @@ -0,0 +1,45 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .base_commandline_predictor import BaseCommandlinePredictor +from .parsing import parse_netmhcstabpan + +class NetMHCstabpan(BaseCommandlinePredictor): + def __init__( + self, + alleles, + default_peptide_lengths=[], + program_name="netMHCstabpan", + process_limit=-1, + flags=[]): + """ + Wrapper for NetMHCstabpan. + """ + BaseCommandlinePredictor.__init__( + self, + program_name=program_name, + alleles=alleles, + default_peptide_lengths=default_peptide_lengths, + parse_output_fn=parse_netmhcstabpan, + supported_alleles_flag="-listMHC", + input_file_flag="-f", + length_flag="-l", + allele_flag="-a", + extra_flags=flags, + process_limit=process_limit) + + def predict_peptides(self, peptides): + peptide_lengths = set(len(p) for p in peptides) + if len(peptide_lengths) > 1: + raise ValueError("All peptides must be the same length") + return super().predict_peptides(peptides) + diff --git a/mhctools/parsing.py b/mhctools/parsing.py index 910a642..c65fd9a 100644 --- a/mhctools/parsing.py +++ b/mhctools/parsing.py @@ -600,3 +600,46 @@ def parse_netmhciipan4_stdout( rank_index=8 if mode == "elution_score" else 12, score_index=7 if mode == "elution_score" else 10, transforms=transforms) + +def parse_netmhcstabpan( + stdout, + prediction_method_name="netmhcstabpan", + sequence_key_mapping=None, + mode=None): + """ + # NetMHCstabpan version 1.0 + + # Input is in PEPTIDE format + + HLA-A02:01 : Distance to traning data 0.000 (using nearest neighbor HLA-A02:01) + + # Rank Threshold for Strong binding peptides 0.500 + # Rank Threshold for Weak binding peptides 2.000 + ----------------------------------------------------------------------------------------------------- + pos HLA peptide Identity Pred Thalf(h) %Rank_Stab BindLevel + ----------------------------------------------------------------------------------------------------- + 0 HLA-A*02:01 AAAAAAAAAA PEPLIST 0.075 0.27 19.00 + ----------------------------------------------------------------------------------------------------- + + Protein PEPLIST. Allele HLA-A*02:01. Number of high binders 0. Number of weak binders 0. Number of peptides 1 + + ----------------------------------------------------------------------------------------------------- + """ + + # the offset specified in "pos" (at index 0) is 1-based instead of 0-based. we adjust it to be + # 0-based, as in all the other netmhc predictors supported by this library. + transforms = { + 0: lambda x: int(x) - 1, + } + return parse_stdout( + stdout=stdout, + prediction_method_name=prediction_method_name, + sequence_key_mapping=sequence_key_mapping, + key_index=3, + offset_index=0, + peptide_index=2, + allele_index=1, + score_index=5, + rank_index=6, + transforms=transforms) + diff --git a/tests/test_netmhc_stabpan.py b/tests/test_netmhc_stabpan.py new file mode 100644 index 0000000..577dfed --- /dev/null +++ b/tests/test_netmhc_stabpan.py @@ -0,0 +1,62 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +from mhctools import NetMHCstabpan + + +DEFAULT_ALLELE = 'HLA-A*02:01' + +# Protein sequences to test with against known values of netMHCstabpan web server. +protein_sequences = [ + "ALDKNLHQL", + "ALHEEVVCV", + "ALPPTVYEV", + "AVLGSFSYV", + "EMASVLFKA", +] + +web_server_predictions = [ + 4.6393, + 10.6011, + 11.8201, + 4.6722, + 1.8404, +] + +# REMINDER: a program called "netMHCstabpan" MUST be installed and working for +# this test suite to succeeed. Also all peptides must be the same length. + + +def test_netmhc_stabpan_accuracy(): + # Check that the netMHCstabpan program is working and returning th eexpected outputs. + predictor = NetMHCstabpan( + alleles=[DEFAULT_ALLELE], program_name='netMHCstabpan' + ) + + binding_predictions = predictor.predict_peptides(protein_sequences) + stability_predictions = [p.score for p in binding_predictions] + rank_predictions = [p.percentile_rank for p in binding_predictions] + + assert len(web_server_predictions) == len(binding_predictions) + assert len(stability_predictions) == len(binding_predictions) + + for prank in rank_predictions: + # Make sure that correct mapping is done by checking percentiles aren't above 100. + assert prank < 100 + + for i, (expected, actual) in enumerate(zip(web_server_predictions, stability_predictions)): + # Check to make sure that the stability predictions are within 0.01 of the webserver values. + # This could be the result of different versions of dependencies or the nature of the ANN itself. + np.testing.assert_allclose(expected, actual, atol=0.01, error_msg="Peptide %d: expected %f but got %f" % (i, expected, actual)) + + \ No newline at end of file