Merge pull request #167 from dhuvik/netmhcstabpan

Added support for NetMHCstabpan (GH Issue #64)
openvax · Mar 13, 2024 · 9837a5c · 9837a5c
2 parents 8c78caf + c57e9a6
commit 9837a5c
Show file tree

Hide file tree

Showing 7 changed files with 153 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -39,6 +39,7 @@ htmlcov/
 .cache
 nosetests.xml
 coverage.xml
+.hypothesis/
 
 # Translations
 *.mo

diff --git a/.hypothesis/unicode_data/13.0.0/charmap.json.gz b/.hypothesis/unicode_data/13.0.0/charmap.json.gz
diff --git a/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz b/.hypothesis/unicode_data/13.0.0/codec-utf-8.json.gz
diff --git a/mhctools/__init__.py b/mhctools/__init__.py
@@ -21,6 +21,7 @@
 from .netmhc_pan41 import NetMHCpan41, NetMHCpan41_BA, NetMHCpan41_EL
 from .netmhcii_pan import NetMHCIIpan, NetMHCIIpan3, NetMHCIIpan4, NetMHCIIpan4_BA, NetMHCIIpan4_EL
 from .random_predictor import RandomBindingPredictor
+from .netmhcstabpan import NetMHCstabpan
 from .unsupported_allele import UnsupportedAllele
 
 __version__ = "1.9.0"
@@ -54,6 +55,7 @@
     "NetMHCIIpan4",
     "NetMHCIIpan4_BA",
     "NetMHCIIpan4_EL",
+    "NetMHCstabpan",
     "RandomBindingPredictor",
     "UnsupportedAllele",
 ]
diff --git a/mhctools/netmhcstabpan.py b/mhctools/netmhcstabpan.py
@@ -0,0 +1,45 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .base_commandline_predictor import BaseCommandlinePredictor
+from .parsing import parse_netmhcstabpan
+
+class NetMHCstabpan(BaseCommandlinePredictor):
+    def __init__(
+            self,
+            alleles,
+            default_peptide_lengths=[],
+            program_name="netMHCstabpan",
+            process_limit=-1,
+            flags=[]):
+        """
+        Wrapper for NetMHCstabpan.
+        """
+        BaseCommandlinePredictor.__init__(
+            self,
+            program_name=program_name,
+            alleles=alleles,
+            default_peptide_lengths=default_peptide_lengths,
+            parse_output_fn=parse_netmhcstabpan,
+            supported_alleles_flag="-listMHC",
+            input_file_flag="-f",
+            length_flag="-l",
+            allele_flag="-a",
+            extra_flags=flags,
+            process_limit=process_limit)
+
+    def predict_peptides(self, peptides):
+        peptide_lengths = set(len(p) for p in peptides)
+        if len(peptide_lengths) > 1:
+            raise ValueError("All peptides must be the same length")
+        return super().predict_peptides(peptides)
+
diff --git a/mhctools/parsing.py b/mhctools/parsing.py
@@ -600,3 +600,46 @@ def parse_netmhciipan4_stdout(
         rank_index=8 if mode == "elution_score" else 12,
         score_index=7 if mode == "elution_score" else 10,
         transforms=transforms)
+
+def parse_netmhcstabpan(
+        stdout,
+        prediction_method_name="netmhcstabpan",
+        sequence_key_mapping=None,
+        mode=None):
+    """
+    # NetMHCstabpan version 1.0
+
+    # Input is in PEPTIDE format
+
+    HLA-A02:01 : Distance to traning data  0.000 (using nearest neighbor HLA-A02:01)
+
+    # Rank Threshold for Strong binding peptides   0.500
+    # Rank Threshold for Weak binding peptides   2.000
+    -----------------------------------------------------------------------------------------------------
+     pos      HLA         peptide         Identity       Pred     Thalf(h) %Rank_Stab BindLevel
+    -----------------------------------------------------------------------------------------------------
+        0  HLA-A*02:01   AAAAAAAAAA         PEPLIST      0.075       0.27      19.00
+    -----------------------------------------------------------------------------------------------------
+
+    Protein PEPLIST. Allele HLA-A*02:01. Number of high binders 0. Number of weak binders 0. Number of peptides 1
+
+    -----------------------------------------------------------------------------------------------------
+    """
+
+    # the offset specified in "pos" (at index 0) is 1-based instead of 0-based. we adjust it to be
+    # 0-based, as in all the other netmhc predictors supported by this library.
+    transforms = {
+        0: lambda x: int(x) - 1,
+    }
+    return parse_stdout(
+        stdout=stdout,
+        prediction_method_name=prediction_method_name,
+        sequence_key_mapping=sequence_key_mapping,
+        key_index=3,
+        offset_index=0,
+        peptide_index=2,
+        allele_index=1,
+        score_index=5,
+        rank_index=6,
+        transforms=transforms)
+
diff --git a/tests/test_netmhc_stabpan.py b/tests/test_netmhc_stabpan.py
@@ -0,0 +1,62 @@
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from mhctools import NetMHCstabpan
+
+
+DEFAULT_ALLELE = 'HLA-A*02:01'
+
+# Protein sequences to test with against known values of netMHCstabpan web server.
+protein_sequences = [
+    "ALDKNLHQL",
+    "ALHEEVVCV",
+    "ALPPTVYEV",
+    "AVLGSFSYV",
+    "EMASVLFKA",
+]
+
+web_server_predictions = [
+    4.6393,
+    10.6011,
+    11.8201,
+    4.6722,
+    1.8404,
+]
+
+# REMINDER: a program called "netMHCstabpan" MUST be installed and working for
+# this test suite to succeeed. Also all peptides must be the same length.
+
+
+def test_netmhc_stabpan_accuracy():    
+    # Check that the netMHCstabpan program is working and returning th eexpected outputs.
+    predictor = NetMHCstabpan(
+    alleles=[DEFAULT_ALLELE], program_name='netMHCstabpan'
+    )
+
+    binding_predictions = predictor.predict_peptides(protein_sequences)
+    stability_predictions = [p.score for p in binding_predictions]
+    rank_predictions = [p.percentile_rank for p in binding_predictions]
+
+    assert len(web_server_predictions) == len(binding_predictions)
+    assert len(stability_predictions) == len(binding_predictions)
+
+    for prank in rank_predictions:
+        # Make sure that correct mapping is done by checking percentiles aren't above 100.
+        assert prank < 100
+
+    for i, (expected, actual) in enumerate(zip(web_server_predictions, stability_predictions)):
+        # Check to make sure that the stability predictions are within 0.01 of the webserver values.
+        # This could be the result of different versions of dependencies or the nature of the ANN itself.
+        np.testing.assert_allclose(expected, actual, atol=0.01, error_msg="Peptide %d: expected %f but got %f" % (i, expected, actual))
+
+