From 29f54f7641410618d47c03137d9289fd4190f956 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 14 Mar 2024 11:54:15 -0400
Subject: [PATCH 1/3] Set upper limit on tensorflow version. Fixes #234

---
 mhcflurry/version.py | 2 +-
 requirements.txt     | 2 +-
 setup.py             | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mhcflurry/version.py b/mhcflurry/version.py
index 9aa3f903..58039f50 100644
--- a/mhcflurry/version.py
+++ b/mhcflurry/version.py
@@ -1 +1 @@
-__version__ = "2.1.0"
+__version__ = "2.1.1"
diff --git a/requirements.txt b/requirements.txt
index cab7d556..bcae36f4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,6 @@
 six
 pandas>=0.20.3
-tensorflow>=2.12.0
+tensorflow>=2.12.0,<2.16.0
 appdirs
 scikit-learn
 mhcgnomes
diff --git a/setup.py b/setup.py
index 8a44eacf..e5ab6dce 100644
--- a/setup.py
+++ b/setup.py
@@ -41,7 +41,7 @@
         "mhcgnomes>=0.8.4",
         "pyyaml",
         "tqdm",
-        "tensorflow>=2.12.0",
+        "tensorflow>=2.12.0,<2.16.0",
     ]
 
     setup(

From 006cd57bf1c647ce09c9357e576fb525b8f0c4a8 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 14 Mar 2024 16:25:19 -0400
Subject: [PATCH 2/3] Fix tests to work with pytest 8

---
 ...test_calibrate_percentile_ranks_command.py |  9 +-
 test/test_changing_allele_representations.py  |  9 +-
 test/test_class1_neural_network.py            | 14 ++-
 test/test_class1_pan.py                       |  9 +-
 test/test_class1_presentation_predictor.py    | 97 ++++++++-----------
 test/test_class1_processing_neural_network.py |  9 +-
 test/test_custom_loss.py                      |  8 +-
 test/test_doctest.py                          | 10 +-
 test/test_download_models_class1.py           | 59 ++++++-----
 test/test_multi_output.py                     | 13 ++-
 test/test_network_merging.py                  | 34 +++----
 test/test_predict_command.py                  |  9 +-
 test/test_predict_scan_command.py             | 11 ++-
 ...test_released_predictors_on_hpv_dataset.py | 37 +++----
 ...est_released_predictors_well_correlated.py | 28 +++---
 test/test_speed.py                            | 58 ++++++-----
 test/test_train_and_related_commands.py       |  9 +-
 test/test_train_pan_allele_models_command.py  |  9 +-
 test/test_train_processing_models_command.py  | 10 +-
 19 files changed, 232 insertions(+), 210 deletions(-)

diff --git a/test/test_calibrate_percentile_ranks_command.py b/test/test_calibrate_percentile_ranks_command.py
index c29c6233..11ea3ef9 100644
--- a/test/test_calibrate_percentile_ranks_command.py
+++ b/test/test_calibrate_percentile_ranks_command.py
@@ -8,6 +8,7 @@
 import shutil
 import tempfile
 import subprocess
+import pytest
 
 from numpy.testing import assert_equal
 
@@ -17,8 +18,12 @@
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 
 def run_and_check(n_jobs=0, delete=True, additional_args=[]):
diff --git a/test/test_changing_allele_representations.py b/test/test_changing_allele_representations.py
index 3fc67479..1f89de09 100644
--- a/test/test_changing_allele_representations.py
+++ b/test/test_changing_allele_representations.py
@@ -2,14 +2,19 @@
 initialize()
 
 import pandas
+import pytest
 
 from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
 from mhcflurry.downloads import get_path
 
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 ALLELE_TO_SEQUENCE = pandas.read_csv(
     get_path(
diff --git a/test/test_class1_neural_network.py b/test/test_class1_neural_network.py
index 7f1cb14b..c948df6c 100644
--- a/test/test_class1_neural_network.py
+++ b/test/test_class1_neural_network.py
@@ -4,6 +4,8 @@
 import numpy
 from numpy import testing
 
+import pytest
+
 from nose.tools import eq_, assert_less, assert_greater, assert_almost_equal
 
 import pandas
@@ -13,11 +15,15 @@
 from mhcflurry.common import random_peptides
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+@pytest.fixture(scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 
-def test_class1_neural_network_a0205_training_accuracy():
+def test_class1_neural_network_a0205_training_accuracy(setup_module):
     # Memorize the dataset.
     hyperparameters = dict(
         activation="tanh",
@@ -72,7 +78,7 @@ def test_class1_neural_network_a0205_training_accuracy():
     eq_(predictor.network().to_json(), predictor2.network().to_json())
 
 
-def test_inequalities():
+def test_inequalities(setup_module):
     # Memorize the dataset.
     hyperparameters = dict(
         peptide_amino_acid_encoding="one-hot",
diff --git a/test/test_class1_pan.py b/test/test_class1_pan.py
index 49732522..c5665534 100644
--- a/test/test_class1_pan.py
+++ b/test/test_class1_pan.py
@@ -6,6 +6,7 @@
 
 from sklearn.metrics import roc_auc_score
 import pandas
+import pytest
 
 from numpy.testing import assert_, assert_equal
 
@@ -14,8 +15,12 @@
 from mhcflurry.downloads import get_path
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 
 HYPERPARAMETERS = {
diff --git a/test/test_class1_presentation_predictor.py b/test/test_class1_presentation_predictor.py
index 7e1f7978..af6f318a 100644
--- a/test/test_class1_presentation_predictor.py
+++ b/test/test_class1_presentation_predictor.py
@@ -7,6 +7,7 @@
 
 from numpy.testing import assert_, assert_equal, assert_allclose, assert_array_equal
 from nose.tools import assert_greater, assert_less
+import pytest
 import numpy
 
 from sklearn.metrics import roc_auc_score
@@ -20,44 +21,28 @@
 
 from . import data_path
 
-AFFINITY_PREDICTOR = None
-CLEAVAGE_PREDICTOR = None
-CLEAVAGE_PREDICTOR_NO_FLANKING = None
-PRESENTATION_PREDICTOR = None
 
-
-def setup():
-    global AFFINITY_PREDICTOR
-    global CLEAVAGE_PREDICTOR
-    global CLEAVAGE_PREDICTOR_NO_FLANKING
-    global PRESENTATION_PREDICTOR
+@pytest.fixture(scope="module")
+def predictors():
     startup()
-    AFFINITY_PREDICTOR = Class1AffinityPredictor.load(
-        get_path("models_class1_pan", "models.combined"),
-        optimization_level=0,
-        max_models=1)
-    CLEAVAGE_PREDICTOR = Class1ProcessingPredictor.load(
-        get_path("models_class1_processing", "models.selected.with_flanks"),
-        max_models=1)
-    CLEAVAGE_PREDICTOR_NO_FLANKING = Class1ProcessingPredictor.load(
-        get_path("models_class1_processing", "models.selected.no_flank"),
-        max_models=1)
-    PRESENTATION_PREDICTOR = Class1PresentationPredictor.load()
-
-
-def teardown():
-    global AFFINITY_PREDICTOR
-    global CLEAVAGE_PREDICTOR
-    global CLEAVAGE_PREDICTOR_NO_FLANKING
-    global PRESENTATION_PREDICTOR
-    AFFINITY_PREDICTOR = None
-    CLEAVAGE_PREDICTOR = None
-    CLEAVAGE_PREDICTOR_NO_FLANKING = None
-    PRESENTATION_PREDICTOR = None
+    predictors = {
+        'affinity_predictor': Class1AffinityPredictor.load(
+            get_path("models_class1_pan", "models.combined"),
+            optimization_level=0,
+            max_models=1),
+        'cleavage_predictor': Class1ProcessingPredictor.load(
+            get_path("models_class1_processing", "models.selected.with_flanks"),
+            max_models=1),
+        'cleavage_predictor_no_flanking': Class1ProcessingPredictor.load(
+            get_path("models_class1_processing", "models.selected.no_flank"),
+            max_models=1),
+        'presentation_predictor': Class1PresentationPredictor.load()
+    }
+    yield predictors
     cleanup()
 
 
-def test_basic():
+def test_basic(predictors):
     df = pandas.read_csv(data_path("multiallelic.benchmark.small.csv.bz2"))
     train_df = df.loc[
         df.sample_id.isin(sorted(df.sample_id.unique())[:3])
@@ -71,9 +56,9 @@ def test_basic():
         df.drop_duplicates("sample_id").set_index("sample_id").hla.str.split().to_dict())
 
     predictor = Class1PresentationPredictor(
-        affinity_predictor=AFFINITY_PREDICTOR,
-        processing_predictor_without_flanks=CLEAVAGE_PREDICTOR_NO_FLANKING,
-        processing_predictor_with_flanks=CLEAVAGE_PREDICTOR)
+        affinity_predictor=predictors["affinity_predictor"],
+        processing_predictor_without_flanks=predictors['cleavage_predictor_no_flanking'],
+        processing_predictor_with_flanks=predictors['cleavage_predictor'])
 
     predictor.fit(
         targets=train_df.hit.values,
@@ -159,11 +144,11 @@ def add_prediction_cols(test_df, predictor):
             test_df["prediction2"], other_test_df["prediction2"], decimal=6)
 
 
-def test_downloaded_predictor_small():
-    global PRESENTATION_PREDICTOR
+def test_downloaded_predictor_small(predictors):
+    presentation_predictor = predictors['presentation_predictor']
 
     # Test sequence scanning
-    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFN",
             "QPYVFIKRS",
@@ -178,7 +163,7 @@ def test_downloaded_predictor_small():
     print(scan_results)
     assert_equal(len(scan_results), 6)
 
-    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFN",
             "QPYVFIKRS",
@@ -193,7 +178,7 @@ def test_downloaded_predictor_small():
     print(scan_results)
     assert_equal(len(scan_results), 6)
 
-    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFN",
             "QPYVFIKRS",
@@ -208,7 +193,7 @@ def test_downloaded_predictor_small():
     print(scan_results)
     assert_equal(len(scan_results), 6)
 
-    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFN",
             "QPYVFIKRS",
@@ -223,7 +208,7 @@ def test_downloaded_predictor_small():
     print(scan_results)
     assert_equal(len(scan_results), 18)
 
-    scan_results = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFN",
             "QPYVFIKRS",
@@ -239,11 +224,11 @@ def test_downloaded_predictor_small():
     assert_equal(len(scan_results), 0)
 
 
-def test_downloaded_predictor():
-    global PRESENTATION_PREDICTOR
+def test_downloaded_predictor(predictors):
+    presentation_predictor = predictors['presentation_predictor']
 
     # Test sequence scanning
-    scan_results1 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results1 = presentation_predictor.predict_sequences(
         sequences=[
             "MESLVPGFNEKTHVQLSLPVLQVRDVLVRGFGDSVEEVLSEARQHLKDGTCGLVEVEKGVLPQLE",
             "QPYVFIKRSDARTAPHGHVMVELVAELEGIQYGRSGETLGVLVPHVGEIPVAYRKVLLRKNGNKG",
@@ -263,7 +248,7 @@ def test_downloaded_predictor():
     assert (scan_results1.affinity < 200).all(), str(scan_results1)
     assert (scan_results1.presentation_score > 0.7).all(), str(scan_results1)
 
-    scan_results2 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results2 = presentation_predictor.predict_sequences(
         result="filtered",
         filter_value=500,
         comparison_quantity="affinity",
@@ -285,7 +270,7 @@ def test_downloaded_predictor():
     assert len(scan_results2) > 10
     assert (scan_results2.affinity <= 500).all()
 
-    scan_results3 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results3 = presentation_predictor.predict_sequences(
         result="filtered",
         filter_value=0.9,
         comparison_quantity="presentation_score",
@@ -307,7 +292,7 @@ def test_downloaded_predictor():
     assert len(scan_results3) >= 5, len(scan_results3)
     assert (scan_results3.presentation_score >= 0.9).all()
 
-    scan_results4 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results4 = presentation_predictor.predict_sequences(
         result="all",
         comparison_quantity="affinity",
         sequences={
@@ -337,7 +322,7 @@ def test_downloaded_predictor():
             "AGGHSYGADLKSFDLGDELGTDPYEDFQENWNTKHSSGVTRELMRELNGGAYTRYVDNNFCGPDG",
     }
 
-    scan_results5 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results5 = presentation_predictor.predict_sequences(
         result="all",
         comparison_quantity="affinity",
         sequences=sequences,
@@ -363,7 +348,7 @@ def test_downloaded_predictor():
     assert_equal(len(scan_results5), len(scan_results4) * 2)
 
     # Test case-insensitive.
-    scan_results6 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results6 = presentation_predictor.predict_sequences(
         result="all",
         comparison_quantity="affinity",
         sequences=dict((k, v.lower()) for (k, v) in sequences.items()),
@@ -399,7 +384,7 @@ def test_downloaded_predictor():
         scan_results6.presentation_score.values,
         scan_results5.presentation_score.values)
 
-    scan_results7 = PRESENTATION_PREDICTOR.predict_sequences(
+    scan_results7 = presentation_predictor.predict_sequences(
         result="all",
         comparison_quantity="affinity",
         sequences={
@@ -422,8 +407,8 @@ def test_downloaded_predictor():
     assert "DNNFCGPdg" in scan_results7.peptide.values, scan_results7.peptide
 
 
-def test_downloaded_predictor_invalid_peptides():
-    global PRESENTATION_PREDICTOR
+def test_downloaded_predictor_invalid_peptides(predictors):
+    presentation_predictor = predictors['presentation_predictor']
 
     peptides = [
         "SIINFEKL",
@@ -441,11 +426,11 @@ def test_downloaded_predictor_invalid_peptides():
 
     numpy.testing.assert_raises(
         ValueError,
-        PRESENTATION_PREDICTOR.predict,
+        presentation_predictor.predict,
         peptides=peptides,
         alleles=alleles)
 
-    results1 = PRESENTATION_PREDICTOR.predict(
+    results1 = presentation_predictor.predict(
         peptides=peptides,
         alleles=alleles,
         throw=False).presentation_score.values
diff --git a/test/test_class1_processing_neural_network.py b/test/test_class1_processing_neural_network.py
index b7427ef8..b88ee272 100644
--- a/test/test_class1_processing_neural_network.py
+++ b/test/test_class1_processing_neural_network.py
@@ -10,6 +10,7 @@
 from nose.tools import eq_, assert_less, assert_greater, assert_almost_equal
 
 import pandas
+import pytest
 
 from mhcflurry.class1_processing_neural_network import Class1ProcessingNeuralNetwork
 from mhcflurry.common import random_peptides
@@ -17,8 +18,12 @@
 from mhcflurry.flanking_encoding import FlankingEncoding
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+@pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 table = dict([
     (tuple(encoding), amino_acid)
diff --git a/test/test_custom_loss.py b/test/test_custom_loss.py
index fc0df346..4415fcfe 100644
--- a/test/test_custom_loss.py
+++ b/test/test_custom_loss.py
@@ -5,12 +5,16 @@
 
 import numpy
 import tensorflow as tf
+import pytest
 from mhcflurry.custom_loss import CUSTOM_LOSSES, MultiallelicMassSpecLoss
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
 
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 def evaluate_loss(loss, y_true, y_pred):
     y_true = tf.convert_to_tensor(y_true, dtype='float32', name='y_true')
diff --git a/test/test_doctest.py b/test/test_doctest.py
index 93a8b1ce..fa398770 100644
--- a/test/test_doctest.py
+++ b/test/test_doctest.py
@@ -8,6 +8,8 @@
 import doctest
 
 import pandas
+import pytest
+
 
 import mhcflurry
 import mhcflurry.class1_presentation_predictor
@@ -15,8 +17,12 @@
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 
 def test_doctests():
diff --git a/test/test_download_models_class1.py b/test/test_download_models_class1.py
index acb5f12f..eaff952b 100644
--- a/test/test_download_models_class1.py
+++ b/test/test_download_models_class1.py
@@ -6,99 +6,94 @@
 
 import pickle
 import tempfile
+import pytest
 
 from mhcflurry import Class1AffinityPredictor, Class1NeuralNetwork
 
 from mhcflurry.testing_utils import cleanup, startup
 
-DOWNLOADED_PREDICTOR = None
-
-
-def setup():
-    global DOWNLOADED_PREDICTOR
+# Define a fixture to initialize and clean up predictors
+@pytest.fixture(scope="module")
+def downloaded_predictor():
     startup()
-    DOWNLOADED_PREDICTOR = Class1AffinityPredictor.load()
-
-
-def teardown():
-    global DOWNLOADED_PREDICTOR
-    DOWNLOADED_PREDICTOR = None
+    yield Class1AffinityPredictor.load()
     cleanup()
 
 
 def predict_and_check(
+        downloaded_predictor,
         allele,
         peptide,
         expected_range=(0, 500)):
 
     print("\n%s" % (
-        DOWNLOADED_PREDICTOR.predict_to_dataframe(
+        downloaded_predictor.predict_to_dataframe(
             peptides=[peptide],
             allele=allele,
             include_individual_model_predictions=True)))
 
-    (prediction,) = DOWNLOADED_PREDICTOR.predict(allele=allele, peptides=[peptide])
-    assert prediction >= expected_range[0], (DOWNLOADED_PREDICTOR, prediction)
-    assert prediction <= expected_range[1], (DOWNLOADED_PREDICTOR, prediction)
+    (prediction,) = downloaded_predictor.predict(allele=allele, peptides=[peptide])
+    assert prediction >= expected_range[0], (downloaded_predictor, prediction)
+    assert prediction <= expected_range[1], (downloaded_predictor, prediction)
 
 
-def test_a1_titin_epitope_downloaded_models():
+def test_a1_titin_epitope_downloaded_models(downloaded_predictor):
     # Test the A1 Titin epitope ESDPIVAQY from
     #   Identification of a Titin-Derived HLA-A1-Presented Peptide
     #   as a Cross-Reactive Target for Engineered MAGE A3-Directed
     #   T Cells
-    predict_and_check("HLA-A*01:01", "ESDPIVAQY")
+    predict_and_check(downloaded_predictor, "HLA-A*01:01", "ESDPIVAQY")
 
 
-def test_a1_mage_epitope_downloaded_models():
+def test_a1_mage_epitope_downloaded_models(downloaded_predictor):
     # Test the A1 MAGE epitope EVDPIGHLY from
     #   Identification of a Titin-Derived HLA-A1-Presented Peptide
     #   as a Cross-Reactive Target for Engineered MAGE A3-Directed
     #   T Cells
-    predict_and_check("HLA-A*01:01", "EVDPIGHLY")
+    predict_and_check(downloaded_predictor, "HLA-A*01:01", "EVDPIGHLY")
 
 
-def test_a2_hiv_epitope_downloaded_models():
+def test_a2_hiv_epitope_downloaded_models(downloaded_predictor):
     # Test the A2 HIV epitope SLYNTVATL from
     #    The HIV-1 HLA-A2-SLYNTVATL Is a Help-Independent CTL Epitope
-    predict_and_check("HLA-A*02:01", "SLYNTVATL")
+    predict_and_check(downloaded_predictor, "HLA-A*02:01", "SLYNTVATL")
 
 
-def test_caching():
-    if not DOWNLOADED_PREDICTOR.allele_to_sequence:
+def test_caching(downloaded_predictor):
+    if not downloaded_predictor.allele_to_sequence:
         # Only run this test on allele-specific predictors.
         Class1NeuralNetwork.KERAS_MODELS_CACHE.clear()
-        DOWNLOADED_PREDICTOR.predict(
+        downloaded_predictor.predict(
             peptides=["SIINFEKL"],
             allele="HLA-A*02:01")
         num_cached = len(Class1NeuralNetwork.KERAS_MODELS_CACHE)
         assert num_cached > 0
 
 
-def test_downloaded_predictor_is_serializable():
-    predictor_copy = pickle.loads(pickle.dumps(DOWNLOADED_PREDICTOR))
+def test_downloaded_predictor_is_serializable(downloaded_predictor):
+    predictor_copy = pickle.loads(pickle.dumps(downloaded_predictor))
     numpy.testing.assert_equal(
-        DOWNLOADED_PREDICTOR.predict(
+        downloaded_predictor.predict(
             ["RSKERAVVVAW"], allele="HLA-A*01:01")[0],
         predictor_copy.predict(
             ["RSKERAVVVAW"], allele="HLA-A*01:01")[0])
 
 
-def test_downloaded_predictor_is_savable():
+def test_downloaded_predictor_is_savable(downloaded_predictor):
     models_dir = tempfile.mkdtemp("_models")
     print(models_dir)
-    DOWNLOADED_PREDICTOR.save(models_dir)
+    downloaded_predictor.save(models_dir)
     predictor_copy = Class1AffinityPredictor.load(models_dir)
 
     numpy.testing.assert_equal(
-        DOWNLOADED_PREDICTOR.predict(
+        downloaded_predictor.predict(
             ["RSKERAVVVAW"], allele="HLA-A*01:01")[0],
         predictor_copy.predict(
             ["RSKERAVVVAW"], allele="HLA-A*01:01")[0])
 
 
-def test_downloaded_predictor_gives_percentile_ranks():
-    predictions = DOWNLOADED_PREDICTOR.predict_to_dataframe(
+def test_downloaded_predictor_gives_percentile_ranks(downloaded_predictor):
+    predictions = downloaded_predictor.predict_to_dataframe(
         peptides=["SAQGQFSAV", "SAQGQFSAV"],
         alleles=["HLA-A*03:01", "HLA-C*01:02"])
 
diff --git a/test/test_multi_output.py b/test/test_multi_output.py
index f910d1c9..4ad87772 100644
--- a/test/test_multi_output.py
+++ b/test/test_multi_output.py
@@ -3,19 +3,22 @@
 
 import numpy
 import pandas
-from numpy import testing
-
+import pytest
 numpy.random.seed(0)
 
 from mhcflurry.class1_neural_network import Class1NeuralNetwork
 from mhcflurry.common import random_peptides
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+@pytest.fixture(scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 
-def test_multi_output():
+def test_multi_output(setup_module):
     hyperparameters = dict(
         loss="custom:mse_with_inequalities_and_multiple_outputs",
         activation="tanh",
diff --git a/test/test_network_merging.py b/test/test_network_merging.py
index 3067bf65..744c15a5 100644
--- a/test/test_network_merging.py
+++ b/test/test_network_merging.py
@@ -3,31 +3,31 @@
 
 import numpy
 import pandas
+import pytest
+
 from mhcflurry import Class1AffinityPredictor, Class1NeuralNetwork
 from mhcflurry.common import random_peptides
 from mhcflurry.downloads import get_path
 
 from mhcflurry.testing_utils import cleanup, startup
 
-PAN_ALLELE_PREDICTOR = None
-
 
-def setup():
-    global PAN_ALLELE_PREDICTOR
+# Define a fixture to initialize and clean up predictors
+@pytest.fixture(scope="module")
+def predictors():
     startup()
-    PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
-        get_path("models_class1_pan", "models.combined"),
-        optimization_level=0,)
-
-
-def teardown():
-    global PAN_ALLELE_PREDICTOR
-    PAN_ALLELE_PREDICTOR = None
+    predictors_dict = {
+        'allele-specific': Class1AffinityPredictor.load(get_path("models_class1", "models")),
+        'pan-allele': Class1AffinityPredictor.load(get_path("models_class1_pan", "models.combined"), optimization_level=0),
+    }
+    yield predictors_dict
     cleanup()
 
 
-def test_merge():
-    assert len(PAN_ALLELE_PREDICTOR.class1_pan_allele_models) > 1
+def test_merge(predictors):
+    pan_allele_predictor = predictors['pan-allele']
+
+    assert len(pan_allele_predictor.class1_pan_allele_models) > 1
     peptides = random_peptides(100, length=9)
     peptides.extend(random_peptides(100, length=10))
     peptides = pandas.Series(peptides).sample(frac=1.0)
@@ -36,13 +36,13 @@ def test_merge():
         ["HLA-A*03:01", "HLA-B*57:01", "HLA-C*02:01"]
     ).sample(n=len(peptides), replace=True)
 
-    predictions1 = PAN_ALLELE_PREDICTOR.predict(
+    predictions1 = pan_allele_predictor.predict(
         peptides=peptides, alleles=alleles)
 
     merged = Class1NeuralNetwork.merge(
-        PAN_ALLELE_PREDICTOR.class1_pan_allele_models)
+        pan_allele_predictor.class1_pan_allele_models)
     merged_predictor = Class1AffinityPredictor(
-        allele_to_sequence=PAN_ALLELE_PREDICTOR.allele_to_sequence,
+        allele_to_sequence=pan_allele_predictor.allele_to_sequence,
         class1_pan_allele_models=[merged],
     )
     predictions2 = merged_predictor.predict(peptides=peptides, alleles=alleles)
diff --git a/test/test_predict_command.py b/test/test_predict_command.py
index f4a8ec4d..6769e20d 100644
--- a/test/test_predict_command.py
+++ b/test/test_predict_command.py
@@ -5,6 +5,7 @@
 import os
 
 import pandas
+import pytest
 from numpy.testing import assert_equal
 
 import tensorflow as tf
@@ -15,8 +16,12 @@
 from mhcflurry import predict_command
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 TEST_CSV = '''
 Allele,Peptide,Experiment
diff --git a/test/test_predict_scan_command.py b/test/test_predict_scan_command.py
index 2c7e3685..0e7d8835 100644
--- a/test/test_predict_scan_command.py
+++ b/test/test_predict_scan_command.py
@@ -5,16 +5,21 @@
 import os
 
 import pandas
+import pytest
 from numpy.testing import assert_equal, assert_array_less, assert_array_equal
 
 from mhcflurry import predict_scan_command
 
+from . import data_path
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
 
-from . import data_path
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
+
 
 
 def read_output_csv(filename):
diff --git a/test/test_released_predictors_on_hpv_dataset.py b/test/test_released_predictors_on_hpv_dataset.py
index 88899106..31438a89 100644
--- a/test/test_released_predictors_on_hpv_dataset.py
+++ b/test/test_released_predictors_on_hpv_dataset.py
@@ -10,6 +10,7 @@
 
 import os
 import pandas
+import pytest
 from sklearn.metrics import roc_auc_score
 from nose.tools import assert_greater
 
@@ -27,29 +28,23 @@ def data_path(name):
 
 
 DF = pandas.read_csv(data_path("hpv_predictions.csv"))
-PREDICTORS = None
 
 
-def setup():
-    global PREDICTORS
+# Define a fixture to initialize and clean up predictors
+@pytest.fixture(scope="module")
+def predictors():
     startup()
-    PREDICTORS = {
-        'allele-specific': Class1AffinityPredictor.load(
-            get_path("models_class1", "models")),
-        'pan-allele': Class1AffinityPredictor.load(
-            get_path("models_class1_pan", "models.combined"))
-}
-
-
-def teardown():
-    global PREDICTORS
-    PREDICTORS = None
+    predictors_dict = {
+        'allele-specific': Class1AffinityPredictor.load(get_path("models_class1", "models")),
+        'pan-allele': Class1AffinityPredictor.load(get_path("models_class1_pan", "models.combined")),
+    }
+    yield predictors_dict
     cleanup()
 
 
-def test_on_hpv(df=DF):
+def test_on_hpv(predictors, df=DF):
     scores_df = []
-    for (name, predictor) in PREDICTORS.items():
+    for (name, predictor) in predictors.items():
         print("Running", name)
         df[name] = predictor.predict(df.peptide, alleles=df.allele)
 
@@ -74,13 +69,3 @@ def test_on_hpv(df=DF):
     assert_greater(mean_scores["allele-specific"], mean_scores["netmhcpan4"])
     assert_greater(mean_scores["pan-allele"], mean_scores["netmhcpan4"])
     return scores_df
-
-
-if __name__ == '__main__':
-    # If run directly from python, leave the user in a shell to explore results.
-    setup()
-    result = test_on_hpv()
-
-    # Leave in ipython
-    import ipdb  # pylint: disable=import-error
-    ipdb.set_trace()
diff --git a/test/test_released_predictors_well_correlated.py b/test/test_released_predictors_well_correlated.py
index 00857ef5..5d36fc32 100644
--- a/test/test_released_predictors_well_correlated.py
+++ b/test/test_released_predictors_well_correlated.py
@@ -6,7 +6,7 @@
 logging.getLogger('tensorflow').disabled = True
 logging.getLogger('matplotlib').disabled = True
 
-import os
+import pytest
 import sys
 import argparse
 import pandas
@@ -20,27 +20,21 @@
 
 from mhcflurry.testing_utils import cleanup, startup
 
-PREDICTORS = None
 
-
-def setup():
-    global PREDICTORS
+# Define a fixture to initialize and clean up predictors
+@pytest.fixture(scope="module")
+def predictors():
     startup()
-    PREDICTORS = {
-        'allele-specific': Class1AffinityPredictor.load(
-            get_path("models_class1", "models")),
-        'pan-allele': Class1AffinityPredictor.load(
-            get_path("models_class1_pan", "models.combined"), max_models=2)
+    predictors_dict = {
+        'allele-specific': Class1AffinityPredictor.load(get_path("models_class1", "models")),
+        'pan-allele': Class1AffinityPredictor.load(get_path("models_class1_pan", "models.combined")),
     }
-
-
-def teardown():
-    global PREDICTORS
-    PREDICTORS = None
+    yield predictors_dict
     cleanup()
 
 
 def test_correlation(
+        predictors,
         alleles=None,
         num_peptides_per_length=1000,
         lengths=[8, 9, 10],
@@ -54,14 +48,14 @@ def test_correlation(
 
     if alleles is None:
         alleles = set.intersection(*[
-            set(predictor.supported_alleles) for predictor in PREDICTORS.values()
+            set(predictor.supported_alleles) for predictor in predictors.values()
         ])
     alleles = sorted(set(alleles))
     df = pandas.DataFrame(index=peptides.sequences)
 
     results_df = []
     for allele in alleles:
-        for (name, predictor) in PREDICTORS.items():
+        for (name, predictor) in predictors.items():
             df[name] = predictor.predict(peptides, allele=allele)
         correlation = numpy.corrcoef(
             numpy.log10(df["allele-specific"]),
diff --git a/test/test_speed.py b/test/test_speed.py
index 360cdfba..16cec723 100644
--- a/test/test_speed.py
+++ b/test/test_speed.py
@@ -2,17 +2,15 @@
 Profile prediction speed
 
 """
-from . import initialize
-initialize()
 
 import numpy
-numpy.random.seed(0)
 import time
 import cProfile
 import pstats
 import collections
 import argparse
 import sys
+import pytest
 
 import pandas
 
@@ -23,38 +21,35 @@
 
 from mhcflurry.testing_utils import cleanup, startup
 
+DEFAULT_NUM_PREDICTIONS = 10000
 
-ALLELE_SPECIFIC_PREDICTOR = None
-PAN_ALLELE_PREDICTOR = None
-
-
-def setup():
-    global ALLELE_SPECIFIC_PREDICTOR, PAN_ALLELE_PREDICTOR
-    startup()
-    ALLELE_SPECIFIC_PREDICTOR = Class1AffinityPredictor.load(
-        get_path("models_class1", "models"))
 
-    PAN_ALLELE_PREDICTOR = Class1AffinityPredictor.load(
-        get_path("models_class1_pan", "models.combined"))
+def load_predictors():
+    return {
+        'allele_specific': Class1AffinityPredictor.load(get_path("models_class1", "models")),
+        'pan_allele': Class1AffinityPredictor.load(get_path("models_class1_pan", "models.combined")),
+    }
 
 
-def teardown():
-    global ALLELE_SPECIFIC_PREDICTOR, PAN_ALLELE_PREDICTOR
-    ALLELE_SPECIFIC_PREDICTOR = None
-    PAN_ALLELE_PREDICTOR = None
+# Define a fixture to initialize and clean up predictors
+@pytest.fixture(scope="module")
+def predictors():
+    startup()
+    predictors_dict = load_predictors()
+    yield predictors_dict
     cleanup()
 
+@pytest.fixture(autouse=True)
+def init():
+    from . import initialize
+    initialize()
 
-DEFAULT_NUM_PREDICTIONS = 10000
-
-
-def test_speed_allele_specific(profile=False, num=DEFAULT_NUM_PREDICTIONS):
-    global ALLELE_SPECIFIC_PREDICTOR
+def test_speed_allele_specific(predictors, profile=False, num=DEFAULT_NUM_PREDICTIONS):
     starts = collections.OrderedDict()
     timings = collections.OrderedDict()
     profilers = collections.OrderedDict()
 
-    predictor = ALLELE_SPECIFIC_PREDICTOR
+    predictor = predictors['allele_specific']
 
     def start(name):
         starts[name] = time.time()
@@ -102,13 +97,12 @@ def end(name):
             (key, pstats.Stats(value)) for (key, value) in profilers.items())
 
 
-def test_speed_pan_allele(profile=False, num=DEFAULT_NUM_PREDICTIONS):
-    global PAN_ALLELE_PREDICTOR
+def test_speed_pan_allele(predictors, profile=False, num=DEFAULT_NUM_PREDICTIONS):
     starts = collections.OrderedDict()
     timings = collections.OrderedDict()
     profilers = collections.OrderedDict()
 
-    predictor = PAN_ALLELE_PREDICTOR
+    predictor = predictors['pan_allele']
 
     def start(name):
         starts[name] = time.time()
@@ -159,12 +153,14 @@ def end(name):
     # to explore results.
 
     args = parser.parse_args(sys.argv[1:])
-    setup()
+    predictors_dict = load_predictors()
 
     if "allele-specific" in args.predictor:
         print("Running allele-specific test")
         result = test_speed_allele_specific(
-            profile=True, num=args.num_predictions)
+            predictors=predictors_dict,
+            profile=True,
+            num=args.num_predictions)
         result[
             "pred_%d" % args.num_predictions
         ].sort_stats("cumtime").reverse_order().print_stats()
@@ -172,7 +168,9 @@ def end(name):
     if "pan-allele" in args.predictor:
         print("Running pan-allele test")
         result = test_speed_pan_allele(
-            profile=True, num=args.num_predictions)
+            predictors=predictors_dict,
+            profile=True,
+            num=args.num_predictions)
         result[
             "pred_%d" % args.num_predictions
         ].sort_stats("cumtime").reverse_order().print_stats()
diff --git a/test/test_train_and_related_commands.py b/test/test_train_and_related_commands.py
index 928da134..dc6e46ee 100644
--- a/test/test_train_and_related_commands.py
+++ b/test/test_train_and_related_commands.py
@@ -10,6 +10,7 @@
 import tempfile
 import subprocess
 from copy import deepcopy
+import pytest
 
 from numpy.testing import assert_array_less, assert_equal
 
@@ -17,8 +18,12 @@
 from mhcflurry.downloads import get_path
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
diff --git a/test/test_train_pan_allele_models_command.py b/test/test_train_pan_allele_models_command.py
index 6ab167c1..691a28a3 100644
--- a/test/test_train_pan_allele_models_command.py
+++ b/test/test_train_pan_allele_models_command.py
@@ -11,6 +11,7 @@
 import subprocess
 
 import pandas
+import pytest
 
 from numpy.testing import assert_equal, assert_array_less
 
@@ -18,8 +19,12 @@
 from mhcflurry.downloads import get_path
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 
diff --git a/test/test_train_processing_models_command.py b/test/test_train_processing_models_command.py
index 692449d9..28766b45 100644
--- a/test/test_train_processing_models_command.py
+++ b/test/test_train_processing_models_command.py
@@ -10,6 +10,7 @@
 import tempfile
 import subprocess
 import re
+import pytest
 from copy import deepcopy
 
 from numpy.testing import assert_array_less, assert_equal
@@ -21,8 +22,13 @@
 from mhcflurry.common import random_peptides
 
 from mhcflurry.testing_utils import cleanup, startup
-teardown = cleanup
-setup = startup
+
+
+pytest.fixture(autouse=True, scope="module")
+def setup_module():
+    startup()
+    yield
+    cleanup()
 
 os.environ["CUDA_VISIBLE_DEVICES"] = ""
 

From 1e043e246375571918b5974f697d6e7e8de50fa5 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Thu, 14 Mar 2024 16:30:33 -0400
Subject: [PATCH 3/3] Fix typo

---
 test/test_released_predictors_well_correlated.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_released_predictors_well_correlated.py b/test/test_released_predictors_well_correlated.py
index 5d36fc32..9bcf8f23 100644
--- a/test/test_released_predictors_well_correlated.py
+++ b/test/test_released_predictors_well_correlated.py
@@ -89,7 +89,7 @@ def test_correlation(
 
 if __name__ == '__main__':
     # If run directly from python, leave the user in a shell to explore results.
-    setup()
+    startup()
     args = parser.parse_args(sys.argv[1:])
     result = test_correlation(alleles=args.alleles, debug=True)