Skip to content

Commit 5ae9488

Browse files
authored
fix: fix test failures with Transformers models in PRs from forks (#8809)
* trigger * try pinning sentence transformers * make integr tests run right away * pin transformers instead * older transformers version * rm transformers pin * try ignoring cache * change ubuntu version * try removing token * try again * more HF_API_TOKEN local deletions * restore test priority * rm leftover * more deletions * moreee * more * deletions * restore jobs order
1 parent f1679f1 commit 5ae9488

9 files changed

+38
-19
lines changed

test/components/classifiers/test_zero_shot_document_classifier.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ def test_run_unit(self, hf_pipeline_mock):
137137
assert result["documents"][1].to_dict()["classification"]["label"] == "negative"
138138

139139
@pytest.mark.integration
140-
def test_run(self):
140+
def test_run(self, monkeypatch):
141+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
141142
component = TransformersZeroShotDocumentClassifier(
142143
model="cross-encoder/nli-deberta-v3-xsmall", labels=["positive", "negative"]
143144
)

test/components/embedders/test_sentence_transformers_text_embedder.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,10 +261,11 @@ def test_run_wrong_input_format(self):
261261
embedder.run(text=list_integers_input)
262262

263263
@pytest.mark.integration
264-
def test_run_trunc(self):
264+
def test_run_trunc(self, monkeypatch):
265265
"""
266266
sentence-transformers/paraphrase-albert-small-v2 maps sentences & paragraphs to a 768 dimensional dense vector space
267267
"""
268+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
268269
checkpoint = "sentence-transformers/paraphrase-albert-small-v2"
269270
text = "a nice text to embed"
270271

test/components/evaluators/test_sas_evaluator.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ def test_run_not_warmed_up(self):
104104
evaluator.run(ground_truth_answers=ground_truths, predicted_answers=predictions)
105105

106106
@pytest.mark.integration
107-
def test_run_with_matching_predictions(self):
107+
def test_run_with_matching_predictions(self, monkeypatch):
108+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
108109
evaluator = SASEvaluator()
109110
ground_truths = [
110111
"A construction budget of US $2.3 billion",
@@ -124,7 +125,8 @@ def test_run_with_matching_predictions(self):
124125
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
125126

126127
@pytest.mark.integration
127-
def test_run_with_single_prediction(self):
128+
def test_run_with_single_prediction(self, monkeypatch):
129+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
128130
evaluator = SASEvaluator()
129131

130132
ground_truths = ["US $2.3 billion"]
@@ -137,7 +139,8 @@ def test_run_with_single_prediction(self):
137139
assert result["individual_scores"] == pytest.approx([0.689089], abs=1e-5)
138140

139141
@pytest.mark.integration
140-
def test_run_with_mismatched_predictions(self):
142+
def test_run_with_mismatched_predictions(self, monkeypatch):
143+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
141144
evaluator = SASEvaluator()
142145
ground_truths = [
143146
"US $2.3 billion",
@@ -156,7 +159,8 @@ def test_run_with_mismatched_predictions(self):
156159
assert result["individual_scores"] == pytest.approx([0.689089, 0.870389, 0.908679], abs=1e-5)
157160

158161
@pytest.mark.integration
159-
def test_run_with_bi_encoder_model(self):
162+
def test_run_with_bi_encoder_model(self, monkeypatch):
163+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
160164
evaluator = SASEvaluator(model="sentence-transformers/all-mpnet-base-v2")
161165
ground_truths = [
162166
"A construction budget of US $2.3 billion",
@@ -175,7 +179,8 @@ def test_run_with_bi_encoder_model(self):
175179
assert result["individual_scores"] == pytest.approx([1.0, 1.0, 1.0])
176180

177181
@pytest.mark.integration
178-
def test_run_with_cross_encoder_model(self):
182+
def test_run_with_cross_encoder_model(self, monkeypatch):
183+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
179184
evaluator = SASEvaluator(model="cross-encoder/ms-marco-MiniLM-L-6-v2")
180185
ground_truths = [
181186
"A construction budget of US $2.3 billion",

test/components/generators/chat/test_hugging_face_local.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,8 @@ def test_messages_conversion_is_called(self, mock_convert, model_info_mock):
293293

294294
@pytest.mark.integration
295295
@pytest.mark.flaky(reruns=3, reruns_delay=10)
296-
def test_live_run(self):
296+
def test_live_run(self, monkeypatch):
297+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
297298
messages = [ChatMessage.from_user("Please create a summary about the following topic: Climate change")]
298299

299300
llm = HuggingFaceLocalChatGenerator(

test/components/generators/test_hugging_face_local_generator.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,8 +454,9 @@ def test_stop_words_criteria_using_hf_tokenizer(self):
454454
assert criteria(generated_text_ids, scores=None) is True
455455

456456
@pytest.mark.integration
457-
def test_hf_pipeline_runs_with_our_criteria(self):
457+
def test_hf_pipeline_runs_with_our_criteria(self, monkeypatch):
458458
"""Test that creating our own StopWordsCriteria and passing it to a Huggingface pipeline works."""
459+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
459460
generator = HuggingFaceLocalGenerator(
460461
model="google/flan-t5-small", task="text2text-generation", stop_words=["unambiguously"]
461462
)
@@ -466,7 +467,8 @@ def test_hf_pipeline_runs_with_our_criteria(self):
466467

467468
@pytest.mark.integration
468469
@pytest.mark.flaky(reruns=3, reruns_delay=10)
469-
def test_live_run(self):
470+
def test_live_run(self, monkeypatch):
471+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
470472
llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})
471473
llm.warm_up()
472474

test/components/rankers/test_sentence_transformers_diversity.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -574,10 +574,11 @@ def test_pipeline_serialise_deserialise(self):
574574

575575
@pytest.mark.integration
576576
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
577-
def test_run(self, similarity):
577+
def test_run(self, similarity, monkeypatch):
578578
"""
579579
Tests that run method returns documents in the correct order
580580
"""
581+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
581582
ranker = SentenceTransformersDiversityRanker(
582583
model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
583584
)
@@ -601,7 +602,8 @@ def test_run(self, similarity):
601602

602603
@pytest.mark.integration
603604
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
604-
def test_run_real_world_use_case(self, similarity):
605+
def test_run_real_world_use_case(self, similarity, monkeypatch):
606+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
605607
ranker = SentenceTransformersDiversityRanker(
606608
model="sentence-transformers/all-MiniLM-L6-v2", similarity=similarity
607609
)
@@ -673,7 +675,8 @@ def test_run_real_world_use_case(self, similarity):
673675

674676
@pytest.mark.integration
675677
@pytest.mark.parametrize("similarity", ["dot_product", "cosine"])
676-
def test_run_with_maximum_margin_relevance_strategy(self, similarity):
678+
def test_run_with_maximum_margin_relevance_strategy(self, similarity, monkeypatch):
679+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
677680
query = "renewable energy sources"
678681
docs = [
679682
Document(content="18th-century French literature"),

test/components/readers/test_extractive.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -776,7 +776,8 @@ def test_deduplicate_by_overlap(
776776

777777

778778
@pytest.mark.integration
779-
def test_t5():
779+
def test_t5(monkeypatch):
780+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
780781
reader = ExtractiveReader("sjrhuschlee/flan-t5-base-squad2")
781782
reader.warm_up()
782783
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
@@ -800,7 +801,8 @@ def test_t5():
800801

801802

802803
@pytest.mark.integration
803-
def test_roberta():
804+
def test_roberta(monkeypatch):
805+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
804806
reader = ExtractiveReader("deepset/tinyroberta-squad2")
805807
reader.warm_up()
806808
answers = reader.run(example_queries[0], example_documents[0], top_k=2)[
@@ -829,7 +831,8 @@ def test_roberta():
829831

830832

831833
@pytest.mark.integration
832-
def test_matches_hf_pipeline():
834+
def test_matches_hf_pipeline(monkeypatch):
835+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
833836
reader = ExtractiveReader(
834837
"deepset/tinyroberta-squad2", device=ComponentDevice.from_str("cpu"), overlap_threshold=None
835838
)

test/components/routers/test_transformers_text_router.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ def test_run_unit(self, hf_pipeline_mock, mock_auto_config_from_pretrained):
172172
assert out == {"en": "What is the color of the sky?"}
173173

174174
@pytest.mark.integration
175-
def test_run(self):
175+
def test_run(self, monkeypatch):
176+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
176177
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection")
177178
router.warm_up()
178179
out = router.run("What is the color of the sky?")
@@ -202,7 +203,8 @@ def test_run(self):
202203
assert out == {"en": "What is the color of the sky?"}
203204

204205
@pytest.mark.integration
205-
def test_wrong_labels(self):
206+
def test_wrong_labels(self, monkeypatch):
207+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
206208
router = TransformersTextRouter(model="papluca/xlm-roberta-base-language-detection", labels=["en", "de"])
207209
with pytest.raises(ValueError):
208210
router.warm_up()

test/components/routers/test_zero_shot_text_router.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,8 @@ def test_run_unit(self, hf_pipeline_mock):
106106
assert out == {"query": "What is the color of the sky?"}
107107

108108
@pytest.mark.integration
109-
def test_run(self):
109+
def test_run(self, monkeypatch):
110+
monkeypatch.delenv("HF_API_TOKEN", raising=False) # https://github.com/deepset-ai/haystack/issues/8811
110111
router = TransformersZeroShotTextRouter(labels=["query", "passage"])
111112
router.warm_up()
112113
out = router.run("What is the color of the sky?")

0 commit comments

Comments
 (0)