Skip to content

Commit f96839e

Browse files
authored
chore: update transformers test dependency (#8752)
* update transformers test dependency * add pad_token_id to the mock tokenizer * fix HFLocal test + new test
1 parent 2bf6bf6 commit f96839e

File tree

3 files changed

+19
-2
lines changed

3 files changed

+19
-2
lines changed

Diff for: pyproject.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ format-check = "ruff format --check {args}"
8585
extra-dependencies = [
8686
"numpy>=2", # Haystack is compatible both with numpy 1.x and 2.x, but we test with 2.x
8787

88-
"transformers[torch,sentencepiece]==4.44.2", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
88+
"transformers[torch,sentencepiece]==4.47.1", # ExtractiveReader, TransformersSimilarityRanker, LocalWhisperTranscriber, HFGenerators...
8989
"huggingface_hub>=0.27.0", # Hugging Face API Generators and Embedders
9090
"sentence-transformers>=3.0.0", # SentenceTransformersTextEmbedder and SentenceTransformersDocumentEmbedder
9191
"langdetect", # TextLanguageRouter and DocumentLanguageClassifier

Diff for: test/components/generators/chat/test_hugging_face_local.py

+1
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def mock_pipeline_tokenizer():
4242
# Mocking the tokenizer
4343
mock_tokenizer = Mock(spec=PreTrainedTokenizer)
4444
mock_tokenizer.encode.return_value = ["Berlin", "is", "cool"]
45+
mock_tokenizer.pad_token_id = 100
4546
mock_pipeline.tokenizer = mock_tokenizer
4647

4748
return mock_pipeline

Diff for: test/components/generators/test_hugging_face_local_generator.py

+17-1
Original file line numberDiff line numberDiff line change
@@ -397,8 +397,12 @@ def test_stop_words_criteria_with_a_mocked_tokenizer(self):
397397
# "This is ambiguously, but is unrelated."
398398
input_ids_one = torch.LongTensor([[100, 19, 24621, 11937, 6, 68, 19, 73, 3897, 5]])
399399
input_ids_two = torch.LongTensor([[100, 19, 73, 24621, 11937]]) # "This is unambiguously"
400-
stop_words_criteria = StopWordsCriteria(tokenizer=Mock(spec=PreTrainedTokenizerFast), stop_words=["mock data"])
400+
401+
mock_tokenizer = Mock(spec=PreTrainedTokenizerFast)
402+
mock_tokenizer.pad_token = "<pad>"
403+
stop_words_criteria = StopWordsCriteria(tokenizer=mock_tokenizer, stop_words=["mock data"])
401404
stop_words_criteria.stop_ids = stop_words_id
405+
402406
assert not stop_words_criteria(input_ids_one, scores=None)
403407
assert stop_words_criteria(input_ids_two, scores=None)
404408

@@ -459,3 +463,15 @@ def test_hf_pipeline_runs_with_our_criteria(self):
459463
results = generator.run(prompt="something that triggers something")
460464
assert results["replies"] != []
461465
assert generator.stopping_criteria_list is not None
466+
467+
@pytest.mark.integration
468+
@pytest.mark.flaky(reruns=3, reruns_delay=10)
469+
def test_live_run(self):
470+
llm = HuggingFaceLocalGenerator(model="Qwen/Qwen2.5-0.5B-Instruct", generation_kwargs={"max_new_tokens": 50})
471+
llm.warm_up()
472+
473+
result = llm.run(prompt="Please create a summary about the following topic: Climate change")
474+
475+
assert "replies" in result
476+
assert isinstance(result["replies"][0], str)
477+
assert "climate change" in result["replies"][0].lower()

0 commit comments

Comments
 (0)