@@ -397,8 +397,12 @@ def test_stop_words_criteria_with_a_mocked_tokenizer(self):
397
397
# "This is ambiguously, but is unrelated."
398
398
input_ids_one = torch .LongTensor ([[100 , 19 , 24621 , 11937 , 6 , 68 , 19 , 73 , 3897 , 5 ]])
399
399
input_ids_two = torch .LongTensor ([[100 , 19 , 73 , 24621 , 11937 ]]) # "This is unambiguously"
400
- stop_words_criteria = StopWordsCriteria (tokenizer = Mock (spec = PreTrainedTokenizerFast ), stop_words = ["mock data" ])
400
+
401
+ mock_tokenizer = Mock (spec = PreTrainedTokenizerFast )
402
+ mock_tokenizer .pad_token = "<pad>"
403
+ stop_words_criteria = StopWordsCriteria (tokenizer = mock_tokenizer , stop_words = ["mock data" ])
401
404
stop_words_criteria .stop_ids = stop_words_id
405
+
402
406
assert not stop_words_criteria (input_ids_one , scores = None )
403
407
assert stop_words_criteria (input_ids_two , scores = None )
404
408
@@ -459,3 +463,15 @@ def test_hf_pipeline_runs_with_our_criteria(self):
459
463
results = generator .run (prompt = "something that triggers something" )
460
464
assert results ["replies" ] != []
461
465
assert generator .stopping_criteria_list is not None
466
+
467
+ @pytest .mark .integration
468
+ @pytest .mark .flaky (reruns = 3 , reruns_delay = 10 )
469
+ def test_live_run (self ):
470
+ llm = HuggingFaceLocalGenerator (model = "Qwen/Qwen2.5-0.5B-Instruct" , generation_kwargs = {"max_new_tokens" : 50 })
471
+ llm .warm_up ()
472
+
473
+ result = llm .run (prompt = "Please create a summary about the following topic: Climate change" )
474
+
475
+ assert "replies" in result
476
+ assert isinstance (result ["replies" ][0 ], str )
477
+ assert "climate change" in result ["replies" ][0 ].lower ()
0 commit comments