@@ -135,6 +135,7 @@ def test_to_dict(self, model_info_mock):
135
135
generation_kwargs = {"n" : 5 },
136
136
stop_words = ["stop" , "words" ],
137
137
streaming_callback = lambda x : x ,
138
+ chat_template = "irrelevant" ,
138
139
)
139
140
140
141
# Call the to_dict method
@@ -146,13 +147,15 @@ def test_to_dict(self, model_info_mock):
146
147
assert init_params ["huggingface_pipeline_kwargs" ]["model" ] == "NousResearch/Llama-2-7b-chat-hf"
147
148
assert "token" not in init_params ["huggingface_pipeline_kwargs" ]
148
149
assert init_params ["generation_kwargs" ] == {"max_new_tokens" : 512 , "n" : 5 , "stop_sequences" : ["stop" , "words" ]}
150
+ assert init_params ["chat_template" ] == "irrelevant"
149
151
150
152
def test_from_dict (self , model_info_mock ):
151
153
generator = HuggingFaceLocalChatGenerator (
152
154
model = "NousResearch/Llama-2-7b-chat-hf" ,
153
155
generation_kwargs = {"n" : 5 },
154
156
stop_words = ["stop" , "words" ],
155
157
streaming_callback = streaming_callback_handler ,
158
+ chat_template = "irrelevant" ,
156
159
)
157
160
# Call the to_dict method
158
161
result = generator .to_dict ()
@@ -162,6 +165,7 @@ def test_from_dict(self, model_info_mock):
162
165
assert generator_2 .token == Secret .from_env_var (["HF_API_TOKEN" , "HF_TOKEN" ], strict = False )
163
166
assert generator_2 .generation_kwargs == {"max_new_tokens" : 512 , "n" : 5 , "stop_sequences" : ["stop" , "words" ]}
164
167
assert generator_2 .streaming_callback is streaming_callback_handler
168
+ assert generator_2 .chat_template == "irrelevant"
165
169
166
170
@patch ("haystack.components.generators.chat.hugging_face_local.pipeline" )
167
171
def test_warm_up (self , pipeline_mock , monkeypatch ):
@@ -218,3 +222,36 @@ def test_run_with_custom_generation_parameters(self, model_info_mock, mock_pipel
218
222
chat_message = results ["replies" ][0 ]
219
223
assert chat_message .is_from (ChatRole .ASSISTANT )
220
224
assert chat_message .text == "Berlin is cool"
225
+
226
+ @patch ("haystack.components.generators.chat.hugging_face_local.convert_message_to_hf_format" )
227
+ def test_messages_conversion_is_called (self , mock_convert , model_info_mock ):
228
+ generator = HuggingFaceLocalChatGenerator (model = "fake-model" )
229
+
230
+ messages = [ChatMessage .from_user ("Hello" ), ChatMessage .from_assistant ("Hi there" )]
231
+
232
+ with patch .object (generator , "pipeline" ) as mock_pipeline :
233
+ mock_pipeline .tokenizer .apply_chat_template .return_value = "test prompt"
234
+ mock_pipeline .return_value = [{"generated_text" : "test response" }]
235
+
236
+ generator .warm_up ()
237
+ generator .run (messages )
238
+
239
+ assert mock_convert .call_count == 2
240
+ mock_convert .assert_any_call (messages [0 ])
241
+ mock_convert .assert_any_call (messages [1 ])
242
+
243
+ @pytest .mark .integration
244
+ @pytest .mark .flaky (reruns = 3 , reruns_delay = 10 )
245
+ def test_live_run (self ):
246
+ messages = [ChatMessage .from_user ("Please create a summary about the following topic: Climate change" )]
247
+
248
+ llm = HuggingFaceLocalChatGenerator (
249
+ model = "Qwen/Qwen2.5-0.5B-Instruct" , generation_kwargs = {"max_new_tokens" : 50 }
250
+ )
251
+ llm .warm_up ()
252
+
253
+ result = llm .run (messages )
254
+
255
+ assert "replies" in result
256
+ assert isinstance (result ["replies" ][0 ], ChatMessage )
257
+ assert "climate change" in result ["replies" ][0 ].text .lower ()
0 commit comments