From 424bce2783977346ea408f54607f02dd3d8cedbe Mon Sep 17 00:00:00 2001
From: Stefano Fiorucci <stefanofiorucci@gmail.com>
Date: Fri, 17 Jan 2025 13:36:07 +0100
Subject: [PATCH] test: fix HF API flaky live test with tools (#8744)

* test: fix HF API flaky live test with tools

* rm print
---
 .../generators/chat/test_hugging_face_api.py        | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py
index f9e306c46e..6e46e5041b 100644
--- a/test/components/generators/chat/test_hugging_face_api.py
+++ b/test/components/generators/chat/test_hugging_face_api.py
@@ -466,6 +466,7 @@ def test_run_with_tools(self, mock_check_valid_model, tools):
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
+    @pytest.mark.flaky(reruns=3, reruns_delay=10)
     def test_live_run_serverless(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@@ -489,6 +490,7 @@ def test_live_run_serverless(self):
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
+    @pytest.mark.flaky(reruns=3, reruns_delay=10)
     def test_live_run_serverless_streaming(self):
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
@@ -517,19 +519,18 @@ def test_live_run_serverless_streaming(self):
         not os.environ.get("HF_API_TOKEN", None),
         reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.",
     )
-    @pytest.mark.integration
+    @pytest.mark.flaky(reruns=3, reruns_delay=10)
     def test_live_run_with_tools(self, tools):
         """
         We test the round trip: generate tool call, pass tool message, generate response.
 
-        The model used here (zephyr-7b-beta) is always available and not gated.
-        Even if it does not officially support tools, TGI+HF API make it work.
+        The model used here (Hermes-3-Llama-3.1-8B) is not gated and kept in a warm state.
         """
 
-        chat_messages = [ChatMessage.from_user("What's the weather like in Paris and Munich?")]
+        chat_messages = [ChatMessage.from_user("What's the weather like in Paris?")]
         generator = HuggingFaceAPIChatGenerator(
             api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API,
-            api_params={"model": "HuggingFaceH4/zephyr-7b-beta"},
+            api_params={"model": "NousResearch/Hermes-3-Llama-3.1-8B"},
             generation_kwargs={"temperature": 0.5},
         )
 
@@ -545,7 +546,7 @@ def test_live_run_with_tools(self, tools):
         assert "Paris" in tool_call.arguments["city"]
         assert message.meta["finish_reason"] == "stop"
 
-        new_messages = chat_messages + [message, ChatMessage.from_tool(tool_result="22°", origin=tool_call)]
+        new_messages = chat_messages + [message, ChatMessage.from_tool(tool_result="22° C", origin=tool_call)]
 
         # the model tends to make tool calls if provided with tools, so we don't pass them here
         results = generator.run(new_messages, generation_kwargs={"max_tokens": 50})