From 424bce2783977346ea408f54607f02dd3d8cedbe Mon Sep 17 00:00:00 2001 From: Stefano Fiorucci Date: Fri, 17 Jan 2025 13:36:07 +0100 Subject: [PATCH] test: fix HF API flaky live test with tools (#8744) * test: fix HF API flaky live test with tools * rm print --- .../generators/chat/test_hugging_face_api.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index f9e306c46e..6e46e5041b 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -466,6 +466,7 @@ def test_run_with_tools(self, mock_check_valid_model, tools): not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) + @pytest.mark.flaky(reruns=3, reruns_delay=10) def test_live_run_serverless(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, @@ -489,6 +490,7 @@ def test_live_run_serverless(self): not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) + @pytest.mark.flaky(reruns=3, reruns_delay=10) def test_live_run_serverless_streaming(self): generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, @@ -517,19 +519,18 @@ def test_live_run_serverless_streaming(self): not os.environ.get("HF_API_TOKEN", None), reason="Export an env var called HF_API_TOKEN containing the Hugging Face token to run this test.", ) - @pytest.mark.integration + @pytest.mark.flaky(reruns=3, reruns_delay=10) def test_live_run_with_tools(self, tools): """ We test the round trip: generate tool call, pass tool message, generate response. - The model used here (zephyr-7b-beta) is always available and not gated. - Even if it does not officially support tools, TGI+HF API make it work. + The model used here (Hermes-3-Llama-3.1-8B) is not gated and kept in a warm state. """ - chat_messages = [ChatMessage.from_user("What's the weather like in Paris and Munich?")] + chat_messages = [ChatMessage.from_user("What's the weather like in Paris?")] generator = HuggingFaceAPIChatGenerator( api_type=HFGenerationAPIType.SERVERLESS_INFERENCE_API, - api_params={"model": "HuggingFaceH4/zephyr-7b-beta"}, + api_params={"model": "NousResearch/Hermes-3-Llama-3.1-8B"}, generation_kwargs={"temperature": 0.5}, ) @@ -545,7 +546,7 @@ def test_live_run_with_tools(self, tools): assert "Paris" in tool_call.arguments["city"] assert message.meta["finish_reason"] == "stop" - new_messages = chat_messages + [message, ChatMessage.from_tool(tool_result="22°", origin=tool_call)] + new_messages = chat_messages + [message, ChatMessage.from_tool(tool_result="22° C", origin=tool_call)] # the model tends to make tool calls if provided with tools, so we don't pass them here results = generator.run(new_messages, generation_kwargs={"max_tokens": 50})