feat: Add timeout, max_retries to all generators and async support to AnthropicVertexChatGenerator (#1952)

sjrl · web-flow · commit 61d163e26cab · 2025-06-25T13:55:30.000+02:00
* Add timeout and max_retries to AnthropicChatGenerator

* Add timeout, max_retries and async client to AnthropicVertexChatGenerator

* Add async integration test for AnthropicVertexChatGenerator

* Also add timeout and max_retries to regular generator

* Update live test
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/chat_generator.py
@@ -195,6 +195,9 @@ def __init__(
         generation_kwargs: Optional[Dict[str, Any]] = None,
         ignore_tools_thinking_messages: bool = True,
         tools: Optional[Union[List[Tool], Toolset]] = None,
+        *,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
     ):
         """
         Creates an instance of AnthropicChatGenerator.
@@ -222,16 +225,32 @@ def __init__(
             use is detected. See the Anthropic [tools](https://docs.anthropic.com/en/docs/tool-use#chain-of-thought-tool-use)
             for more details.
         :param tools: A list of Tool objects or a Toolset that the model can use. Each tool should have a unique name.
-
+        :param timeout:
+            Timeout for Anthropic client calls. If not set, it defaults to the default set by the Anthropic client.
+        :param max_retries:
+            Maximum number of retries to attempt for failed requests. If not set, it defaults to the default set by
+            the Anthropic client.
         """
         _check_duplicate_tool_names(list(tools or []))  # handles Toolset as well
 
         self.api_key = api_key
         self.model = model
         self.generation_kwargs = generation_kwargs or {}
         self.streaming_callback = streaming_callback
-        self.client = Anthropic(api_key=self.api_key.resolve_value())
-        self.async_client = AsyncAnthropic(api_key=self.api_key.resolve_value())
+        self.timeout = timeout
+        self.max_retries = max_retries
+
+        client_kwargs: Dict[str, Any] = {"api_key": api_key.resolve_value()}
+        # We do this since timeout=None is not the same as not setting it in Anthropic
+        if timeout is not None:
+            client_kwargs["timeout"] = timeout
+        # We do this since max_retries must be an int when passing to Anthropic
+        if max_retries is not None:
+            client_kwargs["max_retries"] = max_retries
+
+        self.client = Anthropic(**client_kwargs)
+        self.async_client = AsyncAnthropic(**client_kwargs)
+
         self.ignore_tools_thinking_messages = ignore_tools_thinking_messages
         self.tools = tools
 
@@ -257,6 +276,8 @@ def to_dict(self) -> Dict[str, Any]:
             api_key=self.api_key.to_dict(),
             ignore_tools_thinking_messages=self.ignore_tools_thinking_messages,
             tools=serialize_tools_or_toolset(self.tools),
+            timeout=self.timeout,
+            max_retries=self.max_retries,
         )
 
     @classmethod
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/vertex_chat_generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/chat/vertex_chat_generator.py
@@ -6,7 +6,7 @@
 from haystack.tools import Tool, _check_duplicate_tool_names, deserialize_tools_or_toolset_inplace
 from haystack.utils import deserialize_callable, serialize_callable
 
-from anthropic import AnthropicVertex
+from anthropic import AnthropicVertex, AsyncAnthropicVertex
 
 from .chat_generator import AnthropicChatGenerator
 
@@ -68,6 +68,9 @@ def __init__(
         generation_kwargs: Optional[Dict[str, Any]] = None,
         ignore_tools_thinking_messages: bool = True,
         tools: Optional[List[Tool]] = None,
+        *,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
     ):
         """
         Creates an instance of AnthropicVertexChatGenerator.
@@ -96,6 +99,11 @@ def __init__(
             use is detected. See the Anthropic [tools](https://docs.anthropic.com/en/docs/tool-use#chain-of-thought-tool-use)
             for more details.
         :param tools: A list of Tool objects that the model can use. Each tool should have a unique name.
+        :param timeout:
+            Timeout for Anthropic client calls. If not set, it defaults to the default set by the Anthropic client.
+        :param max_retries:
+            Maximum number of retries to attempt for failed requests. If not set, it defaults to the default set by
+            the Anthropic client.
         """
         _check_duplicate_tool_names(tools)
         self.region = region or os.environ.get("REGION")
@@ -105,9 +113,20 @@ def __init__(
         self.streaming_callback = streaming_callback
         self.ignore_tools_thinking_messages = ignore_tools_thinking_messages
         self.tools = tools
+        self.timeout = timeout
+        self.max_retries = max_retries
 
-        # mypy is not happy that we override the type of the client
-        self.client = AnthropicVertex(region=self.region, project_id=self.project_id)  # type: ignore
+        client_kwargs: Dict[str, Any] = {"region": self.region, "project_id": self.project_id}
+        # We do this since timeout=None is not the same as not setting it in Anthropic
+        if timeout is not None:
+            client_kwargs["timeout"] = timeout
+        # We do this since max_retries must be an int when passing to Anthropic
+        if max_retries is not None:
+            client_kwargs["max_retries"] = max_retries
+
+        # mypy is not happy that we override the type of the clients
+        self.client = AnthropicVertex(**client_kwargs)  # type: ignore[assignment]
+        self.async_client = AsyncAnthropicVertex(**client_kwargs)  # type: ignore[assignment]
 
     def to_dict(self) -> Dict[str, Any]:
         """
@@ -128,6 +147,8 @@ def to_dict(self) -> Dict[str, Any]:
             generation_kwargs=self.generation_kwargs,
             ignore_tools_thinking_messages=self.ignore_tools_thinking_messages,
             tools=serialized_tools,
+            timeout=self.timeout,
+            max_retries=self.max_retries,
         )
 
     @classmethod
diff --git a/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/generator.py b/integrations/anthropic/src/haystack_integrations/components/generators/anthropic/generator.py
@@ -64,6 +64,9 @@ def __init__(
         streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
         system_prompt: Optional[str] = None,
         generation_kwargs: Optional[Dict[str, Any]] = None,
+        *,
+        timeout: Optional[float] = None,
+        max_retries: Optional[int] = None,
     ):
         """
         Initialize the AnthropicGenerator.
@@ -79,12 +82,24 @@ def __init__(
         self.generation_kwargs = generation_kwargs or {}
         self.streaming_callback = streaming_callback
         self.system_prompt = system_prompt
-        self.client = Anthropic(api_key=self.api_key.resolve_value())
+        self.timeout = timeout
+        self.max_retries = max_retries
+
         self.include_thinking = self.generation_kwargs.pop("include_thinking", True)
         self.thinking_tag = self.generation_kwargs.pop("thinking_tag", "thinking")
         self.thinking_tag_start = f"<{self.thinking_tag}>" if self.thinking_tag else ""
         self.thinking_tag_end = f"</{self.thinking_tag}>\n\n" if self.thinking_tag else "\n\n"
 
+        client_kwargs: Dict[str, Any] = {"api_key": api_key.resolve_value()}
+        # We do this since timeout=None is not the same as not setting it in Anthropic
+        if timeout is not None:
+            client_kwargs["timeout"] = timeout
+        # We do this since max_retries must be an int when passing to Anthropic
+        if max_retries is not None:
+            client_kwargs["max_retries"] = max_retries
+
+        self.client = Anthropic(**client_kwargs)
+
     def _get_telemetry_data(self) -> Dict[str, Any]:
         """
         Get telemetry data for the component.
@@ -103,11 +118,13 @@ def to_dict(self) -> Dict[str, Any]:
         callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
         return default_to_dict(
             self,
+            api_key=self.api_key.to_dict(),
             model=self.model,
             streaming_callback=callback_name,
             system_prompt=self.system_prompt,
             generation_kwargs=self.generation_kwargs,
-            api_key=self.api_key.to_dict(),
+            timeout=self.timeout,
+            max_retries=self.max_retries,
         )
 
     @classmethod
diff --git a/integrations/anthropic/tests/test_chat_generator.py b/integrations/anthropic/tests/test_chat_generator.py
@@ -165,6 +165,8 @@ def test_to_dict_default(self, monkeypatch):
                 "ignore_tools_thinking_messages": True,
                 "generation_kwargs": {},
                 "tools": None,
+                "timeout": None,
+                "max_retries": None,
             },
         }
 
@@ -181,6 +183,8 @@ def test_to_dict_with_parameters(self, monkeypatch):
             streaming_callback=print_streaming_chunk,
             generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
             tools=[tool],
+            timeout=10.0,
+            max_retries=1,
         )
         data = component.to_dict()
 
@@ -207,6 +211,8 @@ def test_to_dict_with_parameters(self, monkeypatch):
                         "type": "haystack.tools.tool.Tool",
                     }
                 ],
+                "timeout": 10.0,
+                "max_retries": 1,
             },
         }
 
@@ -697,6 +703,8 @@ def test_serde_in_pipeline(self):
                                 },
                             }
                         ],
+                        "timeout": None,
+                        "max_retries": None,
                     },
                 }
             },
@@ -775,7 +783,7 @@ def __call__(self, chunk: StreamingChunk) -> None:
                 self.responses += chunk.content if chunk.content else ""
 
         callback = Callback()
-        component = AnthropicChatGenerator(streaming_callback=callback)
+        component = AnthropicChatGenerator(streaming_callback=callback, timeout=30.0, max_retries=1)
         results = component.run([ChatMessage.from_user("What's the capital of France?")])
 
         assert len(results["replies"]) == 1
diff --git a/integrations/anthropic/tests/test_generator.py b/integrations/anthropic/tests/test_generator.py
@@ -47,6 +47,8 @@ def test_to_dict_default(self, monkeypatch):
                 "streaming_callback": None,
                 "system_prompt": None,
                 "generation_kwargs": {},
+                "timeout": None,
+                "max_retries": None,
             },
         }
 
@@ -57,6 +59,8 @@ def test_to_dict_with_parameters(self, monkeypatch):
             streaming_callback=print_streaming_chunk,
             system_prompt="test-prompt",
             generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            timeout=10.0,
+            max_retries=1,
         )
         data = component.to_dict()
         assert data == {
@@ -67,6 +71,8 @@ def test_to_dict_with_parameters(self, monkeypatch):
                 "system_prompt": "test-prompt",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "timeout": 10.0,
+                "max_retries": 1,
             },
         }
 
@@ -80,6 +86,8 @@ def test_from_dict(self, monkeypatch):
                 "system_prompt": "test-prompt",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "timeout": None,
+                "max_retries": None,
             },
         }
         component = AnthropicGenerator.from_dict(data)
@@ -88,6 +96,8 @@ def test_from_dict(self, monkeypatch):
         assert component.system_prompt == "test-prompt"
         assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
         assert component.api_key == Secret.from_env_var("ANTHROPIC_API_KEY")
+        assert component.timeout is None
+        assert component.max_retries is None
 
     def test_from_dict_fail_wo_env_var(self, monkeypatch):
         monkeypatch.delenv("ANTHROPIC_API_KEY", raising=False)
@@ -99,6 +109,8 @@ def test_from_dict_fail_wo_env_var(self, monkeypatch):
                 "system_prompt": "test-prompt",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
+                "timeout": None,
+                "max_retries": None,
             },
         }
         with pytest.raises(ValueError, match="None of the .* environment variables are set"):
diff --git a/integrations/anthropic/tests/test_vertex_chat_generator.py b/integrations/anthropic/tests/test_vertex_chat_generator.py
@@ -58,6 +58,8 @@ def test_to_dict_default(self):
                 "generation_kwargs": {},
                 "ignore_tools_thinking_messages": True,
                 "tools": None,
+                "timeout": None,
+                "max_retries": None,
             },
         }
 
@@ -67,6 +69,9 @@ def test_to_dict_with_parameters(self):
             project_id="test-project-id",
             streaming_callback=print_streaming_chunk,
             generation_kwargs={"max_tokens": 10, "some_test_param": "test-params"},
+            ignore_tools_thinking_messages=False,
+            timeout=10.0,
+            max_retries=1,
         )
         data = component.to_dict()
         assert data == {
@@ -80,8 +85,10 @@ def test_to_dict_with_parameters(self):
                 "model": "claude-3-5-sonnet@20240620",
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
-                "ignore_tools_thinking_messages": True,
+                "ignore_tools_thinking_messages": False,
                 "tools": None,
+                "timeout": 10.0,
+                "max_retries": 1,
             },
         }
 
@@ -98,6 +105,9 @@ def test_from_dict(self):
                 "streaming_callback": "haystack.components.generators.utils.print_streaming_chunk",
                 "generation_kwargs": {"max_tokens": 10, "some_test_param": "test-params"},
                 "ignore_tools_thinking_messages": True,
+                "tools": None,
+                "timeout": None,
+                "max_retries": None,
             },
         }
         component = AnthropicVertexChatGenerator.from_dict(data)
@@ -106,6 +116,9 @@ def test_from_dict(self):
         assert component.project_id == "test-project-id"
         assert component.streaming_callback is print_streaming_chunk
         assert component.generation_kwargs == {"max_tokens": 10, "some_test_param": "test-params"}
+        assert component.ignore_tools_thinking_messages is True
+        assert component.timeout is None
+        assert component.max_retries is None
 
     def test_run(self, chat_messages, mock_chat_completion):
         component = AnthropicVertexChatGenerator(region="us-central1", project_id="test-project-id")
@@ -173,3 +186,30 @@ def test_default_inference_params(self, chat_messages):
 
     # Anthropic messages API is similar for AnthropicVertex and Anthropic endpoint,
     # remaining tests are skipped for AnthropicVertexChatGenerator as they are already tested in AnthropicChatGenerator.
+
+
+class TestAnthropicVertexChatGeneratorAsync:
+    @pytest.mark.asyncio
+    @pytest.mark.skipif(
+        not (os.environ.get("REGION", None) or os.environ.get("PROJECT_ID", None)),
+        reason="Authenticate with GCP and set env variables REGION and PROJECT_ID to run this test.",
+    )
+    @pytest.mark.integration
+    async def test_live_run_async(self):
+        """
+        Integration test that the async run method of AnthropicVertexChatGenerator works correctly
+        """
+        component = AnthropicVertexChatGenerator(
+            region=os.environ.get("REGION"),
+            project_id=os.environ.get("PROJECT_ID"),
+            model="claude-3-5-sonnet@20240620",
+        )
+        results = await component.run_async(messages=[ChatMessage.from_user("What's the capital of France?")])
+        assert len(results["replies"]) == 1
+        message: ChatMessage = results["replies"][0]
+        assert "Paris" in message.text
+        assert "claude-3-5-sonnet-20240620" in message.meta["model"]
+        assert message.meta["finish_reason"] == "end_turn"
+
+    # Anthropic messages API is similar for AnthropicVertex and Anthropic endpoint,
+    # remaining tests are skipped for AnthropicVertexChatGenerator as they are already tested in AnthropicChatGenerator.