|
40 | 40 | parse_xml_params,
|
41 | 41 | prompt_factory,
|
42 | 42 | )
|
| 43 | +from litellm.llms.anthropic.chat.handler import ( |
| 44 | + ModelResponseIterator as AnthropicModelResponseIterator, |
| 45 | +) |
43 | 46 | from litellm.llms.custom_httpx.http_handler import (
|
44 | 47 | AsyncHTTPHandler,
|
45 | 48 | HTTPHandler,
|
@@ -177,6 +180,7 @@ async def make_call(
|
177 | 180 | logging_obj: Logging,
|
178 | 181 | fake_stream: bool = False,
|
179 | 182 | json_mode: Optional[bool] = False,
|
| 183 | + bedrock_invoke_provider: Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL] = None, |
180 | 184 | ):
|
181 | 185 | try:
|
182 | 186 | if client is None:
|
@@ -214,6 +218,14 @@ async def make_call(
|
214 | 218 | completion_stream: Any = MockResponseIterator(
|
215 | 219 | model_response=model_response, json_mode=json_mode
|
216 | 220 | )
|
| 221 | + elif bedrock_invoke_provider == "anthropic": |
| 222 | + decoder = AmazonAnthropicClaudeStreamDecoder( |
| 223 | + model=model, |
| 224 | + sync_stream=False, |
| 225 | + ) |
| 226 | + completion_stream = decoder.aiter_bytes( |
| 227 | + response.aiter_bytes(chunk_size=1024) |
| 228 | + ) |
217 | 229 | else:
|
218 | 230 | decoder = AWSEventStreamDecoder(model=model)
|
219 | 231 | completion_stream = decoder.aiter_bytes(
|
@@ -248,6 +260,7 @@ def make_sync_call(
|
248 | 260 | logging_obj: Logging,
|
249 | 261 | fake_stream: bool = False,
|
250 | 262 | json_mode: Optional[bool] = False,
|
| 263 | + bedrock_invoke_provider: Optional[litellm.BEDROCK_INVOKE_PROVIDERS_LITERAL] = None, |
251 | 264 | ):
|
252 | 265 | try:
|
253 | 266 | if client is None:
|
@@ -283,6 +296,12 @@ def make_sync_call(
|
283 | 296 | completion_stream: Any = MockResponseIterator(
|
284 | 297 | model_response=model_response, json_mode=json_mode
|
285 | 298 | )
|
| 299 | + elif bedrock_invoke_provider == "anthropic": |
| 300 | + decoder = AmazonAnthropicClaudeStreamDecoder( |
| 301 | + model=model, |
| 302 | + sync_stream=True, |
| 303 | + ) |
| 304 | + completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024)) |
286 | 305 | else:
|
287 | 306 | decoder = AWSEventStreamDecoder(model=model)
|
288 | 307 | completion_stream = decoder.iter_bytes(response.iter_bytes(chunk_size=1024))
|
@@ -1323,7 +1342,7 @@ def _chunk_parser(self, chunk_data: dict) -> GChunk:
|
1323 | 1342 | text = chunk_data.get("completions")[0].get("data").get("text") # type: ignore
|
1324 | 1343 | is_finished = True
|
1325 | 1344 | finish_reason = "stop"
|
1326 |
| - ######## bedrock.anthropic mappings ############### |
| 1345 | + ######## converse bedrock.anthropic mappings ############### |
1327 | 1346 | elif (
|
1328 | 1347 | "contentBlockIndex" in chunk_data
|
1329 | 1348 | or "stopReason" in chunk_data
|
@@ -1429,6 +1448,22 @@ def _parse_message_from_event(self, event) -> Optional[str]:
|
1429 | 1448 | return chunk.decode() # type: ignore[no-any-return]
|
1430 | 1449 |
|
1431 | 1450 |
|
| 1451 | +class AmazonAnthropicClaudeStreamDecoder(AWSEventStreamDecoder): |
| 1452 | + def __init__( |
| 1453 | + self, |
| 1454 | + model: str, |
| 1455 | + sync_stream: bool, |
| 1456 | + ) -> None: |
| 1457 | + super().__init__(model=model) |
| 1458 | + self.anthropic_model_response_iterator = AnthropicModelResponseIterator( |
| 1459 | + streaming_response=None, |
| 1460 | + sync_stream=sync_stream, |
| 1461 | + ) |
| 1462 | + |
| 1463 | + def _chunk_parser(self, chunk_data: dict) -> GChunk: |
| 1464 | + return self.anthropic_model_response_iterator.chunk_parser(chunk=chunk_data) |
| 1465 | + |
| 1466 | + |
1432 | 1467 | class MockResponseIterator: # for returning ai21 streaming responses
|
1433 | 1468 | def __init__(self, model_response, json_mode: Optional[bool] = False):
|
1434 | 1469 | self.model_response = model_response
|
|
0 commit comments