diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json index a613b29a4dfe..d9e99c3f5e32 100644 --- a/litellm/model_prices_and_context_window_backup.json +++ b/litellm/model_prices_and_context_window_backup.json @@ -3663,18 +3663,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 0.001, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.0006, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000004, "litellm_provider": "gemini", "mode": "chat", + "rpm": 10000, + "tpm": 10000000, "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + "source": "https://ai.google.dev/pricing#2_0flash" }, "gemini-2.0-flash-001": { "max_tokens": 8192, @@ -3767,6 +3769,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.0-flash-001": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000004, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://ai.google.dev/pricing#2_0flash" + }, "gemini/gemini-2.0-flash-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -3803,6 +3830,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.0-flash-lite-preview-02-05": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.000000075, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 60000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite" + }, "gemini/gemini-2.0-flash-thinking-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -8758,21 +8810,6 @@ "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "supports_tool_choice": true - - }, - "databricks/databricks-meta-llama-3-3-70b-instruct": { - "max_tokens": 128000, - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "input_cost_per_token": 0.00000100002, - "input_dbu_cost_per_token": 0.000014286, - "output_cost_per_token": 0.00000299999, - "output_dbu_cost_per_token": 0.000042857, - "litellm_provider": "databricks", - "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving", - "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, - "supports_tool_choice": true }, "databricks/databricks-dbrx-instruct": { "max_tokens": 32768, @@ -8973,4 +9010,4 @@ "output_cost_per_second": 0.00, "litellm_provider": "assemblyai" } -} +} \ No newline at end of file diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index a613b29a4dfe..d9e99c3f5e32 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -3663,18 +3663,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 0.001, - "input_cost_per_token": 0.00015, - "output_cost_per_token": 0.0006, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000004, "litellm_provider": "gemini", "mode": "chat", + "rpm": 10000, + "tpm": 10000000, "supports_system_messages": true, "supports_function_calling": true, "supports_vision": true, "supports_response_schema": true, "supports_audio_output": true, "supports_tool_choice": true, - "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash" + "source": "https://ai.google.dev/pricing#2_0flash" }, "gemini-2.0-flash-001": { "max_tokens": 8192, @@ -3767,6 +3769,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.0-flash-001": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.0000007, + "input_cost_per_token": 0.0000001, + "output_cost_per_token": 0.0000004, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 10000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://ai.google.dev/pricing#2_0flash" + }, "gemini/gemini-2.0-flash-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -3803,6 +3830,31 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true }, + "gemini/gemini-2.0-flash-lite-preview-02-05": { + "max_tokens": 8192, + "max_input_tokens": 1048576, + "max_output_tokens": 8192, + "max_images_per_prompt": 3000, + "max_videos_per_prompt": 10, + "max_video_length": 1, + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_pdf_size_mb": 30, + "input_cost_per_audio_token": 0.000000075, + "input_cost_per_token": 0.000000075, + "output_cost_per_token": 0.0000003, + "litellm_provider": "gemini", + "mode": "chat", + "rpm": 60000, + "tpm": 10000000, + "supports_system_messages": true, + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_audio_output": false, + "supports_tool_choice": true, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite" + }, "gemini/gemini-2.0-flash-thinking-exp": { "max_tokens": 8192, "max_input_tokens": 1048576, @@ -8758,21 +8810,6 @@ "source": "https://www.databricks.com/product/pricing/foundation-model-serving", "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, "supports_tool_choice": true - - }, - "databricks/databricks-meta-llama-3-3-70b-instruct": { - "max_tokens": 128000, - "max_input_tokens": 128000, - "max_output_tokens": 128000, - "input_cost_per_token": 0.00000100002, - "input_dbu_cost_per_token": 0.000014286, - "output_cost_per_token": 0.00000299999, - "output_dbu_cost_per_token": 0.000042857, - "litellm_provider": "databricks", - "mode": "chat", - "source": "https://www.databricks.com/product/pricing/foundation-model-serving", - "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."}, - "supports_tool_choice": true }, "databricks/databricks-dbrx-instruct": { "max_tokens": 32768, @@ -8973,4 +9010,4 @@ "output_cost_per_second": 0.00, "litellm_provider": "assemblyai" } -} +} \ No newline at end of file