build: add gemini commercial rate limits

krrishdholakia · krrishdholakia · commit 74217024aa94 · 2025-02-06T22:20:52.000-08:00
diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -3663,18 +3663,20 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.001,
-        "input_cost_per_token": 0.00015,
-        "output_cost_per_token": 0.0006,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
         "litellm_provider": "gemini",
         "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
         "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
+        "source": "https://ai.google.dev/pricing#2_0flash"
     },
     "gemini-2.0-flash-001": {
         "max_tokens": 8192,
@@ -3767,6 +3769,31 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.0-flash-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
     "gemini/gemini-2.0-flash-exp": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
@@ -3803,6 +3830,31 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.0-flash-lite-preview-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.000000075,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 60000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
+    },
     "gemini/gemini-2.0-flash-thinking-exp": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
@@ -8758,21 +8810,6 @@
         "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
         "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
         "supports_tool_choice": true
-
-    },
-    "databricks/databricks-meta-llama-3-3-70b-instruct": {
-        "max_tokens": 128000,
-        "max_input_tokens": 128000,
-        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.00000100002,
-        "input_dbu_cost_per_token": 0.000014286,
-        "output_cost_per_token": 0.00000299999,
-        "output_dbu_cost_per_token": 0.000042857,
-        "litellm_provider": "databricks",
-        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
-        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
-        "supports_tool_choice": true
     },
     "databricks/databricks-dbrx-instruct": {
         "max_tokens": 32768,
diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json
@@ -3663,18 +3663,20 @@
         "max_audio_length_hours": 8.4,
         "max_audio_per_prompt": 1,
         "max_pdf_size_mb": 30,
-        "input_cost_per_audio_token": 0.001,
-        "input_cost_per_token": 0.00015,
-        "output_cost_per_token": 0.0006,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
         "litellm_provider": "gemini",
         "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
         "supports_system_messages": true,
         "supports_function_calling": true,
         "supports_vision": true,
         "supports_response_schema": true,
         "supports_audio_output": true,
         "supports_tool_choice": true,
-        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
+        "source": "https://ai.google.dev/pricing#2_0flash"
     },
     "gemini-2.0-flash-001": {
         "max_tokens": 8192,
@@ -3767,6 +3769,31 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.0-flash-001": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.0000007,
+        "input_cost_per_token": 0.0000001,
+        "output_cost_per_token": 0.0000004,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 10000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "source": "https://ai.google.dev/pricing#2_0flash"
+    },
     "gemini/gemini-2.0-flash-exp": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
@@ -3803,6 +3830,31 @@
         "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
         "supports_tool_choice": true
     },
+    "gemini/gemini-2.0-flash-lite-preview-02-05": {
+        "max_tokens": 8192,
+        "max_input_tokens": 1048576,
+        "max_output_tokens": 8192,
+        "max_images_per_prompt": 3000,
+        "max_videos_per_prompt": 10,
+        "max_video_length": 1,
+        "max_audio_length_hours": 8.4,
+        "max_audio_per_prompt": 1,
+        "max_pdf_size_mb": 30,
+        "input_cost_per_audio_token": 0.000000075,
+        "input_cost_per_token": 0.000000075,
+        "output_cost_per_token": 0.0000003,
+        "litellm_provider": "gemini",
+        "mode": "chat",
+        "rpm": 60000,
+        "tpm": 10000000,
+        "supports_system_messages": true,
+        "supports_function_calling": true,
+        "supports_vision": true,
+        "supports_response_schema": true,
+        "supports_audio_output": false,
+        "supports_tool_choice": true,
+        "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
+    },
     "gemini/gemini-2.0-flash-thinking-exp": {
         "max_tokens": 8192,
         "max_input_tokens": 1048576,
@@ -8758,21 +8810,6 @@
         "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
         "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
         "supports_tool_choice": true
-
-    },
-    "databricks/databricks-meta-llama-3-3-70b-instruct": {
-        "max_tokens": 128000,
-        "max_input_tokens": 128000,
-        "max_output_tokens": 128000, 
-        "input_cost_per_token": 0.00000100002,
-        "input_dbu_cost_per_token": 0.000014286,
-        "output_cost_per_token": 0.00000299999,
-        "output_dbu_cost_per_token": 0.000042857,
-        "litellm_provider": "databricks",
-        "mode": "chat",
-        "source": "https://www.databricks.com/product/pricing/foundation-model-serving",
-        "metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
-        "supports_tool_choice": true
     },
     "databricks/databricks-dbrx-instruct": {
         "max_tokens": 32768,