Skip to content

Commit 7421702

Browse files
build: add gemini commercial rate limits
1 parent 25e9dd4 commit 7421702

File tree

2 files changed

+112
-38
lines changed

2 files changed

+112
-38
lines changed

Diff for: litellm/model_prices_and_context_window_backup.json

+56-19
Original file line numberDiff line numberDiff line change
@@ -3663,18 +3663,20 @@
36633663
"max_audio_length_hours": 8.4,
36643664
"max_audio_per_prompt": 1,
36653665
"max_pdf_size_mb": 30,
3666-
"input_cost_per_audio_token": 0.001,
3667-
"input_cost_per_token": 0.00015,
3668-
"output_cost_per_token": 0.0006,
3666+
"input_cost_per_audio_token": 0.0000007,
3667+
"input_cost_per_token": 0.0000001,
3668+
"output_cost_per_token": 0.0000004,
36693669
"litellm_provider": "gemini",
36703670
"mode": "chat",
3671+
"rpm": 10000,
3672+
"tpm": 10000000,
36713673
"supports_system_messages": true,
36723674
"supports_function_calling": true,
36733675
"supports_vision": true,
36743676
"supports_response_schema": true,
36753677
"supports_audio_output": true,
36763678
"supports_tool_choice": true,
3677-
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
3679+
"source": "https://ai.google.dev/pricing#2_0flash"
36783680
},
36793681
"gemini-2.0-flash-001": {
36803682
"max_tokens": 8192,
@@ -3767,6 +3769,31 @@
37673769
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
37683770
"supports_tool_choice": true
37693771
},
3772+
"gemini/gemini-2.0-flash-001": {
3773+
"max_tokens": 8192,
3774+
"max_input_tokens": 1048576,
3775+
"max_output_tokens": 8192,
3776+
"max_images_per_prompt": 3000,
3777+
"max_videos_per_prompt": 10,
3778+
"max_video_length": 1,
3779+
"max_audio_length_hours": 8.4,
3780+
"max_audio_per_prompt": 1,
3781+
"max_pdf_size_mb": 30,
3782+
"input_cost_per_audio_token": 0.0000007,
3783+
"input_cost_per_token": 0.0000001,
3784+
"output_cost_per_token": 0.0000004,
3785+
"litellm_provider": "gemini",
3786+
"mode": "chat",
3787+
"rpm": 10000,
3788+
"tpm": 10000000,
3789+
"supports_system_messages": true,
3790+
"supports_function_calling": true,
3791+
"supports_vision": true,
3792+
"supports_response_schema": true,
3793+
"supports_audio_output": false,
3794+
"supports_tool_choice": true,
3795+
"source": "https://ai.google.dev/pricing#2_0flash"
3796+
},
37703797
"gemini/gemini-2.0-flash-exp": {
37713798
"max_tokens": 8192,
37723799
"max_input_tokens": 1048576,
@@ -3803,6 +3830,31 @@
38033830
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
38043831
"supports_tool_choice": true
38053832
},
3833+
"gemini/gemini-2.0-flash-lite-preview-02-05": {
3834+
"max_tokens": 8192,
3835+
"max_input_tokens": 1048576,
3836+
"max_output_tokens": 8192,
3837+
"max_images_per_prompt": 3000,
3838+
"max_videos_per_prompt": 10,
3839+
"max_video_length": 1,
3840+
"max_audio_length_hours": 8.4,
3841+
"max_audio_per_prompt": 1,
3842+
"max_pdf_size_mb": 30,
3843+
"input_cost_per_audio_token": 0.000000075,
3844+
"input_cost_per_token": 0.000000075,
3845+
"output_cost_per_token": 0.0000003,
3846+
"litellm_provider": "gemini",
3847+
"mode": "chat",
3848+
"rpm": 60000,
3849+
"tpm": 10000000,
3850+
"supports_system_messages": true,
3851+
"supports_function_calling": true,
3852+
"supports_vision": true,
3853+
"supports_response_schema": true,
3854+
"supports_audio_output": false,
3855+
"supports_tool_choice": true,
3856+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
3857+
},
38063858
"gemini/gemini-2.0-flash-thinking-exp": {
38073859
"max_tokens": 8192,
38083860
"max_input_tokens": 1048576,
@@ -8758,21 +8810,6 @@
87588810
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
87598811
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
87608812
"supports_tool_choice": true
8761-
8762-
},
8763-
"databricks/databricks-meta-llama-3-3-70b-instruct": {
8764-
"max_tokens": 128000,
8765-
"max_input_tokens": 128000,
8766-
"max_output_tokens": 128000,
8767-
"input_cost_per_token": 0.00000100002,
8768-
"input_dbu_cost_per_token": 0.000014286,
8769-
"output_cost_per_token": 0.00000299999,
8770-
"output_dbu_cost_per_token": 0.000042857,
8771-
"litellm_provider": "databricks",
8772-
"mode": "chat",
8773-
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
8774-
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
8775-
"supports_tool_choice": true
87768813
},
87778814
"databricks/databricks-dbrx-instruct": {
87788815
"max_tokens": 32768,

Diff for: model_prices_and_context_window.json

+56-19
Original file line numberDiff line numberDiff line change
@@ -3663,18 +3663,20 @@
36633663
"max_audio_length_hours": 8.4,
36643664
"max_audio_per_prompt": 1,
36653665
"max_pdf_size_mb": 30,
3666-
"input_cost_per_audio_token": 0.001,
3667-
"input_cost_per_token": 0.00015,
3668-
"output_cost_per_token": 0.0006,
3666+
"input_cost_per_audio_token": 0.0000007,
3667+
"input_cost_per_token": 0.0000001,
3668+
"output_cost_per_token": 0.0000004,
36693669
"litellm_provider": "gemini",
36703670
"mode": "chat",
3671+
"rpm": 10000,
3672+
"tpm": 10000000,
36713673
"supports_system_messages": true,
36723674
"supports_function_calling": true,
36733675
"supports_vision": true,
36743676
"supports_response_schema": true,
36753677
"supports_audio_output": true,
36763678
"supports_tool_choice": true,
3677-
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash"
3679+
"source": "https://ai.google.dev/pricing#2_0flash"
36783680
},
36793681
"gemini-2.0-flash-001": {
36803682
"max_tokens": 8192,
@@ -3767,6 +3769,31 @@
37673769
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
37683770
"supports_tool_choice": true
37693771
},
3772+
"gemini/gemini-2.0-flash-001": {
3773+
"max_tokens": 8192,
3774+
"max_input_tokens": 1048576,
3775+
"max_output_tokens": 8192,
3776+
"max_images_per_prompt": 3000,
3777+
"max_videos_per_prompt": 10,
3778+
"max_video_length": 1,
3779+
"max_audio_length_hours": 8.4,
3780+
"max_audio_per_prompt": 1,
3781+
"max_pdf_size_mb": 30,
3782+
"input_cost_per_audio_token": 0.0000007,
3783+
"input_cost_per_token": 0.0000001,
3784+
"output_cost_per_token": 0.0000004,
3785+
"litellm_provider": "gemini",
3786+
"mode": "chat",
3787+
"rpm": 10000,
3788+
"tpm": 10000000,
3789+
"supports_system_messages": true,
3790+
"supports_function_calling": true,
3791+
"supports_vision": true,
3792+
"supports_response_schema": true,
3793+
"supports_audio_output": false,
3794+
"supports_tool_choice": true,
3795+
"source": "https://ai.google.dev/pricing#2_0flash"
3796+
},
37703797
"gemini/gemini-2.0-flash-exp": {
37713798
"max_tokens": 8192,
37723799
"max_input_tokens": 1048576,
@@ -3803,6 +3830,31 @@
38033830
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash",
38043831
"supports_tool_choice": true
38053832
},
3833+
"gemini/gemini-2.0-flash-lite-preview-02-05": {
3834+
"max_tokens": 8192,
3835+
"max_input_tokens": 1048576,
3836+
"max_output_tokens": 8192,
3837+
"max_images_per_prompt": 3000,
3838+
"max_videos_per_prompt": 10,
3839+
"max_video_length": 1,
3840+
"max_audio_length_hours": 8.4,
3841+
"max_audio_per_prompt": 1,
3842+
"max_pdf_size_mb": 30,
3843+
"input_cost_per_audio_token": 0.000000075,
3844+
"input_cost_per_token": 0.000000075,
3845+
"output_cost_per_token": 0.0000003,
3846+
"litellm_provider": "gemini",
3847+
"mode": "chat",
3848+
"rpm": 60000,
3849+
"tpm": 10000000,
3850+
"supports_system_messages": true,
3851+
"supports_function_calling": true,
3852+
"supports_vision": true,
3853+
"supports_response_schema": true,
3854+
"supports_audio_output": false,
3855+
"supports_tool_choice": true,
3856+
"source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite"
3857+
},
38063858
"gemini/gemini-2.0-flash-thinking-exp": {
38073859
"max_tokens": 8192,
38083860
"max_input_tokens": 1048576,
@@ -8758,21 +8810,6 @@
87588810
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
87598811
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
87608812
"supports_tool_choice": true
8761-
8762-
},
8763-
"databricks/databricks-meta-llama-3-3-70b-instruct": {
8764-
"max_tokens": 128000,
8765-
"max_input_tokens": 128000,
8766-
"max_output_tokens": 128000,
8767-
"input_cost_per_token": 0.00000100002,
8768-
"input_dbu_cost_per_token": 0.000014286,
8769-
"output_cost_per_token": 0.00000299999,
8770-
"output_dbu_cost_per_token": 0.000042857,
8771-
"litellm_provider": "databricks",
8772-
"mode": "chat",
8773-
"source": "https://www.databricks.com/product/pricing/foundation-model-serving",
8774-
"metadata": {"notes": "Input/output cost per token is dbu cost * $0.070, based on databricks Llama 3.1 70B conversion. Number provided for reference, '*_dbu_cost_per_token' used in actual calculation."},
8775-
"supports_tool_choice": true
87768813
},
87778814
"databricks/databricks-dbrx-instruct": {
87788815
"max_tokens": 32768,

0 commit comments

Comments
 (0)