Skip to content

Commit

Permalink
Add 'end_user', 'user' and 'requested_model' on more prometheus metri…
Browse files Browse the repository at this point in the history
…cs (#7399)

* fix(prometheus.py): support streaming end user litellm_proxy_total_requests_metric tracking

* fix(prometheus.py): add 'requested_model' and 'end_user_id' to 'litellm_request_total_latency_metric_bucket'

enables latency tracking by end user + requested model

* fix(prometheus.py): add end user, user and requested model metrics to 'litellm_llm_api_latency_metric'

* test: update prometheus unit tests

* test(test_prometheus.py): update tests

* test(test_prometheus.py): fix test

* test: reorder test
  • Loading branch information
krrishdholakia authored Dec 24, 2024
1 parent bd4ab14 commit 78fe124
Show file tree
Hide file tree
Showing 8 changed files with 114 additions and 31 deletions.
82 changes: 61 additions & 21 deletions litellm/integrations/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,14 @@ def __init__(
"litellm_request_total_latency_metric",
"Total latency (seconds) for a request to LiteLLM",
labelnames=[
"model",
"hashed_api_key",
"api_key_alias",
"team",
"team_alias",
UserAPIKeyLabelNames.END_USER.value,
UserAPIKeyLabelNames.API_KEY_HASH.value,
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
REQUESTED_MODEL,
UserAPIKeyLabelNames.TEAM.value,
UserAPIKeyLabelNames.TEAM_ALIAS.value,
UserAPIKeyLabelNames.USER.value,
UserAPIKeyLabelNames.LITELLM_MODEL.value,
],
buckets=LATENCY_BUCKETS,
)
Expand All @@ -82,11 +85,14 @@ def __init__(
"litellm_llm_api_latency_metric",
"Total latency (seconds) for a models LLM API call",
labelnames=[
"model",
"hashed_api_key",
"api_key_alias",
"team",
"team_alias",
UserAPIKeyLabelNames.LITELLM_MODEL.value,
UserAPIKeyLabelNames.API_KEY_HASH.value,
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
UserAPIKeyLabelNames.TEAM.value,
UserAPIKeyLabelNames.TEAM_ALIAS.value,
UserAPIKeyLabelNames.REQUESTED_MODEL.value,
UserAPIKeyLabelNames.END_USER.value,
UserAPIKeyLabelNames.USER.value,
],
buckets=LATENCY_BUCKETS,
)
Expand Down Expand Up @@ -447,7 +453,20 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
self.set_llm_deployment_success_metrics(
kwargs, start_time, end_time, output_tokens
)
pass

if (
standard_logging_payload["stream"] is True
): # log successful streaming requests from logging event hook.
self.litellm_proxy_total_requests_metric.labels(
end_user=end_user_id,
hashed_api_key=user_api_key,
api_key_alias=user_api_key_alias,
requested_model=model,
team=user_api_team,
team_alias=user_api_team_alias,
user=user_id,
status_code="200",
).inc()

def _increment_token_metrics(
self,
Expand Down Expand Up @@ -631,23 +650,44 @@ def _set_latency_metrics(
api_call_total_time: timedelta = end_time - api_call_start_time
api_call_total_time_seconds = api_call_total_time.total_seconds()
self.litellm_llm_api_latency_metric.labels(
model,
user_api_key,
user_api_key_alias,
user_api_team,
user_api_team_alias,
**{
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
UserAPIKeyLabelNames.TEAM.value: user_api_team,
UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias,
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
"metadata"
]["user_api_key_user_id"],
UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[
"metadata"
]["user_api_key_end_user_id"],
UserAPIKeyLabelNames.REQUESTED_MODEL.value: standard_logging_payload[
"model_group"
],
}
).observe(api_call_total_time_seconds)

# total request latency
if start_time is not None and isinstance(start_time, datetime):
total_time: timedelta = end_time - start_time
total_time_seconds = total_time.total_seconds()

self.litellm_request_total_latency_metric.labels(
model,
user_api_key,
user_api_key_alias,
user_api_team,
user_api_team_alias,
**{
UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[
"metadata"
]["user_api_key_end_user_id"],
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
REQUESTED_MODEL: standard_logging_payload["model_group"],
UserAPIKeyLabelNames.TEAM.value: user_api_team,
UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias,
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
"metadata"
]["user_api_key_user_id"],
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
}
).observe(total_time_seconds)

async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
Expand Down
8 changes: 8 additions & 0 deletions litellm/litellm_core_utils/litellm_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -2961,11 +2961,19 @@ def get_standard_logging_object_payload(
kwargs=kwargs,
)

stream: Optional[bool] = None
if (
kwargs.get("complete_streaming_response") is not None
or kwargs.get("async_complete_streaming_response") is not None
):
stream = True

payload: StandardLoggingPayload = StandardLoggingPayload(
id=str(id),
trace_id=kwargs.get("litellm_trace_id"), # type: ignore
call_type=call_type or "",
cache_hit=cache_hit,
stream=stream,
status=status,
saved_cache_cost=saved_cache_cost,
startTime=start_time_float,
Expand Down
14 changes: 9 additions & 5 deletions litellm/proxy/_new_secret_config.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
model_list:
- model_name: whisper
- model_name: openai/*
litellm_params:
model: whisper-1
model: openai/*
api_key: os.environ/OPENAI_API_KEY
model_info:
mode: audio_transcription

- model_name: fake-openai-endpoint
litellm_params:
model: openai/gpt-3.5-turbo
api_key: os.environ/OPENAI_API_KEY

litellm_settings:
callbacks: ["prometheus"]
13 changes: 13 additions & 0 deletions litellm/types/integrations/prometheus.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import Enum

REQUESTED_MODEL = "requested_model"
EXCEPTION_STATUS = "exception_status"
EXCEPTION_CLASS = "exception_class"
Expand Down Expand Up @@ -41,3 +43,14 @@
300.0,
float("inf"),
)


class UserAPIKeyLabelNames(Enum):
END_USER = "end_user"
USER = "user"
API_KEY_HASH = "hashed_api_key"
API_KEY_ALIAS = "api_key_alias"
TEAM = "team"
TEAM_ALIAS = "team_alias"
REQUESTED_MODEL = REQUESTED_MODEL
LITELLM_MODEL = "model"
1 change: 1 addition & 0 deletions litellm/types/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1506,6 +1506,7 @@ class StandardLoggingPayload(TypedDict):
id: str
trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
call_type: str
stream: Optional[bool]
response_cost: float
response_cost_failure_debug_info: Optional[
StandardLoggingModelCostFailureDebugInformation
Expand Down
2 changes: 1 addition & 1 deletion tests/local_testing/test_amazing_vertex_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def test_vertex_ai_anthropic_streaming():
# )
@pytest.mark.asyncio
@pytest.mark.flaky(retries=3, delay=1)
async def test_vertex_ai_anthropic_async():
async def test_aavertex_ai_anthropic_async():
# load_vertex_ai_credentials()
try:

Expand Down
21 changes: 19 additions & 2 deletions tests/logging_callback_tests/test_prometheus_unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
return StandardLoggingPayload(
id="test_id",
call_type="completion",
stream=False,
response_cost=0.1,
response_cost_failure_debug_info=None,
status="success",
Expand All @@ -72,6 +73,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
spend_logs_metadata=None,
requester_ip_address="127.0.0.1",
requester_metadata=None,
user_api_key_end_user_id="test_end_user",
),
cache_hit=False,
cache_key=None,
Expand Down Expand Up @@ -110,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
"user_api_key": "test_key",
"user_api_key_user_id": "test_user",
"user_api_key_team_id": "test_team",
"user_api_key_end_user_id": "test_end_user",
}
},
"start_time": datetime.now(),
Expand Down Expand Up @@ -299,15 +302,29 @@ def test_set_latency_metrics(prometheus_logger):

# end_time - api_call_start_time
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
model="gpt-3.5-turbo",
hashed_api_key="key1",
api_key_alias="alias1",
team="team1",
team_alias="team_alias1",
user="test_user",
end_user="test_end_user",
requested_model="openai-gpt",
)
prometheus_logger.litellm_llm_api_latency_metric.labels().observe.assert_called_once_with(
1.5
)

# total latency for the request
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once_with(
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
end_user="test_end_user",
hashed_api_key="key1",
api_key_alias="alias1",
requested_model="openai-gpt",
team="team1",
team_alias="team_alias1",
user="test_user",
model="gpt-3.5-turbo",
)
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with(
2.0
Expand Down
4 changes: 2 additions & 2 deletions tests/otel_tests/test_prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,12 @@ async def test_proxy_success_metrics():

# Check if the success metric is present and correct
assert (
'litellm_request_total_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
'litellm_request_total_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
in metrics
)

assert (
'litellm_llm_api_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
'litellm_llm_api_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
in metrics
)

Expand Down

0 comments on commit 78fe124

Please sign in to comment.