Skip to content

Commit 78fe124

Browse files
Add 'end_user', 'user' and 'requested_model' on more prometheus metrics (#7399)
* fix(prometheus.py): support streaming end user litellm_proxy_total_requests_metric tracking * fix(prometheus.py): add 'requested_model' and 'end_user_id' to 'litellm_request_total_latency_metric_bucket' enables latency tracking by end user + requested model * fix(prometheus.py): add end user, user and requested model metrics to 'litellm_llm_api_latency_metric' * test: update prometheus unit tests * test(test_prometheus.py): update tests * test(test_prometheus.py): fix test * test: reorder test
1 parent bd4ab14 commit 78fe124

File tree

8 files changed

+114
-31
lines changed

8 files changed

+114
-31
lines changed

litellm/integrations/prometheus.py

Lines changed: 61 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,14 @@ def __init__(
6969
"litellm_request_total_latency_metric",
7070
"Total latency (seconds) for a request to LiteLLM",
7171
labelnames=[
72-
"model",
73-
"hashed_api_key",
74-
"api_key_alias",
75-
"team",
76-
"team_alias",
72+
UserAPIKeyLabelNames.END_USER.value,
73+
UserAPIKeyLabelNames.API_KEY_HASH.value,
74+
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
75+
REQUESTED_MODEL,
76+
UserAPIKeyLabelNames.TEAM.value,
77+
UserAPIKeyLabelNames.TEAM_ALIAS.value,
78+
UserAPIKeyLabelNames.USER.value,
79+
UserAPIKeyLabelNames.LITELLM_MODEL.value,
7780
],
7881
buckets=LATENCY_BUCKETS,
7982
)
@@ -82,11 +85,14 @@ def __init__(
8285
"litellm_llm_api_latency_metric",
8386
"Total latency (seconds) for a models LLM API call",
8487
labelnames=[
85-
"model",
86-
"hashed_api_key",
87-
"api_key_alias",
88-
"team",
89-
"team_alias",
88+
UserAPIKeyLabelNames.LITELLM_MODEL.value,
89+
UserAPIKeyLabelNames.API_KEY_HASH.value,
90+
UserAPIKeyLabelNames.API_KEY_ALIAS.value,
91+
UserAPIKeyLabelNames.TEAM.value,
92+
UserAPIKeyLabelNames.TEAM_ALIAS.value,
93+
UserAPIKeyLabelNames.REQUESTED_MODEL.value,
94+
UserAPIKeyLabelNames.END_USER.value,
95+
UserAPIKeyLabelNames.USER.value,
9096
],
9197
buckets=LATENCY_BUCKETS,
9298
)
@@ -447,7 +453,20 @@ async def async_log_success_event(self, kwargs, response_obj, start_time, end_ti
447453
self.set_llm_deployment_success_metrics(
448454
kwargs, start_time, end_time, output_tokens
449455
)
450-
pass
456+
457+
if (
458+
standard_logging_payload["stream"] is True
459+
): # log successful streaming requests from logging event hook.
460+
self.litellm_proxy_total_requests_metric.labels(
461+
end_user=end_user_id,
462+
hashed_api_key=user_api_key,
463+
api_key_alias=user_api_key_alias,
464+
requested_model=model,
465+
team=user_api_team,
466+
team_alias=user_api_team_alias,
467+
user=user_id,
468+
status_code="200",
469+
).inc()
451470

452471
def _increment_token_metrics(
453472
self,
@@ -631,23 +650,44 @@ def _set_latency_metrics(
631650
api_call_total_time: timedelta = end_time - api_call_start_time
632651
api_call_total_time_seconds = api_call_total_time.total_seconds()
633652
self.litellm_llm_api_latency_metric.labels(
634-
model,
635-
user_api_key,
636-
user_api_key_alias,
637-
user_api_team,
638-
user_api_team_alias,
653+
**{
654+
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
655+
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
656+
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
657+
UserAPIKeyLabelNames.TEAM.value: user_api_team,
658+
UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias,
659+
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
660+
"metadata"
661+
]["user_api_key_user_id"],
662+
UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[
663+
"metadata"
664+
]["user_api_key_end_user_id"],
665+
UserAPIKeyLabelNames.REQUESTED_MODEL.value: standard_logging_payload[
666+
"model_group"
667+
],
668+
}
639669
).observe(api_call_total_time_seconds)
640670

641671
# total request latency
642672
if start_time is not None and isinstance(start_time, datetime):
643673
total_time: timedelta = end_time - start_time
644674
total_time_seconds = total_time.total_seconds()
675+
645676
self.litellm_request_total_latency_metric.labels(
646-
model,
647-
user_api_key,
648-
user_api_key_alias,
649-
user_api_team,
650-
user_api_team_alias,
677+
**{
678+
UserAPIKeyLabelNames.END_USER.value: standard_logging_payload[
679+
"metadata"
680+
]["user_api_key_end_user_id"],
681+
UserAPIKeyLabelNames.API_KEY_HASH.value: user_api_key,
682+
UserAPIKeyLabelNames.API_KEY_ALIAS.value: user_api_key_alias,
683+
REQUESTED_MODEL: standard_logging_payload["model_group"],
684+
UserAPIKeyLabelNames.TEAM.value: user_api_team,
685+
UserAPIKeyLabelNames.TEAM_ALIAS.value: user_api_team_alias,
686+
UserAPIKeyLabelNames.USER.value: standard_logging_payload[
687+
"metadata"
688+
]["user_api_key_user_id"],
689+
UserAPIKeyLabelNames.LITELLM_MODEL.value: model,
690+
}
651691
).observe(total_time_seconds)
652692

653693
async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):

litellm/litellm_core_utils/litellm_logging.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2961,11 +2961,19 @@ def get_standard_logging_object_payload(
29612961
kwargs=kwargs,
29622962
)
29632963

2964+
stream: Optional[bool] = None
2965+
if (
2966+
kwargs.get("complete_streaming_response") is not None
2967+
or kwargs.get("async_complete_streaming_response") is not None
2968+
):
2969+
stream = True
2970+
29642971
payload: StandardLoggingPayload = StandardLoggingPayload(
29652972
id=str(id),
29662973
trace_id=kwargs.get("litellm_trace_id"), # type: ignore
29672974
call_type=call_type or "",
29682975
cache_hit=cache_hit,
2976+
stream=stream,
29692977
status=status,
29702978
saved_cache_cost=saved_cache_cost,
29712979
startTime=start_time_float,

litellm/proxy/_new_secret_config.yaml

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
model_list:
2-
- model_name: whisper
2+
- model_name: openai/*
33
litellm_params:
4-
model: whisper-1
4+
model: openai/*
55
api_key: os.environ/OPENAI_API_KEY
6-
model_info:
7-
mode: audio_transcription
8-
6+
- model_name: fake-openai-endpoint
7+
litellm_params:
8+
model: openai/gpt-3.5-turbo
9+
api_key: os.environ/OPENAI_API_KEY
10+
11+
litellm_settings:
12+
callbacks: ["prometheus"]

litellm/types/integrations/prometheus.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from enum import Enum
2+
13
REQUESTED_MODEL = "requested_model"
24
EXCEPTION_STATUS = "exception_status"
35
EXCEPTION_CLASS = "exception_class"
@@ -41,3 +43,14 @@
4143
300.0,
4244
float("inf"),
4345
)
46+
47+
48+
class UserAPIKeyLabelNames(Enum):
49+
END_USER = "end_user"
50+
USER = "user"
51+
API_KEY_HASH = "hashed_api_key"
52+
API_KEY_ALIAS = "api_key_alias"
53+
TEAM = "team"
54+
TEAM_ALIAS = "team_alias"
55+
REQUESTED_MODEL = REQUESTED_MODEL
56+
LITELLM_MODEL = "model"

litellm/types/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,6 +1506,7 @@ class StandardLoggingPayload(TypedDict):
15061506
id: str
15071507
trace_id: str # Trace multiple LLM calls belonging to same overall request (e.g. fallbacks/retries)
15081508
call_type: str
1509+
stream: Optional[bool]
15091510
response_cost: float
15101511
response_cost_failure_debug_info: Optional[
15111512
StandardLoggingModelCostFailureDebugInformation

tests/local_testing/test_amazing_vertex_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -274,7 +274,7 @@ def test_vertex_ai_anthropic_streaming():
274274
# )
275275
@pytest.mark.asyncio
276276
@pytest.mark.flaky(retries=3, delay=1)
277-
async def test_vertex_ai_anthropic_async():
277+
async def test_aavertex_ai_anthropic_async():
278278
# load_vertex_ai_credentials()
279279
try:
280280

tests/logging_callback_tests/test_prometheus_unit_tests.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
4646
return StandardLoggingPayload(
4747
id="test_id",
4848
call_type="completion",
49+
stream=False,
4950
response_cost=0.1,
5051
response_cost_failure_debug_info=None,
5152
status="success",
@@ -72,6 +73,7 @@ def create_standard_logging_payload() -> StandardLoggingPayload:
7273
spend_logs_metadata=None,
7374
requester_ip_address="127.0.0.1",
7475
requester_metadata=None,
76+
user_api_key_end_user_id="test_end_user",
7577
),
7678
cache_hit=False,
7779
cache_key=None,
@@ -110,6 +112,7 @@ async def test_async_log_success_event(prometheus_logger):
110112
"user_api_key": "test_key",
111113
"user_api_key_user_id": "test_user",
112114
"user_api_key_team_id": "test_team",
115+
"user_api_key_end_user_id": "test_end_user",
113116
}
114117
},
115118
"start_time": datetime.now(),
@@ -299,15 +302,29 @@ def test_set_latency_metrics(prometheus_logger):
299302

300303
# end_time - api_call_start_time
301304
prometheus_logger.litellm_llm_api_latency_metric.labels.assert_called_once_with(
302-
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
305+
model="gpt-3.5-turbo",
306+
hashed_api_key="key1",
307+
api_key_alias="alias1",
308+
team="team1",
309+
team_alias="team_alias1",
310+
user="test_user",
311+
end_user="test_end_user",
312+
requested_model="openai-gpt",
303313
)
304314
prometheus_logger.litellm_llm_api_latency_metric.labels().observe.assert_called_once_with(
305315
1.5
306316
)
307317

308318
# total latency for the request
309319
prometheus_logger.litellm_request_total_latency_metric.labels.assert_called_once_with(
310-
"gpt-3.5-turbo", "key1", "alias1", "team1", "team_alias1"
320+
end_user="test_end_user",
321+
hashed_api_key="key1",
322+
api_key_alias="alias1",
323+
requested_model="openai-gpt",
324+
team="team1",
325+
team_alias="team_alias1",
326+
user="test_user",
327+
model="gpt-3.5-turbo",
311328
)
312329
prometheus_logger.litellm_request_total_latency_metric.labels().observe.assert_called_once_with(
313330
2.0

tests/otel_tests/test_prometheus.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,12 @@ async def test_proxy_success_metrics():
145145

146146
# Check if the success metric is present and correct
147147
assert (
148-
'litellm_request_total_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
148+
'litellm_request_total_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
149149
in metrics
150150
)
151151

152152
assert (
153-
'litellm_llm_api_latency_metric_bucket{api_key_alias="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",team="None",team_alias="None"}'
153+
'litellm_llm_api_latency_metric_bucket{api_key_alias="None",end_user="None",hashed_api_key="88dc28d0f030c55ed4ab77ed8faf098196cb1c05df778539800c9f1243fe6b4b",le="0.005",model="fake",requested_model="fake-openai-endpoint",team="None",team_alias="None",user="default_user_id"}'
154154
in metrics
155155
)
156156

0 commit comments

Comments
 (0)