From 734b0f5d555ea353b9b8580c478e91c06d170739 Mon Sep 17 00:00:00 2001 From: JonasElburgUVA Date: Tue, 4 Mar 2025 12:23:44 +0100 Subject: [PATCH 1/4] add model name to the TokenUsage object --- src/ragas/cost.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ragas/cost.py b/src/ragas/cost.py index 144f66a12..b6a96ab80 100644 --- a/src/ragas/cost.py +++ b/src/ragas/cost.py @@ -67,8 +67,10 @@ def get_token_usage_for_openai( return TokenUsage(input_tokens=0, output_tokens=0) output_tokens = get_from_dict(llm_output, "token_usage.completion_tokens", 0) input_tokens = get_from_dict(llm_output, "token_usage.prompt_tokens", 0) + model_name = get_from_dict(llm_output, "model_name", "") - return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens) + + return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens, model=model_name) def get_token_usage_for_anthropic( From 72e7806a1df943b4871a815970f6dda35c6f74e7 Mon Sep 17 00:00:00 2001 From: JonasElburgUVA Date: Tue, 4 Mar 2025 13:02:51 +0100 Subject: [PATCH 2/4] retain model when adding TokenUsage objects together --- src/ragas/cost.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/ragas/cost.py b/src/ragas/cost.py index b6a96ab80..aaa9f1767 100644 --- a/src/ragas/cost.py +++ b/src/ragas/cost.py @@ -22,6 +22,7 @@ def __add__(self, y: "TokenUsage") -> "TokenUsage": return TokenUsage( input_tokens=self.input_tokens + y.input_tokens, output_tokens=self.output_tokens + y.output_tokens, + model=self.model ) else: raise ValueError("Cannot add TokenUsage objects with different models") From df779fe19837175bb9bfa1c67f5aed4afb6f6445 Mon Sep 17 00:00:00 2001 From: JonasElburgUVA Date: Fri, 7 Mar 2025 09:21:17 +0100 Subject: [PATCH 3/4] Add model ID's to other parsers and update tests to expect a model value in TokenUsage objects --- src/ragas/cost.py | 14 +++++++++++--- tests/unit/test_cost.py | 10 +++++----- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/ragas/cost.py b/src/ragas/cost.py index aaa9f1767..6decd46b7 100644 --- a/src/ragas/cost.py +++ b/src/ragas/cost.py @@ -70,7 +70,6 @@ def get_token_usage_for_openai( input_tokens = get_from_dict(llm_output, "token_usage.prompt_tokens", 0) model_name = get_from_dict(llm_output, "model_name", "") - return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens, model=model_name) @@ -95,10 +94,14 @@ def get_token_usage_for_anthropic( "usage.output_tokens", 0, ), + model=get_from_dict( + g.message.response_metadata, + "model", + "") ) ) - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0)) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=token_usages[0].model)) else: return TokenUsage(input_tokens=0, output_tokens=0) @@ -123,10 +126,15 @@ def get_token_usage_for_bedrock( "usage.completion_tokens", 0, ), + model=get_from_dict( + g.message.response_metadata, + "model_id" + ) + ) ) - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0)) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=token_usages[0].model)) return TokenUsage(input_tokens=0, output_tokens=0) diff --git a/tests/unit/test_cost.py b/tests/unit/test_cost.py index 715f28f94..1f525dc8f 100644 --- a/tests/unit/test_cost.py +++ b/tests/unit/test_cost.py @@ -133,19 +133,19 @@ def test_token_usage_cost(): def test_parse_llm_results(): # openai token_usage = get_token_usage_for_openai(openai_llm_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="gpt-4o") # anthropic token_usage = get_token_usage_for_anthropic(anthropic_llm_result) - assert token_usage == TokenUsage(input_tokens=9, output_tokens=12) + assert token_usage == TokenUsage(input_tokens=9, output_tokens=12, model="claude-3-opus-20240229") # Bedrock LLaMa token_usage = get_token_usage_for_bedrock(bedrock_llama_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="us.meta.llama3-1-70b-instruct-v1:0") # Bedrock Claude token_usage = get_token_usage_for_bedrock(bedrock_claude_result) - assert token_usage == TokenUsage(input_tokens=10, output_tokens=10) + assert token_usage == TokenUsage(input_tokens=10, output_tokens=10, model="us.anthropic.claude-3-5-sonnet-20240620-v1:0") def test_cost_callback_handler(): @@ -153,7 +153,7 @@ def test_cost_callback_handler(): cost_cb.on_llm_end(openai_llm_result) # cost - assert cost_cb.total_tokens() == TokenUsage(input_tokens=10, output_tokens=10) + assert cost_cb.total_tokens() == TokenUsage(input_tokens=10, output_tokens=10, model="gpt-4o") assert cost_cb.total_cost(0.1) == 2.0 assert ( From 308b53c93a228e271c6501e39fd200dda5b8fca2 Mon Sep 17 00:00:00 2001 From: JonasElburgUVA Date: Fri, 7 Mar 2025 09:41:25 +0100 Subject: [PATCH 4/4] Fallback empty string as model ID if the llm results do not come with model IDs --- src/ragas/cost.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/ragas/cost.py b/src/ragas/cost.py index 6decd46b7..f33f33d27 100644 --- a/src/ragas/cost.py +++ b/src/ragas/cost.py @@ -68,9 +68,9 @@ def get_token_usage_for_openai( return TokenUsage(input_tokens=0, output_tokens=0) output_tokens = get_from_dict(llm_output, "token_usage.completion_tokens", 0) input_tokens = get_from_dict(llm_output, "token_usage.prompt_tokens", 0) - model_name = get_from_dict(llm_output, "model_name", "") + model = get_from_dict(llm_output, "model_name", "") - return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens, model=model_name) + return TokenUsage(input_tokens=input_tokens, output_tokens=output_tokens, model=model) def get_token_usage_for_anthropic( @@ -100,8 +100,10 @@ def get_token_usage_for_anthropic( "") ) ) - - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=token_usages[0].model)) + model = next( + (usage.model for usage in token_usages if usage.model), "" + ) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=model)) else: return TokenUsage(input_tokens=0, output_tokens=0) @@ -128,13 +130,15 @@ def get_token_usage_for_bedrock( ), model=get_from_dict( g.message.response_metadata, - "model_id" + "model_id", + "" ) - ) ) - - return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=token_usages[0].model)) + model = next( + (usage.model for usage in token_usages if usage.model), "" + ) + return sum(token_usages, TokenUsage(input_tokens=0, output_tokens=0, model=model)) return TokenUsage(input_tokens=0, output_tokens=0)