From ef7b1c91adb03309f628d5cb130670acd0244f18 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Fri, 8 Nov 2024 21:41:29 +0530 Subject: [PATCH 01/11] use anthropic `client.beta.messages.count_tokens` to count message tokens --- tokencost/costs.py | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index 3677d6f..41ac88f 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -43,13 +43,23 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: model = strip_ft_model_name(model) if "claude-" in model: - """ - Note that this is only accurate for older models, e.g. `claude-2.1`. - For newer models this can only be used as a _very_ rough estimate, - instead you should rely on the `usage` property in the response for exact counts. - """ - prompt = "".join(message["content"] for message in messages) - return count_string_tokens(prompt, model) + logger.warning( + "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!" + ) + client = anthropic.Client() + + if "claude-3-sonnet" in model: + logger.warning( + f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string." + ) + prompt = "".join(message["content"] for message in messages) + return count_string_tokens(prompt, model) + + num_tokens = client.beta.messages.count_tokens( + model=model, + messages=messages, + ).input_tokens + return num_tokens try: encoding = tiktoken.encoding_for_model(model) @@ -80,7 +90,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: ) return count_message_tokens(messages, model="gpt-3.5-turbo-0613") elif "gpt-4o" in model: - print( + logger.warning( "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.") return count_message_tokens(messages, model="gpt-4o-2024-05-13") elif "gpt-4" in model: @@ -121,14 +131,13 @@ def count_string_tokens(prompt: str, model: str) -> int: model = model.split("/")[-1] if "claude-" in model: - """ - Note that this is only accurate for older models, e.g. `claude-2.1`. - For newer models this can only be used as a _very_ rough estimate, - instead you should rely on the `usage` property in the response for exact counts. - """ + logger.warning( + "Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate." + "Use the `usage` property in the response for exact counts." + ) if "claude-3" in model: logger.warning( - "Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1." + "Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1." ) client = anthropic.Client() token_count = client.count_tokens(prompt) From 6dc90ca93cb57ac5e74df5aac455906dd833ae24 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 9 Nov 2024 16:33:20 +0530 Subject: [PATCH 02/11] add exception for claude --- tokencost/costs.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index 41ac88f..4f0b3b8 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -12,8 +12,6 @@ logger = logging.getLogger(__name__) -# TODO: Add Claude support -# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf # Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet. # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts @@ -55,11 +53,14 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: prompt = "".join(message["content"] for message in messages) return count_string_tokens(prompt, model) - num_tokens = client.beta.messages.count_tokens( - model=model, - messages=messages, - ).input_tokens - return num_tokens + try: + num_tokens = client.beta.messages.count_tokens( + model=model, + messages=messages, + ).input_tokens + return num_tokens + except Exception as e: + raise Exception(f"An error occured - {e}") from e try: encoding = tiktoken.encoding_for_model(model) From 090177a7e2fb2349c1cc168fc82288cab6a75c23 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 9 Nov 2024 16:34:37 +0530 Subject: [PATCH 03/11] add comment --- tokencost/costs.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tokencost/costs.py b/tokencost/costs.py index 4f0b3b8..445d244 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -50,6 +50,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: logger.warning( f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string." ) + # For anthropic<0.39.0 this method is no more supported prompt = "".join(message["content"] for message in messages) return count_string_tokens(prompt, model) From 1e97c4d6071379df49acb1edcbf2cd7add8fe405 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 9 Nov 2024 16:54:39 +0530 Subject: [PATCH 04/11] add exception handling and api key loading --- tokencost/costs.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index 445d244..e9d9f89 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -3,6 +3,7 @@ Costs dictionary and utility tool for counting tokens """ +import os import tiktoken import anthropic from typing import Union, List, Dict @@ -40,12 +41,11 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: model = model.lower() model = strip_ft_model_name(model) + # Anthropic token counting requires a valid API key if "claude-" in model: logger.warning( "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!" ) - client = anthropic.Client() - if "claude-3-sonnet" in model: logger.warning( f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string." @@ -54,14 +54,19 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: prompt = "".join(message["content"] for message in messages) return count_string_tokens(prompt, model) + ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") + try: + client = anthropic.Client(api_key=ANTHROPIC_API_KEY) num_tokens = client.beta.messages.count_tokens( model=model, messages=messages, ).input_tokens return num_tokens + except TypeError as e: + raise e except Exception as e: - raise Exception(f"An error occured - {e}") from e + raise e try: encoding = tiktoken.encoding_for_model(model) From 923661f4f946622e675ca42ed609695b3396e4db Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 9 Nov 2024 16:56:28 +0530 Subject: [PATCH 05/11] black --- tests/test_costs.py | 3 +-- tokencost/__init__.py | 2 +- tokencost/constants.py | 9 ++++--- tokencost/costs.py | 4 +-- update_prices.py | 60 ++++++++++++++++++++++++++++-------------- 5 files changed, 50 insertions(+), 28 deletions(-) diff --git a/tests/test_costs.py b/tests/test_costs.py index 8b3f5f6..665da10 100644 --- a/tests/test_costs.py +++ b/tests/test_costs.py @@ -75,7 +75,6 @@ def test_count_message_tokens(model, expected_output): ("gpt-4o", 17), ("azure/gpt-4o", 17), ("claude-2.1", 4), - ], ) def test_count_message_tokens_with_name(model, expected_output): @@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model(): ("gpt-4-vision-preview", 4), ("text-embedding-ada-002", 4), ("gpt-4o", 4), - ("claude-2.1", 4) + ("claude-2.1", 4), ], ) def test_count_string_tokens(model, expected_output): diff --git a/tokencost/__init__.py b/tokencost/__init__.py index 8d67f82..c79f30c 100644 --- a/tokencost/__init__.py +++ b/tokencost/__init__.py @@ -4,6 +4,6 @@ calculate_completion_cost, calculate_prompt_cost, calculate_all_costs_and_tokens, - calculate_cost_by_tokens + calculate_cost_by_tokens, ) from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs diff --git a/tokencost/constants.py b/tokencost/constants.py index 9ca6ddb..afc1260 100644 --- a/tokencost/constants.py +++ b/tokencost/constants.py @@ -39,7 +39,9 @@ async def fetch_costs(): if response.status == 200: return await response.json(content_type=None) else: - raise Exception(f"Failed to fetch token costs, status code: {response.status}") + raise Exception( + f"Failed to fetch token costs, status code: {response.status}" + ) async def update_token_costs(): @@ -49,11 +51,12 @@ async def update_token_costs(): fetched_costs = await fetch_costs() # Safely remove 'sample_spec' if it exists TOKEN_COSTS.update(fetched_costs) - TOKEN_COSTS.pop('sample_spec', None) + TOKEN_COSTS.pop("sample_spec", None) except Exception as e: logger.error(f"Failed to update TOKEN_COSTS: {e}") raise + with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f: TOKEN_COSTS_STATIC = json.load(f) @@ -63,4 +66,4 @@ async def update_token_costs(): TOKEN_COSTS = TOKEN_COSTS_STATIC asyncio.run(update_token_costs()) except Exception: - logger.error('Failed to update token costs. Using static costs.') + logger.error("Failed to update token costs. Using static costs.") diff --git a/tokencost/costs.py b/tokencost/costs.py index e9d9f89..50af1bd 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -1,4 +1,3 @@ - """ Costs dictionary and utility tool for counting tokens """ @@ -98,7 +97,8 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: return count_message_tokens(messages, model="gpt-3.5-turbo-0613") elif "gpt-4o" in model: logger.warning( - "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.") + "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13." + ) return count_message_tokens(messages, model="gpt-4o-2024-05-13") elif "gpt-4" in model: logger.warning( diff --git a/update_prices.py b/update_prices.py index 4fa02e5..8687cf5 100644 --- a/update_prices.py +++ b/update_prices.py @@ -9,7 +9,9 @@ def diff_dicts(dict1, dict2): diff_keys = dict1.keys() ^ dict2.keys() differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys} - differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}) + differences.update( + {k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]} + ) if differences: print("Differences found:") @@ -24,17 +26,21 @@ def diff_dicts(dict1, dict2): return False -with open('tokencost/model_prices.json', 'r') as f: +with open("tokencost/model_prices.json", "r") as f: model_prices = json.load(f) if diff_dicts(model_prices, tokencost.TOKEN_COSTS): - print('Updating model_prices.json') - with open('tokencost/model_prices.json', 'w') as f: + print("Updating model_prices.json") + with open("tokencost/model_prices.json", "w") as f: json.dump(tokencost.TOKEN_COSTS, f, indent=4) # Load the data df = pd.DataFrame(tokencost.TOKEN_COSTS).T -df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x)) -df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x)) +df.loc[df.index[1:], "max_input_tokens"] = ( + df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x)) +) +df.loc[df.index[1:], "max_tokens"] = ( + df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x)) +) # Updated function to format the cost or handle NaN @@ -42,38 +48,52 @@ def diff_dicts(dict1, dict2): def format_cost(x): if pd.isna(x): - return '--' + return "--" else: price_per_million = Decimal(str(x)) * Decimal(str(1_000_000)) # print(price_per_million) normalized = price_per_million.normalize() - formatted_price = '{:2f}'.format(normalized) + formatted_price = "{:2f}".format(normalized) - formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00' + formatted_price = ( + formatted_price.rstrip("0").rstrip(".") + if "." in formatted_price + else formatted_price + ".00" + ) return f"${formatted_price}" # Apply the formatting function using DataFrame.apply and lambda -df[['input_cost_per_token', 'output_cost_per_token']] = df[[ - 'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost)) +df[["input_cost_per_token", "output_cost_per_token"]] = df[ + ["input_cost_per_token", "output_cost_per_token"] +].apply(lambda x: x.map(format_cost)) column_mapping = { - 'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens', - 'output_cost_per_token': 'Completion Cost (USD) per 1M tokens', - 'max_input_tokens': 'Max Prompt Tokens', - 'max_output_tokens': 'Max Output Tokens', - 'model_name': 'Model Name' + "input_cost_per_token": "Prompt Cost (USD) per 1M tokens", + "output_cost_per_token": "Completion Cost (USD) per 1M tokens", + "max_input_tokens": "Max Prompt Tokens", + "max_output_tokens": "Max Output Tokens", + "model_name": "Model Name", } # Assuming the keys of the JSON data represent the model names and have been set as the index -df['Model Name'] = df.index +df["Model Name"] = df.index # Apply the column renaming df.rename(columns=column_mapping, inplace=True) # Write the DataFrame with the correct column names as markdown to a file -with open('pricing_table.md', 'w') as f: - f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens', - 'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False)) +with open("pricing_table.md", "w") as f: + f.write( + df[ + [ + "Model Name", + "Prompt Cost (USD) per 1M tokens", + "Completion Cost (USD) per 1M tokens", + "Max Prompt Tokens", + "Max Output Tokens", + ] + ].to_markdown(index=False) + ) From e871c8b1c41a557a2833dbad97f857bac62b227d Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 9 Nov 2024 16:57:13 +0530 Subject: [PATCH 06/11] ruff --- tests/test_llama_index_callbacks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/test_llama_index_callbacks.py b/tests/test_llama_index_callbacks.py index 6ca348a..1169974 100644 --- a/tests/test_llama_index_callbacks.py +++ b/tests/test_llama_index_callbacks.py @@ -1,8 +1,7 @@ # test_llama_index.py import pytest from tokencost.callbacks import llama_index -from llama_index.core.callbacks.schema import CBEventType, EventPayload -from unittest.mock import MagicMock +from llama_index.core.callbacks.schema import EventPayload # Mock the calculate_prompt_cost and calculate_completion_cost functions From 838ebd46c557c107d05e9f5895a7540f6cdb16b1 Mon Sep 17 00:00:00 2001 From: reibs Date: Wed, 13 Nov 2024 23:57:34 -0800 Subject: [PATCH 07/11] updated some tests, will look at rest later --- tests/test_costs.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_costs.py b/tests/test_costs.py index 665da10..10926f3 100644 --- a/tests/test_costs.py +++ b/tests/test_costs.py @@ -46,7 +46,7 @@ ("gpt-4-vision-preview", 15), ("gpt-4o", 15), ("azure/gpt-4o", 15), - ("claude-2.1", 4), + ("claude-3-opus-latest", 11), ], ) def test_count_message_tokens(model, expected_output): @@ -74,7 +74,7 @@ def test_count_message_tokens(model, expected_output): ("gpt-4-vision-preview", 17), ("gpt-4o", 17), ("azure/gpt-4o", 17), - ("claude-2.1", 4), + # ("claude-3-opus-latest", 4), # TODO: Claude is not supported yet ], ) def test_count_message_tokens_with_name(model, expected_output): @@ -115,7 +115,7 @@ def test_count_message_tokens_invalid_model(): ("gpt-4-vision-preview", 4), ("text-embedding-ada-002", 4), ("gpt-4o", 4), - ("claude-2.1", 4), + # ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages ], ) def test_count_string_tokens(model, expected_output): @@ -155,7 +155,7 @@ def test_count_string_invalid_model(): (MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")), (MESSAGES, "gpt-4o", Decimal("0.000075")), (MESSAGES, "azure/gpt-4o", Decimal("0.000075")), - (MESSAGES, "claude-2.1", Decimal("0.000032")), + (MESSAGES, "claude-3-opus-latest", Decimal("0.000165")), (STRING, "text-embedding-ada-002", Decimal("0.0000004")), ], ) @@ -192,7 +192,7 @@ def test_invalid_prompt_format(): (STRING, "gpt-4-vision-preview", Decimal("0.00012")), (STRING, "gpt-4o", Decimal("0.000060")), (STRING, "azure/gpt-4o", Decimal("0.000060")), - (STRING, "claude-2.1", Decimal("0.000096")), + (STRING, "claude-3-opus-latest", Decimal("0.000096")), (STRING, "text-embedding-ada-002", 0), ], ) From c22e4035a34e05e93ffcafbf0b1305cab5e6ad05 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Thu, 14 Nov 2024 15:37:21 +0530 Subject: [PATCH 08/11] update code for latest anthropic api --- tokencost/costs.py | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index 50af1bd..e87251d 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -45,18 +45,19 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: logger.warning( "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!" ) - if "claude-3-sonnet" in model: - logger.warning( - f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string." + if not any( + supported_model in model for supported_model in [ + "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus" + ] + ): + raise ValueError( + f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts." ) - # For anthropic<0.39.0 this method is no more supported - prompt = "".join(message["content"] for message in messages) - return count_string_tokens(prompt, model) ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") try: - client = anthropic.Client(api_key=ANTHROPIC_API_KEY) + client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) num_tokens = client.beta.messages.count_tokens( model=model, messages=messages, @@ -138,17 +139,9 @@ def count_string_tokens(prompt: str, model: str) -> int: model = model.split("/")[-1] if "claude-" in model: - logger.warning( - "Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate." - "Use the `usage` property in the response for exact counts." + raise ValueError( + "Claude models do not support this method. Use the `usage` property in the response for exact counts." ) - if "claude-3" in model: - logger.warning( - "Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1." - ) - client = anthropic.Client() - token_count = client.count_tokens(prompt) - return token_count try: encoding = tiktoken.encoding_for_model(model) From 856a131b7736515e696e82efcf9d6d7ac2fb6f4d Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Thu, 14 Nov 2024 15:37:37 +0530 Subject: [PATCH 09/11] comment out anthropic models in tests --- tests/test_costs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_costs.py b/tests/test_costs.py index 10926f3..6b0578f 100644 --- a/tests/test_costs.py +++ b/tests/test_costs.py @@ -74,7 +74,7 @@ def test_count_message_tokens(model, expected_output): ("gpt-4-vision-preview", 17), ("gpt-4o", 17), ("azure/gpt-4o", 17), - # ("claude-3-opus-latest", 4), # TODO: Claude is not supported yet + # ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages without extra inputs ], ) def test_count_message_tokens_with_name(model, expected_output): @@ -192,7 +192,7 @@ def test_invalid_prompt_format(): (STRING, "gpt-4-vision-preview", Decimal("0.00012")), (STRING, "gpt-4o", Decimal("0.000060")), (STRING, "azure/gpt-4o", Decimal("0.000060")), - (STRING, "claude-3-opus-latest", Decimal("0.000096")), + # (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages (STRING, "text-embedding-ada-002", 0), ], ) From bb99c402a9ce496cd82ad3207923fcd40ee4d58e Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Fri, 15 Nov 2024 03:58:05 +0530 Subject: [PATCH 10/11] anthropic support and addressing caveats --- tokencost/costs.py | 77 ++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 30 deletions(-) diff --git a/tokencost/costs.py b/tokencost/costs.py index e87251d..efdaffd 100644 --- a/tokencost/costs.py +++ b/tokencost/costs.py @@ -16,6 +16,26 @@ # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts +def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int: + if not any( + supported_model in model for supported_model in [ + "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus" + ] + ): + raise ValueError( + f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts." + ) + try: + return anthropic.Anthropic().beta.messages.count_tokens( + model=model, + messages=messages, + ).input_tokens + except TypeError as e: + raise e + except Exception as e: + raise e + + def strip_ft_model_name(model: str) -> str: """ Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id @@ -45,28 +65,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int: logger.warning( "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!" ) - if not any( - supported_model in model for supported_model in [ - "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus" - ] - ): - raise ValueError( - f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts." - ) - - ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") - - try: - client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) - num_tokens = client.beta.messages.count_tokens( - model=model, - messages=messages, - ).input_tokens - return num_tokens - except TypeError as e: - raise e - except Exception as e: - raise e + return get_anthropic_token_count(messages, model) try: encoding = tiktoken.encoding_for_model(model) @@ -140,7 +139,7 @@ def count_string_tokens(prompt: str, model: str) -> int: if "claude-" in model: raise ValueError( - "Claude models do not support this method. Use the `usage` property in the response for exact counts." + "Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts." ) try: @@ -209,13 +208,11 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal ) if not isinstance(prompt, (list, str)): raise TypeError( - f"""Prompt must be either a string or list of message objects. - it is {type(prompt)} instead. - """ + f"Prompt must be either a string or list of message objects but found {type(prompt)} instead." ) prompt_tokens = ( count_string_tokens(prompt, model) - if isinstance(prompt, str) + if isinstance(prompt, str) and "claude-" not in model else count_message_tokens(prompt, model) ) @@ -244,7 +241,18 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal: f"""Model {model} is not implemented. Double-check your spelling, or submit an issue/PR""" ) - completion_tokens = count_string_tokens(completion, model) + + if not isinstance(completion, str): + raise TypeError( + f"Prompt must be a string but found {type(completion)} instead." + ) + + if "claude-" in model: + completion_list = [{"role": "assistant", "content": completion}] + # Anthropic appends some 13 additional tokens to the actual completion tokens + completion_tokens = count_message_tokens(completion_list, model) - 13 + else: + completion_tokens = count_string_tokens(completion, model) return calculate_cost_by_tokens(completion_tokens, model, "output") @@ -273,10 +281,19 @@ def calculate_all_costs_and_tokens( completion_cost = calculate_completion_cost(completion, model) prompt_tokens = ( count_string_tokens(prompt, model) - if isinstance(prompt, str) + if isinstance(prompt, str) and "claude-" not in model else count_message_tokens(prompt, model) ) - completion_tokens = count_string_tokens(completion, model) + + if "claude-" in model: + logger.warning( + "Warning: Token counting is estimated for " + ) + completion_list = [{"role": "assistant", "content": completion}] + # Anthropic appends some 13 additional tokens to the actual completion tokens + completion_tokens = count_message_tokens(completion_list, model) - 13 + else: + completion_tokens = count_string_tokens(completion, model) return { "prompt_cost": prompt_cost, From ca70b7cbf1e9963d956e53b72d33c2c259d557cf Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Fri, 15 Nov 2024 04:10:33 +0530 Subject: [PATCH 11/11] update costs in tests for `gpt-4o` model --- tests/test_costs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_costs.py b/tests/test_costs.py index 6b0578f..4ba9d09 100644 --- a/tests/test_costs.py +++ b/tests/test_costs.py @@ -153,7 +153,7 @@ def test_count_string_invalid_model(): (MESSAGES, "gpt-4-0613", Decimal("0.00045")), (MESSAGES, "gpt-4-1106-preview", Decimal("0.00015")), (MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")), - (MESSAGES, "gpt-4o", Decimal("0.000075")), + (MESSAGES, "gpt-4o", Decimal("0.0000375")), (MESSAGES, "azure/gpt-4o", Decimal("0.000075")), (MESSAGES, "claude-3-opus-latest", Decimal("0.000165")), (STRING, "text-embedding-ada-002", Decimal("0.0000004")), @@ -190,7 +190,7 @@ def test_invalid_prompt_format(): (STRING, "gpt-4-0613", Decimal("0.00024")), (STRING, "gpt-4-1106-preview", Decimal("0.00012")), (STRING, "gpt-4-vision-preview", Decimal("0.00012")), - (STRING, "gpt-4o", Decimal("0.000060")), + (STRING, "gpt-4o", Decimal("0.00004")), (STRING, "azure/gpt-4o", Decimal("0.000060")), # (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages (STRING, "text-embedding-ada-002", 0),