Skip to content

Commit

Permalink
Merge pull request #85 from AgentOps-AI/feat/anthropic-token-count
Browse files Browse the repository at this point in the history
  • Loading branch information
the-praxs authored Nov 15, 2024
2 parents 095f036 + ca70b7c commit abf5e6d
Show file tree
Hide file tree
Showing 6 changed files with 112 additions and 65 deletions.
15 changes: 7 additions & 8 deletions tests/test_costs.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
("gpt-4-vision-preview", 15),
("gpt-4o", 15),
("azure/gpt-4o", 15),
("claude-2.1", 4),
("claude-3-opus-latest", 11),
],
)
def test_count_message_tokens(model, expected_output):
Expand Down Expand Up @@ -74,8 +74,7 @@ def test_count_message_tokens(model, expected_output):
("gpt-4-vision-preview", 17),
("gpt-4o", 17),
("azure/gpt-4o", 17),
("claude-2.1", 4),
# ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages without extra inputs
],
)
def test_count_message_tokens_with_name(model, expected_output):
Expand Down Expand Up @@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model():
("gpt-4-vision-preview", 4),
("text-embedding-ada-002", 4),
("gpt-4o", 4),
("claude-2.1", 4)
# ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages
],
)
def test_count_string_tokens(model, expected_output):
Expand Down Expand Up @@ -154,9 +153,9 @@ def test_count_string_invalid_model():
(MESSAGES, "gpt-4-0613", Decimal("0.00045")),
(MESSAGES, "gpt-4-1106-preview", Decimal("0.00015")),
(MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")),
(MESSAGES, "gpt-4o", Decimal("0.000075")),
(MESSAGES, "gpt-4o", Decimal("0.0000375")),
(MESSAGES, "azure/gpt-4o", Decimal("0.000075")),
(MESSAGES, "claude-2.1", Decimal("0.000032")),
(MESSAGES, "claude-3-opus-latest", Decimal("0.000165")),
(STRING, "text-embedding-ada-002", Decimal("0.0000004")),
],
)
Expand Down Expand Up @@ -191,9 +190,9 @@ def test_invalid_prompt_format():
(STRING, "gpt-4-0613", Decimal("0.00024")),
(STRING, "gpt-4-1106-preview", Decimal("0.00012")),
(STRING, "gpt-4-vision-preview", Decimal("0.00012")),
(STRING, "gpt-4o", Decimal("0.000060")),
(STRING, "gpt-4o", Decimal("0.00004")),
(STRING, "azure/gpt-4o", Decimal("0.000060")),
(STRING, "claude-2.1", Decimal("0.000096")),
# (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages
(STRING, "text-embedding-ada-002", 0),
],
)
Expand Down
3 changes: 1 addition & 2 deletions tests/test_llama_index_callbacks.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
# test_llama_index.py
import pytest
from tokencost.callbacks import llama_index
from llama_index.core.callbacks.schema import CBEventType, EventPayload
from unittest.mock import MagicMock
from llama_index.core.callbacks.schema import EventPayload

# Mock the calculate_prompt_cost and calculate_completion_cost functions

Expand Down
2 changes: 1 addition & 1 deletion tokencost/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
calculate_completion_cost,
calculate_prompt_cost,
calculate_all_costs_and_tokens,
calculate_cost_by_tokens
calculate_cost_by_tokens,
)
from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs
9 changes: 6 additions & 3 deletions tokencost/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ async def fetch_costs():
if response.status == 200:
return await response.json(content_type=None)
else:
raise Exception(f"Failed to fetch token costs, status code: {response.status}")
raise Exception(
f"Failed to fetch token costs, status code: {response.status}"
)


async def update_token_costs():
Expand All @@ -49,11 +51,12 @@ async def update_token_costs():
fetched_costs = await fetch_costs()
# Safely remove 'sample_spec' if it exists
TOKEN_COSTS.update(fetched_costs)
TOKEN_COSTS.pop('sample_spec', None)
TOKEN_COSTS.pop("sample_spec", None)
except Exception as e:
logger.error(f"Failed to update TOKEN_COSTS: {e}")
raise


with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
TOKEN_COSTS_STATIC = json.load(f)

Expand All @@ -63,4 +66,4 @@ async def update_token_costs():
TOKEN_COSTS = TOKEN_COSTS_STATIC
asyncio.run(update_token_costs())
except Exception:
logger.error('Failed to update token costs. Using static costs.')
logger.error("Failed to update token costs. Using static costs.")
88 changes: 57 additions & 31 deletions tokencost/costs.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@

"""
Costs dictionary and utility tool for counting tokens
"""

import os
import tiktoken
import anthropic
from typing import Union, List, Dict
Expand All @@ -12,12 +12,30 @@

logger = logging.getLogger(__name__)

# TODO: Add Claude support
# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf
# Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
# https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts


def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int:
if not any(
supported_model in model for supported_model in [
"claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus"
]
):
raise ValueError(
f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
)
try:
return anthropic.Anthropic().beta.messages.count_tokens(
model=model,
messages=messages,
).input_tokens
except TypeError as e:
raise e
except Exception as e:
raise e


def strip_ft_model_name(model: str) -> str:
"""
Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id
Expand All @@ -42,14 +60,12 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
model = model.lower()
model = strip_ft_model_name(model)

# Anthropic token counting requires a valid API key
if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
prompt = "".join(message["content"] for message in messages)
return count_string_tokens(prompt, model)
logger.warning(
"Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
)
return get_anthropic_token_count(messages, model)

try:
encoding = tiktoken.encoding_for_model(model)
Expand Down Expand Up @@ -80,8 +96,9 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
)
return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
elif "gpt-4o" in model:
print(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
logger.warning(
"Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
)
return count_message_tokens(messages, model="gpt-4o-2024-05-13")
elif "gpt-4" in model:
logger.warning(
Expand Down Expand Up @@ -121,18 +138,9 @@ def count_string_tokens(prompt: str, model: str) -> int:
model = model.split("/")[-1]

if "claude-" in model:
"""
Note that this is only accurate for older models, e.g. `claude-2.1`.
For newer models this can only be used as a _very_ rough estimate,
instead you should rely on the `usage` property in the response for exact counts.
"""
if "claude-3" in model:
logger.warning(
"Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1."
)
client = anthropic.Client()
token_count = client.count_tokens(prompt)
return token_count
raise ValueError(
"Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts."
)

try:
encoding = tiktoken.encoding_for_model(model)
Expand Down Expand Up @@ -200,13 +208,11 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal
)
if not isinstance(prompt, (list, str)):
raise TypeError(
f"""Prompt must be either a string or list of message objects.
it is {type(prompt)} instead.
"""
f"Prompt must be either a string or list of message objects but found {type(prompt)} instead."
)
prompt_tokens = (
count_string_tokens(prompt, model)
if isinstance(prompt, str)
if isinstance(prompt, str) and "claude-" not in model
else count_message_tokens(prompt, model)
)

Expand Down Expand Up @@ -235,7 +241,18 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal:
f"""Model {model} is not implemented.
Double-check your spelling, or submit an issue/PR"""
)
completion_tokens = count_string_tokens(completion, model)

if not isinstance(completion, str):
raise TypeError(
f"Prompt must be a string but found {type(completion)} instead."
)

if "claude-" in model:
completion_list = [{"role": "assistant", "content": completion}]
# Anthropic appends some 13 additional tokens to the actual completion tokens
completion_tokens = count_message_tokens(completion_list, model) - 13
else:
completion_tokens = count_string_tokens(completion, model)

return calculate_cost_by_tokens(completion_tokens, model, "output")

Expand Down Expand Up @@ -264,10 +281,19 @@ def calculate_all_costs_and_tokens(
completion_cost = calculate_completion_cost(completion, model)
prompt_tokens = (
count_string_tokens(prompt, model)
if isinstance(prompt, str)
if isinstance(prompt, str) and "claude-" not in model
else count_message_tokens(prompt, model)
)
completion_tokens = count_string_tokens(completion, model)

if "claude-" in model:
logger.warning(
"Warning: Token counting is estimated for "
)
completion_list = [{"role": "assistant", "content": completion}]
# Anthropic appends some 13 additional tokens to the actual completion tokens
completion_tokens = count_message_tokens(completion_list, model) - 13
else:
completion_tokens = count_string_tokens(completion, model)

return {
"prompt_cost": prompt_cost,
Expand Down
60 changes: 40 additions & 20 deletions update_prices.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
def diff_dicts(dict1, dict2):
diff_keys = dict1.keys() ^ dict2.keys()
differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys}
differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]})
differences.update(
{k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}
)

if differences:
print("Differences found:")
Expand All @@ -24,56 +26,74 @@ def diff_dicts(dict1, dict2):
return False


with open('tokencost/model_prices.json', 'r') as f:
with open("tokencost/model_prices.json", "r") as f:
model_prices = json.load(f)

if diff_dicts(model_prices, tokencost.TOKEN_COSTS):
print('Updating model_prices.json')
with open('tokencost/model_prices.json', 'w') as f:
print("Updating model_prices.json")
with open("tokencost/model_prices.json", "w") as f:
json.dump(tokencost.TOKEN_COSTS, f, indent=4)
# Load the data
df = pd.DataFrame(tokencost.TOKEN_COSTS).T
df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
df.loc[df.index[1:], "max_input_tokens"] = (
df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)
df.loc[df.index[1:], "max_tokens"] = (
df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
)


# Updated function to format the cost or handle NaN


def format_cost(x):
if pd.isna(x):
return '--'
return "--"
else:
price_per_million = Decimal(str(x)) * Decimal(str(1_000_000))
# print(price_per_million)
normalized = price_per_million.normalize()
formatted_price = '{:2f}'.format(normalized)
formatted_price = "{:2f}".format(normalized)

formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00'
formatted_price = (
formatted_price.rstrip("0").rstrip(".")
if "." in formatted_price
else formatted_price + ".00"
)

return f"${formatted_price}"


# Apply the formatting function using DataFrame.apply and lambda
df[['input_cost_per_token', 'output_cost_per_token']] = df[[
'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost))
df[["input_cost_per_token", "output_cost_per_token"]] = df[
["input_cost_per_token", "output_cost_per_token"]
].apply(lambda x: x.map(format_cost))


column_mapping = {
'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens',
'output_cost_per_token': 'Completion Cost (USD) per 1M tokens',
'max_input_tokens': 'Max Prompt Tokens',
'max_output_tokens': 'Max Output Tokens',
'model_name': 'Model Name'
"input_cost_per_token": "Prompt Cost (USD) per 1M tokens",
"output_cost_per_token": "Completion Cost (USD) per 1M tokens",
"max_input_tokens": "Max Prompt Tokens",
"max_output_tokens": "Max Output Tokens",
"model_name": "Model Name",
}

# Assuming the keys of the JSON data represent the model names and have been set as the index
df['Model Name'] = df.index
df["Model Name"] = df.index

# Apply the column renaming
df.rename(columns=column_mapping, inplace=True)

# Write the DataFrame with the correct column names as markdown to a file
with open('pricing_table.md', 'w') as f:
f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens',
'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False))
with open("pricing_table.md", "w") as f:
f.write(
df[
[
"Model Name",
"Prompt Cost (USD) per 1M tokens",
"Completion Cost (USD) per 1M tokens",
"Max Prompt Tokens",
"Max Output Tokens",
]
].to_markdown(index=False)
)

0 comments on commit abf5e6d

Please sign in to comment.