From ef7b1c91adb03309f628d5cb130670acd0244f18 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Fri, 8 Nov 2024 21:41:29 +0530
Subject: [PATCH 01/11] use anthropic `client.beta.messages.count_tokens` to
 count message tokens

---
 tokencost/costs.py | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index 3677d6f..41ac88f 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -43,13 +43,23 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
     model = strip_ft_model_name(model)
 
     if "claude-" in model:
-        """
-        Note that this is only accurate for older models, e.g. `claude-2.1`. 
-        For newer models this can only be used as a _very_ rough estimate, 
-        instead you should rely on the `usage` property in the response for exact counts.
-        """
-        prompt = "".join(message["content"] for message in messages)
-        return count_string_tokens(prompt, model)
+        logger.warning(
+            "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
+        )
+        client = anthropic.Client()
+
+        if "claude-3-sonnet" in model:
+            logger.warning(
+                f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
+            )
+            prompt = "".join(message["content"] for message in messages)
+            return count_string_tokens(prompt, model)
+
+        num_tokens = client.beta.messages.count_tokens(
+            model=model,
+            messages=messages,
+        ).input_tokens
+        return num_tokens
 
     try:
         encoding = tiktoken.encoding_for_model(model)
@@ -80,7 +90,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         )
         return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
     elif "gpt-4o" in model:
-        print(
+        logger.warning(
             "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
         return count_message_tokens(messages, model="gpt-4o-2024-05-13")
     elif "gpt-4" in model:
@@ -121,14 +131,13 @@ def count_string_tokens(prompt: str, model: str) -> int:
         model = model.split("/")[-1]
 
     if "claude-" in model:
-        """
-        Note that this is only accurate for older models, e.g. `claude-2.1`. 
-        For newer models this can only be used as a _very_ rough estimate, 
-        instead you should rely on the `usage` property in the response for exact counts.
-        """
+        logger.warning(
+            "Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate."
+            "Use the `usage` property in the response for exact counts."
+        )
         if "claude-3" in model:
             logger.warning(
-                "Warning: Claude-3 models are not yet supported. Returning num tokens assuming claude-2.1."
+                "Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1."
             )
         client = anthropic.Client()
         token_count = client.count_tokens(prompt)

From 6dc90ca93cb57ac5e74df5aac455906dd833ae24 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 9 Nov 2024 16:33:20 +0530
Subject: [PATCH 02/11] add exception for claude

---
 tokencost/costs.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index 41ac88f..4f0b3b8 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -12,8 +12,6 @@
 
 logger = logging.getLogger(__name__)
 
-# TODO: Add Claude support
-# https://www-files.anthropic.com/production/images/model_pricing_july2023.pdf
 # Note: cl100k is the openai base tokenizer. Nothing to do with Claude. Tiktoken doesn't have claude yet.
 # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts
 
@@ -55,11 +53,14 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
             prompt = "".join(message["content"] for message in messages)
             return count_string_tokens(prompt, model)
 
-        num_tokens = client.beta.messages.count_tokens(
-            model=model,
-            messages=messages,
-        ).input_tokens
-        return num_tokens
+        try:
+            num_tokens = client.beta.messages.count_tokens(
+                model=model,
+                messages=messages,
+            ).input_tokens
+            return num_tokens
+        except Exception as e:
+            raise Exception(f"An error occured - {e}") from e
 
     try:
         encoding = tiktoken.encoding_for_model(model)

From 090177a7e2fb2349c1cc168fc82288cab6a75c23 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 9 Nov 2024 16:34:37 +0530
Subject: [PATCH 03/11] add comment

---
 tokencost/costs.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index 4f0b3b8..445d244 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -50,6 +50,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
             logger.warning(
                 f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
             )
+            # For anthropic<0.39.0 this method is no more supported
             prompt = "".join(message["content"] for message in messages)
             return count_string_tokens(prompt, model)
 

From 1e97c4d6071379df49acb1edcbf2cd7add8fe405 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 9 Nov 2024 16:54:39 +0530
Subject: [PATCH 04/11] add exception handling and api key loading

---
 tokencost/costs.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index 445d244..e9d9f89 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -3,6 +3,7 @@
 Costs dictionary and utility tool for counting tokens
 """
 
+import os
 import tiktoken
 import anthropic
 from typing import Union, List, Dict
@@ -40,12 +41,11 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
     model = model.lower()
     model = strip_ft_model_name(model)
 
+    # Anthropic token counting requires a valid API key
     if "claude-" in model:
         logger.warning(
             "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
         )
-        client = anthropic.Client()
-
         if "claude-3-sonnet" in model:
             logger.warning(
                 f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
@@ -54,14 +54,19 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
             prompt = "".join(message["content"] for message in messages)
             return count_string_tokens(prompt, model)
 
+        ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
+
         try:
+            client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
             num_tokens = client.beta.messages.count_tokens(
                 model=model,
                 messages=messages,
             ).input_tokens
             return num_tokens
+        except TypeError as e:
+            raise e
         except Exception as e:
-            raise Exception(f"An error occured - {e}") from e
+            raise e
 
     try:
         encoding = tiktoken.encoding_for_model(model)

From 923661f4f946622e675ca42ed609695b3396e4db Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 9 Nov 2024 16:56:28 +0530
Subject: [PATCH 05/11] black

---
 tests/test_costs.py    |  3 +--
 tokencost/__init__.py  |  2 +-
 tokencost/constants.py |  9 ++++---
 tokencost/costs.py     |  4 +--
 update_prices.py       | 60 ++++++++++++++++++++++++++++--------------
 5 files changed, 50 insertions(+), 28 deletions(-)

diff --git a/tests/test_costs.py b/tests/test_costs.py
index 8b3f5f6..665da10 100644
--- a/tests/test_costs.py
+++ b/tests/test_costs.py
@@ -75,7 +75,6 @@ def test_count_message_tokens(model, expected_output):
         ("gpt-4o", 17),
         ("azure/gpt-4o", 17),
         ("claude-2.1", 4),
-
     ],
 )
 def test_count_message_tokens_with_name(model, expected_output):
@@ -116,7 +115,7 @@ def test_count_message_tokens_invalid_model():
         ("gpt-4-vision-preview", 4),
         ("text-embedding-ada-002", 4),
         ("gpt-4o", 4),
-        ("claude-2.1", 4)
+        ("claude-2.1", 4),
     ],
 )
 def test_count_string_tokens(model, expected_output):
diff --git a/tokencost/__init__.py b/tokencost/__init__.py
index 8d67f82..c79f30c 100644
--- a/tokencost/__init__.py
+++ b/tokencost/__init__.py
@@ -4,6 +4,6 @@
     calculate_completion_cost,
     calculate_prompt_cost,
     calculate_all_costs_and_tokens,
-    calculate_cost_by_tokens
+    calculate_cost_by_tokens,
 )
 from .constants import TOKEN_COSTS_STATIC, TOKEN_COSTS, update_token_costs
diff --git a/tokencost/constants.py b/tokencost/constants.py
index 9ca6ddb..afc1260 100644
--- a/tokencost/constants.py
+++ b/tokencost/constants.py
@@ -39,7 +39,9 @@ async def fetch_costs():
             if response.status == 200:
                 return await response.json(content_type=None)
             else:
-                raise Exception(f"Failed to fetch token costs, status code: {response.status}")
+                raise Exception(
+                    f"Failed to fetch token costs, status code: {response.status}"
+                )
 
 
 async def update_token_costs():
@@ -49,11 +51,12 @@ async def update_token_costs():
         fetched_costs = await fetch_costs()
         # Safely remove 'sample_spec' if it exists
         TOKEN_COSTS.update(fetched_costs)
-        TOKEN_COSTS.pop('sample_spec', None)
+        TOKEN_COSTS.pop("sample_spec", None)
     except Exception as e:
         logger.error(f"Failed to update TOKEN_COSTS: {e}")
         raise
 
+
 with open(os.path.join(os.path.dirname(__file__), "model_prices.json"), "r") as f:
     TOKEN_COSTS_STATIC = json.load(f)
 
@@ -63,4 +66,4 @@ async def update_token_costs():
     TOKEN_COSTS = TOKEN_COSTS_STATIC
     asyncio.run(update_token_costs())
 except Exception:
-    logger.error('Failed to update token costs. Using static costs.')
+    logger.error("Failed to update token costs. Using static costs.")
diff --git a/tokencost/costs.py b/tokencost/costs.py
index e9d9f89..50af1bd 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -1,4 +1,3 @@
-
 """
 Costs dictionary and utility tool for counting tokens
 """
@@ -98,7 +97,8 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         return count_message_tokens(messages, model="gpt-3.5-turbo-0613")
     elif "gpt-4o" in model:
         logger.warning(
-            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13.")
+            "Warning: gpt-4o may update over time. Returning num tokens assuming gpt-4o-2024-05-13."
+        )
         return count_message_tokens(messages, model="gpt-4o-2024-05-13")
     elif "gpt-4" in model:
         logger.warning(
diff --git a/update_prices.py b/update_prices.py
index 4fa02e5..8687cf5 100644
--- a/update_prices.py
+++ b/update_prices.py
@@ -9,7 +9,9 @@
 def diff_dicts(dict1, dict2):
     diff_keys = dict1.keys() ^ dict2.keys()
     differences = {k: (dict1.get(k), dict2.get(k)) for k in diff_keys}
-    differences.update({k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]})
+    differences.update(
+        {k: (dict1[k], dict2[k]) for k in dict1 if k in dict2 and dict1[k] != dict2[k]}
+    )
 
     if differences:
         print("Differences found:")
@@ -24,17 +26,21 @@ def diff_dicts(dict1, dict2):
         return False
 
 
-with open('tokencost/model_prices.json', 'r') as f:
+with open("tokencost/model_prices.json", "r") as f:
     model_prices = json.load(f)
 
 if diff_dicts(model_prices, tokencost.TOKEN_COSTS):
-    print('Updating model_prices.json')
-    with open('tokencost/model_prices.json', 'w') as f:
+    print("Updating model_prices.json")
+    with open("tokencost/model_prices.json", "w") as f:
         json.dump(tokencost.TOKEN_COSTS, f, indent=4)
 # Load the data
 df = pd.DataFrame(tokencost.TOKEN_COSTS).T
-df.loc[df.index[1:], 'max_input_tokens'] = df['max_input_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
-df.loc[df.index[1:], 'max_tokens'] = df['max_tokens'].iloc[1:].apply(lambda x: '{:,.0f}'.format(x))
+df.loc[df.index[1:], "max_input_tokens"] = (
+    df["max_input_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
+)
+df.loc[df.index[1:], "max_tokens"] = (
+    df["max_tokens"].iloc[1:].apply(lambda x: "{:,.0f}".format(x))
+)
 
 
 # Updated function to format the cost or handle NaN
@@ -42,38 +48,52 @@ def diff_dicts(dict1, dict2):
 
 def format_cost(x):
     if pd.isna(x):
-        return '--'
+        return "--"
     else:
         price_per_million = Decimal(str(x)) * Decimal(str(1_000_000))
         # print(price_per_million)
         normalized = price_per_million.normalize()
-        formatted_price = '{:2f}'.format(normalized)
+        formatted_price = "{:2f}".format(normalized)
 
-        formatted_price = formatted_price.rstrip('0').rstrip('.') if '.' in formatted_price else formatted_price + '.00'
+        formatted_price = (
+            formatted_price.rstrip("0").rstrip(".")
+            if "." in formatted_price
+            else formatted_price + ".00"
+        )
 
         return f"${formatted_price}"
 
 
 # Apply the formatting function using DataFrame.apply and lambda
-df[['input_cost_per_token', 'output_cost_per_token']] = df[[
-    'input_cost_per_token', 'output_cost_per_token']].apply(lambda x: x.map(format_cost))
+df[["input_cost_per_token", "output_cost_per_token"]] = df[
+    ["input_cost_per_token", "output_cost_per_token"]
+].apply(lambda x: x.map(format_cost))
 
 
 column_mapping = {
-    'input_cost_per_token': 'Prompt Cost (USD) per 1M tokens',
-    'output_cost_per_token': 'Completion Cost (USD) per 1M tokens',
-    'max_input_tokens': 'Max Prompt Tokens',
-    'max_output_tokens': 'Max Output Tokens',
-    'model_name': 'Model Name'
+    "input_cost_per_token": "Prompt Cost (USD) per 1M tokens",
+    "output_cost_per_token": "Completion Cost (USD) per 1M tokens",
+    "max_input_tokens": "Max Prompt Tokens",
+    "max_output_tokens": "Max Output Tokens",
+    "model_name": "Model Name",
 }
 
 # Assuming the keys of the JSON data represent the model names and have been set as the index
-df['Model Name'] = df.index
+df["Model Name"] = df.index
 
 # Apply the column renaming
 df.rename(columns=column_mapping, inplace=True)
 
 # Write the DataFrame with the correct column names as markdown to a file
-with open('pricing_table.md', 'w') as f:
-    f.write(df[['Model Name', 'Prompt Cost (USD) per 1M tokens', 'Completion Cost (USD) per 1M tokens',
-            'Max Prompt Tokens', 'Max Output Tokens']].to_markdown(index=False))
+with open("pricing_table.md", "w") as f:
+    f.write(
+        df[
+            [
+                "Model Name",
+                "Prompt Cost (USD) per 1M tokens",
+                "Completion Cost (USD) per 1M tokens",
+                "Max Prompt Tokens",
+                "Max Output Tokens",
+            ]
+        ].to_markdown(index=False)
+    )

From e871c8b1c41a557a2833dbad97f857bac62b227d Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 9 Nov 2024 16:57:13 +0530
Subject: [PATCH 06/11] ruff

---
 tests/test_llama_index_callbacks.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_llama_index_callbacks.py b/tests/test_llama_index_callbacks.py
index 6ca348a..1169974 100644
--- a/tests/test_llama_index_callbacks.py
+++ b/tests/test_llama_index_callbacks.py
@@ -1,8 +1,7 @@
 # test_llama_index.py
 import pytest
 from tokencost.callbacks import llama_index
-from llama_index.core.callbacks.schema import CBEventType, EventPayload
-from unittest.mock import MagicMock
+from llama_index.core.callbacks.schema import EventPayload
 
 # Mock the calculate_prompt_cost and calculate_completion_cost functions
 

From 838ebd46c557c107d05e9f5895a7540f6cdb16b1 Mon Sep 17 00:00:00 2001
From: reibs <areibman@gmail.com>
Date: Wed, 13 Nov 2024 23:57:34 -0800
Subject: [PATCH 07/11] updated some tests, will look at rest later

---
 tests/test_costs.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_costs.py b/tests/test_costs.py
index 665da10..10926f3 100644
--- a/tests/test_costs.py
+++ b/tests/test_costs.py
@@ -46,7 +46,7 @@
         ("gpt-4-vision-preview", 15),
         ("gpt-4o", 15),
         ("azure/gpt-4o", 15),
-        ("claude-2.1", 4),
+        ("claude-3-opus-latest", 11),
     ],
 )
 def test_count_message_tokens(model, expected_output):
@@ -74,7 +74,7 @@ def test_count_message_tokens(model, expected_output):
         ("gpt-4-vision-preview", 17),
         ("gpt-4o", 17),
         ("azure/gpt-4o", 17),
-        ("claude-2.1", 4),
+        # ("claude-3-opus-latest", 4), # TODO: Claude is not supported yet
     ],
 )
 def test_count_message_tokens_with_name(model, expected_output):
@@ -115,7 +115,7 @@ def test_count_message_tokens_invalid_model():
         ("gpt-4-vision-preview", 4),
         ("text-embedding-ada-002", 4),
         ("gpt-4o", 4),
-        ("claude-2.1", 4),
+        # ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages
     ],
 )
 def test_count_string_tokens(model, expected_output):
@@ -155,7 +155,7 @@ def test_count_string_invalid_model():
         (MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")),
         (MESSAGES, "gpt-4o", Decimal("0.000075")),
         (MESSAGES, "azure/gpt-4o", Decimal("0.000075")),
-        (MESSAGES, "claude-2.1", Decimal("0.000032")),
+        (MESSAGES, "claude-3-opus-latest", Decimal("0.000165")),
         (STRING, "text-embedding-ada-002", Decimal("0.0000004")),
     ],
 )
@@ -192,7 +192,7 @@ def test_invalid_prompt_format():
         (STRING, "gpt-4-vision-preview", Decimal("0.00012")),
         (STRING, "gpt-4o", Decimal("0.000060")),
         (STRING, "azure/gpt-4o", Decimal("0.000060")),
-        (STRING, "claude-2.1", Decimal("0.000096")),
+        (STRING, "claude-3-opus-latest", Decimal("0.000096")),
         (STRING, "text-embedding-ada-002", 0),
     ],
 )

From c22e4035a34e05e93ffcafbf0b1305cab5e6ad05 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Thu, 14 Nov 2024 15:37:21 +0530
Subject: [PATCH 08/11] update code for latest anthropic api

---
 tokencost/costs.py | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index 50af1bd..e87251d 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -45,18 +45,19 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         logger.warning(
             "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
         )
-        if "claude-3-sonnet" in model:
-            logger.warning(
-                f"Token counting (beta) is not supported for {model}. Returning num tokens using count from the string."
+        if not any(
+            supported_model in model for supported_model in [
+                "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus"
+            ]
+        ):
+            raise ValueError(
+                f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
             )
-            # For anthropic<0.39.0 this method is no more supported
-            prompt = "".join(message["content"] for message in messages)
-            return count_string_tokens(prompt, model)
 
         ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
 
         try:
-            client = anthropic.Client(api_key=ANTHROPIC_API_KEY)
+            client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
             num_tokens = client.beta.messages.count_tokens(
                 model=model,
                 messages=messages,
@@ -138,17 +139,9 @@ def count_string_tokens(prompt: str, model: str) -> int:
         model = model.split("/")[-1]
 
     if "claude-" in model:
-        logger.warning(
-            "Warning: This is only accurate for older models e.g. `claude-2.1` so please expect a _very_ rough estimate."
-            "Use the `usage` property in the response for exact counts."
+        raise ValueError(
+            "Claude models do not support this method. Use the `usage` property in the response for exact counts."
         )
-        if "claude-3" in model:
-            logger.warning(
-                "Warning: Claude-3 models are unsupported. Returning num tokens assuming claude-2.1."
-            )
-        client = anthropic.Client()
-        token_count = client.count_tokens(prompt)
-        return token_count
 
     try:
         encoding = tiktoken.encoding_for_model(model)

From 856a131b7736515e696e82efcf9d6d7ac2fb6f4d Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Thu, 14 Nov 2024 15:37:37 +0530
Subject: [PATCH 09/11] comment out anthropic models in tests

---
 tests/test_costs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_costs.py b/tests/test_costs.py
index 10926f3..6b0578f 100644
--- a/tests/test_costs.py
+++ b/tests/test_costs.py
@@ -74,7 +74,7 @@ def test_count_message_tokens(model, expected_output):
         ("gpt-4-vision-preview", 17),
         ("gpt-4o", 17),
         ("azure/gpt-4o", 17),
-        # ("claude-3-opus-latest", 4), # TODO: Claude is not supported yet
+        # ("claude-3-opus-latest", 4), # NOTE: Claude only supports messages without extra inputs
     ],
 )
 def test_count_message_tokens_with_name(model, expected_output):
@@ -192,7 +192,7 @@ def test_invalid_prompt_format():
         (STRING, "gpt-4-vision-preview", Decimal("0.00012")),
         (STRING, "gpt-4o", Decimal("0.000060")),
         (STRING, "azure/gpt-4o", Decimal("0.000060")),
-        (STRING, "claude-3-opus-latest", Decimal("0.000096")),
+        # (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages
         (STRING, "text-embedding-ada-002", 0),
     ],
 )

From bb99c402a9ce496cd82ad3207923fcd40ee4d58e Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Fri, 15 Nov 2024 03:58:05 +0530
Subject: [PATCH 10/11] anthropic support and addressing caveats

---
 tokencost/costs.py | 77 ++++++++++++++++++++++++++++------------------
 1 file changed, 47 insertions(+), 30 deletions(-)

diff --git a/tokencost/costs.py b/tokencost/costs.py
index e87251d..efdaffd 100644
--- a/tokencost/costs.py
+++ b/tokencost/costs.py
@@ -16,6 +16,26 @@
 # https://github.com/anthropics/anthropic-tokenizer-typescript/blob/main/index.ts
 
 
+def get_anthropic_token_count(messages: List[Dict[str, str]], model: str) -> int:
+    if not any(
+        supported_model in model for supported_model in [
+            "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus"
+        ]
+    ):
+        raise ValueError(
+            f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
+        )
+    try:
+        return anthropic.Anthropic().beta.messages.count_tokens(
+            model=model,
+            messages=messages,
+        ).input_tokens
+    except TypeError as e:
+        raise e
+    except Exception as e:
+        raise e
+
+
 def strip_ft_model_name(model: str) -> str:
     """
     Finetuned models format: ft:gpt-3.5-turbo:my-org:custom_suffix:id
@@ -45,28 +65,7 @@ def count_message_tokens(messages: List[Dict[str, str]], model: str) -> int:
         logger.warning(
             "Warning: Anthropic token counting API is currently in beta. Please expect differences in costs!"
         )
-        if not any(
-            supported_model in model for supported_model in [
-                "claude-3-5-sonnet", "claude-3-5-haiku", "claude-3-haiku", "claude-3-opus"
-            ]
-        ):
-            raise ValueError(
-                f"{model} is not supported in token counting (beta) API. Use the `usage` property in the response for exact counts."
-            )
-
-        ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
-
-        try:
-            client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
-            num_tokens = client.beta.messages.count_tokens(
-                model=model,
-                messages=messages,
-            ).input_tokens
-            return num_tokens
-        except TypeError as e:
-            raise e
-        except Exception as e:
-            raise e
+        return get_anthropic_token_count(messages, model)
 
     try:
         encoding = tiktoken.encoding_for_model(model)
@@ -140,7 +139,7 @@ def count_string_tokens(prompt: str, model: str) -> int:
 
     if "claude-" in model:
         raise ValueError(
-            "Claude models do not support this method. Use the `usage` property in the response for exact counts."
+            "Warning: Anthropic does not support this method. Please use the `count_message_tokens` function for the exact counts."
         )
 
     try:
@@ -209,13 +208,11 @@ def calculate_prompt_cost(prompt: Union[List[dict], str], model: str) -> Decimal
         )
     if not isinstance(prompt, (list, str)):
         raise TypeError(
-            f"""Prompt must be either a string or list of message objects.
-            it is {type(prompt)} instead.
-            """
+            f"Prompt must be either a string or list of message objects but found {type(prompt)} instead."
         )
     prompt_tokens = (
         count_string_tokens(prompt, model)
-        if isinstance(prompt, str)
+        if isinstance(prompt, str) and "claude-" not in model
         else count_message_tokens(prompt, model)
     )
 
@@ -244,7 +241,18 @@ def calculate_completion_cost(completion: str, model: str) -> Decimal:
             f"""Model {model} is not implemented.
             Double-check your spelling, or submit an issue/PR"""
         )
-    completion_tokens = count_string_tokens(completion, model)
+
+    if not isinstance(completion, str):
+        raise TypeError(
+            f"Prompt must be a string but found {type(completion)} instead."
+        )
+
+    if "claude-" in model:
+        completion_list = [{"role": "assistant", "content": completion}]
+        # Anthropic appends some 13 additional tokens to the actual completion tokens
+        completion_tokens = count_message_tokens(completion_list, model) - 13
+    else:
+        completion_tokens = count_string_tokens(completion, model)
 
     return calculate_cost_by_tokens(completion_tokens, model, "output")
 
@@ -273,10 +281,19 @@ def calculate_all_costs_and_tokens(
     completion_cost = calculate_completion_cost(completion, model)
     prompt_tokens = (
         count_string_tokens(prompt, model)
-        if isinstance(prompt, str)
+        if isinstance(prompt, str) and "claude-" not in model
         else count_message_tokens(prompt, model)
     )
-    completion_tokens = count_string_tokens(completion, model)
+
+    if "claude-" in model:
+        logger.warning(
+            "Warning: Token counting is estimated for "
+        )
+        completion_list = [{"role": "assistant", "content": completion}]
+        # Anthropic appends some 13 additional tokens to the actual completion tokens
+        completion_tokens = count_message_tokens(completion_list, model) - 13
+    else:
+        completion_tokens = count_string_tokens(completion, model)
 
     return {
         "prompt_cost": prompt_cost,

From ca70b7cbf1e9963d956e53b72d33c2c259d557cf Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Fri, 15 Nov 2024 04:10:33 +0530
Subject: [PATCH 11/11] update costs in tests for `gpt-4o` model

---
 tests/test_costs.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_costs.py b/tests/test_costs.py
index 6b0578f..4ba9d09 100644
--- a/tests/test_costs.py
+++ b/tests/test_costs.py
@@ -153,7 +153,7 @@ def test_count_string_invalid_model():
         (MESSAGES, "gpt-4-0613", Decimal("0.00045")),
         (MESSAGES, "gpt-4-1106-preview", Decimal("0.00015")),
         (MESSAGES, "gpt-4-vision-preview", Decimal("0.00015")),
-        (MESSAGES, "gpt-4o", Decimal("0.000075")),
+        (MESSAGES, "gpt-4o", Decimal("0.0000375")),
         (MESSAGES, "azure/gpt-4o", Decimal("0.000075")),
         (MESSAGES, "claude-3-opus-latest", Decimal("0.000165")),
         (STRING, "text-embedding-ada-002", Decimal("0.0000004")),
@@ -190,7 +190,7 @@ def test_invalid_prompt_format():
         (STRING, "gpt-4-0613", Decimal("0.00024")),
         (STRING, "gpt-4-1106-preview", Decimal("0.00012")),
         (STRING, "gpt-4-vision-preview", Decimal("0.00012")),
-        (STRING, "gpt-4o", Decimal("0.000060")),
+        (STRING, "gpt-4o", Decimal("0.00004")),
         (STRING, "azure/gpt-4o", Decimal("0.000060")),
         # (STRING, "claude-3-opus-latest", Decimal("0.000096")), # NOTE: Claude only supports messages
         (STRING, "text-embedding-ada-002", 0),