From f3a3cd4e5bd5c0acd5707f1e67b2a465ef51e366 Mon Sep 17 00:00:00 2001
From: Jeff Tang <jeff.x.tang@gmail.com>
Date: Tue, 6 Aug 2024 22:12:13 -0700
Subject: [PATCH] record execution time for each llm; display results in a
 pandas table for easy comparison

---
 examples/llm_reasoning.ipynb | 1000 +++++++++++++++++++++++++++-------
 1 file changed, 818 insertions(+), 182 deletions(-)

diff --git a/examples/llm_reasoning.ipynb b/examples/llm_reasoning.ipynb
index ebbe2ad4..c3869cc2 100644
--- a/examples/llm_reasoning.ipynb
+++ b/examples/llm_reasoning.ipynb
@@ -114,6 +114,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
+    "import time\n",
+    "\n",
     "llms = [\"aws:meta.llama3-8b-instruct-v1:0\",\n",
     "        \"groq:llama3-8b-8192\",\n",
     "        \"fireworks:accounts/fireworks/models/llama-v3-8b-instruct\",\n",
@@ -127,13 +129,20 @@
     "        \"fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct\",\n",
     "        \"together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\",\n",
     "        \"octo:meta-llama-3.1-8b-instruct\",\n",
-    "        \n",
     "       ]\n",
     "\n",
     "def compare_llm(messages):\n",
+    "    execution_times = []\n",
+    "    responses = []\n",
     "    for llm in llms:\n",
+    "        start_time = time.time()\n",
     "        response = client.chat.completions.create(model=llm, messages=messages)\n",
-    "        print(f\"{llm} - {response.choices[0].message.content.strip()}\\n==========\")"
+    "        end_time = time.time()\n",
+    "        execution_time = end_time - start_time\n",
+    "        responses.append(response.choices[0].message.content.strip())\n",
+    "        execution_times.append(execution_time)\n",
+    "        print(f\"{llm} - {execution_time:.2f} seconds: {response.choices[0].message.content.strip()}\")\n",
+    "    return responses, execution_times"
    ]
   },
   {
@@ -149,57 +158,181 @@
    "execution_count": 5,
    "id": "f3c4a8ef-e23b-4d4a-8561-3e5a2a866bd1",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "aws:meta.llama3-8b-instruct-v1:0 - 2.38 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
+      "groq:llama3-8b-8192 - 2.24 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
+      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.92 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a social worker.\n",
+      "octo:meta-llama-3-8b-instruct - 1.82 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.\n",
+      "together:meta-llama/Llama-3-8b-chat-hf - 0.61 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
+      "openai:gpt-3.5-turbo - 1.00 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n",
+      "replicate:meta/meta-llama-3-8b-instruct - 1.36 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
+      "aws:meta.llama3-1-8b-instruct-v1:0 - 0.45 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n",
+      "groq:llama-3.1-8b-instant - 0.84 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n",
+      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.37 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n",
+      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.26 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n",
+      "octo:meta-llama-3.1-8b-instruct - 0.32 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n"
+     ]
+    }
+   ],
    "source": [
     "messages = [\n",
     "    {\"role\": \"user\", \"content\": \"Who is Tom Cruise's mother?\"},\n",
-    "]"
+    "]\n",
+    "\n",
+    "responses, execution_times = compare_llm(messages)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "3e901285-6ca7-4e82-8829-12b24fb9ff5d",
+   "id": "769f7f42-2adb-4903-ab17-3143a5d950ce",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "def display(llms, execution_times, responses):\n",
+    "    data = {\n",
+    "        'Provider:Model Name': llms,\n",
+    "        'Execution Time': execution_times,\n",
+    "        'Model Response ': responses\n",
+    "    }\n",
+    "    \n",
+    "    df = pd.DataFrame(data)\n",
+    "    df.index = df.index + 1\n",
+    "    styled_df = df.style.set_table_styles(\n",
+    "        [{'selector': 'th', 'props': [('text-align', 'center')]}, \n",
+    "         {'selector': 'td', 'props': [('text-align', 'center')]}]\n",
+    "    ).set_properties(**{'text-align': 'center'})\n",
+    "    \n",
+    "    return styled_df "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d2359ad5-9f0b-4bd6-9838-54df91de0fb3",
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "aws:meta.llama3-8b-instruct-v1:0 - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
-      "==========\n",
-      "groq:llama3-8b-8192 - Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
-      "==========\n",
-      "octo:meta-llama-3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
-      "==========\n",
-      "together:meta-llama/Llama-3-8b-chat-hf - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
-      "==========\n",
-      "openai:gpt-3.5-turbo - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n",
-      "replicate:meta/meta-llama-3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n",
-      "==========\n",
-      "aws:meta.llama3-1-8b-instruct-v1:0 - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n",
-      "groq:llama-3.1-8b-instant - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n",
-      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n",
-      "octo:meta-llama-3.1-8b-instruct - Tom Cruise's mother is Mary Lee Pfeiffer.\n",
-      "==========\n"
-     ]
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_0fd73 th {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_0fd73 td {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_0fd73_row0_col0, #T_0fd73_row0_col1, #T_0fd73_row0_col2, #T_0fd73_row1_col0, #T_0fd73_row1_col1, #T_0fd73_row1_col2, #T_0fd73_row2_col0, #T_0fd73_row2_col1, #T_0fd73_row2_col2, #T_0fd73_row3_col0, #T_0fd73_row3_col1, #T_0fd73_row3_col2, #T_0fd73_row4_col0, #T_0fd73_row4_col1, #T_0fd73_row4_col2, #T_0fd73_row5_col0, #T_0fd73_row5_col1, #T_0fd73_row5_col2, #T_0fd73_row6_col0, #T_0fd73_row6_col1, #T_0fd73_row6_col2, #T_0fd73_row7_col0, #T_0fd73_row7_col1, #T_0fd73_row7_col2, #T_0fd73_row8_col0, #T_0fd73_row8_col1, #T_0fd73_row8_col2, #T_0fd73_row9_col0, #T_0fd73_row9_col1, #T_0fd73_row9_col2, #T_0fd73_row10_col0, #T_0fd73_row10_col1, #T_0fd73_row10_col2, #T_0fd73_row11_col0, #T_0fd73_row11_col1, #T_0fd73_row11_col2 {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_0fd73\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_0fd73_level0_col0\" class=\"col_heading level0 col0\" >Provider:Model Name</th>\n",
+       "      <th id=\"T_0fd73_level0_col1\" class=\"col_heading level0 col1\" >Execution Time</th>\n",
+       "      <th id=\"T_0fd73_level0_col2\" class=\"col_heading level0 col2\" >Model Response </th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "      <td id=\"T_0fd73_row0_col0\" class=\"data row0 col0\" >aws:meta.llama3-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_0fd73_row0_col1\" class=\"data row0 col1\" >2.383425</td>\n",
+       "      <td id=\"T_0fd73_row0_col2\" class=\"data row0 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer).</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "      <td id=\"T_0fd73_row1_col0\" class=\"data row1 col0\" >groq:llama3-8b-8192</td>\n",
+       "      <td id=\"T_0fd73_row1_col1\" class=\"data row1 col1\" >2.241169</td>\n",
+       "      <td id=\"T_0fd73_row1_col2\" class=\"data row1 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer).</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "      <td id=\"T_0fd73_row2_col0\" class=\"data row2 col0\" >fireworks:accounts/fireworks/models/llama-v3-8b-instruct</td>\n",
+       "      <td id=\"T_0fd73_row2_col1\" class=\"data row2 col1\" >0.916995</td>\n",
+       "      <td id=\"T_0fd73_row2_col2\" class=\"data row2 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a social worker.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
+       "      <td id=\"T_0fd73_row3_col0\" class=\"data row3 col0\" >octo:meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_0fd73_row3_col1\" class=\"data row3 col1\" >1.822360</td>\n",
+       "      <td id=\"T_0fd73_row3_col2\" class=\"data row3 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
+       "      <td id=\"T_0fd73_row4_col0\" class=\"data row4 col0\" >together:meta-llama/Llama-3-8b-chat-hf</td>\n",
+       "      <td id=\"T_0fd73_row4_col1\" class=\"data row4 col1\" >0.607085</td>\n",
+       "      <td id=\"T_0fd73_row4_col2\" class=\"data row4 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer).</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n",
+       "      <td id=\"T_0fd73_row5_col0\" class=\"data row5 col0\" >openai:gpt-3.5-turbo</td>\n",
+       "      <td id=\"T_0fd73_row5_col1\" class=\"data row5 col1\" >1.002106</td>\n",
+       "      <td id=\"T_0fd73_row5_col2\" class=\"data row5 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n",
+       "      <td id=\"T_0fd73_row6_col0\" class=\"data row6 col0\" >replicate:meta/meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_0fd73_row6_col1\" class=\"data row6 col1\" >1.362718</td>\n",
+       "      <td id=\"T_0fd73_row6_col2\" class=\"data row6 col2\" >Tom Cruise's mother is Mary Lee South (née Pfeiffer).</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n",
+       "      <td id=\"T_0fd73_row7_col0\" class=\"data row7 col0\" >aws:meta.llama3-1-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_0fd73_row7_col1\" class=\"data row7 col1\" >0.454378</td>\n",
+       "      <td id=\"T_0fd73_row7_col2\" class=\"data row7 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n",
+       "      <td id=\"T_0fd73_row8_col0\" class=\"data row8 col0\" >groq:llama-3.1-8b-instant</td>\n",
+       "      <td id=\"T_0fd73_row8_col1\" class=\"data row8 col1\" >0.835516</td>\n",
+       "      <td id=\"T_0fd73_row8_col2\" class=\"data row8 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n",
+       "      <td id=\"T_0fd73_row9_col0\" class=\"data row9 col0\" >fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct</td>\n",
+       "      <td id=\"T_0fd73_row9_col1\" class=\"data row9 col1\" >0.371963</td>\n",
+       "      <td id=\"T_0fd73_row9_col2\" class=\"data row9 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row10\" class=\"row_heading level0 row10\" >11</th>\n",
+       "      <td id=\"T_0fd73_row10_col0\" class=\"data row10 col0\" >together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo</td>\n",
+       "      <td id=\"T_0fd73_row10_col1\" class=\"data row10 col1\" >0.264464</td>\n",
+       "      <td id=\"T_0fd73_row10_col2\" class=\"data row10 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_0fd73_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
+       "      <td id=\"T_0fd73_row11_col0\" class=\"data row11 col0\" >octo:meta-llama-3.1-8b-instruct</td>\n",
+       "      <td id=\"T_0fd73_row11_col1\" class=\"data row11 col1\" >0.315450</td>\n",
+       "      <td id=\"T_0fd73_row11_col2\" class=\"data row11 col2\" >Tom Cruise's mother is Mary Lee Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x111c24be0>"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "compare_llm(messages)"
+    "display(llms, execution_times, responses)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "id": "399f6cca-7f34-4a91-aab0-070560640033",
    "metadata": {},
    "outputs": [
@@ -207,30 +340,18 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "aws:meta.llama3-8b-instruct-v1:0 - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?\n",
-      "==========\n",
-      "groq:llama3-8b-8192 - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Mary Lee Pfeiffer is a well-known American artist, and her son is none other than the famous artist and sculptor, John Pfeiffer!\n",
-      "==========\n",
-      "octo:meta-llama-3-8b-instruct - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
-      "==========\n",
-      "together:meta-llama/Llama-3-8b-chat-hf - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
-      "==========\n",
-      "openai:gpt-3.5-turbo - Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.\n",
-      "==========\n",
-      "replicate:meta/meta-llama-3-8b-instruct - According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n",
-      "==========\n",
-      "aws:meta.llama3-1-8b-instruct-v1:0 - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
-      "==========\n",
-      "groq:llama-3.1-8b-instant - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - I don't have information on Mary Lee Pfeiffer's son.\n",
-      "==========\n",
-      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - I am unable to verify who Mary Lee Pfeiffer's son is.\n",
-      "==========\n",
-      "octo:meta-llama-3.1-8b-instruct - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
-      "==========\n"
+      "aws:meta.llama3-8b-instruct-v1:0 - 1.23 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?\n",
+      "groq:llama3-8b-8192 - 0.39 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
+      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.44 seconds: According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n",
+      "octo:meta-llama-3-8b-instruct - 1.25 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
+      "together:meta-llama/Llama-3-8b-chat-hf - 0.92 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n",
+      "openai:gpt-3.5-turbo - 0.64 seconds: Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.\n",
+      "replicate:meta/meta-llama-3-8b-instruct - 1.38 seconds: According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n",
+      "aws:meta.llama3-1-8b-instruct-v1:0 - 0.64 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
+      "groq:llama-3.1-8b-instant - 1.06 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
+      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.39 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n",
+      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.79 seconds: I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. If you could provide more context or clarify who Mary Lee Pfeiffer is, I may be able to help you better.\n",
+      "octo:meta-llama-3.1-8b-instruct - 0.76 seconds: I am unable to verify who Mary Lee Pfeiffer's son is.\n"
      ]
     }
    ],
@@ -238,7 +359,126 @@
     "messages = [\n",
     "    {\"role\": \"user\", \"content\": \"Who is Mary Lee Pfeiffer's son?\"},\n",
     "]\n",
-    "compare_llm(messages)"
+    "\n",
+    "responses, execution_times = compare_llm(messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "eee7704d-a187-41bc-b119-c94461d0ee74",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_a0447 th {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_a0447 td {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_a0447_row0_col0, #T_a0447_row0_col1, #T_a0447_row0_col2, #T_a0447_row1_col0, #T_a0447_row1_col1, #T_a0447_row1_col2, #T_a0447_row2_col0, #T_a0447_row2_col1, #T_a0447_row2_col2, #T_a0447_row3_col0, #T_a0447_row3_col1, #T_a0447_row3_col2, #T_a0447_row4_col0, #T_a0447_row4_col1, #T_a0447_row4_col2, #T_a0447_row5_col0, #T_a0447_row5_col1, #T_a0447_row5_col2, #T_a0447_row6_col0, #T_a0447_row6_col1, #T_a0447_row6_col2, #T_a0447_row7_col0, #T_a0447_row7_col1, #T_a0447_row7_col2, #T_a0447_row8_col0, #T_a0447_row8_col1, #T_a0447_row8_col2, #T_a0447_row9_col0, #T_a0447_row9_col1, #T_a0447_row9_col2, #T_a0447_row10_col0, #T_a0447_row10_col1, #T_a0447_row10_col2, #T_a0447_row11_col0, #T_a0447_row11_col1, #T_a0447_row11_col2 {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_a0447\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_a0447_level0_col0\" class=\"col_heading level0 col0\" >Provider:Model Name</th>\n",
+       "      <th id=\"T_a0447_level0_col1\" class=\"col_heading level0 col1\" >Execution Time</th>\n",
+       "      <th id=\"T_a0447_level0_col2\" class=\"col_heading level0 col2\" >Model Response </th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "      <td id=\"T_a0447_row0_col0\" class=\"data row0 col0\" >aws:meta.llama3-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_a0447_row0_col1\" class=\"data row0 col1\" >1.225959</td>\n",
+       "      <td id=\"T_a0447_row0_col2\" class=\"data row0 col2\" >I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "      <td id=\"T_a0447_row1_col0\" class=\"data row1 col0\" >groq:llama3-8b-8192</td>\n",
+       "      <td id=\"T_a0447_row1_col1\" class=\"data row1 col1\" >0.391800</td>\n",
+       "      <td id=\"T_a0447_row1_col2\" class=\"data row1 col2\" >I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "      <td id=\"T_a0447_row2_col0\" class=\"data row2 col0\" >fireworks:accounts/fireworks/models/llama-v3-8b-instruct</td>\n",
+       "      <td id=\"T_a0447_row2_col1\" class=\"data row2 col1\" >0.438607</td>\n",
+       "      <td id=\"T_a0447_row2_col2\" class=\"data row2 col2\" >According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
+       "      <td id=\"T_a0447_row3_col0\" class=\"data row3 col0\" >octo:meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_a0447_row3_col1\" class=\"data row3 col1\" >1.250298</td>\n",
+       "      <td id=\"T_a0447_row3_col2\" class=\"data row3 col2\" >I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
+       "      <td id=\"T_a0447_row4_col0\" class=\"data row4 col0\" >together:meta-llama/Llama-3-8b-chat-hf</td>\n",
+       "      <td id=\"T_a0447_row4_col1\" class=\"data row4 col1\" >0.924522</td>\n",
+       "      <td id=\"T_a0447_row4_col2\" class=\"data row4 col2\" >I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n",
+       "      <td id=\"T_a0447_row5_col0\" class=\"data row5 col0\" >openai:gpt-3.5-turbo</td>\n",
+       "      <td id=\"T_a0447_row5_col1\" class=\"data row5 col1\" >0.637278</td>\n",
+       "      <td id=\"T_a0447_row5_col2\" class=\"data row5 col2\" >Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n",
+       "      <td id=\"T_a0447_row6_col0\" class=\"data row6 col0\" >replicate:meta/meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_a0447_row6_col1\" class=\"data row6 col1\" >1.375630</td>\n",
+       "      <td id=\"T_a0447_row6_col2\" class=\"data row6 col2\" >According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n",
+       "      <td id=\"T_a0447_row7_col0\" class=\"data row7 col0\" >aws:meta.llama3-1-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_a0447_row7_col1\" class=\"data row7 col1\" >0.639018</td>\n",
+       "      <td id=\"T_a0447_row7_col2\" class=\"data row7 col2\" >I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n",
+       "      <td id=\"T_a0447_row8_col0\" class=\"data row8 col0\" >groq:llama-3.1-8b-instant</td>\n",
+       "      <td id=\"T_a0447_row8_col1\" class=\"data row8 col1\" >1.059837</td>\n",
+       "      <td id=\"T_a0447_row8_col2\" class=\"data row8 col2\" >I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n",
+       "      <td id=\"T_a0447_row9_col0\" class=\"data row9 col0\" >fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct</td>\n",
+       "      <td id=\"T_a0447_row9_col1\" class=\"data row9 col1\" >0.387835</td>\n",
+       "      <td id=\"T_a0447_row9_col2\" class=\"data row9 col2\" >I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row10\" class=\"row_heading level0 row10\" >11</th>\n",
+       "      <td id=\"T_a0447_row10_col0\" class=\"data row10 col0\" >together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo</td>\n",
+       "      <td id=\"T_a0447_row10_col1\" class=\"data row10 col1\" >0.788913</td>\n",
+       "      <td id=\"T_a0447_row10_col2\" class=\"data row10 col2\" >I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. If you could provide more context or clarify who Mary Lee Pfeiffer is, I may be able to help you better.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_a0447_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
+       "      <td id=\"T_a0447_row11_col0\" class=\"data row11 col0\" >octo:meta-llama-3.1-8b-instruct</td>\n",
+       "      <td id=\"T_a0447_row11_col1\" class=\"data row11 col1\" >0.763321</td>\n",
+       "      <td id=\"T_a0447_row11_col2\" class=\"data row11 col2\" >I am unable to verify who Mary Lee Pfeiffer's son is.</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x126389b40>"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "display(llms, execution_times, responses)"
    ]
   },
   {
@@ -251,7 +491,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 10,
    "id": "e537871e-68b6-44c3-886a-d3ebe7a692c1",
    "metadata": {},
    "outputs": [
@@ -259,32 +499,20 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "aws:meta.llama3-8b-instruct-v1:0 - There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "groq:llama3-8b-8192 - There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "octo:meta-llama-3-8b-instruct - There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "together:meta-llama/Llama-3-8b-chat-hf - There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "openai:gpt-3.5-turbo - There are three r's in the word \"strawberry.\"\n",
-      "==========\n",
-      "replicate:meta/meta-llama-3-8b-instruct - Let me count them for you!\n",
+      "aws:meta.llama3-8b-instruct-v1:0 - 0.48 seconds: There are 2 R's in the word \"strawberry\".\n",
+      "groq:llama3-8b-8192 - 0.16 seconds: There are 2 R's in the word \"strawberry\".\n",
+      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.25 seconds: There are 2 R's in the word \"strawberry\".\n",
+      "octo:meta-llama-3-8b-instruct - 0.31 seconds: There are 2 R's in the word \"strawberry\".\n",
+      "together:meta-llama/Llama-3-8b-chat-hf - 0.25 seconds: There are 2 R's in the word \"strawberry\".\n",
+      "openai:gpt-3.5-turbo - 0.90 seconds: There are three r's in the word \"strawberry.\"\n",
+      "replicate:meta/meta-llama-3-8b-instruct - 1.33 seconds: Let me count them for you!\n",
       "\n",
       "There are 2 R's in the word \"strawberry\".\n",
-      "==========\n",
-      "aws:meta.llama3-1-8b-instruct-v1:0 - There are 3 r's in the word \"strawberry\".\n",
-      "==========\n",
-      "groq:llama-3.1-8b-instant - There are 3 r's in the word \"strawberry\".\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - There are 2 r's in the word strawberry.\n",
-      "==========\n",
-      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - There are 2 r's in the word strawberry.\n",
-      "==========\n",
-      "octo:meta-llama-3.1-8b-instruct - There are 3 r's in the word \"strawberry\".\n",
-      "==========\n"
+      "aws:meta.llama3-1-8b-instruct-v1:0 - 0.49 seconds: There are 3 r's in the word \"strawberry\".\n",
+      "groq:llama-3.1-8b-instant - 2.36 seconds: There are 3 r's in the word \"strawberry\".\n",
+      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.43 seconds: There are 3 r's in the word \"strawberry\".\n",
+      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.33 seconds: There are 3 r's in the word \"strawberry\".\n",
+      "octo:meta-llama-3.1-8b-instruct - 0.59 seconds: There are 2 r's in the word \"strawberry\".\n"
      ]
     }
    ],
@@ -292,107 +520,320 @@
     "messages = [\n",
     "    {\"role\": \"user\", \"content\": \"How many r's in the word strawberry?\"},\n",
     "]\n",
-    "compare_llm(messages)"
+    "\n",
+    "responses, execution_times = compare_llm(messages)"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "cae3fb5f-a173-4a33-b843-65df6d1086f9",
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5678e393-4967-49f1-9e0f-251471dc92b7",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_347ad th {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_347ad td {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_347ad_row0_col0, #T_347ad_row0_col1, #T_347ad_row0_col2, #T_347ad_row1_col0, #T_347ad_row1_col1, #T_347ad_row1_col2, #T_347ad_row2_col0, #T_347ad_row2_col1, #T_347ad_row2_col2, #T_347ad_row3_col0, #T_347ad_row3_col1, #T_347ad_row3_col2, #T_347ad_row4_col0, #T_347ad_row4_col1, #T_347ad_row4_col2, #T_347ad_row5_col0, #T_347ad_row5_col1, #T_347ad_row5_col2, #T_347ad_row6_col0, #T_347ad_row6_col1, #T_347ad_row6_col2, #T_347ad_row7_col0, #T_347ad_row7_col1, #T_347ad_row7_col2, #T_347ad_row8_col0, #T_347ad_row8_col1, #T_347ad_row8_col2, #T_347ad_row9_col0, #T_347ad_row9_col1, #T_347ad_row9_col2, #T_347ad_row10_col0, #T_347ad_row10_col1, #T_347ad_row10_col2, #T_347ad_row11_col0, #T_347ad_row11_col1, #T_347ad_row11_col2 {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_347ad\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_347ad_level0_col0\" class=\"col_heading level0 col0\" >Provider:Model Name</th>\n",
+       "      <th id=\"T_347ad_level0_col1\" class=\"col_heading level0 col1\" >Execution Time</th>\n",
+       "      <th id=\"T_347ad_level0_col2\" class=\"col_heading level0 col2\" >Model Response </th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "      <td id=\"T_347ad_row0_col0\" class=\"data row0 col0\" >aws:meta.llama3-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_347ad_row0_col1\" class=\"data row0 col1\" >0.480391</td>\n",
+       "      <td id=\"T_347ad_row0_col2\" class=\"data row0 col2\" >There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "      <td id=\"T_347ad_row1_col0\" class=\"data row1 col0\" >groq:llama3-8b-8192</td>\n",
+       "      <td id=\"T_347ad_row1_col1\" class=\"data row1 col1\" >0.159436</td>\n",
+       "      <td id=\"T_347ad_row1_col2\" class=\"data row1 col2\" >There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "      <td id=\"T_347ad_row2_col0\" class=\"data row2 col0\" >fireworks:accounts/fireworks/models/llama-v3-8b-instruct</td>\n",
+       "      <td id=\"T_347ad_row2_col1\" class=\"data row2 col1\" >0.254061</td>\n",
+       "      <td id=\"T_347ad_row2_col2\" class=\"data row2 col2\" >There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
+       "      <td id=\"T_347ad_row3_col0\" class=\"data row3 col0\" >octo:meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_347ad_row3_col1\" class=\"data row3 col1\" >0.314966</td>\n",
+       "      <td id=\"T_347ad_row3_col2\" class=\"data row3 col2\" >There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
+       "      <td id=\"T_347ad_row4_col0\" class=\"data row4 col0\" >together:meta-llama/Llama-3-8b-chat-hf</td>\n",
+       "      <td id=\"T_347ad_row4_col1\" class=\"data row4 col1\" >0.248981</td>\n",
+       "      <td id=\"T_347ad_row4_col2\" class=\"data row4 col2\" >There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n",
+       "      <td id=\"T_347ad_row5_col0\" class=\"data row5 col0\" >openai:gpt-3.5-turbo</td>\n",
+       "      <td id=\"T_347ad_row5_col1\" class=\"data row5 col1\" >0.899374</td>\n",
+       "      <td id=\"T_347ad_row5_col2\" class=\"data row5 col2\" >There are three r's in the word \"strawberry.\"</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n",
+       "      <td id=\"T_347ad_row6_col0\" class=\"data row6 col0\" >replicate:meta/meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_347ad_row6_col1\" class=\"data row6 col1\" >1.328329</td>\n",
+       "      <td id=\"T_347ad_row6_col2\" class=\"data row6 col2\" >Let me count them for you!\n",
+       "\n",
+       "There are 2 R's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n",
+       "      <td id=\"T_347ad_row7_col0\" class=\"data row7 col0\" >aws:meta.llama3-1-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_347ad_row7_col1\" class=\"data row7 col1\" >0.494379</td>\n",
+       "      <td id=\"T_347ad_row7_col2\" class=\"data row7 col2\" >There are 3 r's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n",
+       "      <td id=\"T_347ad_row8_col0\" class=\"data row8 col0\" >groq:llama-3.1-8b-instant</td>\n",
+       "      <td id=\"T_347ad_row8_col1\" class=\"data row8 col1\" >2.364020</td>\n",
+       "      <td id=\"T_347ad_row8_col2\" class=\"data row8 col2\" >There are 3 r's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n",
+       "      <td id=\"T_347ad_row9_col0\" class=\"data row9 col0\" >fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct</td>\n",
+       "      <td id=\"T_347ad_row9_col1\" class=\"data row9 col1\" >0.434086</td>\n",
+       "      <td id=\"T_347ad_row9_col2\" class=\"data row9 col2\" >There are 3 r's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row10\" class=\"row_heading level0 row10\" >11</th>\n",
+       "      <td id=\"T_347ad_row10_col0\" class=\"data row10 col0\" >together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo</td>\n",
+       "      <td id=\"T_347ad_row10_col1\" class=\"data row10 col1\" >0.327454</td>\n",
+       "      <td id=\"T_347ad_row10_col2\" class=\"data row10 col2\" >There are 3 r's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_347ad_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
+       "      <td id=\"T_347ad_row11_col0\" class=\"data row11 col0\" >octo:meta-llama-3.1-8b-instruct</td>\n",
+       "      <td id=\"T_347ad_row11_col1\" class=\"data row11 col1\" >0.592822</td>\n",
+       "      <td id=\"T_347ad_row11_col2\" class=\"data row11 col2\" >There are 2 r's in the word \"strawberry\".</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x1263a4400>"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "## Which number is bigger?"
+    "display(llms, execution_times, responses)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "efdf2fd6-f63a-4f9b-af15-1df25590e4fc",
+   "cell_type": "markdown",
+   "id": "cae3fb5f-a173-4a33-b843-65df6d1086f9",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "messages = [\n",
-    "    {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9?\"},\n",
-    "]"
+    "## Which number is bigger?"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "eaa14ed1-c83b-4c8f-bb14-d318bf0c9a60",
+   "execution_count": 12,
+   "id": "efdf2fd6-f63a-4f9b-af15-1df25590e4fc",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "aws:meta.llama3-8b-instruct-v1:0 - 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "groq:llama3-8b-8192 - 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "octo:meta-llama-3-8b-instruct - 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "together:meta-llama/Llama-3-8b-chat-hf - 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "openai:gpt-3.5-turbo - 9.9\n",
-      "==========\n",
-      "replicate:meta/meta-llama-3-8b-instruct - Let me help you with that!\n",
+      "aws:meta.llama3-8b-instruct-v1:0 - 0.49 seconds: 9.9 is bigger than 9.11.\n",
+      "groq:llama3-8b-8192 - 0.20 seconds: 9.11 is bigger than 9.9.\n",
+      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.27 seconds: 9.9 is bigger than 9.11.\n",
+      "octo:meta-llama-3-8b-instruct - 0.29 seconds: 9.11 is bigger than 9.9.\n",
+      "together:meta-llama/Llama-3-8b-chat-hf - 0.70 seconds: 9.11 is bigger than 9.9.\n",
+      "openai:gpt-3.5-turbo - 1.05 seconds: 9.9\n",
+      "replicate:meta/meta-llama-3-8b-instruct - 1.58 seconds: Let me help you with that!\n",
       "\n",
       "9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "aws:meta.llama3-1-8b-instruct-v1:0 - The number 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "groq:llama-3.1-8b-instant - 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - To compare the two numbers, we need to look at the decimal part. \n",
+      "aws:meta.llama3-1-8b-instruct-v1:0 - 0.83 seconds: To compare these two numbers, we need to look at the decimal part. Since 9.11 has a larger decimal part (0.11) than 9.9 (0.9), 9.11 is bigger.\n",
+      "groq:llama-3.1-8b-instant - 0.23 seconds: 9.9 is bigger than 9.11.\n",
+      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.19 seconds: 9.9 is bigger than 9.11.\n",
+      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.36 seconds: 9.9 is bigger than 9.11.\n",
+      "octo:meta-llama-3.1-8b-instruct - 0.91 seconds: To compare these two numbers, we need to look at the decimal part. \n",
       "\n",
       "9.11 has a decimal part of 0.11, and 9.9 has a decimal part of 0.9. \n",
       "\n",
-      "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "octo:meta-llama-3.1-8b-instruct - 9.9 is bigger than 9.11.\n",
-      "==========\n"
+      "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.\n"
      ]
     }
    ],
    "source": [
-    "compare_llm(messages)"
+    "messages = [\n",
+    "    {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9?\"},\n",
+    "]\n",
+    "\n",
+    "responses, execution_times = compare_llm(messages)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "198b213a-b7bf-4cce-8c30-a8408454370b",
+   "execution_count": 13,
+   "id": "eaa14ed1-c83b-4c8f-bb14-d318bf0c9a60",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_9c77d th {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_9c77d td {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_9c77d_row0_col0, #T_9c77d_row0_col1, #T_9c77d_row0_col2, #T_9c77d_row1_col0, #T_9c77d_row1_col1, #T_9c77d_row1_col2, #T_9c77d_row2_col0, #T_9c77d_row2_col1, #T_9c77d_row2_col2, #T_9c77d_row3_col0, #T_9c77d_row3_col1, #T_9c77d_row3_col2, #T_9c77d_row4_col0, #T_9c77d_row4_col1, #T_9c77d_row4_col2, #T_9c77d_row5_col0, #T_9c77d_row5_col1, #T_9c77d_row5_col2, #T_9c77d_row6_col0, #T_9c77d_row6_col1, #T_9c77d_row6_col2, #T_9c77d_row7_col0, #T_9c77d_row7_col1, #T_9c77d_row7_col2, #T_9c77d_row8_col0, #T_9c77d_row8_col1, #T_9c77d_row8_col2, #T_9c77d_row9_col0, #T_9c77d_row9_col1, #T_9c77d_row9_col2, #T_9c77d_row10_col0, #T_9c77d_row10_col1, #T_9c77d_row10_col2, #T_9c77d_row11_col0, #T_9c77d_row11_col1, #T_9c77d_row11_col2 {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_9c77d\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_9c77d_level0_col0\" class=\"col_heading level0 col0\" >Provider:Model Name</th>\n",
+       "      <th id=\"T_9c77d_level0_col1\" class=\"col_heading level0 col1\" >Execution Time</th>\n",
+       "      <th id=\"T_9c77d_level0_col2\" class=\"col_heading level0 col2\" >Model Response </th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "      <td id=\"T_9c77d_row0_col0\" class=\"data row0 col0\" >aws:meta.llama3-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_9c77d_row0_col1\" class=\"data row0 col1\" >0.489279</td>\n",
+       "      <td id=\"T_9c77d_row0_col2\" class=\"data row0 col2\" >9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "      <td id=\"T_9c77d_row1_col0\" class=\"data row1 col0\" >groq:llama3-8b-8192</td>\n",
+       "      <td id=\"T_9c77d_row1_col1\" class=\"data row1 col1\" >0.200864</td>\n",
+       "      <td id=\"T_9c77d_row1_col2\" class=\"data row1 col2\" >9.11 is bigger than 9.9.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "      <td id=\"T_9c77d_row2_col0\" class=\"data row2 col0\" >fireworks:accounts/fireworks/models/llama-v3-8b-instruct</td>\n",
+       "      <td id=\"T_9c77d_row2_col1\" class=\"data row2 col1\" >0.271625</td>\n",
+       "      <td id=\"T_9c77d_row2_col2\" class=\"data row2 col2\" >9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
+       "      <td id=\"T_9c77d_row3_col0\" class=\"data row3 col0\" >octo:meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_9c77d_row3_col1\" class=\"data row3 col1\" >0.294958</td>\n",
+       "      <td id=\"T_9c77d_row3_col2\" class=\"data row3 col2\" >9.11 is bigger than 9.9.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
+       "      <td id=\"T_9c77d_row4_col0\" class=\"data row4 col0\" >together:meta-llama/Llama-3-8b-chat-hf</td>\n",
+       "      <td id=\"T_9c77d_row4_col1\" class=\"data row4 col1\" >0.695657</td>\n",
+       "      <td id=\"T_9c77d_row4_col2\" class=\"data row4 col2\" >9.11 is bigger than 9.9.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n",
+       "      <td id=\"T_9c77d_row5_col0\" class=\"data row5 col0\" >openai:gpt-3.5-turbo</td>\n",
+       "      <td id=\"T_9c77d_row5_col1\" class=\"data row5 col1\" >1.051595</td>\n",
+       "      <td id=\"T_9c77d_row5_col2\" class=\"data row5 col2\" >9.9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n",
+       "      <td id=\"T_9c77d_row6_col0\" class=\"data row6 col0\" >replicate:meta/meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_9c77d_row6_col1\" class=\"data row6 col1\" >1.580146</td>\n",
+       "      <td id=\"T_9c77d_row6_col2\" class=\"data row6 col2\" >Let me help you with that!\n",
+       "\n",
+       "9.11 is bigger than 9.9.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n",
+       "      <td id=\"T_9c77d_row7_col0\" class=\"data row7 col0\" >aws:meta.llama3-1-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_9c77d_row7_col1\" class=\"data row7 col1\" >0.828657</td>\n",
+       "      <td id=\"T_9c77d_row7_col2\" class=\"data row7 col2\" >To compare these two numbers, we need to look at the decimal part. Since 9.11 has a larger decimal part (0.11) than 9.9 (0.9), 9.11 is bigger.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n",
+       "      <td id=\"T_9c77d_row8_col0\" class=\"data row8 col0\" >groq:llama-3.1-8b-instant</td>\n",
+       "      <td id=\"T_9c77d_row8_col1\" class=\"data row8 col1\" >0.232353</td>\n",
+       "      <td id=\"T_9c77d_row8_col2\" class=\"data row8 col2\" >9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n",
+       "      <td id=\"T_9c77d_row9_col0\" class=\"data row9 col0\" >fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct</td>\n",
+       "      <td id=\"T_9c77d_row9_col1\" class=\"data row9 col1\" >0.192978</td>\n",
+       "      <td id=\"T_9c77d_row9_col2\" class=\"data row9 col2\" >9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row10\" class=\"row_heading level0 row10\" >11</th>\n",
+       "      <td id=\"T_9c77d_row10_col0\" class=\"data row10 col0\" >together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo</td>\n",
+       "      <td id=\"T_9c77d_row10_col1\" class=\"data row10 col1\" >0.364679</td>\n",
+       "      <td id=\"T_9c77d_row10_col2\" class=\"data row10 col2\" >9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_9c77d_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
+       "      <td id=\"T_9c77d_row11_col0\" class=\"data row11 col0\" >octo:meta-llama-3.1-8b-instruct</td>\n",
+       "      <td id=\"T_9c77d_row11_col1\" class=\"data row11 col1\" >0.911637</td>\n",
+       "      <td id=\"T_9c77d_row11_col2\" class=\"data row11 col2\" >To compare these two numbers, we need to look at the decimal part. \n",
+       "\n",
+       "9.11 has a decimal part of 0.11, and 9.9 has a decimal part of 0.9. \n",
+       "\n",
+       "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x12639c160>"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "messages = [\n",
-    "    {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9? Think step by step.\"},\n",
-    "]"
+    "display(llms, execution_times, responses)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
-   "id": "4a3fb8fc-a7a2-47d3-9db2-792f03cc47c2",
+   "execution_count": 14,
+   "id": "198b213a-b7bf-4cce-8c30-a8408454370b",
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "aws:meta.llama3-8b-instruct-v1:0 - Let's break it down step by step!\n",
+      "aws:meta.llama3-8b-instruct-v1:0 - 1.83 seconds: Let's break it down step by step:\n",
       "\n",
-      "1. Compare the whole numbers: Both numbers have the same whole number part, which is 9.\n",
-      "2. Compare the decimal parts: 9.11 has a decimal part of 0.11, while 9.9 has a decimal part of 0.9.\n",
-      "3. Since 0.11 is smaller than 0.9, 9.11 is smaller than 9.9.\n",
+      "1. Both numbers have the same first digit, which is 9.\n",
+      "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+      "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+      "4. Therefore, 9.9 is greater than 9.11.\n",
       "\n",
-      "So, the correct answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "groq:llama3-8b-8192 - Let's break it down step by step:\n",
+      "So, the answer is: 9.9 is bigger than 9.11.\n",
+      "groq:llama3-8b-8192 - 0.31 seconds: Let's break it down step by step:\n",
       "\n",
       "1. Both numbers have the same first digit, which is 9.\n",
       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
@@ -400,15 +841,15 @@
       "4. Therefore, 9.9 is greater than 9.11.\n",
       "\n",
       "So, the answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Let's break it down step by step!\n",
+      "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.72 seconds: Let's break it down step by step!\n",
       "\n",
-      "1. Compare the whole numbers: Both numbers have the same whole part, which is 9.\n",
-      "2. Compare the decimal parts: 0.11 is less than 0.9.\n",
+      "1. Both numbers have the same first digit, which is 9.\n",
+      "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+      "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+      "4. Therefore, 9.9 is greater than 9.11.\n",
       "\n",
-      "So, 9.11 is less than 9.9.\n",
-      "==========\n",
-      "octo:meta-llama-3-8b-instruct - Let's break it down step by step:\n",
+      "So, the correct answer is: 9.9 is bigger than 9.11!\n",
+      "octo:meta-llama-3-8b-instruct - 1.40 seconds: Let's break it down step by step:\n",
       "\n",
       "1. Both numbers have the same first digit, which is 9.\n",
       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
@@ -416,8 +857,7 @@
       "4. Therefore, 9.9 is greater than 9.11.\n",
       "\n",
       "So, the answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "together:meta-llama/Llama-3-8b-chat-hf - Let's break it down step by step:\n",
+      "together:meta-llama/Llama-3-8b-chat-hf - 0.76 seconds: Let's break it down step by step:\n",
       "\n",
       "1. Both numbers have the same first digit, which is 9.\n",
       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
@@ -425,14 +865,10 @@
       "4. Therefore, 9.9 is greater than 9.11.\n",
       "\n",
       "So, the answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "openai:gpt-3.5-turbo - To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9.\n",
-      "\n",
-      "Next, we can compare the decimal parts. In 9.11, the decimal part is 0.11, and in 9.9, the decimal part is 0.9.\n",
+      "openai:gpt-3.5-turbo - 1.26 seconds: To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9. Since they are the same, we need to look at the decimal portion.\n",
       "\n",
-      "Since 0.11 is greater than 0.9, we can conclude that 9.11 is bigger than 9.9.\n",
-      "==========\n",
-      "replicate:meta/meta-llama-3-8b-instruct - Let's break it down step by step!\n",
+      "9.11 is bigger than 9.9 because 0.11 is greater than 0.9.\n",
+      "replicate:meta/meta-llama-3-8b-instruct - 2.31 seconds: Let's break it down step by step!\n",
       "\n",
       "1. Both numbers start with the same digits: 9.1\n",
       "2. The next digit is also the same: 1\n",
@@ -441,16 +877,14 @@
       "Since 9 is greater than 1, the correct answer is:\n",
       "\n",
       "9.9 is bigger than 9.11\n",
-      "==========\n",
-      "aws:meta.llama3-1-8b-instruct-v1:0 - Let's compare the two numbers step by step.\n",
+      "aws:meta.llama3-1-8b-instruct-v1:0 - 1.37 seconds: Let's compare the two numbers step by step.\n",
       "\n",
       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
       "2. We're left with 0.11 and 0.9. Which one is bigger?\n",
       "3. Since 0.9 is bigger than 0.11, the whole number 9.9 is bigger than 9.11.\n",
       "\n",
       "So, the answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "groq:llama-3.1-8b-instant - Let's compare the two numbers step by step:\n",
+      "groq:llama-3.1-8b-instant - 0.46 seconds: Let's compare the two numbers step by step:\n",
       "\n",
       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
       "2. We're left with 0.11 and 0.9.\n",
@@ -458,38 +892,232 @@
       "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n",
       "\n",
       "Therefore, 9.9 is bigger than 9.11.\n",
-      "==========\n",
-      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - Let's compare the two numbers step by step.\n",
-      "\n",
-      "1. Both numbers have the same whole number part, which is 9.\n",
-      "2. Now, let's look at the decimal part. 9.11 has a decimal part of 0.11, while 9.9 has a decimal part of 0.9.\n",
-      "3. Since 0.11 is greater than 0.9, the number 9.11 is greater than 9.9.\n",
-      "\n",
-      "So, the answer is: 9.11 is bigger.\n",
-      "==========\n",
-      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - To determine which number is bigger, let's break it down step by step:\n",
+      "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.58 seconds: Let's compare the two numbers step by step:\n",
       "\n",
       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
       "2. We're left with 0.11 and 0.9.\n",
-      "3. 0.9 is greater than 0.11 because 9 is greater than 11 when comparing the same number of decimal places.\n",
-      "4. Since 0.9 is greater than 0.11, and both numbers start with 9, the original number 9.9 is greater than 9.11.\n",
+      "3. 0.9 is bigger than 0.11.\n",
+      "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n",
+      "\n",
+      "Therefore, 9.9 is bigger than 9.11.\n",
+      "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.99 seconds: Let's compare the two numbers step by step:\n",
       "\n",
-      "Therefore, 9.9 is the bigger number.\n",
-      "==========\n",
-      "octo:meta-llama-3.1-8b-instruct - Let's compare the two numbers step by step:\n",
+      "1. Both numbers start with 9, so they are equal in the first digit.\n",
+      "2. The next digit is 1 in 9.11 and 9 in 9.9. Since 9 is greater than 1, 9.9 is bigger than 9.11 in the second digit.\n",
+      "3. Since the first two digits are the same in both numbers (9.1), we need to look at the third digit to break the tie. In 9.11, the third digit is 1, and in 9.9, the third digit is 9. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n",
       "\n",
-      "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
-      "2. We're left with 0.11 and 0.9.\n",
-      "3. 0.9 is bigger than 0.11 because 9 is bigger than 11.\n",
-      "4. Since 0.9 is bigger than 0.11, and both numbers started with 9, 9.9 is bigger than 9.11.\n",
+      "Therefore, 9.9 is bigger than 9.11.\n",
+      "octo:meta-llama-3.1-8b-instruct - 1.55 seconds: To determine which number is bigger, let's compare them step by step:\n",
       "\n",
-      "So, the answer is: 9.9 is bigger than 9.11.\n",
-      "==========\n"
+      "1. Both numbers start with 9, so they are equal in the first digit.\n",
+      "2. Now, let's look at the second digit. 9.11 has a 1, while 9.9 has a 9.\n",
+      "3. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n",
+      "\n",
+      "So, the number 9.9 is bigger than 9.11.\n"
      ]
     }
    ],
    "source": [
-    "compare_llm(messages)"
+    "messages = [\n",
+    "    {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9? Think step by step.\"},\n",
+    "]\n",
+    "\n",
+    "responses, execution_times = compare_llm(messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "4a3fb8fc-a7a2-47d3-9db2-792f03cc47c2",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style type=\"text/css\">\n",
+       "#T_cac71 th {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_cac71 td {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "#T_cac71_row0_col0, #T_cac71_row0_col1, #T_cac71_row0_col2, #T_cac71_row1_col0, #T_cac71_row1_col1, #T_cac71_row1_col2, #T_cac71_row2_col0, #T_cac71_row2_col1, #T_cac71_row2_col2, #T_cac71_row3_col0, #T_cac71_row3_col1, #T_cac71_row3_col2, #T_cac71_row4_col0, #T_cac71_row4_col1, #T_cac71_row4_col2, #T_cac71_row5_col0, #T_cac71_row5_col1, #T_cac71_row5_col2, #T_cac71_row6_col0, #T_cac71_row6_col1, #T_cac71_row6_col2, #T_cac71_row7_col0, #T_cac71_row7_col1, #T_cac71_row7_col2, #T_cac71_row8_col0, #T_cac71_row8_col1, #T_cac71_row8_col2, #T_cac71_row9_col0, #T_cac71_row9_col1, #T_cac71_row9_col2, #T_cac71_row10_col0, #T_cac71_row10_col1, #T_cac71_row10_col2, #T_cac71_row11_col0, #T_cac71_row11_col1, #T_cac71_row11_col2 {\n",
+       "  text-align: center;\n",
+       "}\n",
+       "</style>\n",
+       "<table id=\"T_cac71\">\n",
+       "  <thead>\n",
+       "    <tr>\n",
+       "      <th class=\"blank level0\" >&nbsp;</th>\n",
+       "      <th id=\"T_cac71_level0_col0\" class=\"col_heading level0 col0\" >Provider:Model Name</th>\n",
+       "      <th id=\"T_cac71_level0_col1\" class=\"col_heading level0 col1\" >Execution Time</th>\n",
+       "      <th id=\"T_cac71_level0_col2\" class=\"col_heading level0 col2\" >Model Response </th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row0\" class=\"row_heading level0 row0\" >1</th>\n",
+       "      <td id=\"T_cac71_row0_col0\" class=\"data row0 col0\" >aws:meta.llama3-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_cac71_row0_col1\" class=\"data row0 col1\" >1.830247</td>\n",
+       "      <td id=\"T_cac71_row0_col2\" class=\"data row0 col2\" >Let's break it down step by step:\n",
+       "\n",
+       "1. Both numbers have the same first digit, which is 9.\n",
+       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+       "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+       "4. Therefore, 9.9 is greater than 9.11.\n",
+       "\n",
+       "So, the answer is: 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row1\" class=\"row_heading level0 row1\" >2</th>\n",
+       "      <td id=\"T_cac71_row1_col0\" class=\"data row1 col0\" >groq:llama3-8b-8192</td>\n",
+       "      <td id=\"T_cac71_row1_col1\" class=\"data row1 col1\" >0.307869</td>\n",
+       "      <td id=\"T_cac71_row1_col2\" class=\"data row1 col2\" >Let's break it down step by step:\n",
+       "\n",
+       "1. Both numbers have the same first digit, which is 9.\n",
+       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+       "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+       "4. Therefore, 9.9 is greater than 9.11.\n",
+       "\n",
+       "So, the answer is: 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row2\" class=\"row_heading level0 row2\" >3</th>\n",
+       "      <td id=\"T_cac71_row2_col0\" class=\"data row2 col0\" >fireworks:accounts/fireworks/models/llama-v3-8b-instruct</td>\n",
+       "      <td id=\"T_cac71_row2_col1\" class=\"data row2 col1\" >0.716259</td>\n",
+       "      <td id=\"T_cac71_row2_col2\" class=\"data row2 col2\" >Let's break it down step by step!\n",
+       "\n",
+       "1. Both numbers have the same first digit, which is 9.\n",
+       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+       "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+       "4. Therefore, 9.9 is greater than 9.11.\n",
+       "\n",
+       "So, the correct answer is: 9.9 is bigger than 9.11!</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row3\" class=\"row_heading level0 row3\" >4</th>\n",
+       "      <td id=\"T_cac71_row3_col0\" class=\"data row3 col0\" >octo:meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_cac71_row3_col1\" class=\"data row3 col1\" >1.397936</td>\n",
+       "      <td id=\"T_cac71_row3_col2\" class=\"data row3 col2\" >Let's break it down step by step:\n",
+       "\n",
+       "1. Both numbers have the same first digit, which is 9.\n",
+       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+       "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+       "4. Therefore, 9.9 is greater than 9.11.\n",
+       "\n",
+       "So, the answer is: 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row4\" class=\"row_heading level0 row4\" >5</th>\n",
+       "      <td id=\"T_cac71_row4_col0\" class=\"data row4 col0\" >together:meta-llama/Llama-3-8b-chat-hf</td>\n",
+       "      <td id=\"T_cac71_row4_col1\" class=\"data row4 col1\" >0.764065</td>\n",
+       "      <td id=\"T_cac71_row4_col2\" class=\"data row4 col2\" >Let's break it down step by step:\n",
+       "\n",
+       "1. Both numbers have the same first digit, which is 9.\n",
+       "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n",
+       "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n",
+       "4. Therefore, 9.9 is greater than 9.11.\n",
+       "\n",
+       "So, the answer is: 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row5\" class=\"row_heading level0 row5\" >6</th>\n",
+       "      <td id=\"T_cac71_row5_col0\" class=\"data row5 col0\" >openai:gpt-3.5-turbo</td>\n",
+       "      <td id=\"T_cac71_row5_col1\" class=\"data row5 col1\" >1.262903</td>\n",
+       "      <td id=\"T_cac71_row5_col2\" class=\"data row5 col2\" >To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9. Since they are the same, we need to look at the decimal portion.\n",
+       "\n",
+       "9.11 is bigger than 9.9 because 0.11 is greater than 0.9.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row6\" class=\"row_heading level0 row6\" >7</th>\n",
+       "      <td id=\"T_cac71_row6_col0\" class=\"data row6 col0\" >replicate:meta/meta-llama-3-8b-instruct</td>\n",
+       "      <td id=\"T_cac71_row6_col1\" class=\"data row6 col1\" >2.307123</td>\n",
+       "      <td id=\"T_cac71_row6_col2\" class=\"data row6 col2\" >Let's break it down step by step!\n",
+       "\n",
+       "1. Both numbers start with the same digits: 9.1\n",
+       "2. The next digit is also the same: 1\n",
+       "3. The last digit is different: 9.11 has a 1, while 9.9 has a 9\n",
+       "\n",
+       "Since 9 is greater than 1, the correct answer is:\n",
+       "\n",
+       "9.9 is bigger than 9.11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row7\" class=\"row_heading level0 row7\" >8</th>\n",
+       "      <td id=\"T_cac71_row7_col0\" class=\"data row7 col0\" >aws:meta.llama3-1-8b-instruct-v1:0</td>\n",
+       "      <td id=\"T_cac71_row7_col1\" class=\"data row7 col1\" >1.374292</td>\n",
+       "      <td id=\"T_cac71_row7_col2\" class=\"data row7 col2\" >Let's compare the two numbers step by step.\n",
+       "\n",
+       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
+       "2. We're left with 0.11 and 0.9. Which one is bigger?\n",
+       "3. Since 0.9 is bigger than 0.11, the whole number 9.9 is bigger than 9.11.\n",
+       "\n",
+       "So, the answer is: 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row8\" class=\"row_heading level0 row8\" >9</th>\n",
+       "      <td id=\"T_cac71_row8_col0\" class=\"data row8 col0\" >groq:llama-3.1-8b-instant</td>\n",
+       "      <td id=\"T_cac71_row8_col1\" class=\"data row8 col1\" >0.463357</td>\n",
+       "      <td id=\"T_cac71_row8_col2\" class=\"data row8 col2\" >Let's compare the two numbers step by step:\n",
+       "\n",
+       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
+       "2. We're left with 0.11 and 0.9.\n",
+       "3. 0.9 is bigger than 0.11.\n",
+       "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n",
+       "\n",
+       "Therefore, 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row9\" class=\"row_heading level0 row9\" >10</th>\n",
+       "      <td id=\"T_cac71_row9_col0\" class=\"data row9 col0\" >fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct</td>\n",
+       "      <td id=\"T_cac71_row9_col1\" class=\"data row9 col1\" >0.584036</td>\n",
+       "      <td id=\"T_cac71_row9_col2\" class=\"data row9 col2\" >Let's compare the two numbers step by step:\n",
+       "\n",
+       "1. Both numbers start with 9, so we can ignore the 9 for now.\n",
+       "2. We're left with 0.11 and 0.9.\n",
+       "3. 0.9 is bigger than 0.11.\n",
+       "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n",
+       "\n",
+       "Therefore, 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row10\" class=\"row_heading level0 row10\" >11</th>\n",
+       "      <td id=\"T_cac71_row10_col0\" class=\"data row10 col0\" >together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo</td>\n",
+       "      <td id=\"T_cac71_row10_col1\" class=\"data row10 col1\" >0.994566</td>\n",
+       "      <td id=\"T_cac71_row10_col2\" class=\"data row10 col2\" >Let's compare the two numbers step by step:\n",
+       "\n",
+       "1. Both numbers start with 9, so they are equal in the first digit.\n",
+       "2. The next digit is 1 in 9.11 and 9 in 9.9. Since 9 is greater than 1, 9.9 is bigger than 9.11 in the second digit.\n",
+       "3. Since the first two digits are the same in both numbers (9.1), we need to look at the third digit to break the tie. In 9.11, the third digit is 1, and in 9.9, the third digit is 9. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n",
+       "\n",
+       "Therefore, 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th id=\"T_cac71_level0_row11\" class=\"row_heading level0 row11\" >12</th>\n",
+       "      <td id=\"T_cac71_row11_col0\" class=\"data row11 col0\" >octo:meta-llama-3.1-8b-instruct</td>\n",
+       "      <td id=\"T_cac71_row11_col1\" class=\"data row11 col1\" >1.551550</td>\n",
+       "      <td id=\"T_cac71_row11_col2\" class=\"data row11 col2\" >To determine which number is bigger, let's compare them step by step:\n",
+       "\n",
+       "1. Both numbers start with 9, so they are equal in the first digit.\n",
+       "2. Now, let's look at the second digit. 9.11 has a 1, while 9.9 has a 9.\n",
+       "3. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n",
+       "\n",
+       "So, the number 9.9 is bigger than 9.11.</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n"
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x1263c25c0>"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "display(llms, execution_times, responses)"
    ]
   },
   {
@@ -500,9 +1128,17 @@
     "## Takeaways\n",
     "1. Not all LLMs are created equal - not even all Llama 3 (or 3.1) are created equal (by different providers).\n",
     "2. Ask LLM to think step by step may help improve its reasoning.\n",
-    "3. The way LLM was trained and tokenized could lead it to some weird reasoning.\n",
+    "3. The way tokenization works in LLM could lead to a lot of weirdness in LLM (see AK's awesome [video](https://www.youtube.com/watch?v=zduSFxRajkE) for a deep dive).\n",
     "4. A more comprehensive benchmark would be desired, but a quick LLM comparison like shown here can be the first step."
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04e13c90-3680-4f1d-8f65-768a78b7adb2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {