From f3a3cd4e5bd5c0acd5707f1e67b2a465ef51e366 Mon Sep 17 00:00:00 2001 From: Jeff Tang Date: Tue, 6 Aug 2024 22:12:13 -0700 Subject: [PATCH] record execution time for each llm; display results in a pandas table for easy comparison --- examples/llm_reasoning.ipynb | 1000 +++++++++++++++++++++++++++------- 1 file changed, 818 insertions(+), 182 deletions(-) diff --git a/examples/llm_reasoning.ipynb b/examples/llm_reasoning.ipynb index ebbe2ad4..c3869cc2 100644 --- a/examples/llm_reasoning.ipynb +++ b/examples/llm_reasoning.ipynb @@ -114,6 +114,8 @@ "metadata": {}, "outputs": [], "source": [ + "import time\n", + "\n", "llms = [\"aws:meta.llama3-8b-instruct-v1:0\",\n", " \"groq:llama3-8b-8192\",\n", " \"fireworks:accounts/fireworks/models/llama-v3-8b-instruct\",\n", @@ -127,13 +129,20 @@ " \"fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct\",\n", " \"together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\",\n", " \"octo:meta-llama-3.1-8b-instruct\",\n", - " \n", " ]\n", "\n", "def compare_llm(messages):\n", + " execution_times = []\n", + " responses = []\n", " for llm in llms:\n", + " start_time = time.time()\n", " response = client.chat.completions.create(model=llm, messages=messages)\n", - " print(f\"{llm} - {response.choices[0].message.content.strip()}\\n==========\")" + " end_time = time.time()\n", + " execution_time = end_time - start_time\n", + " responses.append(response.choices[0].message.content.strip())\n", + " execution_times.append(execution_time)\n", + " print(f\"{llm} - {execution_time:.2f} seconds: {response.choices[0].message.content.strip()}\")\n", + " return responses, execution_times" ] }, { @@ -149,57 +158,181 @@ "execution_count": 5, "id": "f3c4a8ef-e23b-4d4a-8561-3e5a2a866bd1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "aws:meta.llama3-8b-instruct-v1:0 - 2.38 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", + "groq:llama3-8b-8192 - 2.24 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", + "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.92 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a social worker.\n", + "octo:meta-llama-3-8b-instruct - 1.82 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.\n", + "together:meta-llama/Llama-3-8b-chat-hf - 0.61 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", + "openai:gpt-3.5-turbo - 1.00 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n", + "replicate:meta/meta-llama-3-8b-instruct - 1.36 seconds: Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", + "aws:meta.llama3-1-8b-instruct-v1:0 - 0.45 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n", + "groq:llama-3.1-8b-instant - 0.84 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n", + "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.37 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n", + "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.26 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n", + "octo:meta-llama-3.1-8b-instruct - 0.32 seconds: Tom Cruise's mother is Mary Lee Pfeiffer.\n" + ] + } + ], "source": [ "messages = [\n", " {\"role\": \"user\", \"content\": \"Who is Tom Cruise's mother?\"},\n", - "]" + "]\n", + "\n", + "responses, execution_times = compare_llm(messages)" ] }, { "cell_type": "code", "execution_count": 6, - "id": "3e901285-6ca7-4e82-8829-12b24fb9ff5d", + "id": "769f7f42-2adb-4903-ab17-3143a5d950ce", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "\n", + "def display(llms, execution_times, responses):\n", + " data = {\n", + " 'Provider:Model Name': llms,\n", + " 'Execution Time': execution_times,\n", + " 'Model Response ': responses\n", + " }\n", + " \n", + " df = pd.DataFrame(data)\n", + " df.index = df.index + 1\n", + " styled_df = df.style.set_table_styles(\n", + " [{'selector': 'th', 'props': [('text-align', 'center')]}, \n", + " {'selector': 'td', 'props': [('text-align', 'center')]}]\n", + " ).set_properties(**{'text-align': 'center'})\n", + " \n", + " return styled_df " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "d2359ad5-9f0b-4bd6-9838-54df91de0fb3", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "aws:meta.llama3-8b-instruct-v1:0 - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", - "==========\n", - "groq:llama3-8b-8192 - Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", - "==========\n", - "octo:meta-llama-3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", - "==========\n", - "together:meta-llama/Llama-3-8b-chat-hf - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", - "==========\n", - "openai:gpt-3.5-turbo - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n", - "replicate:meta/meta-llama-3-8b-instruct - Tom Cruise's mother is Mary Lee South (née Pfeiffer).\n", - "==========\n", - "aws:meta.llama3-1-8b-instruct-v1:0 - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n", - "groq:llama-3.1-8b-instant - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n", - "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n", - "octo:meta-llama-3.1-8b-instruct - Tom Cruise's mother is Mary Lee Pfeiffer.\n", - "==========\n" - ] + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Provider:Model NameExecution TimeModel Response
1aws:meta.llama3-8b-instruct-v1:02.383425Tom Cruise's mother is Mary Lee South (née Pfeiffer).
2groq:llama3-8b-81922.241169Tom Cruise's mother is Mary Lee South (née Pfeiffer).
3fireworks:accounts/fireworks/models/llama-v3-8b-instruct0.916995Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a social worker.
4octo:meta-llama-3-8b-instruct1.822360Tom Cruise's mother is Mary Lee South (née Pfeiffer). She was a special education teacher and a homemaker.
5together:meta-llama/Llama-3-8b-chat-hf0.607085Tom Cruise's mother is Mary Lee South (née Pfeiffer).
6openai:gpt-3.5-turbo1.002106Tom Cruise's mother is Mary Lee Pfeiffer.
7replicate:meta/meta-llama-3-8b-instruct1.362718Tom Cruise's mother is Mary Lee South (née Pfeiffer).
8aws:meta.llama3-1-8b-instruct-v1:00.454378Tom Cruise's mother is Mary Lee Pfeiffer.
9groq:llama-3.1-8b-instant0.835516Tom Cruise's mother is Mary Lee Pfeiffer.
10fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct0.371963Tom Cruise's mother is Mary Lee Pfeiffer.
11together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo0.264464Tom Cruise's mother is Mary Lee Pfeiffer.
12octo:meta-llama-3.1-8b-instruct0.315450Tom Cruise's mother is Mary Lee Pfeiffer.
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "compare_llm(messages)" + "display(llms, execution_times, responses)" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "id": "399f6cca-7f34-4a91-aab0-070560640033", "metadata": {}, "outputs": [ @@ -207,30 +340,18 @@ "name": "stdout", "output_type": "stream", "text": [ - "aws:meta.llama3-8b-instruct-v1:0 - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?\n", - "==========\n", - "groq:llama3-8b-8192 - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Mary Lee Pfeiffer is a well-known American artist, and her son is none other than the famous artist and sculptor, John Pfeiffer!\n", - "==========\n", - "octo:meta-llama-3-8b-instruct - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", - "==========\n", - "together:meta-llama/Llama-3-8b-chat-hf - I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", - "==========\n", - "openai:gpt-3.5-turbo - Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.\n", - "==========\n", - "replicate:meta/meta-llama-3-8b-instruct - According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n", - "==========\n", - "aws:meta.llama3-1-8b-instruct-v1:0 - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", - "==========\n", - "groq:llama-3.1-8b-instant - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - I don't have information on Mary Lee Pfeiffer's son.\n", - "==========\n", - "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - I am unable to verify who Mary Lee Pfeiffer's son is.\n", - "==========\n", - "octo:meta-llama-3.1-8b-instruct - I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", - "==========\n" + "aws:meta.llama3-8b-instruct-v1:0 - 1.23 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?\n", + "groq:llama3-8b-8192 - 0.39 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", + "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.44 seconds: According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n", + "octo:meta-llama-3-8b-instruct - 1.25 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", + "together:meta-llama/Llama-3-8b-chat-hf - 0.92 seconds: I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?\n", + "openai:gpt-3.5-turbo - 0.64 seconds: Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.\n", + "replicate:meta/meta-llama-3-8b-instruct - 1.38 seconds: According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.\n", + "aws:meta.llama3-1-8b-instruct-v1:0 - 0.64 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", + "groq:llama-3.1-8b-instant - 1.06 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", + "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.39 seconds: I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?\n", + "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.79 seconds: I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. If you could provide more context or clarify who Mary Lee Pfeiffer is, I may be able to help you better.\n", + "octo:meta-llama-3.1-8b-instruct - 0.76 seconds: I am unable to verify who Mary Lee Pfeiffer's son is.\n" ] } ], @@ -238,7 +359,126 @@ "messages = [\n", " {\"role\": \"user\", \"content\": \"Who is Mary Lee Pfeiffer's son?\"},\n", "]\n", - "compare_llm(messages)" + "\n", + "responses, execution_times = compare_llm(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "eee7704d-a187-41bc-b119-c94461d0ee74", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Provider:Model NameExecution TimeModel Response
1aws:meta.llama3-8b-instruct-v1:01.225959I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. Can you provide more context or details about who Mary Lee Pfeiffer is or why you are looking for information about her son?
2groq:llama3-8b-81920.391800I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?
3fireworks:accounts/fireworks/models/llama-v3-8b-instruct0.438607According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.
4octo:meta-llama-3-8b-instruct1.250298I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?
5together:meta-llama/Llama-3-8b-chat-hf0.924522I apologize, but I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private or personal matter, or that the person is not a public figure. Can you provide more context or clarify who Mary Lee Pfeiffer is?
6openai:gpt-3.5-turbo0.637278Mary Lee Pfeiffer's son is actor and filmmaker Joaquin Phoenix.
7replicate:meta/meta-llama-3-8b-instruct1.375630According to my knowledge, Mary Lee Pfeiffer's son is John Pfeiffer.
8aws:meta.llama3-1-8b-instruct-v1:00.639018I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?
9groq:llama-3.1-8b-instant1.059837I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?
10fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct0.387835I don't have information on Mary Lee Pfeiffer's son. Is there something else I can help you with?
11together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo0.788913I couldn't find any information on a person named Mary Lee Pfeiffer or her son. It's possible that this is a private individual and not a public figure, or that the name is not well-known. If you could provide more context or clarify who Mary Lee Pfeiffer is, I may be able to help you better.
12octo:meta-llama-3.1-8b-instruct0.763321I am unable to verify who Mary Lee Pfeiffer's son is.
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(llms, execution_times, responses)" ] }, { @@ -251,7 +491,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "e537871e-68b6-44c3-886a-d3ebe7a692c1", "metadata": {}, "outputs": [ @@ -259,32 +499,20 @@ "name": "stdout", "output_type": "stream", "text": [ - "aws:meta.llama3-8b-instruct-v1:0 - There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "groq:llama3-8b-8192 - There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "octo:meta-llama-3-8b-instruct - There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "together:meta-llama/Llama-3-8b-chat-hf - There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "openai:gpt-3.5-turbo - There are three r's in the word \"strawberry.\"\n", - "==========\n", - "replicate:meta/meta-llama-3-8b-instruct - Let me count them for you!\n", + "aws:meta.llama3-8b-instruct-v1:0 - 0.48 seconds: There are 2 R's in the word \"strawberry\".\n", + "groq:llama3-8b-8192 - 0.16 seconds: There are 2 R's in the word \"strawberry\".\n", + "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.25 seconds: There are 2 R's in the word \"strawberry\".\n", + "octo:meta-llama-3-8b-instruct - 0.31 seconds: There are 2 R's in the word \"strawberry\".\n", + "together:meta-llama/Llama-3-8b-chat-hf - 0.25 seconds: There are 2 R's in the word \"strawberry\".\n", + "openai:gpt-3.5-turbo - 0.90 seconds: There are three r's in the word \"strawberry.\"\n", + "replicate:meta/meta-llama-3-8b-instruct - 1.33 seconds: Let me count them for you!\n", "\n", "There are 2 R's in the word \"strawberry\".\n", - "==========\n", - "aws:meta.llama3-1-8b-instruct-v1:0 - There are 3 r's in the word \"strawberry\".\n", - "==========\n", - "groq:llama-3.1-8b-instant - There are 3 r's in the word \"strawberry\".\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - There are 2 r's in the word strawberry.\n", - "==========\n", - "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - There are 2 r's in the word strawberry.\n", - "==========\n", - "octo:meta-llama-3.1-8b-instruct - There are 3 r's in the word \"strawberry\".\n", - "==========\n" + "aws:meta.llama3-1-8b-instruct-v1:0 - 0.49 seconds: There are 3 r's in the word \"strawberry\".\n", + "groq:llama-3.1-8b-instant - 2.36 seconds: There are 3 r's in the word \"strawberry\".\n", + "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.43 seconds: There are 3 r's in the word \"strawberry\".\n", + "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.33 seconds: There are 3 r's in the word \"strawberry\".\n", + "octo:meta-llama-3.1-8b-instruct - 0.59 seconds: There are 2 r's in the word \"strawberry\".\n" ] } ], @@ -292,107 +520,320 @@ "messages = [\n", " {\"role\": \"user\", \"content\": \"How many r's in the word strawberry?\"},\n", "]\n", - "compare_llm(messages)" + "\n", + "responses, execution_times = compare_llm(messages)" ] }, { - "cell_type": "markdown", - "id": "cae3fb5f-a173-4a33-b843-65df6d1086f9", + "cell_type": "code", + "execution_count": 11, + "id": "5678e393-4967-49f1-9e0f-251471dc92b7", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Provider:Model NameExecution TimeModel Response
1aws:meta.llama3-8b-instruct-v1:00.480391There are 2 R's in the word \"strawberry\".
2groq:llama3-8b-81920.159436There are 2 R's in the word \"strawberry\".
3fireworks:accounts/fireworks/models/llama-v3-8b-instruct0.254061There are 2 R's in the word \"strawberry\".
4octo:meta-llama-3-8b-instruct0.314966There are 2 R's in the word \"strawberry\".
5together:meta-llama/Llama-3-8b-chat-hf0.248981There are 2 R's in the word \"strawberry\".
6openai:gpt-3.5-turbo0.899374There are three r's in the word \"strawberry.\"
7replicate:meta/meta-llama-3-8b-instruct1.328329Let me count them for you!\n", + "\n", + "There are 2 R's in the word \"strawberry\".
8aws:meta.llama3-1-8b-instruct-v1:00.494379There are 3 r's in the word \"strawberry\".
9groq:llama-3.1-8b-instant2.364020There are 3 r's in the word \"strawberry\".
10fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct0.434086There are 3 r's in the word \"strawberry\".
11together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo0.327454There are 3 r's in the word \"strawberry\".
12octo:meta-llama-3.1-8b-instruct0.592822There are 2 r's in the word \"strawberry\".
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "## Which number is bigger?" + "display(llms, execution_times, responses)" ] }, { - "cell_type": "code", - "execution_count": 9, - "id": "efdf2fd6-f63a-4f9b-af15-1df25590e4fc", + "cell_type": "markdown", + "id": "cae3fb5f-a173-4a33-b843-65df6d1086f9", "metadata": {}, - "outputs": [], "source": [ - "messages = [\n", - " {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9?\"},\n", - "]" + "## Which number is bigger?" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "eaa14ed1-c83b-4c8f-bb14-d318bf0c9a60", + "execution_count": 12, + "id": "efdf2fd6-f63a-4f9b-af15-1df25590e4fc", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "aws:meta.llama3-8b-instruct-v1:0 - 9.9 is bigger than 9.11.\n", - "==========\n", - "groq:llama3-8b-8192 - 9.11 is bigger than 9.9.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 9.9 is bigger than 9.11.\n", - "==========\n", - "octo:meta-llama-3-8b-instruct - 9.11 is bigger than 9.9.\n", - "==========\n", - "together:meta-llama/Llama-3-8b-chat-hf - 9.11 is bigger than 9.9.\n", - "==========\n", - "openai:gpt-3.5-turbo - 9.9\n", - "==========\n", - "replicate:meta/meta-llama-3-8b-instruct - Let me help you with that!\n", + "aws:meta.llama3-8b-instruct-v1:0 - 0.49 seconds: 9.9 is bigger than 9.11.\n", + "groq:llama3-8b-8192 - 0.20 seconds: 9.11 is bigger than 9.9.\n", + "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.27 seconds: 9.9 is bigger than 9.11.\n", + "octo:meta-llama-3-8b-instruct - 0.29 seconds: 9.11 is bigger than 9.9.\n", + "together:meta-llama/Llama-3-8b-chat-hf - 0.70 seconds: 9.11 is bigger than 9.9.\n", + "openai:gpt-3.5-turbo - 1.05 seconds: 9.9\n", + "replicate:meta/meta-llama-3-8b-instruct - 1.58 seconds: Let me help you with that!\n", "\n", "9.11 is bigger than 9.9.\n", - "==========\n", - "aws:meta.llama3-1-8b-instruct-v1:0 - The number 9.11 is bigger than 9.9.\n", - "==========\n", - "groq:llama-3.1-8b-instant - 9.9 is bigger than 9.11.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 9.9 is bigger than 9.11.\n", - "==========\n", - "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - To compare the two numbers, we need to look at the decimal part. \n", + "aws:meta.llama3-1-8b-instruct-v1:0 - 0.83 seconds: To compare these two numbers, we need to look at the decimal part. Since 9.11 has a larger decimal part (0.11) than 9.9 (0.9), 9.11 is bigger.\n", + "groq:llama-3.1-8b-instant - 0.23 seconds: 9.9 is bigger than 9.11.\n", + "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.19 seconds: 9.9 is bigger than 9.11.\n", + "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.36 seconds: 9.9 is bigger than 9.11.\n", + "octo:meta-llama-3.1-8b-instruct - 0.91 seconds: To compare these two numbers, we need to look at the decimal part. \n", "\n", "9.11 has a decimal part of 0.11, and 9.9 has a decimal part of 0.9. \n", "\n", - "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.\n", - "==========\n", - "octo:meta-llama-3.1-8b-instruct - 9.9 is bigger than 9.11.\n", - "==========\n" + "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.\n" ] } ], "source": [ - "compare_llm(messages)" + "messages = [\n", + " {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9?\"},\n", + "]\n", + "\n", + "responses, execution_times = compare_llm(messages)" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "198b213a-b7bf-4cce-8c30-a8408454370b", + "execution_count": 13, + "id": "eaa14ed1-c83b-4c8f-bb14-d318bf0c9a60", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Provider:Model NameExecution TimeModel Response
1aws:meta.llama3-8b-instruct-v1:00.4892799.9 is bigger than 9.11.
2groq:llama3-8b-81920.2008649.11 is bigger than 9.9.
3fireworks:accounts/fireworks/models/llama-v3-8b-instruct0.2716259.9 is bigger than 9.11.
4octo:meta-llama-3-8b-instruct0.2949589.11 is bigger than 9.9.
5together:meta-llama/Llama-3-8b-chat-hf0.6956579.11 is bigger than 9.9.
6openai:gpt-3.5-turbo1.0515959.9
7replicate:meta/meta-llama-3-8b-instruct1.580146Let me help you with that!\n", + "\n", + "9.11 is bigger than 9.9.
8aws:meta.llama3-1-8b-instruct-v1:00.828657To compare these two numbers, we need to look at the decimal part. Since 9.11 has a larger decimal part (0.11) than 9.9 (0.9), 9.11 is bigger.
9groq:llama-3.1-8b-instant0.2323539.9 is bigger than 9.11.
10fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct0.1929789.9 is bigger than 9.11.
11together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo0.3646799.9 is bigger than 9.11.
12octo:meta-llama-3.1-8b-instruct0.911637To compare these two numbers, we need to look at the decimal part. \n", + "\n", + "9.11 has a decimal part of 0.11, and 9.9 has a decimal part of 0.9. \n", + "\n", + "Since 0.11 is greater than 0.9, 9.11 is bigger than 9.9.
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "messages = [\n", - " {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9? Think step by step.\"},\n", - "]" + "display(llms, execution_times, responses)" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "4a3fb8fc-a7a2-47d3-9db2-792f03cc47c2", + "execution_count": 14, + "id": "198b213a-b7bf-4cce-8c30-a8408454370b", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "aws:meta.llama3-8b-instruct-v1:0 - Let's break it down step by step!\n", + "aws:meta.llama3-8b-instruct-v1:0 - 1.83 seconds: Let's break it down step by step:\n", "\n", - "1. Compare the whole numbers: Both numbers have the same whole number part, which is 9.\n", - "2. Compare the decimal parts: 9.11 has a decimal part of 0.11, while 9.9 has a decimal part of 0.9.\n", - "3. Since 0.11 is smaller than 0.9, 9.11 is smaller than 9.9.\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", "\n", - "So, the correct answer is: 9.9 is bigger than 9.11.\n", - "==========\n", - "groq:llama3-8b-8192 - Let's break it down step by step:\n", + "So, the answer is: 9.9 is bigger than 9.11.\n", + "groq:llama3-8b-8192 - 0.31 seconds: Let's break it down step by step:\n", "\n", "1. Both numbers have the same first digit, which is 9.\n", "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", @@ -400,15 +841,15 @@ "4. Therefore, 9.9 is greater than 9.11.\n", "\n", "So, the answer is: 9.9 is bigger than 9.11.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - Let's break it down step by step!\n", + "fireworks:accounts/fireworks/models/llama-v3-8b-instruct - 0.72 seconds: Let's break it down step by step!\n", "\n", - "1. Compare the whole numbers: Both numbers have the same whole part, which is 9.\n", - "2. Compare the decimal parts: 0.11 is less than 0.9.\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", "\n", - "So, 9.11 is less than 9.9.\n", - "==========\n", - "octo:meta-llama-3-8b-instruct - Let's break it down step by step:\n", + "So, the correct answer is: 9.9 is bigger than 9.11!\n", + "octo:meta-llama-3-8b-instruct - 1.40 seconds: Let's break it down step by step:\n", "\n", "1. Both numbers have the same first digit, which is 9.\n", "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", @@ -416,8 +857,7 @@ "4. Therefore, 9.9 is greater than 9.11.\n", "\n", "So, the answer is: 9.9 is bigger than 9.11.\n", - "==========\n", - "together:meta-llama/Llama-3-8b-chat-hf - Let's break it down step by step:\n", + "together:meta-llama/Llama-3-8b-chat-hf - 0.76 seconds: Let's break it down step by step:\n", "\n", "1. Both numbers have the same first digit, which is 9.\n", "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", @@ -425,14 +865,10 @@ "4. Therefore, 9.9 is greater than 9.11.\n", "\n", "So, the answer is: 9.9 is bigger than 9.11.\n", - "==========\n", - "openai:gpt-3.5-turbo - To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9.\n", - "\n", - "Next, we can compare the decimal parts. In 9.11, the decimal part is 0.11, and in 9.9, the decimal part is 0.9.\n", + "openai:gpt-3.5-turbo - 1.26 seconds: To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9. Since they are the same, we need to look at the decimal portion.\n", "\n", - "Since 0.11 is greater than 0.9, we can conclude that 9.11 is bigger than 9.9.\n", - "==========\n", - "replicate:meta/meta-llama-3-8b-instruct - Let's break it down step by step!\n", + "9.11 is bigger than 9.9 because 0.11 is greater than 0.9.\n", + "replicate:meta/meta-llama-3-8b-instruct - 2.31 seconds: Let's break it down step by step!\n", "\n", "1. Both numbers start with the same digits: 9.1\n", "2. The next digit is also the same: 1\n", @@ -441,16 +877,14 @@ "Since 9 is greater than 1, the correct answer is:\n", "\n", "9.9 is bigger than 9.11\n", - "==========\n", - "aws:meta.llama3-1-8b-instruct-v1:0 - Let's compare the two numbers step by step.\n", + "aws:meta.llama3-1-8b-instruct-v1:0 - 1.37 seconds: Let's compare the two numbers step by step.\n", "\n", "1. Both numbers start with 9, so we can ignore the 9 for now.\n", "2. We're left with 0.11 and 0.9. Which one is bigger?\n", "3. Since 0.9 is bigger than 0.11, the whole number 9.9 is bigger than 9.11.\n", "\n", "So, the answer is: 9.9 is bigger than 9.11.\n", - "==========\n", - "groq:llama-3.1-8b-instant - Let's compare the two numbers step by step:\n", + "groq:llama-3.1-8b-instant - 0.46 seconds: Let's compare the two numbers step by step:\n", "\n", "1. Both numbers start with 9, so we can ignore the 9 for now.\n", "2. We're left with 0.11 and 0.9.\n", @@ -458,38 +892,232 @@ "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n", "\n", "Therefore, 9.9 is bigger than 9.11.\n", - "==========\n", - "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - Let's compare the two numbers step by step.\n", - "\n", - "1. Both numbers have the same whole number part, which is 9.\n", - "2. Now, let's look at the decimal part. 9.11 has a decimal part of 0.11, while 9.9 has a decimal part of 0.9.\n", - "3. Since 0.11 is greater than 0.9, the number 9.11 is greater than 9.9.\n", - "\n", - "So, the answer is: 9.11 is bigger.\n", - "==========\n", - "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - To determine which number is bigger, let's break it down step by step:\n", + "fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct - 0.58 seconds: Let's compare the two numbers step by step:\n", "\n", "1. Both numbers start with 9, so we can ignore the 9 for now.\n", "2. We're left with 0.11 and 0.9.\n", - "3. 0.9 is greater than 0.11 because 9 is greater than 11 when comparing the same number of decimal places.\n", - "4. Since 0.9 is greater than 0.11, and both numbers start with 9, the original number 9.9 is greater than 9.11.\n", + "3. 0.9 is bigger than 0.11.\n", + "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n", + "\n", + "Therefore, 9.9 is bigger than 9.11.\n", + "together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo - 0.99 seconds: Let's compare the two numbers step by step:\n", "\n", - "Therefore, 9.9 is the bigger number.\n", - "==========\n", - "octo:meta-llama-3.1-8b-instruct - Let's compare the two numbers step by step:\n", + "1. Both numbers start with 9, so they are equal in the first digit.\n", + "2. The next digit is 1 in 9.11 and 9 in 9.9. Since 9 is greater than 1, 9.9 is bigger than 9.11 in the second digit.\n", + "3. Since the first two digits are the same in both numbers (9.1), we need to look at the third digit to break the tie. In 9.11, the third digit is 1, and in 9.9, the third digit is 9. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n", "\n", - "1. Both numbers start with 9, so we can ignore the 9 for now.\n", - "2. We're left with 0.11 and 0.9.\n", - "3. 0.9 is bigger than 0.11 because 9 is bigger than 11.\n", - "4. Since 0.9 is bigger than 0.11, and both numbers started with 9, 9.9 is bigger than 9.11.\n", + "Therefore, 9.9 is bigger than 9.11.\n", + "octo:meta-llama-3.1-8b-instruct - 1.55 seconds: To determine which number is bigger, let's compare them step by step:\n", "\n", - "So, the answer is: 9.9 is bigger than 9.11.\n", - "==========\n" + "1. Both numbers start with 9, so they are equal in the first digit.\n", + "2. Now, let's look at the second digit. 9.11 has a 1, while 9.9 has a 9.\n", + "3. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n", + "\n", + "So, the number 9.9 is bigger than 9.11.\n" ] } ], "source": [ - "compare_llm(messages)" + "messages = [\n", + " {\"role\": \"user\", \"content\": \"Which number is bigger, 9.11 or 9.9? Think step by step.\"},\n", + "]\n", + "\n", + "responses, execution_times = compare_llm(messages)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "4a3fb8fc-a7a2-47d3-9db2-792f03cc47c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Provider:Model NameExecution TimeModel Response
1aws:meta.llama3-8b-instruct-v1:01.830247Let's break it down step by step:\n", + "\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", + "\n", + "So, the answer is: 9.9 is bigger than 9.11.
2groq:llama3-8b-81920.307869Let's break it down step by step:\n", + "\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", + "\n", + "So, the answer is: 9.9 is bigger than 9.11.
3fireworks:accounts/fireworks/models/llama-v3-8b-instruct0.716259Let's break it down step by step!\n", + "\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", + "\n", + "So, the correct answer is: 9.9 is bigger than 9.11!
4octo:meta-llama-3-8b-instruct1.397936Let's break it down step by step:\n", + "\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", + "\n", + "So, the answer is: 9.9 is bigger than 9.11.
5together:meta-llama/Llama-3-8b-chat-hf0.764065Let's break it down step by step:\n", + "\n", + "1. Both numbers have the same first digit, which is 9.\n", + "2. The second digit of 9.11 is 1, and the second digit of 9.9 is 9.\n", + "3. Since 9 is greater than 1, the second digit of 9.9 is larger than the second digit of 9.11.\n", + "4. Therefore, 9.9 is greater than 9.11.\n", + "\n", + "So, the answer is: 9.9 is bigger than 9.11.
6openai:gpt-3.5-turbo1.262903To determine which number is bigger, we can compare the whole numbers first. In this case, both numbers have a whole number of 9. Since they are the same, we need to look at the decimal portion.\n", + "\n", + "9.11 is bigger than 9.9 because 0.11 is greater than 0.9.
7replicate:meta/meta-llama-3-8b-instruct2.307123Let's break it down step by step!\n", + "\n", + "1. Both numbers start with the same digits: 9.1\n", + "2. The next digit is also the same: 1\n", + "3. The last digit is different: 9.11 has a 1, while 9.9 has a 9\n", + "\n", + "Since 9 is greater than 1, the correct answer is:\n", + "\n", + "9.9 is bigger than 9.11
8aws:meta.llama3-1-8b-instruct-v1:01.374292Let's compare the two numbers step by step.\n", + "\n", + "1. Both numbers start with 9, so we can ignore the 9 for now.\n", + "2. We're left with 0.11 and 0.9. Which one is bigger?\n", + "3. Since 0.9 is bigger than 0.11, the whole number 9.9 is bigger than 9.11.\n", + "\n", + "So, the answer is: 9.9 is bigger than 9.11.
9groq:llama-3.1-8b-instant0.463357Let's compare the two numbers step by step:\n", + "\n", + "1. Both numbers start with 9, so we can ignore the 9 for now.\n", + "2. We're left with 0.11 and 0.9.\n", + "3. 0.9 is bigger than 0.11.\n", + "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n", + "\n", + "Therefore, 9.9 is bigger than 9.11.
10fireworks:accounts/fireworks/models/llama-v3p1-8b-instruct0.584036Let's compare the two numbers step by step:\n", + "\n", + "1. Both numbers start with 9, so we can ignore the 9 for now.\n", + "2. We're left with 0.11 and 0.9.\n", + "3. 0.9 is bigger than 0.11.\n", + "4. Since 9 is the same in both numbers, the overall number with 0.9 is bigger than the number with 0.11.\n", + "\n", + "Therefore, 9.9 is bigger than 9.11.
11together:meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo0.994566Let's compare the two numbers step by step:\n", + "\n", + "1. Both numbers start with 9, so they are equal in the first digit.\n", + "2. The next digit is 1 in 9.11 and 9 in 9.9. Since 9 is greater than 1, 9.9 is bigger than 9.11 in the second digit.\n", + "3. Since the first two digits are the same in both numbers (9.1), we need to look at the third digit to break the tie. In 9.11, the third digit is 1, and in 9.9, the third digit is 9. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n", + "\n", + "Therefore, 9.9 is bigger than 9.11.
12octo:meta-llama-3.1-8b-instruct1.551550To determine which number is bigger, let's compare them step by step:\n", + "\n", + "1. Both numbers start with 9, so they are equal in the first digit.\n", + "2. Now, let's look at the second digit. 9.11 has a 1, while 9.9 has a 9.\n", + "3. Since 9 is greater than 1, 9.9 is bigger than 9.11.\n", + "\n", + "So, the number 9.9 is bigger than 9.11.
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "display(llms, execution_times, responses)" ] }, { @@ -500,9 +1128,17 @@ "## Takeaways\n", "1. Not all LLMs are created equal - not even all Llama 3 (or 3.1) are created equal (by different providers).\n", "2. Ask LLM to think step by step may help improve its reasoning.\n", - "3. The way LLM was trained and tokenized could lead it to some weird reasoning.\n", + "3. The way tokenization works in LLM could lead to a lot of weirdness in LLM (see AK's awesome [video](https://www.youtube.com/watch?v=zduSFxRajkE) for a deep dive).\n", "4. A more comprehensive benchmark would be desired, but a quick LLM comparison like shown here can be the first step." ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "04e13c90-3680-4f1d-8f65-768a78b7adb2", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {