From ba87dd6d0846ac806e0be804bfa1d6ba1822bfe5 Mon Sep 17 00:00:00 2001 From: fairydreaming <166155368+fairydreaming@users.noreply.github.com> Date: Tue, 19 Nov 2024 12:23:13 +0100 Subject: [PATCH] Update README.md --- README.md | 179 +++++++++++++++++++++++++++--------------------------- 1 file changed, 90 insertions(+), 89 deletions(-) diff --git a/README.md b/README.md index 2df1718..644d2ed 100644 --- a/README.md +++ b/README.md @@ -17,96 +17,97 @@ The benchmark result is the macro-averaged accuracy value over all the family re | 7 | llama-3.1-405b-instruct-sys | 87.78 | 100.00 | 100.00 | 100.00 | 96.00 | 96.00 | 92.00 | 50.00 | 56.00 | 100.00 | | 7 | deepseek-v2-chat-0628-Q8_0 | 87.78 | 100.00 | 100.00 | 98.00 | 86.00 | 94.00 | 94.00 | 60.00 | 60.00 | 98.00 | | 9 | gemini-pro-1.5-002 | 87.11 | 100.00 | 100.00 | 74.00 | 88.00 | 100.00 | 84.00 | 70.00 | 72.00 | 96.00 | -| 10 | claude-3.5-sonnet | 86.89 | 100.00 | 100.00 | 98.00 | 80.00 | 98.00 | 94.00 | 60.00 | 56.00 | 96.00 | | 10 | mistral-large-2 | 86.89 | 100.00 | 100.00 | 70.00 | 92.00 | 100.00 | 94.00 | 60.00 | 66.00 | 100.00 | -| 12 | gpt-4-turbo-sys | 86.67 | 100.00 | 100.00 | 94.00 | 80.00 | 94.00 | 94.00 | 54.00 | 68.00 | 96.00 | -| 13 | gpt-4-turbo | 86.22 | 100.00 | 100.00 | 92.00 | 84.00 | 96.00 | 90.00 | 56.00 | 60.00 | 98.00 | -| 14 | llama-3.1-405b-instruct | 85.78 | 100.00 | 100.00 | 88.00 | 92.00 | 98.00 | 88.00 | 54.00 | 52.00 | 100.00 | -| 15 | mistral-large-2-sys | 85.11 | 100.00 | 100.00 | 84.00 | 86.00 | 100.00 | 94.00 | 56.00 | 50.00 | 96.00 | -| 16 | gpt-4o | 83.11 | 100.00 | 100.00 | 84.00 | 82.00 | 98.00 | 74.00 | 62.00 | 52.00 | 96.00 | -| 17 | claude-3-opus-sys | 82.67 | 100.00 | 100.00 | 88.00 | 72.00 | 96.00 | 92.00 | 48.00 | 50.00 | 98.00 | -| 18 | grok-2-sys | 81.78 | 100.00 | 100.00 | 80.00 | 92.00 | 100.00 | 90.00 | 34.00 | 42.00 | 98.00 | -| 19 | grok-2 | 80.67 | 100.00 | 100.00 | 66.00 | 96.00 | 100.00 | 84.00 | 48.00 | 40.00 | 92.00 | -| 20 | grok-beta | 80.44 | 100.00 | 100.00 | 64.00 | 94.00 | 100.00 | 86.00 | 50.00 | 32.00 | 98.00 | -| 21 | llama-3.1-nemotron-70b-instruct | 80.00 | 100.00 | 100.00 | 92.00 | 80.00 | 96.00 | 82.00 | 46.00 | 32.00 | 92.00 | -| 22 | nemotron-4-340b-instruct-Q8_0-sys | 79.78 | 100.00 | 100.00 | 86.00 | 70.00 | 98.00 | 74.00 | 46.00 | 48.00 | 96.00 | -| 23 | claude-3-opus | 78.89 | 100.00 | 100.00 | 86.00 | 72.00 | 94.00 | 90.00 | 40.00 | 32.00 | 96.00 | -| 24 | qwen-2.5-72b-instruct | 78.67 | 100.00 | 100.00 | 60.00 | 78.00 | 96.00 | 84.00 | 42.00 | 54.00 | 94.00 | -| 24 | nemotron-4-340b-instruct-Q8_0 | 78.67 | 100.00 | 100.00 | 76.00 | 60.00 | 96.00 | 76.00 | 46.00 | 58.00 | 96.00 | -| 26 | mistral-large-sys | 77.33 | 100.00 | 100.00 | 88.00 | 72.00 | 96.00 | 62.00 | 46.00 | 42.00 | 90.00 | -| 27 | gemini-flash-1.5-002 | 77.11 | 100.00 | 100.00 | 86.00 | 58.00 | 100.00 | 64.00 | 48.00 | 42.00 | 96.00 | -| 28 | llama-3.1-70b-instruct | 76.89 | 100.00 | 100.00 | 72.00 | 66.00 | 96.00 | 78.00 | 52.00 | 34.00 | 94.00 | -| 29 | llama-3.1-70b-instruct-sys | 75.11 | 100.00 | 98.00 | 76.00 | 70.00 | 90.00 | 76.00 | 44.00 | 30.00 | 92.00 | -| 29 | Meta-Llama-3-70B-Instruct.Q8_0-sys | 75.11 | 100.00 | 100.00 | 78.00 | 68.00 | 100.00 | 74.00 | 34.00 | 26.00 | 96.00 | -| 31 | gpt-4-sys | 74.44 | 100.00 | 100.00 | 90.00 | 66.00 | 96.00 | 60.00 | 46.00 | 46.00 | 66.00 | -| 32 | gemini-pro-1.5 | 74.00 | 100.00 | 100.00 | 94.00 | 74.00 | 96.00 | 58.00 | 28.00 | 28.00 | 88.00 | -| 33 | gemma-2-27b-Q5_K_M-sys | 72.44 | 100.00 | 84.00 | 86.00 | 68.00 | 90.00 | 58.00 | 50.00 | 38.00 | 78.00 | -| 34 | mistral-large | 71.33 | 100.00 | 100.00 | 100.00 | 54.00 | 92.00 | 58.00 | 48.00 | 10.00 | 80.00 | -| 34 | gemini-flash-1.5 | 71.33 | 100.00 | 100.00 | 94.00 | 56.00 | 98.00 | 62.00 | 30.00 | 18.00 | 84.00 | -| 36 | mistral-nemo-sys | 69.33 | 96.00 | 100.00 | 52.00 | 76.00 | 96.00 | 54.00 | 36.00 | 26.00 | 88.00 | -| 36 | gemma-2-27b-Q5_K_M | 69.33 | 100.00 | 100.00 | 80.00 | 54.00 | 92.00 | 58.00 | 20.00 | 32.00 | 88.00 | -| 38 | gemma-2-9b-Q8_0 | 67.33 | 100.00 | 100.00 | 82.00 | 42.00 | 92.00 | 64.00 | 20.00 | 16.00 | 90.00 | -| 39 | gemma-2-9b-Q8_0-sys | 66.67 | 100.00 | 100.00 | 84.00 | 36.00 | 92.00 | 64.00 | 16.00 | 20.00 | 88.00 | -| 40 | gpt-4 | 65.78 | 100.00 | 100.00 | 98.00 | 28.00 | 86.00 | 76.00 | 12.00 | 14.00 | 78.00 | -| 41 | Qwen2-72B-Instruct-Q8_0 | 65.11 | 100.00 | 100.00 | 86.00 | 44.00 | 88.00 | 68.00 | 22.00 | 16.00 | 62.00 | -| 41 | mixtral-8x22b-instruct-v0.1-Q8_0 | 65.11 | 100.00 | 100.00 | 100.00 | 22.00 | 92.00 | 50.00 | 24.00 | 16.00 | 82.00 | -| 43 | mixtral-8x22b-instruct-v0.1.Q8_0-sys | 64.89 | 100.00 | 100.00 | 100.00 | 22.00 | 94.00 | 44.00 | 30.00 | 16.00 | 78.00 | -| 43 | Mistral-Nemo-Instruct-2407-Q8_0-sys | 64.89 | 98.00 | 94.00 | 34.00 | 58.00 | 88.00 | 52.00 | 40.00 | 30.00 | 90.00 | -| 45 | Meta-Llama-3-70B-Instruct.Q8_0 | 64.67 | 100.00 | 100.00 | 96.00 | 34.00 | 90.00 | 44.00 | 48.00 | 16.00 | 54.00 | -| 46 | claude-3-haiku-sys | 64.00 | 100.00 | 100.00 | 80.00 | 32.00 | 94.00 | 66.00 | 16.00 | 18.00 | 70.00 | -| 47 | WizardLM-2-8x22B.Q8_0 | 63.56 | 100.00 | 98.00 | 86.00 | 24.00 | 82.00 | 54.00 | 28.00 | 20.00 | 80.00 | -| 48 | Bielik-11B-v2.3-Instruct-Q8_0-sys | 63.33 | 96.00 | 96.00 | 48.00 | 54.00 | 94.00 | 52.00 | 38.00 | 18.00 | 74.00 | -| 49 | c4ai-command-r-plus-v01.Q8_0-sys | 63.11 | 100.00 | 100.00 | 96.00 | 22.00 | 74.00 | 48.00 | 40.00 | 22.00 | 66.00 | -| 49 | c4ai-command-r-plus-v01.Q8_0 | 63.11 | 100.00 | 100.00 | 96.00 | 22.00 | 72.00 | 46.00 | 46.00 | 18.00 | 68.00 | -| 51 | phi-3-medium-4k-instruct-Q8_0 | 62.44 | 100.00 | 100.00 | 86.00 | 18.00 | 96.00 | 58.00 | 20.00 | 18.00 | 66.00 | -| 52 | mixtral-8x7b-instruct-v0.1.Q8_0 | 62.00 | 98.00 | 96.00 | 78.00 | 24.00 | 96.00 | 50.00 | 34.00 | 8.00 | 74.00 | -| 53 | internlm2_5-20b-chat-Q8_0 | 61.78 | 100.00 | 100.00 | 100.00 | 0.00 | 96.00 | 32.00 | 50.00 | 30.00 | 48.00 | -| 53 | deepseek-v2-chat-Q8_0 | 61.78 | 100.00 | 100.00 | 98.00 | 24.00 | 90.00 | 56.00 | 22.00 | 20.00 | 46.00 | -| 55 | qwen1_5-110b-chat-q8_0 | 61.56 | 100.00 | 100.00 | 68.00 | 26.00 | 94.00 | 40.00 | 30.00 | 18.00 | 78.00 | -| 55 | Karasu-Mixtral-8x22B-v0.1.Q8_0 | 61.56 | 100.00 | 100.00 | 94.00 | 20.00 | 88.00 | 40.00 | 26.00 | 18.00 | 68.00 | -| 55 | deepseek-v2-chat-Q8_0-sys | 61.56 | 100.00 | 100.00 | 100.00 | 16.00 | 90.00 | 74.00 | 20.00 | 12.00 | 42.00 | -| 58 | qwen1_5-110b-chat-q8_0-sys | 61.33 | 100.00 | 100.00 | 62.00 | 54.00 | 96.00 | 36.00 | 22.00 | 14.00 | 68.00 | -| 59 | gpt-3.5-turbo-sys | 60.89 | 100.00 | 78.00 | 76.00 | 32.00 | 90.00 | 56.00 | 18.00 | 18.00 | 80.00 | -| 59 | Mistral-Nemo-Instruct-2407-Q8_0 | 60.89 | 96.00 | 100.00 | 90.00 | 20.00 | 98.00 | 28.00 | 50.00 | 18.00 | 48.00 | -| 59 | mixtral-8x7b-instruct-v0.1.Q8_0-sys | 60.89 | 98.00 | 86.00 | 50.00 | 50.00 | 88.00 | 68.00 | 34.00 | 10.00 | 64.00 | -| 62 | mistral-nemo | 60.44 | 100.00 | 100.00 | 90.00 | 12.00 | 96.00 | 28.00 | 52.00 | 18.00 | 48.00 | -| 63 | c4ai-command-r-plus-08-2024-Q8_0 | 59.78 | 100.00 | 98.00 | 66.00 | 46.00 | 74.00 | 6.00 | 54.00 | 16.00 | 78.00 | -| 64 | mistral-medium-sys | 59.11 | 100.00 | 100.00 | 60.00 | 42.00 | 82.00 | 32.00 | 24.00 | 28.00 | 64.00 | -| 64 | internlm2_5-20b-chat-Q8_0-sys | 59.11 | 100.00 | 100.00 | 88.00 | 4.00 | 96.00 | 34.00 | 36.00 | 16.00 | 58.00 | -| 66 | c4ai-command-r-08-2024-Q8_0 | 58.44 | 100.00 | 100.00 | 84.00 | 10.00 | 100.00 | 22.00 | 58.00 | 14.00 | 38.00 | -| 67 | mistral-small | 58.00 | 98.00 | 98.00 | 80.00 | 22.00 | 82.00 | 14.00 | 66.00 | 8.00 | 54.00 | -| 68 | qwen1_5-72b-chat-q8_0 | 57.56 | 100.00 | 100.00 | 90.00 | 14.00 | 76.00 | 46.00 | 28.00 | 32.00 | 32.00 | -| 69 | mistral-small-sys | 57.11 | 92.00 | 100.00 | 76.00 | 26.00 | 84.00 | 6.00 | 68.00 | 18.00 | 44.00 | -| 69 | Smaug-2-72B.Q8_0 | 57.11 | 100.00 | 100.00 | 90.00 | 6.00 | 84.00 | 48.00 | 14.00 | 24.00 | 48.00 | -| 71 | qwen1_5-32b-chat-q8_0 | 56.67 | 100.00 | 94.00 | 82.00 | 16.00 | 94.00 | 18.00 | 46.00 | 12.00 | 48.00 | -| 72 | Bielik-11B-v2.3-Instruct-Q8_0 | 56.00 | 100.00 | 100.00 | 66.00 | 28.00 | 80.00 | 4.00 | 62.00 | 30.00 | 34.00 | -| 73 | c4ai-command-r-v01-Q8_0 | 55.78 | 100.00 | 100.00 | 76.00 | 4.00 | 92.00 | 18.00 | 20.00 | 46.00 | 46.00 | -| 73 | llama-2-70b-chat.Q8_0 | 55.78 | 100.00 | 92.00 | 72.00 | 14.00 | 80.00 | 52.00 | 28.00 | 10.00 | 54.00 | -| 73 | mistral-medium | 55.78 | 100.00 | 100.00 | 54.00 | 64.00 | 66.00 | 24.00 | 40.00 | 24.00 | 30.00 | -| 73 | claude-3-haiku | 55.78 | 100.00 | 100.00 | 92.00 | 10.00 | 84.00 | 14.00 | 58.00 | 22.00 | 22.00 | -| 77 | aya-23-35b-Q8_0 | 55.33 | 100.00 | 100.00 | 92.00 | 6.00 | 98.00 | 12.00 | 24.00 | 64.00 | 2.00 | -| 78 | Meta-Llama-3-8B-Instruct.Q8_0 | 55.11 | 96.00 | 94.00 | 46.00 | 38.00 | 96.00 | 36.00 | 8.00 | 28.00 | 54.00 | -| 79 | miqu-1-70b.q5_K_M | 54.89 | 100.00 | 100.00 | 50.00 | 66.00 | 64.00 | 16.00 | 40.00 | 30.00 | 28.00 | -| 79 | c4ai-command-r-v01-Q8_0-sys | 54.89 | 94.00 | 100.00 | 72.00 | 16.00 | 88.00 | 10.00 | 18.00 | 58.00 | 38.00 | -| 81 | ggml-dbrx-instruct-16x12b-q8_0 | 54.44 | 100.00 | 100.00 | 58.00 | 34.00 | 70.00 | 12.00 | 46.00 | 20.00 | 50.00 | -| 82 | snowflake-arctic-instruct-Q5_K_M-sys | 53.56 | 86.00 | 100.00 | 56.00 | 14.00 | 86.00 | 38.00 | 28.00 | 20.00 | 54.00 | -| 83 | Phi-3-mini-4k-instruct-Q8_0 | 53.33 | 98.00 | 96.00 | 98.00 | 4.00 | 90.00 | 20.00 | 26.00 | 36.00 | 12.00 | -| 84 | Meta-Llama-3-8B-Instruct.Q8_0-sys | 51.56 | 80.00 | 88.00 | 50.00 | 32.00 | 90.00 | 42.00 | 14.00 | 18.00 | 50.00 | -| 85 | gpt-3.5-turbo | 50.22 | 96.00 | 54.00 | 78.00 | 18.00 | 80.00 | 22.00 | 52.00 | 18.00 | 34.00 | -| 86 | deepseek-v2-lite-chat-Q8_0 | 49.56 | 88.00 | 100.00 | 60.00 | 14.00 | 88.00 | 8.00 | 62.00 | 6.00 | 20.00 | -| 87 | llama-3.1-8b-instruct | 48.67 | 82.00 | 92.00 | 44.00 | 32.00 | 96.00 | 24.00 | 10.00 | 10.00 | 48.00 | -| 88 | mistral-7b-instruct-v0.2.Q8_0 | 46.89 | 98.00 | 86.00 | 42.00 | 24.00 | 70.00 | 12.00 | 56.00 | 28.00 | 6.00 | -| 89 | aya-23-8b-Q8_0 | 45.78 | 72.00 | 100.00 | 32.00 | 46.00 | 56.00 | 2.00 | 52.00 | 48.00 | 4.00 | -| 89 | llama-3.1-8b-instruct-sys | 45.78 | 82.00 | 78.00 | 34.00 | 30.00 | 84.00 | 30.00 | 8.00 | 8.00 | 58.00 | -| 91 | deepseek-v2-lite-chat-Q8_0-sys | 45.56 | 54.00 | 100.00 | 62.00 | 8.00 | 90.00 | 8.00 | 70.00 | 6.00 | 12.00 | -| 92 | snowflake-arctic-instruct-Q5_K_M | 44.89 | 54.00 | 82.00 | 70.00 | 8.00 | 60.00 | 30.00 | 44.00 | 34.00 | 22.00 | -| 93 | gemma-7b-it-Q8_0 | 43.56 | 100.00 | 54.00 | 62.00 | 32.00 | 36.00 | 28.00 | 50.00 | 18.00 | 12.00 | -| 94 | llama-2-13b-chat.Q8_0 | 43.33 | 88.00 | 82.00 | 32.00 | 22.00 | 76.00 | 6.00 | 42.00 | 30.00 | 12.00 | -| 95 | mistral-7b-instruct-v0.2.Q8_0-sys | 33.33 | 72.00 | 90.00 | 20.00 | 16.00 | 52.00 | 12.00 | 20.00 | 10.00 | 8.00 | -| 96 | llama-2-7b-chat.Q8_0 | 31.56 | 36.00 | 72.00 | 34.00 | 24.00 | 28.00 | 22.00 | 22.00 | 30.00 | 16.00 | -| 97 | WizardLM-2-7B-Q8_0 | 20.00 | 36.00 | 16.00 | 8.00 | 14.00 | 18.00 | 22.00 | 36.00 | 12.00 | 18.00 | -| 98 | gemma-2b-it-Q8_0 | 5.56 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 12.00 | 24.00 | 14.00 | 0.00 | -| 99 | qwen1_5-7b-chat-q8_0 | 2.89 | 6.00 | 2.00 | 4.00 | 0.00 | 2.00 | 0.00 | 8.00 | 2.00 | 2.00 | +| 10 | mistral-large-2411 | 86.89 | 100.00 | 100.00 | 68.00 | 88.00 | 98.00 | 96.00 | 64.00 | 68.00 | 100.00 | +| 10 | claude-3.5-sonnet | 86.89 | 100.00 | 100.00 | 98.00 | 80.00 | 98.00 | 94.00 | 60.00 | 56.00 | 96.00 | +| 13 | gpt-4-turbo-sys | 86.67 | 100.00 | 100.00 | 94.00 | 80.00 | 94.00 | 94.00 | 54.00 | 68.00 | 96.00 | +| 14 | gpt-4-turbo | 86.22 | 100.00 | 100.00 | 92.00 | 84.00 | 96.00 | 90.00 | 56.00 | 60.00 | 98.00 | +| 15 | llama-3.1-405b-instruct | 85.78 | 100.00 | 100.00 | 88.00 | 92.00 | 98.00 | 88.00 | 54.00 | 52.00 | 100.00 | +| 16 | mistral-large-2-sys | 85.11 | 100.00 | 100.00 | 84.00 | 86.00 | 100.00 | 94.00 | 56.00 | 50.00 | 96.00 | +| 17 | gpt-4o | 83.11 | 100.00 | 100.00 | 84.00 | 82.00 | 98.00 | 74.00 | 62.00 | 52.00 | 96.00 | +| 18 | claude-3-opus-sys | 82.67 | 100.00 | 100.00 | 88.00 | 72.00 | 96.00 | 92.00 | 48.00 | 50.00 | 98.00 | +| 19 | grok-2-sys | 81.78 | 100.00 | 100.00 | 80.00 | 92.00 | 100.00 | 90.00 | 34.00 | 42.00 | 98.00 | +| 20 | grok-2 | 80.67 | 100.00 | 100.00 | 66.00 | 96.00 | 100.00 | 84.00 | 48.00 | 40.00 | 92.00 | +| 21 | grok-beta | 80.44 | 100.00 | 100.00 | 64.00 | 94.00 | 100.00 | 86.00 | 50.00 | 32.00 | 98.00 | +| 22 | llama-3.1-nemotron-70b-instruct | 80.00 | 100.00 | 100.00 | 92.00 | 80.00 | 96.00 | 82.00 | 46.00 | 32.00 | 92.00 | +| 23 | nemotron-4-340b-instruct-Q8_0-sys | 79.78 | 100.00 | 100.00 | 86.00 | 70.00 | 98.00 | 74.00 | 46.00 | 48.00 | 96.00 | +| 24 | claude-3-opus | 78.89 | 100.00 | 100.00 | 86.00 | 72.00 | 94.00 | 90.00 | 40.00 | 32.00 | 96.00 | +| 25 | nemotron-4-340b-instruct-Q8_0 | 78.67 | 100.00 | 100.00 | 76.00 | 60.00 | 96.00 | 76.00 | 46.00 | 58.00 | 96.00 | +| 25 | qwen-2.5-72b-instruct | 78.67 | 100.00 | 100.00 | 60.00 | 78.00 | 96.00 | 84.00 | 42.00 | 54.00 | 94.00 | +| 27 | mistral-large-sys | 77.33 | 100.00 | 100.00 | 88.00 | 72.00 | 96.00 | 62.00 | 46.00 | 42.00 | 90.00 | +| 28 | gemini-flash-1.5-002 | 77.11 | 100.00 | 100.00 | 86.00 | 58.00 | 100.00 | 64.00 | 48.00 | 42.00 | 96.00 | +| 29 | llama-3.1-70b-instruct | 76.89 | 100.00 | 100.00 | 72.00 | 66.00 | 96.00 | 78.00 | 52.00 | 34.00 | 94.00 | +| 30 | llama-3.1-70b-instruct-sys | 75.11 | 100.00 | 98.00 | 76.00 | 70.00 | 90.00 | 76.00 | 44.00 | 30.00 | 92.00 | +| 30 | Meta-Llama-3-70B-Instruct.Q8_0-sys | 75.11 | 100.00 | 100.00 | 78.00 | 68.00 | 100.00 | 74.00 | 34.00 | 26.00 | 96.00 | +| 32 | gpt-4-sys | 74.44 | 100.00 | 100.00 | 90.00 | 66.00 | 96.00 | 60.00 | 46.00 | 46.00 | 66.00 | +| 33 | gemini-pro-1.5 | 74.00 | 100.00 | 100.00 | 94.00 | 74.00 | 96.00 | 58.00 | 28.00 | 28.00 | 88.00 | +| 34 | gemma-2-27b-Q5_K_M-sys | 72.44 | 100.00 | 84.00 | 86.00 | 68.00 | 90.00 | 58.00 | 50.00 | 38.00 | 78.00 | +| 35 | mistral-large | 71.33 | 100.00 | 100.00 | 100.00 | 54.00 | 92.00 | 58.00 | 48.00 | 10.00 | 80.00 | +| 35 | gemini-flash-1.5 | 71.33 | 100.00 | 100.00 | 94.00 | 56.00 | 98.00 | 62.00 | 30.00 | 18.00 | 84.00 | +| 37 | mistral-nemo-sys | 69.33 | 96.00 | 100.00 | 52.00 | 76.00 | 96.00 | 54.00 | 36.00 | 26.00 | 88.00 | +| 37 | gemma-2-27b-Q5_K_M | 69.33 | 100.00 | 100.00 | 80.00 | 54.00 | 92.00 | 58.00 | 20.00 | 32.00 | 88.00 | +| 39 | gemma-2-9b-Q8_0 | 67.33 | 100.00 | 100.00 | 82.00 | 42.00 | 92.00 | 64.00 | 20.00 | 16.00 | 90.00 | +| 40 | gemma-2-9b-Q8_0-sys | 66.67 | 100.00 | 100.00 | 84.00 | 36.00 | 92.00 | 64.00 | 16.00 | 20.00 | 88.00 | +| 41 | gpt-4 | 65.78 | 100.00 | 100.00 | 98.00 | 28.00 | 86.00 | 76.00 | 12.00 | 14.00 | 78.00 | +| 42 | Qwen2-72B-Instruct-Q8_0 | 65.11 | 100.00 | 100.00 | 86.00 | 44.00 | 88.00 | 68.00 | 22.00 | 16.00 | 62.00 | +| 42 | mixtral-8x22b-instruct-v0.1-Q8_0 | 65.11 | 100.00 | 100.00 | 100.00 | 22.00 | 92.00 | 50.00 | 24.00 | 16.00 | 82.00 | +| 44 | mixtral-8x22b-instruct-v0.1.Q8_0-sys | 64.89 | 100.00 | 100.00 | 100.00 | 22.00 | 94.00 | 44.00 | 30.00 | 16.00 | 78.00 | +| 44 | Mistral-Nemo-Instruct-2407-Q8_0-sys | 64.89 | 98.00 | 94.00 | 34.00 | 58.00 | 88.00 | 52.00 | 40.00 | 30.00 | 90.00 | +| 46 | Meta-Llama-3-70B-Instruct.Q8_0 | 64.67 | 100.00 | 100.00 | 96.00 | 34.00 | 90.00 | 44.00 | 48.00 | 16.00 | 54.00 | +| 47 | claude-3-haiku-sys | 64.00 | 100.00 | 100.00 | 80.00 | 32.00 | 94.00 | 66.00 | 16.00 | 18.00 | 70.00 | +| 48 | WizardLM-2-8x22B.Q8_0 | 63.56 | 100.00 | 98.00 | 86.00 | 24.00 | 82.00 | 54.00 | 28.00 | 20.00 | 80.00 | +| 49 | Bielik-11B-v2.3-Instruct-Q8_0-sys | 63.33 | 96.00 | 96.00 | 48.00 | 54.00 | 94.00 | 52.00 | 38.00 | 18.00 | 74.00 | +| 50 | c4ai-command-r-plus-v01.Q8_0-sys | 63.11 | 100.00 | 100.00 | 96.00 | 22.00 | 74.00 | 48.00 | 40.00 | 22.00 | 66.00 | +| 50 | c4ai-command-r-plus-v01.Q8_0 | 63.11 | 100.00 | 100.00 | 96.00 | 22.00 | 72.00 | 46.00 | 46.00 | 18.00 | 68.00 | +| 52 | phi-3-medium-4k-instruct-Q8_0 | 62.44 | 100.00 | 100.00 | 86.00 | 18.00 | 96.00 | 58.00 | 20.00 | 18.00 | 66.00 | +| 53 | mixtral-8x7b-instruct-v0.1.Q8_0 | 62.00 | 98.00 | 96.00 | 78.00 | 24.00 | 96.00 | 50.00 | 34.00 | 8.00 | 74.00 | +| 54 | internlm2_5-20b-chat-Q8_0 | 61.78 | 100.00 | 100.00 | 100.00 | 0.00 | 96.00 | 32.00 | 50.00 | 30.00 | 48.00 | +| 54 | deepseek-v2-chat-Q8_0 | 61.78 | 100.00 | 100.00 | 98.00 | 24.00 | 90.00 | 56.00 | 22.00 | 20.00 | 46.00 | +| 56 | qwen1_5-110b-chat-q8_0 | 61.56 | 100.00 | 100.00 | 68.00 | 26.00 | 94.00 | 40.00 | 30.00 | 18.00 | 78.00 | +| 56 | Karasu-Mixtral-8x22B-v0.1.Q8_0 | 61.56 | 100.00 | 100.00 | 94.00 | 20.00 | 88.00 | 40.00 | 26.00 | 18.00 | 68.00 | +| 56 | deepseek-v2-chat-Q8_0-sys | 61.56 | 100.00 | 100.00 | 100.00 | 16.00 | 90.00 | 74.00 | 20.00 | 12.00 | 42.00 | +| 59 | qwen1_5-110b-chat-q8_0-sys | 61.33 | 100.00 | 100.00 | 62.00 | 54.00 | 96.00 | 36.00 | 22.00 | 14.00 | 68.00 | +| 60 | gpt-3.5-turbo-sys | 60.89 | 100.00 | 78.00 | 76.00 | 32.00 | 90.00 | 56.00 | 18.00 | 18.00 | 80.00 | +| 60 | Mistral-Nemo-Instruct-2407-Q8_0 | 60.89 | 96.00 | 100.00 | 90.00 | 20.00 | 98.00 | 28.00 | 50.00 | 18.00 | 48.00 | +| 60 | mixtral-8x7b-instruct-v0.1.Q8_0-sys | 60.89 | 98.00 | 86.00 | 50.00 | 50.00 | 88.00 | 68.00 | 34.00 | 10.00 | 64.00 | +| 63 | mistral-nemo | 60.44 | 100.00 | 100.00 | 90.00 | 12.00 | 96.00 | 28.00 | 52.00 | 18.00 | 48.00 | +| 64 | c4ai-command-r-plus-08-2024-Q8_0 | 59.78 | 100.00 | 98.00 | 66.00 | 46.00 | 74.00 | 6.00 | 54.00 | 16.00 | 78.00 | +| 65 | mistral-medium-sys | 59.11 | 100.00 | 100.00 | 60.00 | 42.00 | 82.00 | 32.00 | 24.00 | 28.00 | 64.00 | +| 65 | internlm2_5-20b-chat-Q8_0-sys | 59.11 | 100.00 | 100.00 | 88.00 | 4.00 | 96.00 | 34.00 | 36.00 | 16.00 | 58.00 | +| 67 | c4ai-command-r-08-2024-Q8_0 | 58.44 | 100.00 | 100.00 | 84.00 | 10.00 | 100.00 | 22.00 | 58.00 | 14.00 | 38.00 | +| 68 | mistral-small | 58.00 | 98.00 | 98.00 | 80.00 | 22.00 | 82.00 | 14.00 | 66.00 | 8.00 | 54.00 | +| 69 | qwen1_5-72b-chat-q8_0 | 57.56 | 100.00 | 100.00 | 90.00 | 14.00 | 76.00 | 46.00 | 28.00 | 32.00 | 32.00 | +| 70 | mistral-small-sys | 57.11 | 92.00 | 100.00 | 76.00 | 26.00 | 84.00 | 6.00 | 68.00 | 18.00 | 44.00 | +| 70 | Smaug-2-72B.Q8_0 | 57.11 | 100.00 | 100.00 | 90.00 | 6.00 | 84.00 | 48.00 | 14.00 | 24.00 | 48.00 | +| 72 | qwen1_5-32b-chat-q8_0 | 56.67 | 100.00 | 94.00 | 82.00 | 16.00 | 94.00 | 18.00 | 46.00 | 12.00 | 48.00 | +| 73 | Bielik-11B-v2.3-Instruct-Q8_0 | 56.00 | 100.00 | 100.00 | 66.00 | 28.00 | 80.00 | 4.00 | 62.00 | 30.00 | 34.00 | +| 74 | c4ai-command-r-v01-Q8_0 | 55.78 | 100.00 | 100.00 | 76.00 | 4.00 | 92.00 | 18.00 | 20.00 | 46.00 | 46.00 | +| 74 | llama-2-70b-chat.Q8_0 | 55.78 | 100.00 | 92.00 | 72.00 | 14.00 | 80.00 | 52.00 | 28.00 | 10.00 | 54.00 | +| 74 | mistral-medium | 55.78 | 100.00 | 100.00 | 54.00 | 64.00 | 66.00 | 24.00 | 40.00 | 24.00 | 30.00 | +| 74 | claude-3-haiku | 55.78 | 100.00 | 100.00 | 92.00 | 10.00 | 84.00 | 14.00 | 58.00 | 22.00 | 22.00 | +| 78 | aya-23-35b-Q8_0 | 55.33 | 100.00 | 100.00 | 92.00 | 6.00 | 98.00 | 12.00 | 24.00 | 64.00 | 2.00 | +| 79 | Meta-Llama-3-8B-Instruct.Q8_0 | 55.11 | 96.00 | 94.00 | 46.00 | 38.00 | 96.00 | 36.00 | 8.00 | 28.00 | 54.00 | +| 80 | miqu-1-70b.q5_K_M | 54.89 | 100.00 | 100.00 | 50.00 | 66.00 | 64.00 | 16.00 | 40.00 | 30.00 | 28.00 | +| 80 | c4ai-command-r-v01-Q8_0-sys | 54.89 | 94.00 | 100.00 | 72.00 | 16.00 | 88.00 | 10.00 | 18.00 | 58.00 | 38.00 | +| 82 | ggml-dbrx-instruct-16x12b-q8_0 | 54.44 | 100.00 | 100.00 | 58.00 | 34.00 | 70.00 | 12.00 | 46.00 | 20.00 | 50.00 | +| 83 | snowflake-arctic-instruct-Q5_K_M-sys | 53.56 | 86.00 | 100.00 | 56.00 | 14.00 | 86.00 | 38.00 | 28.00 | 20.00 | 54.00 | +| 84 | Phi-3-mini-4k-instruct-Q8_0 | 53.33 | 98.00 | 96.00 | 98.00 | 4.00 | 90.00 | 20.00 | 26.00 | 36.00 | 12.00 | +| 85 | Meta-Llama-3-8B-Instruct.Q8_0-sys | 51.56 | 80.00 | 88.00 | 50.00 | 32.00 | 90.00 | 42.00 | 14.00 | 18.00 | 50.00 | +| 86 | gpt-3.5-turbo | 50.22 | 96.00 | 54.00 | 78.00 | 18.00 | 80.00 | 22.00 | 52.00 | 18.00 | 34.00 | +| 87 | deepseek-v2-lite-chat-Q8_0 | 49.56 | 88.00 | 100.00 | 60.00 | 14.00 | 88.00 | 8.00 | 62.00 | 6.00 | 20.00 | +| 88 | llama-3.1-8b-instruct | 48.67 | 82.00 | 92.00 | 44.00 | 32.00 | 96.00 | 24.00 | 10.00 | 10.00 | 48.00 | +| 89 | mistral-7b-instruct-v0.2.Q8_0 | 46.89 | 98.00 | 86.00 | 42.00 | 24.00 | 70.00 | 12.00 | 56.00 | 28.00 | 6.00 | +| 90 | aya-23-8b-Q8_0 | 45.78 | 72.00 | 100.00 | 32.00 | 46.00 | 56.00 | 2.00 | 52.00 | 48.00 | 4.00 | +| 90 | llama-3.1-8b-instruct-sys | 45.78 | 82.00 | 78.00 | 34.00 | 30.00 | 84.00 | 30.00 | 8.00 | 8.00 | 58.00 | +| 92 | deepseek-v2-lite-chat-Q8_0-sys | 45.56 | 54.00 | 100.00 | 62.00 | 8.00 | 90.00 | 8.00 | 70.00 | 6.00 | 12.00 | +| 93 | snowflake-arctic-instruct-Q5_K_M | 44.89 | 54.00 | 82.00 | 70.00 | 8.00 | 60.00 | 30.00 | 44.00 | 34.00 | 22.00 | +| 94 | gemma-7b-it-Q8_0 | 43.56 | 100.00 | 54.00 | 62.00 | 32.00 | 36.00 | 28.00 | 50.00 | 18.00 | 12.00 | +| 95 | llama-2-13b-chat.Q8_0 | 43.33 | 88.00 | 82.00 | 32.00 | 22.00 | 76.00 | 6.00 | 42.00 | 30.00 | 12.00 | +| 96 | mistral-7b-instruct-v0.2.Q8_0-sys | 33.33 | 72.00 | 90.00 | 20.00 | 16.00 | 52.00 | 12.00 | 20.00 | 10.00 | 8.00 | +| 97 | llama-2-7b-chat.Q8_0 | 31.56 | 36.00 | 72.00 | 34.00 | 24.00 | 28.00 | 22.00 | 22.00 | 30.00 | 16.00 | +| 98 | WizardLM-2-7B-Q8_0 | 20.00 | 36.00 | 16.00 | 8.00 | 14.00 | 18.00 | 22.00 | 36.00 | 12.00 | 18.00 | +| 99 | gemma-2b-it-Q8_0 | 5.56 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 12.00 | 24.00 | 14.00 | 0.00 | +| 100 | qwen1_5-7b-chat-q8_0 | 2.89 | 6.00 | 2.00 | 4.00 | 0.00 | 2.00 | 0.00 | 8.00 | 2.00 | 2.00 | Notes: - Hand-curating the result lists by hand was getting tedious, so I merged the closed-weights and open-weights results lists into a single automatically generated list.