|
1 | 1 | {
|
2 | 2 | "cells": [
|
3 |
| - { |
4 |
| - "cell_type": "code", |
5 |
| - "execution_count": 1, |
6 |
| - "metadata": {}, |
7 |
| - "outputs": [], |
8 |
| - "source": [ |
9 |
| - "%load_ext autoreload\n", |
10 |
| - "%autoreload 2" |
11 |
| - ] |
12 |
| - }, |
13 | 3 | {
|
14 | 4 | "cell_type": "code",
|
15 | 5 | "execution_count": 4,
|
|
46 | 36 | " <tbody>\n",
|
47 | 37 | " <tr>\n",
|
48 | 38 | " <th>0</th>\n",
|
49 |
| - " <td>BAAI/bge-base-en</td>\n", |
50 |
| - " <td>768</td>\n", |
51 |
| - " <td>Base English model</td>\n", |
52 |
| - " <td>0.50</td>\n", |
53 |
| - " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'}</td>\n", |
54 |
| - " </tr>\n", |
55 |
| - " <tr>\n", |
56 |
| - " <th>1</th>\n", |
57 | 39 | " <td>BAAI/bge-base-en-v1.5</td>\n",
|
58 | 40 | " <td>768</td>\n",
|
59 | 41 | " <td>Base English model, v1.5</td>\n",
|
60 | 42 | " <td>0.44</td>\n",
|
61 | 43 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'}</td>\n",
|
62 | 44 | " </tr>\n",
|
63 | 45 | " <tr>\n",
|
64 |
| - " <th>2</th>\n", |
65 |
| - " <td>BAAI/bge-large-en-v1.5-quantized</td>\n", |
66 |
| - " <td>1024</td>\n", |
67 |
| - " <td>Large English model, v1.5</td>\n", |
68 |
| - " <td>1.34</td>\n", |
69 |
| - " <td>{'hf': 'qdrant/bge-large-en-v1.5-onnx-q'}</td>\n", |
70 |
| - " </tr>\n", |
71 |
| - " <tr>\n", |
72 |
| - " <th>3</th>\n", |
| 46 | + " <th>1</th>\n", |
73 | 47 | " <td>BAAI/bge-large-en-v1.5</td>\n",
|
74 | 48 | " <td>1024</td>\n",
|
75 | 49 | " <td>Large English model, v1.5</td>\n",
|
76 | 50 | " <td>1.34</td>\n",
|
77 |
| - " <td>{'hf': 'qdrant/bge-large-en-v1.5-onnx'}</td>\n", |
78 |
| - " </tr>\n", |
79 |
| - " <tr>\n", |
80 |
| - " <th>4</th>\n", |
81 |
| - " <td>BAAI/bge-small-en</td>\n", |
82 |
| - " <td>384</td>\n", |
83 |
| - " <td>Fast English model</td>\n", |
84 |
| - " <td>0.20</td>\n", |
85 |
| - " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'}</td>\n", |
| 51 | + " <td>{'hf': 'qdrant/bge-large-en-v1.5-onnx-q'}</td>\n", |
86 | 52 | " </tr>\n",
|
87 | 53 | " <tr>\n",
|
88 |
| - " <th>5</th>\n", |
| 54 | + " <th>2</th>\n", |
89 | 55 | " <td>BAAI/bge-small-en-v1.5</td>\n",
|
90 | 56 | " <td>384</td>\n",
|
91 | 57 | " <td>Fast and Default English model</td>\n",
|
92 | 58 | " <td>0.13</td>\n",
|
93 | 59 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'}</td>\n",
|
94 | 60 | " </tr>\n",
|
95 | 61 | " <tr>\n",
|
96 |
| - " <th>6</th>\n", |
| 62 | + " <th>3</th>\n", |
97 | 63 | " <td>BAAI/bge-small-zh-v1.5</td>\n",
|
98 | 64 | " <td>512</td>\n",
|
99 | 65 | " <td>Fast and recommended Chinese model</td>\n",
|
100 | 66 | " <td>0.10</td>\n",
|
101 | 67 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'}</td>\n",
|
102 | 68 | " </tr>\n",
|
103 | 69 | " <tr>\n",
|
104 |
| - " <th>7</th>\n", |
| 70 | + " <th>4</th>\n", |
105 | 71 | " <td>sentence-transformers/all-MiniLM-L6-v2</td>\n",
|
106 | 72 | " <td>384</td>\n",
|
107 | 73 | " <td>Sentence Transformer model, MiniLM-L6-v2</td>\n",
|
108 | 74 | " <td>0.09</td>\n",
|
109 | 75 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'}</td>\n",
|
110 | 76 | " </tr>\n",
|
111 | 77 | " <tr>\n",
|
112 |
| - " <th>8</th>\n", |
| 78 | + " <th>5</th>\n", |
113 | 79 | " <td>intfloat/multilingual-e5-large</td>\n",
|
114 | 80 | " <td>1024</td>\n",
|
115 | 81 | " <td>Multilingual model, e5-large. Recommend using this model for non-English languages</td>\n",
|
116 | 82 | " <td>2.24</td>\n",
|
117 | 83 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'}</td>\n",
|
118 | 84 | " </tr>\n",
|
119 | 85 | " <tr>\n",
|
120 |
| - " <th>9</th>\n", |
| 86 | + " <th>6</th>\n", |
121 | 87 | " <td>sentence-transformers/paraphrase-multilingual-mpnet-base-v2</td>\n",
|
122 | 88 | " <td>768</td>\n",
|
123 | 89 | " <td>Sentence-transformers model for tasks like clustering or semantic search</td>\n",
|
124 | 90 | " <td>1.11</td>\n",
|
125 | 91 | " <td>{'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'}</td>\n",
|
126 | 92 | " </tr>\n",
|
127 | 93 | " <tr>\n",
|
128 |
| - " <th>10</th>\n", |
| 94 | + " <th>7</th>\n", |
129 | 95 | " <td>jinaai/jina-embeddings-v2-base-en</td>\n",
|
130 | 96 | " <td>768</td>\n",
|
131 | 97 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
132 | 98 | " <td>0.55</td>\n",
|
133 | 99 | " <td>{'hf': 'xenova/jina-embeddings-v2-base-en'}</td>\n",
|
134 | 100 | " </tr>\n",
|
135 | 101 | " <tr>\n",
|
136 |
| - " <th>11</th>\n", |
| 102 | + " <th>8</th>\n", |
137 | 103 | " <td>jinaai/jina-embeddings-v2-small-en</td>\n",
|
138 | 104 | " <td>512</td>\n",
|
139 | 105 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
|
145 | 111 | "</div>"
|
146 | 112 | ],
|
147 | 113 | "text/plain": [
|
148 |
| - " model dim \\\n", |
149 |
| - "0 BAAI/bge-base-en 768 \n", |
150 |
| - "1 BAAI/bge-base-en-v1.5 768 \n", |
151 |
| - "2 BAAI/bge-large-en-v1.5-quantized 1024 \n", |
152 |
| - "3 BAAI/bge-large-en-v1.5 1024 \n", |
153 |
| - "4 BAAI/bge-small-en 384 \n", |
154 |
| - "5 BAAI/bge-small-en-v1.5 384 \n", |
155 |
| - "6 BAAI/bge-small-zh-v1.5 512 \n", |
156 |
| - "7 sentence-transformers/all-MiniLM-L6-v2 384 \n", |
157 |
| - "8 intfloat/multilingual-e5-large 1024 \n", |
158 |
| - "9 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
159 |
| - "10 jinaai/jina-embeddings-v2-base-en 768 \n", |
160 |
| - "11 jinaai/jina-embeddings-v2-small-en 512 \n", |
| 114 | + " model dim \\\n", |
| 115 | + "0 BAAI/bge-base-en-v1.5 768 \n", |
| 116 | + "1 BAAI/bge-large-en-v1.5 1024 \n", |
| 117 | + "2 BAAI/bge-small-en-v1.5 384 \n", |
| 118 | + "3 BAAI/bge-small-zh-v1.5 512 \n", |
| 119 | + "4 sentence-transformers/all-MiniLM-L6-v2 384 \n", |
| 120 | + "5 intfloat/multilingual-e5-large 1024 \n", |
| 121 | + "6 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
| 122 | + "7 jinaai/jina-embeddings-v2-base-en 768 \n", |
| 123 | + "8 jinaai/jina-embeddings-v2-small-en 512 \n", |
161 | 124 | "\n",
|
162 |
| - " description \\\n", |
163 |
| - "0 Base English model \n", |
164 |
| - "1 Base English model, v1.5 \n", |
165 |
| - "2 Large English model, v1.5 \n", |
166 |
| - "3 Large English model, v1.5 \n", |
167 |
| - "4 Fast English model \n", |
168 |
| - "5 Fast and Default English model \n", |
169 |
| - "6 Fast and recommended Chinese model \n", |
170 |
| - "7 Sentence Transformer model, MiniLM-L6-v2 \n", |
171 |
| - "8 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
172 |
| - "9 Sentence-transformers model for tasks like clustering or semantic search \n", |
173 |
| - "10 English embedding model supporting 8192 sequence length \n", |
174 |
| - "11 English embedding model supporting 8192 sequence length \n", |
| 125 | + " description \\\n", |
| 126 | + "0 Base English model, v1.5 \n", |
| 127 | + "1 Large English model, v1.5 \n", |
| 128 | + "2 Fast and Default English model \n", |
| 129 | + "3 Fast and recommended Chinese model \n", |
| 130 | + "4 Sentence Transformer model, MiniLM-L6-v2 \n", |
| 131 | + "5 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
| 132 | + "6 Sentence-transformers model for tasks like clustering or semantic search \n", |
| 133 | + "7 English embedding model supporting 8192 sequence length \n", |
| 134 | + "8 English embedding model supporting 8192 sequence length \n", |
175 | 135 | "\n",
|
176 |
| - " size_in_GB \\\n", |
177 |
| - "0 0.50 \n", |
178 |
| - "1 0.44 \n", |
179 |
| - "2 1.34 \n", |
180 |
| - "3 1.34 \n", |
181 |
| - "4 0.20 \n", |
182 |
| - "5 0.13 \n", |
183 |
| - "6 0.10 \n", |
184 |
| - "7 0.09 \n", |
185 |
| - "8 2.24 \n", |
186 |
| - "9 1.11 \n", |
187 |
| - "10 0.55 \n", |
188 |
| - "11 0.13 \n", |
| 136 | + " size_in_GB \\\n", |
| 137 | + "0 0.44 \n", |
| 138 | + "1 1.34 \n", |
| 139 | + "2 0.13 \n", |
| 140 | + "3 0.10 \n", |
| 141 | + "4 0.09 \n", |
| 142 | + "5 2.24 \n", |
| 143 | + "6 1.11 \n", |
| 144 | + "7 0.55 \n", |
| 145 | + "8 0.13 \n", |
189 | 146 | "\n",
|
190 |
| - " sources \n", |
191 |
| - "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n", |
192 |
| - "1 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n", |
193 |
| - "2 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n", |
194 |
| - "3 {'hf': 'qdrant/bge-large-en-v1.5-onnx'} \n", |
195 |
| - "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz'} \n", |
196 |
| - "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n", |
197 |
| - "6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n", |
198 |
| - "7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n", |
199 |
| - "8 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
200 |
| - "9 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
201 |
| - "10 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
202 |
| - "11 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
| 147 | + " sources \n", |
| 148 | + "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz', 'hf': 'qdrant/bge-base-en-v1.5-onnx-q'} \n", |
| 149 | + "1 {'hf': 'qdrant/bge-large-en-v1.5-onnx-q'} \n", |
| 150 | + "2 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n", |
| 151 | + "3 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n", |
| 152 | + "4 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n", |
| 153 | + "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
| 154 | + "6 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
| 155 | + "7 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
| 156 | + "8 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
203 | 157 | ]
|
204 | 158 | },
|
205 | 159 | "execution_count": 4,
|
|
232 | 186 | "name": "python",
|
233 | 187 | "nbconvert_exporter": "python",
|
234 | 188 | "pygments_lexer": "ipython3",
|
235 |
| - "version": "3.11.5" |
| 189 | + "version": "3.10.13" |
236 | 190 | },
|
237 | 191 | "orig_nbformat": 4
|
238 | 192 | },
|
|
0 commit comments