|
54 | 54 | },
|
55 | 55 | {
|
56 | 56 | "cell_type": "code",
|
57 |
| - "execution_count": 6, |
| 57 | + "execution_count": 3, |
58 | 58 | "metadata": {
|
59 | 59 | "ExecuteTime": {
|
60 | 60 | "end_time": "2024-05-31T18:13:25.863008Z",
|
|
127 | 127 | " </tr>\n",
|
128 | 128 | " <tr>\n",
|
129 | 129 | " <th>5</th>\n",
|
130 |
| - " <td>snowflake/snowflake-arctic-embed-s</td>\n", |
| 130 | + " <td>BAAI/bge-small-en</td>\n", |
131 | 131 | " <td>384</td>\n",
|
132 |
| - " <td>Based on infloat/e5-small-unsupervised, does n...</td>\n", |
| 132 | + " <td>Fast English model</td>\n", |
133 | 133 | " <td>0.130</td>\n",
|
134 | 134 | " </tr>\n",
|
135 | 135 | " <tr>\n",
|
136 | 136 | " <th>6</th>\n",
|
137 |
| - " <td>BAAI/bge-small-en</td>\n", |
| 137 | + " <td>snowflake/snowflake-arctic-embed-s</td>\n", |
138 | 138 | " <td>384</td>\n",
|
139 |
| - " <td>Fast English model</td>\n", |
| 139 | + " <td>Based on infloat/e5-small-unsupervised, does n...</td>\n", |
140 | 140 | " <td>0.130</td>\n",
|
141 | 141 | " </tr>\n",
|
142 | 142 | " <tr>\n",
|
|
169 | 169 | " </tr>\n",
|
170 | 170 | " <tr>\n",
|
171 | 171 | " <th>11</th>\n",
|
| 172 | + " <td>jinaai/jina-embeddings-v2-base-de</td>\n", |
| 173 | + " <td>768</td>\n", |
| 174 | + " <td>German embedding model supporting 8192 sequenc...</td>\n", |
| 175 | + " <td>0.320</td>\n", |
| 176 | + " </tr>\n", |
| 177 | + " <tr>\n", |
| 178 | + " <th>12</th>\n", |
172 | 179 | " <td>BAAI/bge-base-en</td>\n",
|
173 | 180 | " <td>768</td>\n",
|
174 | 181 | " <td>Base English model</td>\n",
|
175 | 182 | " <td>0.420</td>\n",
|
176 | 183 | " </tr>\n",
|
177 | 184 | " <tr>\n",
|
178 |
| - " <th>12</th>\n", |
| 185 | + " <th>13</th>\n", |
179 | 186 | " <td>snowflake/snowflake-arctic-embed-m</td>\n",
|
180 | 187 | " <td>768</td>\n",
|
181 | 188 | " <td>Based on intfloat/e5-base-unsupervised model, ...</td>\n",
|
182 | 189 | " <td>0.430</td>\n",
|
183 | 190 | " </tr>\n",
|
184 | 191 | " <tr>\n",
|
185 |
| - " <th>13</th>\n", |
186 |
| - " <td>nomic-ai/nomic-embed-text-v1</td>\n", |
187 |
| - " <td>768</td>\n", |
188 |
| - " <td>8192 context length english model</td>\n", |
189 |
| - " <td>0.520</td>\n", |
190 |
| - " </tr>\n", |
191 |
| - " <tr>\n", |
192 | 192 | " <th>14</th>\n",
|
193 | 193 | " <td>jinaai/jina-embeddings-v2-base-en</td>\n",
|
194 | 194 | " <td>768</td>\n",
|
|
204 | 204 | " </tr>\n",
|
205 | 205 | " <tr>\n",
|
206 | 206 | " <th>16</th>\n",
|
| 207 | + " <td>nomic-ai/nomic-embed-text-v1</td>\n", |
| 208 | + " <td>768</td>\n", |
| 209 | + " <td>8192 context length english model</td>\n", |
| 210 | + " <td>0.520</td>\n", |
| 211 | + " </tr>\n", |
| 212 | + " <tr>\n", |
| 213 | + " <th>17</th>\n", |
207 | 214 | " <td>snowflake/snowflake-arctic-embed-m-long</td>\n",
|
208 | 215 | " <td>768</td>\n",
|
209 | 216 | " <td>Based on nomic-ai/nomic-embed-text-v1-unsuperv...</td>\n",
|
210 | 217 | " <td>0.540</td>\n",
|
211 | 218 | " </tr>\n",
|
212 | 219 | " <tr>\n",
|
213 |
| - " <th>17</th>\n", |
| 220 | + " <th>18</th>\n", |
214 | 221 | " <td>mixedbread-ai/mxbai-embed-large-v1</td>\n",
|
215 | 222 | " <td>1024</td>\n",
|
216 | 223 | " <td>MixedBread Base sentence embedding model, does...</td>\n",
|
217 | 224 | " <td>0.640</td>\n",
|
218 | 225 | " </tr>\n",
|
219 | 226 | " <tr>\n",
|
220 |
| - " <th>18</th>\n", |
| 227 | + " <th>19</th>\n", |
221 | 228 | " <td>sentence-transformers/paraphrase-multilingual-...</td>\n",
|
222 | 229 | " <td>768</td>\n",
|
223 | 230 | " <td>Sentence-transformers model for tasks like clu...</td>\n",
|
224 | 231 | " <td>1.000</td>\n",
|
225 | 232 | " </tr>\n",
|
226 | 233 | " <tr>\n",
|
227 |
| - " <th>19</th>\n", |
| 234 | + " <th>20</th>\n", |
228 | 235 | " <td>snowflake/snowflake-arctic-embed-l</td>\n",
|
229 | 236 | " <td>1024</td>\n",
|
230 | 237 | " <td>Based on intfloat/e5-large-unsupervised, large...</td>\n",
|
231 | 238 | " <td>1.020</td>\n",
|
232 | 239 | " </tr>\n",
|
233 | 240 | " <tr>\n",
|
234 |
| - " <th>20</th>\n", |
235 |
| - " <td>BAAI/bge-large-en-v1.5</td>\n", |
236 |
| - " <td>1024</td>\n", |
237 |
| - " <td>Large English model, v1.5</td>\n", |
238 |
| - " <td>1.200</td>\n", |
239 |
| - " </tr>\n", |
240 |
| - " <tr>\n", |
241 | 241 | " <th>21</th>\n",
|
242 | 242 | " <td>thenlper/gte-large</td>\n",
|
243 | 243 | " <td>1024</td>\n",
|
|
246 | 246 | " </tr>\n",
|
247 | 247 | " <tr>\n",
|
248 | 248 | " <th>22</th>\n",
|
| 249 | + " <td>BAAI/bge-large-en-v1.5</td>\n", |
| 250 | + " <td>1024</td>\n", |
| 251 | + " <td>Large English model, v1.5</td>\n", |
| 252 | + " <td>1.200</td>\n", |
| 253 | + " </tr>\n", |
| 254 | + " <tr>\n", |
| 255 | + " <th>23</th>\n", |
249 | 256 | " <td>intfloat/multilingual-e5-large</td>\n",
|
250 | 257 | " <td>1024</td>\n",
|
251 | 258 | " <td>Multilingual model, e5-large. Recommend using ...</td>\n",
|
|
262 | 269 | "2 sentence-transformers/all-MiniLM-L6-v2 384 \n",
|
263 | 270 | "3 snowflake/snowflake-arctic-embed-xs 384 \n",
|
264 | 271 | "4 jinaai/jina-embeddings-v2-small-en 512 \n",
|
265 |
| - "5 snowflake/snowflake-arctic-embed-s 384 \n", |
266 |
| - "6 BAAI/bge-small-en 384 \n", |
| 272 | + "5 BAAI/bge-small-en 384 \n", |
| 273 | + "6 snowflake/snowflake-arctic-embed-s 384 \n", |
267 | 274 | "7 nomic-ai/nomic-embed-text-v1.5-Q 768 \n",
|
268 | 275 | "8 BAAI/bge-base-en-v1.5 768 \n",
|
269 | 276 | "9 sentence-transformers/paraphrase-multilingual-... 384 \n",
|
270 | 277 | "10 Qdrant/clip-ViT-B-32-text 512 \n",
|
271 |
| - "11 BAAI/bge-base-en 768 \n", |
272 |
| - "12 snowflake/snowflake-arctic-embed-m 768 \n", |
273 |
| - "13 nomic-ai/nomic-embed-text-v1 768 \n", |
| 278 | + "11 jinaai/jina-embeddings-v2-base-de 768 \n", |
| 279 | + "12 BAAI/bge-base-en 768 \n", |
| 280 | + "13 snowflake/snowflake-arctic-embed-m 768 \n", |
274 | 281 | "14 jinaai/jina-embeddings-v2-base-en 768 \n",
|
275 | 282 | "15 nomic-ai/nomic-embed-text-v1.5 768 \n",
|
276 |
| - "16 snowflake/snowflake-arctic-embed-m-long 768 \n", |
277 |
| - "17 mixedbread-ai/mxbai-embed-large-v1 1024 \n", |
278 |
| - "18 sentence-transformers/paraphrase-multilingual-... 768 \n", |
279 |
| - "19 snowflake/snowflake-arctic-embed-l 1024 \n", |
280 |
| - "20 BAAI/bge-large-en-v1.5 1024 \n", |
| 283 | + "16 nomic-ai/nomic-embed-text-v1 768 \n", |
| 284 | + "17 snowflake/snowflake-arctic-embed-m-long 768 \n", |
| 285 | + "18 mixedbread-ai/mxbai-embed-large-v1 1024 \n", |
| 286 | + "19 sentence-transformers/paraphrase-multilingual-... 768 \n", |
| 287 | + "20 snowflake/snowflake-arctic-embed-l 1024 \n", |
281 | 288 | "21 thenlper/gte-large 1024 \n",
|
282 |
| - "22 intfloat/multilingual-e5-large 1024 \n", |
| 289 | + "22 BAAI/bge-large-en-v1.5 1024 \n", |
| 290 | + "23 intfloat/multilingual-e5-large 1024 \n", |
283 | 291 | "\n",
|
284 | 292 | " description size_in_GB \n",
|
285 | 293 | "0 Fast and Default English model 0.067 \n",
|
286 | 294 | "1 Fast and recommended Chinese model 0.090 \n",
|
287 | 295 | "2 Sentence Transformer model, MiniLM-L6-v2 0.090 \n",
|
288 | 296 | "3 Based on all-MiniLM-L6-v2 model with only 22m ... 0.090 \n",
|
289 | 297 | "4 English embedding model supporting 8192 sequen... 0.120 \n",
|
290 |
| - "5 Based on infloat/e5-small-unsupervised, does n... 0.130 \n", |
291 |
| - "6 Fast English model 0.130 \n", |
| 298 | + "5 Fast English model 0.130 \n", |
| 299 | + "6 Based on infloat/e5-small-unsupervised, does n... 0.130 \n", |
292 | 300 | "7 Quantized 8192 context length english model 0.130 \n",
|
293 | 301 | "8 Base English model, v1.5 0.210 \n",
|
294 | 302 | "9 Sentence Transformer model, paraphrase-multili... 0.220 \n",
|
295 | 303 | "10 CLIP text encoder 0.250 \n",
|
296 |
| - "11 Base English model 0.420 \n", |
297 |
| - "12 Based on intfloat/e5-base-unsupervised model, ... 0.430 \n", |
298 |
| - "13 8192 context length english model 0.520 \n", |
| 304 | + "11 German embedding model supporting 8192 sequenc... 0.320 \n", |
| 305 | + "12 Base English model 0.420 \n", |
| 306 | + "13 Based on intfloat/e5-base-unsupervised model, ... 0.430 \n", |
299 | 307 | "14 English embedding model supporting 8192 sequen... 0.520 \n",
|
300 | 308 | "15 8192 context length english model 0.520 \n",
|
301 |
| - "16 Based on nomic-ai/nomic-embed-text-v1-unsuperv... 0.540 \n", |
302 |
| - "17 MixedBread Base sentence embedding model, does... 0.640 \n", |
303 |
| - "18 Sentence-transformers model for tasks like clu... 1.000 \n", |
304 |
| - "19 Based on intfloat/e5-large-unsupervised, large... 1.020 \n", |
305 |
| - "20 Large English model, v1.5 1.200 \n", |
| 309 | + "16 8192 context length english model 0.520 \n", |
| 310 | + "17 Based on nomic-ai/nomic-embed-text-v1-unsuperv... 0.540 \n", |
| 311 | + "18 MixedBread Base sentence embedding model, does... 0.640 \n", |
| 312 | + "19 Sentence-transformers model for tasks like clu... 1.000 \n", |
| 313 | + "20 Based on intfloat/e5-large-unsupervised, large... 1.020 \n", |
306 | 314 | "21 Large general text embeddings model 1.200 \n",
|
307 |
| - "22 Multilingual model, e5-large. Recommend using ... 2.240 " |
| 315 | + "22 Large English model, v1.5 1.200 \n", |
| 316 | + "23 Multilingual model, e5-large. Recommend using ... 2.240 " |
308 | 317 | ]
|
309 | 318 | },
|
310 |
| - "execution_count": 6, |
| 319 | + "execution_count": 3, |
311 | 320 | "metadata": {},
|
312 | 321 | "output_type": "execute_result"
|
313 | 322 | }
|
|
331 | 340 | },
|
332 | 341 | {
|
333 | 342 | "cell_type": "code",
|
334 |
| - "execution_count": 8, |
| 343 | + "execution_count": 4, |
335 | 344 | "metadata": {
|
336 | 345 | "ExecuteTime": {
|
337 | 346 | "end_time": "2024-05-31T18:13:27.124747Z",
|
|
364 | 373 | " <th>vocab_size</th>\n",
|
365 | 374 | " <th>description</th>\n",
|
366 | 375 | " <th>size_in_GB</th>\n",
|
| 376 | + " <th>requires_idf</th>\n", |
367 | 377 | " </tr>\n",
|
368 | 378 | " </thead>\n",
|
369 | 379 | " <tbody>\n",
|
370 | 380 | " <tr>\n",
|
371 | 381 | " <th>0</th>\n",
|
| 382 | + " <td>Qdrant/bm25</td>\n", |
| 383 | + " <td>NaN</td>\n", |
| 384 | + " <td>BM25 as sparse embeddings meant to be used wit...</td>\n", |
| 385 | + " <td>0.010</td>\n", |
| 386 | + " <td>True</td>\n", |
| 387 | + " </tr>\n", |
| 388 | + " <tr>\n", |
| 389 | + " <th>1</th>\n", |
372 | 390 | " <td>Qdrant/bm42-all-minilm-l6-v2-attentions</td>\n",
|
373 |
| - " <td>30522</td>\n", |
| 391 | + " <td>30522.0</td>\n", |
374 | 392 | " <td>Light sparse embedding model, which assigns an...</td>\n",
|
375 | 393 | " <td>0.090</td>\n",
|
| 394 | + " <td>True</td>\n", |
376 | 395 | " </tr>\n",
|
377 | 396 | " <tr>\n",
|
378 |
| - " <th>1</th>\n", |
| 397 | + " <th>2</th>\n", |
379 | 398 | " <td>prithvida/Splade_PP_en_v1</td>\n",
|
380 |
| - " <td>30522</td>\n", |
| 399 | + " <td>30522.0</td>\n", |
381 | 400 | " <td>Misspelled version of the model. Retained for ...</td>\n",
|
382 | 401 | " <td>0.532</td>\n",
|
| 402 | + " <td>NaN</td>\n", |
383 | 403 | " </tr>\n",
|
384 | 404 | " <tr>\n",
|
385 |
| - " <th>2</th>\n", |
| 405 | + " <th>3</th>\n", |
386 | 406 | " <td>prithivida/Splade_PP_en_v1</td>\n",
|
387 |
| - " <td>30522</td>\n", |
| 407 | + " <td>30522.0</td>\n", |
388 | 408 | " <td>Independent Implementation of SPLADE++ Model f...</td>\n",
|
389 | 409 | " <td>0.532</td>\n",
|
| 410 | + " <td>NaN</td>\n", |
390 | 411 | " </tr>\n",
|
391 | 412 | " </tbody>\n",
|
392 | 413 | "</table>\n",
|
393 | 414 | "</div>"
|
394 | 415 | ],
|
395 | 416 | "text/plain": [
|
396 | 417 | " model vocab_size \\\n",
|
397 |
| - "0 Qdrant/bm42-all-minilm-l6-v2-attentions 30522 \n", |
398 |
| - "1 prithvida/Splade_PP_en_v1 30522 \n", |
399 |
| - "2 prithivida/Splade_PP_en_v1 30522 \n", |
| 418 | + "0 Qdrant/bm25 NaN \n", |
| 419 | + "1 Qdrant/bm42-all-minilm-l6-v2-attentions 30522.0 \n", |
| 420 | + "2 prithvida/Splade_PP_en_v1 30522.0 \n", |
| 421 | + "3 prithivida/Splade_PP_en_v1 30522.0 \n", |
400 | 422 | "\n",
|
401 |
| - " description size_in_GB \n", |
402 |
| - "0 Light sparse embedding model, which assigns an... 0.090 \n", |
403 |
| - "1 Misspelled version of the model. Retained for ... 0.532 \n", |
404 |
| - "2 Independent Implementation of SPLADE++ Model f... 0.532 " |
| 423 | + " description size_in_GB requires_idf \n", |
| 424 | + "0 BM25 as sparse embeddings meant to be used wit... 0.010 True \n", |
| 425 | + "1 Light sparse embedding model, which assigns an... 0.090 True \n", |
| 426 | + "2 Misspelled version of the model. Retained for ... 0.532 NaN \n", |
| 427 | + "3 Independent Implementation of SPLADE++ Model f... 0.532 NaN " |
405 | 428 | ]
|
406 | 429 | },
|
407 |
| - "execution_count": 8, |
| 430 | + "execution_count": 4, |
408 | 431 | "metadata": {},
|
409 | 432 | "output_type": "execute_result"
|
410 | 433 | }
|
|
429 | 452 | },
|
430 | 453 | {
|
431 | 454 | "cell_type": "code",
|
432 |
| - "execution_count": 10, |
| 455 | + "execution_count": 5, |
433 | 456 | "metadata": {
|
434 | 457 | "ExecuteTime": {
|
435 | 458 | "end_time": "2024-05-31T18:14:34.370252Z",
|
|
482 | 505 | "0 colbert-ir/colbertv2.0 128 Late interaction model 0.44"
|
483 | 506 | ]
|
484 | 507 | },
|
485 |
| - "execution_count": 10, |
| 508 | + "execution_count": 5, |
486 | 509 | "metadata": {},
|
487 | 510 | "output_type": "execute_result"
|
488 | 511 | }
|
|
507 | 530 | },
|
508 | 531 | {
|
509 | 532 | "cell_type": "code",
|
510 |
| - "execution_count": 12, |
| 533 | + "execution_count": 6, |
511 | 534 | "metadata": {
|
512 | 535 | "ExecuteTime": {
|
513 | 536 | "end_time": "2024-05-31T18:14:42.501881Z",
|
|
558 | 581 | " <td>CLIP vision encoder based on ViT-B/32</td>\n",
|
559 | 582 | " <td>0.34</td>\n",
|
560 | 583 | " </tr>\n",
|
| 584 | + " <tr>\n", |
| 585 | + " <th>2</th>\n", |
| 586 | + " <td>Qdrant/Unicom-ViT-B-32</td>\n", |
| 587 | + " <td>512</td>\n", |
| 588 | + " <td>Unicom Unicom-ViT-B-32 from open-metric-learning</td>\n", |
| 589 | + " <td>0.48</td>\n", |
| 590 | + " </tr>\n", |
| 591 | + " <tr>\n", |
| 592 | + " <th>3</th>\n", |
| 593 | + " <td>Qdrant/Unicom-ViT-B-16</td>\n", |
| 594 | + " <td>768</td>\n", |
| 595 | + " <td>Unicom Unicom-ViT-B-16 from open-metric-learning</td>\n", |
| 596 | + " <td>0.82</td>\n", |
| 597 | + " </tr>\n", |
561 | 598 | " </tbody>\n",
|
562 | 599 | "</table>\n",
|
563 | 600 | "</div>"
|
|
566 | 603 | " model dim \\\n",
|
567 | 604 | "0 Qdrant/resnet50-onnx 2048 \n",
|
568 | 605 | "1 Qdrant/clip-ViT-B-32-vision 512 \n",
|
| 606 | + "2 Qdrant/Unicom-ViT-B-32 512 \n", |
| 607 | + "3 Qdrant/Unicom-ViT-B-16 768 \n", |
569 | 608 | "\n",
|
570 | 609 | " description size_in_GB \n",
|
571 | 610 | "0 ResNet-50 from `Deep Residual Learning for Ima... 0.10 \n",
|
572 |
| - "1 CLIP vision encoder based on ViT-B/32 0.34 " |
| 611 | + "1 CLIP vision encoder based on ViT-B/32 0.34 \n", |
| 612 | + "2 Unicom Unicom-ViT-B-32 from open-metric-learning 0.48 \n", |
| 613 | + "3 Unicom Unicom-ViT-B-16 from open-metric-learning 0.82 " |
573 | 614 | ]
|
574 | 615 | },
|
575 |
| - "execution_count": 12, |
| 616 | + "execution_count": 6, |
576 | 617 | "metadata": {},
|
577 | 618 | "output_type": "execute_result"
|
578 | 619 | }
|
|
602 | 643 | "name": "python",
|
603 | 644 | "nbconvert_exporter": "python",
|
604 | 645 | "pygments_lexer": "ipython3",
|
605 |
| - "version": "3.11.4" |
| 646 | + "version": "3.11.8" |
606 | 647 | },
|
607 | 648 | "orig_nbformat": 4,
|
608 | 649 | "vscode": {
|
|
0 commit comments