Skip to content

Commit 70566df

Browse files
authored
docs: Updated supported models (#302)
1 parent fd116dd commit 70566df

File tree

1 file changed

+107
-66
lines changed

1 file changed

+107
-66
lines changed

docs/examples/Supported_Models.ipynb

+107-66
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
},
5555
{
5656
"cell_type": "code",
57-
"execution_count": 6,
57+
"execution_count": 3,
5858
"metadata": {
5959
"ExecuteTime": {
6060
"end_time": "2024-05-31T18:13:25.863008Z",
@@ -127,16 +127,16 @@
127127
" </tr>\n",
128128
" <tr>\n",
129129
" <th>5</th>\n",
130-
" <td>snowflake/snowflake-arctic-embed-s</td>\n",
130+
" <td>BAAI/bge-small-en</td>\n",
131131
" <td>384</td>\n",
132-
" <td>Based on infloat/e5-small-unsupervised, does n...</td>\n",
132+
" <td>Fast English model</td>\n",
133133
" <td>0.130</td>\n",
134134
" </tr>\n",
135135
" <tr>\n",
136136
" <th>6</th>\n",
137-
" <td>BAAI/bge-small-en</td>\n",
137+
" <td>snowflake/snowflake-arctic-embed-s</td>\n",
138138
" <td>384</td>\n",
139-
" <td>Fast English model</td>\n",
139+
" <td>Based on infloat/e5-small-unsupervised, does n...</td>\n",
140140
" <td>0.130</td>\n",
141141
" </tr>\n",
142142
" <tr>\n",
@@ -169,26 +169,26 @@
169169
" </tr>\n",
170170
" <tr>\n",
171171
" <th>11</th>\n",
172+
" <td>jinaai/jina-embeddings-v2-base-de</td>\n",
173+
" <td>768</td>\n",
174+
" <td>German embedding model supporting 8192 sequenc...</td>\n",
175+
" <td>0.320</td>\n",
176+
" </tr>\n",
177+
" <tr>\n",
178+
" <th>12</th>\n",
172179
" <td>BAAI/bge-base-en</td>\n",
173180
" <td>768</td>\n",
174181
" <td>Base English model</td>\n",
175182
" <td>0.420</td>\n",
176183
" </tr>\n",
177184
" <tr>\n",
178-
" <th>12</th>\n",
185+
" <th>13</th>\n",
179186
" <td>snowflake/snowflake-arctic-embed-m</td>\n",
180187
" <td>768</td>\n",
181188
" <td>Based on intfloat/e5-base-unsupervised model, ...</td>\n",
182189
" <td>0.430</td>\n",
183190
" </tr>\n",
184191
" <tr>\n",
185-
" <th>13</th>\n",
186-
" <td>nomic-ai/nomic-embed-text-v1</td>\n",
187-
" <td>768</td>\n",
188-
" <td>8192 context length english model</td>\n",
189-
" <td>0.520</td>\n",
190-
" </tr>\n",
191-
" <tr>\n",
192192
" <th>14</th>\n",
193193
" <td>jinaai/jina-embeddings-v2-base-en</td>\n",
194194
" <td>768</td>\n",
@@ -204,40 +204,40 @@
204204
" </tr>\n",
205205
" <tr>\n",
206206
" <th>16</th>\n",
207+
" <td>nomic-ai/nomic-embed-text-v1</td>\n",
208+
" <td>768</td>\n",
209+
" <td>8192 context length english model</td>\n",
210+
" <td>0.520</td>\n",
211+
" </tr>\n",
212+
" <tr>\n",
213+
" <th>17</th>\n",
207214
" <td>snowflake/snowflake-arctic-embed-m-long</td>\n",
208215
" <td>768</td>\n",
209216
" <td>Based on nomic-ai/nomic-embed-text-v1-unsuperv...</td>\n",
210217
" <td>0.540</td>\n",
211218
" </tr>\n",
212219
" <tr>\n",
213-
" <th>17</th>\n",
220+
" <th>18</th>\n",
214221
" <td>mixedbread-ai/mxbai-embed-large-v1</td>\n",
215222
" <td>1024</td>\n",
216223
" <td>MixedBread Base sentence embedding model, does...</td>\n",
217224
" <td>0.640</td>\n",
218225
" </tr>\n",
219226
" <tr>\n",
220-
" <th>18</th>\n",
227+
" <th>19</th>\n",
221228
" <td>sentence-transformers/paraphrase-multilingual-...</td>\n",
222229
" <td>768</td>\n",
223230
" <td>Sentence-transformers model for tasks like clu...</td>\n",
224231
" <td>1.000</td>\n",
225232
" </tr>\n",
226233
" <tr>\n",
227-
" <th>19</th>\n",
234+
" <th>20</th>\n",
228235
" <td>snowflake/snowflake-arctic-embed-l</td>\n",
229236
" <td>1024</td>\n",
230237
" <td>Based on intfloat/e5-large-unsupervised, large...</td>\n",
231238
" <td>1.020</td>\n",
232239
" </tr>\n",
233240
" <tr>\n",
234-
" <th>20</th>\n",
235-
" <td>BAAI/bge-large-en-v1.5</td>\n",
236-
" <td>1024</td>\n",
237-
" <td>Large English model, v1.5</td>\n",
238-
" <td>1.200</td>\n",
239-
" </tr>\n",
240-
" <tr>\n",
241241
" <th>21</th>\n",
242242
" <td>thenlper/gte-large</td>\n",
243243
" <td>1024</td>\n",
@@ -246,6 +246,13 @@
246246
" </tr>\n",
247247
" <tr>\n",
248248
" <th>22</th>\n",
249+
" <td>BAAI/bge-large-en-v1.5</td>\n",
250+
" <td>1024</td>\n",
251+
" <td>Large English model, v1.5</td>\n",
252+
" <td>1.200</td>\n",
253+
" </tr>\n",
254+
" <tr>\n",
255+
" <th>23</th>\n",
249256
" <td>intfloat/multilingual-e5-large</td>\n",
250257
" <td>1024</td>\n",
251258
" <td>Multilingual model, e5-large. Recommend using ...</td>\n",
@@ -262,52 +269,54 @@
262269
"2 sentence-transformers/all-MiniLM-L6-v2 384 \n",
263270
"3 snowflake/snowflake-arctic-embed-xs 384 \n",
264271
"4 jinaai/jina-embeddings-v2-small-en 512 \n",
265-
"5 snowflake/snowflake-arctic-embed-s 384 \n",
266-
"6 BAAI/bge-small-en 384 \n",
272+
"5 BAAI/bge-small-en 384 \n",
273+
"6 snowflake/snowflake-arctic-embed-s 384 \n",
267274
"7 nomic-ai/nomic-embed-text-v1.5-Q 768 \n",
268275
"8 BAAI/bge-base-en-v1.5 768 \n",
269276
"9 sentence-transformers/paraphrase-multilingual-... 384 \n",
270277
"10 Qdrant/clip-ViT-B-32-text 512 \n",
271-
"11 BAAI/bge-base-en 768 \n",
272-
"12 snowflake/snowflake-arctic-embed-m 768 \n",
273-
"13 nomic-ai/nomic-embed-text-v1 768 \n",
278+
"11 jinaai/jina-embeddings-v2-base-de 768 \n",
279+
"12 BAAI/bge-base-en 768 \n",
280+
"13 snowflake/snowflake-arctic-embed-m 768 \n",
274281
"14 jinaai/jina-embeddings-v2-base-en 768 \n",
275282
"15 nomic-ai/nomic-embed-text-v1.5 768 \n",
276-
"16 snowflake/snowflake-arctic-embed-m-long 768 \n",
277-
"17 mixedbread-ai/mxbai-embed-large-v1 1024 \n",
278-
"18 sentence-transformers/paraphrase-multilingual-... 768 \n",
279-
"19 snowflake/snowflake-arctic-embed-l 1024 \n",
280-
"20 BAAI/bge-large-en-v1.5 1024 \n",
283+
"16 nomic-ai/nomic-embed-text-v1 768 \n",
284+
"17 snowflake/snowflake-arctic-embed-m-long 768 \n",
285+
"18 mixedbread-ai/mxbai-embed-large-v1 1024 \n",
286+
"19 sentence-transformers/paraphrase-multilingual-... 768 \n",
287+
"20 snowflake/snowflake-arctic-embed-l 1024 \n",
281288
"21 thenlper/gte-large 1024 \n",
282-
"22 intfloat/multilingual-e5-large 1024 \n",
289+
"22 BAAI/bge-large-en-v1.5 1024 \n",
290+
"23 intfloat/multilingual-e5-large 1024 \n",
283291
"\n",
284292
" description size_in_GB \n",
285293
"0 Fast and Default English model 0.067 \n",
286294
"1 Fast and recommended Chinese model 0.090 \n",
287295
"2 Sentence Transformer model, MiniLM-L6-v2 0.090 \n",
288296
"3 Based on all-MiniLM-L6-v2 model with only 22m ... 0.090 \n",
289297
"4 English embedding model supporting 8192 sequen... 0.120 \n",
290-
"5 Based on infloat/e5-small-unsupervised, does n... 0.130 \n",
291-
"6 Fast English model 0.130 \n",
298+
"5 Fast English model 0.130 \n",
299+
"6 Based on infloat/e5-small-unsupervised, does n... 0.130 \n",
292300
"7 Quantized 8192 context length english model 0.130 \n",
293301
"8 Base English model, v1.5 0.210 \n",
294302
"9 Sentence Transformer model, paraphrase-multili... 0.220 \n",
295303
"10 CLIP text encoder 0.250 \n",
296-
"11 Base English model 0.420 \n",
297-
"12 Based on intfloat/e5-base-unsupervised model, ... 0.430 \n",
298-
"13 8192 context length english model 0.520 \n",
304+
"11 German embedding model supporting 8192 sequenc... 0.320 \n",
305+
"12 Base English model 0.420 \n",
306+
"13 Based on intfloat/e5-base-unsupervised model, ... 0.430 \n",
299307
"14 English embedding model supporting 8192 sequen... 0.520 \n",
300308
"15 8192 context length english model 0.520 \n",
301-
"16 Based on nomic-ai/nomic-embed-text-v1-unsuperv... 0.540 \n",
302-
"17 MixedBread Base sentence embedding model, does... 0.640 \n",
303-
"18 Sentence-transformers model for tasks like clu... 1.000 \n",
304-
"19 Based on intfloat/e5-large-unsupervised, large... 1.020 \n",
305-
"20 Large English model, v1.5 1.200 \n",
309+
"16 8192 context length english model 0.520 \n",
310+
"17 Based on nomic-ai/nomic-embed-text-v1-unsuperv... 0.540 \n",
311+
"18 MixedBread Base sentence embedding model, does... 0.640 \n",
312+
"19 Sentence-transformers model for tasks like clu... 1.000 \n",
313+
"20 Based on intfloat/e5-large-unsupervised, large... 1.020 \n",
306314
"21 Large general text embeddings model 1.200 \n",
307-
"22 Multilingual model, e5-large. Recommend using ... 2.240 "
315+
"22 Large English model, v1.5 1.200 \n",
316+
"23 Multilingual model, e5-large. Recommend using ... 2.240 "
308317
]
309318
},
310-
"execution_count": 6,
319+
"execution_count": 3,
311320
"metadata": {},
312321
"output_type": "execute_result"
313322
}
@@ -331,7 +340,7 @@
331340
},
332341
{
333342
"cell_type": "code",
334-
"execution_count": 8,
343+
"execution_count": 4,
335344
"metadata": {
336345
"ExecuteTime": {
337346
"end_time": "2024-05-31T18:13:27.124747Z",
@@ -364,47 +373,61 @@
364373
" <th>vocab_size</th>\n",
365374
" <th>description</th>\n",
366375
" <th>size_in_GB</th>\n",
376+
" <th>requires_idf</th>\n",
367377
" </tr>\n",
368378
" </thead>\n",
369379
" <tbody>\n",
370380
" <tr>\n",
371381
" <th>0</th>\n",
382+
" <td>Qdrant/bm25</td>\n",
383+
" <td>NaN</td>\n",
384+
" <td>BM25 as sparse embeddings meant to be used wit...</td>\n",
385+
" <td>0.010</td>\n",
386+
" <td>True</td>\n",
387+
" </tr>\n",
388+
" <tr>\n",
389+
" <th>1</th>\n",
372390
" <td>Qdrant/bm42-all-minilm-l6-v2-attentions</td>\n",
373-
" <td>30522</td>\n",
391+
" <td>30522.0</td>\n",
374392
" <td>Light sparse embedding model, which assigns an...</td>\n",
375393
" <td>0.090</td>\n",
394+
" <td>True</td>\n",
376395
" </tr>\n",
377396
" <tr>\n",
378-
" <th>1</th>\n",
397+
" <th>2</th>\n",
379398
" <td>prithvida/Splade_PP_en_v1</td>\n",
380-
" <td>30522</td>\n",
399+
" <td>30522.0</td>\n",
381400
" <td>Misspelled version of the model. Retained for ...</td>\n",
382401
" <td>0.532</td>\n",
402+
" <td>NaN</td>\n",
383403
" </tr>\n",
384404
" <tr>\n",
385-
" <th>2</th>\n",
405+
" <th>3</th>\n",
386406
" <td>prithivida/Splade_PP_en_v1</td>\n",
387-
" <td>30522</td>\n",
407+
" <td>30522.0</td>\n",
388408
" <td>Independent Implementation of SPLADE++ Model f...</td>\n",
389409
" <td>0.532</td>\n",
410+
" <td>NaN</td>\n",
390411
" </tr>\n",
391412
" </tbody>\n",
392413
"</table>\n",
393414
"</div>"
394415
],
395416
"text/plain": [
396417
" model vocab_size \\\n",
397-
"0 Qdrant/bm42-all-minilm-l6-v2-attentions 30522 \n",
398-
"1 prithvida/Splade_PP_en_v1 30522 \n",
399-
"2 prithivida/Splade_PP_en_v1 30522 \n",
418+
"0 Qdrant/bm25 NaN \n",
419+
"1 Qdrant/bm42-all-minilm-l6-v2-attentions 30522.0 \n",
420+
"2 prithvida/Splade_PP_en_v1 30522.0 \n",
421+
"3 prithivida/Splade_PP_en_v1 30522.0 \n",
400422
"\n",
401-
" description size_in_GB \n",
402-
"0 Light sparse embedding model, which assigns an... 0.090 \n",
403-
"1 Misspelled version of the model. Retained for ... 0.532 \n",
404-
"2 Independent Implementation of SPLADE++ Model f... 0.532 "
423+
" description size_in_GB requires_idf \n",
424+
"0 BM25 as sparse embeddings meant to be used wit... 0.010 True \n",
425+
"1 Light sparse embedding model, which assigns an... 0.090 True \n",
426+
"2 Misspelled version of the model. Retained for ... 0.532 NaN \n",
427+
"3 Independent Implementation of SPLADE++ Model f... 0.532 NaN "
405428
]
406429
},
407-
"execution_count": 8,
430+
"execution_count": 4,
408431
"metadata": {},
409432
"output_type": "execute_result"
410433
}
@@ -429,7 +452,7 @@
429452
},
430453
{
431454
"cell_type": "code",
432-
"execution_count": 10,
455+
"execution_count": 5,
433456
"metadata": {
434457
"ExecuteTime": {
435458
"end_time": "2024-05-31T18:14:34.370252Z",
@@ -482,7 +505,7 @@
482505
"0 colbert-ir/colbertv2.0 128 Late interaction model 0.44"
483506
]
484507
},
485-
"execution_count": 10,
508+
"execution_count": 5,
486509
"metadata": {},
487510
"output_type": "execute_result"
488511
}
@@ -507,7 +530,7 @@
507530
},
508531
{
509532
"cell_type": "code",
510-
"execution_count": 12,
533+
"execution_count": 6,
511534
"metadata": {
512535
"ExecuteTime": {
513536
"end_time": "2024-05-31T18:14:42.501881Z",
@@ -558,6 +581,20 @@
558581
" <td>CLIP vision encoder based on ViT-B/32</td>\n",
559582
" <td>0.34</td>\n",
560583
" </tr>\n",
584+
" <tr>\n",
585+
" <th>2</th>\n",
586+
" <td>Qdrant/Unicom-ViT-B-32</td>\n",
587+
" <td>512</td>\n",
588+
" <td>Unicom Unicom-ViT-B-32 from open-metric-learning</td>\n",
589+
" <td>0.48</td>\n",
590+
" </tr>\n",
591+
" <tr>\n",
592+
" <th>3</th>\n",
593+
" <td>Qdrant/Unicom-ViT-B-16</td>\n",
594+
" <td>768</td>\n",
595+
" <td>Unicom Unicom-ViT-B-16 from open-metric-learning</td>\n",
596+
" <td>0.82</td>\n",
597+
" </tr>\n",
561598
" </tbody>\n",
562599
"</table>\n",
563600
"</div>"
@@ -566,13 +603,17 @@
566603
" model dim \\\n",
567604
"0 Qdrant/resnet50-onnx 2048 \n",
568605
"1 Qdrant/clip-ViT-B-32-vision 512 \n",
606+
"2 Qdrant/Unicom-ViT-B-32 512 \n",
607+
"3 Qdrant/Unicom-ViT-B-16 768 \n",
569608
"\n",
570609
" description size_in_GB \n",
571610
"0 ResNet-50 from `Deep Residual Learning for Ima... 0.10 \n",
572-
"1 CLIP vision encoder based on ViT-B/32 0.34 "
611+
"1 CLIP vision encoder based on ViT-B/32 0.34 \n",
612+
"2 Unicom Unicom-ViT-B-32 from open-metric-learning 0.48 \n",
613+
"3 Unicom Unicom-ViT-B-16 from open-metric-learning 0.82 "
573614
]
574615
},
575-
"execution_count": 12,
616+
"execution_count": 6,
576617
"metadata": {},
577618
"output_type": "execute_result"
578619
}
@@ -602,7 +643,7 @@
602643
"name": "python",
603644
"nbconvert_exporter": "python",
604645
"pygments_lexer": "ipython3",
605-
"version": "3.11.4"
646+
"version": "3.11.8"
606647
},
607648
"orig_nbformat": 4,
608649
"vscode": {

0 commit comments

Comments
 (0)