docs: Updated supported models (#302)

Anush008 · web-flow · commit 70566dff993b · 2024-07-17T16:44:50.000+05:30
diff --git a/docs/examples/Supported_Models.ipynb b/docs/examples/Supported_Models.ipynb
@@ -54,7 +54,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 3,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-05-31T18:13:25.863008Z",
@@ -127,16 +127,16 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>snowflake/snowflake-arctic-embed-s</td>\n",
+       "      <td>BAAI/bge-small-en</td>\n",
        "      <td>384</td>\n",
-       "      <td>Based on infloat/e5-small-unsupervised, does n...</td>\n",
+       "      <td>Fast English model</td>\n",
        "      <td>0.130</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
-       "      <td>BAAI/bge-small-en</td>\n",
+       "      <td>snowflake/snowflake-arctic-embed-s</td>\n",
        "      <td>384</td>\n",
-       "      <td>Fast English model</td>\n",
+       "      <td>Based on infloat/e5-small-unsupervised, does n...</td>\n",
        "      <td>0.130</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -169,26 +169,26 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
+       "      <td>jinaai/jina-embeddings-v2-base-de</td>\n",
+       "      <td>768</td>\n",
+       "      <td>German embedding model supporting 8192 sequenc...</td>\n",
+       "      <td>0.320</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
        "      <td>BAAI/bge-base-en</td>\n",
        "      <td>768</td>\n",
        "      <td>Base English model</td>\n",
        "      <td>0.420</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12</th>\n",
+       "      <th>13</th>\n",
        "      <td>snowflake/snowflake-arctic-embed-m</td>\n",
        "      <td>768</td>\n",
        "      <td>Based on intfloat/e5-base-unsupervised model, ...</td>\n",
        "      <td>0.430</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>nomic-ai/nomic-embed-text-v1</td>\n",
-       "      <td>768</td>\n",
-       "      <td>8192 context length english model</td>\n",
-       "      <td>0.520</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>14</th>\n",
        "      <td>jinaai/jina-embeddings-v2-base-en</td>\n",
        "      <td>768</td>\n",
@@ -204,40 +204,40 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>16</th>\n",
+       "      <td>nomic-ai/nomic-embed-text-v1</td>\n",
+       "      <td>768</td>\n",
+       "      <td>8192 context length english model</td>\n",
+       "      <td>0.520</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>17</th>\n",
        "      <td>snowflake/snowflake-arctic-embed-m-long</td>\n",
        "      <td>768</td>\n",
        "      <td>Based on nomic-ai/nomic-embed-text-v1-unsuperv...</td>\n",
        "      <td>0.540</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>17</th>\n",
+       "      <th>18</th>\n",
        "      <td>mixedbread-ai/mxbai-embed-large-v1</td>\n",
        "      <td>1024</td>\n",
        "      <td>MixedBread Base sentence embedding model, does...</td>\n",
        "      <td>0.640</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>18</th>\n",
+       "      <th>19</th>\n",
        "      <td>sentence-transformers/paraphrase-multilingual-...</td>\n",
        "      <td>768</td>\n",
        "      <td>Sentence-transformers model for tasks like clu...</td>\n",
        "      <td>1.000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>19</th>\n",
+       "      <th>20</th>\n",
        "      <td>snowflake/snowflake-arctic-embed-l</td>\n",
        "      <td>1024</td>\n",
        "      <td>Based on intfloat/e5-large-unsupervised, large...</td>\n",
        "      <td>1.020</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>20</th>\n",
-       "      <td>BAAI/bge-large-en-v1.5</td>\n",
-       "      <td>1024</td>\n",
-       "      <td>Large English model, v1.5</td>\n",
-       "      <td>1.200</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
        "      <th>21</th>\n",
        "      <td>thenlper/gte-large</td>\n",
        "      <td>1024</td>\n",
@@ -246,6 +246,13 @@
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
+       "      <td>BAAI/bge-large-en-v1.5</td>\n",
+       "      <td>1024</td>\n",
+       "      <td>Large English model, v1.5</td>\n",
+       "      <td>1.200</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
        "      <td>intfloat/multilingual-e5-large</td>\n",
        "      <td>1024</td>\n",
        "      <td>Multilingual model, e5-large. Recommend using ...</td>\n",
@@ -262,52 +269,54 @@
        "2              sentence-transformers/all-MiniLM-L6-v2   384   \n",
        "3                 snowflake/snowflake-arctic-embed-xs   384   \n",
        "4                  jinaai/jina-embeddings-v2-small-en   512   \n",
-       "5                  snowflake/snowflake-arctic-embed-s   384   \n",
-       "6                                   BAAI/bge-small-en   384   \n",
+       "5                                   BAAI/bge-small-en   384   \n",
+       "6                  snowflake/snowflake-arctic-embed-s   384   \n",
        "7                    nomic-ai/nomic-embed-text-v1.5-Q   768   \n",
        "8                               BAAI/bge-base-en-v1.5   768   \n",
        "9   sentence-transformers/paraphrase-multilingual-...   384   \n",
        "10                          Qdrant/clip-ViT-B-32-text   512   \n",
-       "11                                   BAAI/bge-base-en   768   \n",
-       "12                 snowflake/snowflake-arctic-embed-m   768   \n",
-       "13                       nomic-ai/nomic-embed-text-v1   768   \n",
+       "11                  jinaai/jina-embeddings-v2-base-de   768   \n",
+       "12                                   BAAI/bge-base-en   768   \n",
+       "13                 snowflake/snowflake-arctic-embed-m   768   \n",
        "14                  jinaai/jina-embeddings-v2-base-en   768   \n",
        "15                     nomic-ai/nomic-embed-text-v1.5   768   \n",
-       "16            snowflake/snowflake-arctic-embed-m-long   768   \n",
-       "17                 mixedbread-ai/mxbai-embed-large-v1  1024   \n",
-       "18  sentence-transformers/paraphrase-multilingual-...   768   \n",
-       "19                 snowflake/snowflake-arctic-embed-l  1024   \n",
-       "20                             BAAI/bge-large-en-v1.5  1024   \n",
+       "16                       nomic-ai/nomic-embed-text-v1   768   \n",
+       "17            snowflake/snowflake-arctic-embed-m-long   768   \n",
+       "18                 mixedbread-ai/mxbai-embed-large-v1  1024   \n",
+       "19  sentence-transformers/paraphrase-multilingual-...   768   \n",
+       "20                 snowflake/snowflake-arctic-embed-l  1024   \n",
        "21                                 thenlper/gte-large  1024   \n",
-       "22                     intfloat/multilingual-e5-large  1024   \n",
+       "22                             BAAI/bge-large-en-v1.5  1024   \n",
+       "23                     intfloat/multilingual-e5-large  1024   \n",
        "\n",
        "                                          description  size_in_GB  \n",
        "0                      Fast and Default English model       0.067  \n",
        "1                  Fast and recommended Chinese model       0.090  \n",
        "2            Sentence Transformer model, MiniLM-L6-v2       0.090  \n",
        "3   Based on all-MiniLM-L6-v2 model with only 22m ...       0.090  \n",
        "4   English embedding model supporting 8192 sequen...       0.120  \n",
-       "5   Based on infloat/e5-small-unsupervised, does n...       0.130  \n",
-       "6                                  Fast English model       0.130  \n",
+       "5                                  Fast English model       0.130  \n",
+       "6   Based on infloat/e5-small-unsupervised, does n...       0.130  \n",
        "7         Quantized 8192 context length english model       0.130  \n",
        "8                            Base English model, v1.5       0.210  \n",
        "9   Sentence Transformer model, paraphrase-multili...       0.220  \n",
        "10                                  CLIP text encoder       0.250  \n",
-       "11                                 Base English model       0.420  \n",
-       "12  Based on intfloat/e5-base-unsupervised model, ...       0.430  \n",
-       "13                  8192 context length english model       0.520  \n",
+       "11  German embedding model supporting 8192 sequenc...       0.320  \n",
+       "12                                 Base English model       0.420  \n",
+       "13  Based on intfloat/e5-base-unsupervised model, ...       0.430  \n",
        "14  English embedding model supporting 8192 sequen...       0.520  \n",
        "15                  8192 context length english model       0.520  \n",
-       "16  Based on nomic-ai/nomic-embed-text-v1-unsuperv...       0.540  \n",
-       "17  MixedBread Base sentence embedding model, does...       0.640  \n",
-       "18  Sentence-transformers model for tasks like clu...       1.000  \n",
-       "19  Based on intfloat/e5-large-unsupervised, large...       1.020  \n",
-       "20                          Large English model, v1.5       1.200  \n",
+       "16                  8192 context length english model       0.520  \n",
+       "17  Based on nomic-ai/nomic-embed-text-v1-unsuperv...       0.540  \n",
+       "18  MixedBread Base sentence embedding model, does...       0.640  \n",
+       "19  Sentence-transformers model for tasks like clu...       1.000  \n",
+       "20  Based on intfloat/e5-large-unsupervised, large...       1.020  \n",
        "21                Large general text embeddings model       1.200  \n",
-       "22  Multilingual model, e5-large. Recommend using ...       2.240  "
+       "22                          Large English model, v1.5       1.200  \n",
+       "23  Multilingual model, e5-large. Recommend using ...       2.240  "
       ]
      },
-     "execution_count": 6,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -331,7 +340,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-05-31T18:13:27.124747Z",
@@ -364,47 +373,61 @@
        "      <th>vocab_size</th>\n",
        "      <th>description</th>\n",
        "      <th>size_in_GB</th>\n",
+       "      <th>requires_idf</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>Qdrant/bm25</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>BM25 as sparse embeddings meant to be used wit...</td>\n",
+       "      <td>0.010</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
        "      <td>Qdrant/bm42-all-minilm-l6-v2-attentions</td>\n",
-       "      <td>30522</td>\n",
+       "      <td>30522.0</td>\n",
        "      <td>Light sparse embedding model, which assigns an...</td>\n",
        "      <td>0.090</td>\n",
+       "      <td>True</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
+       "      <th>2</th>\n",
        "      <td>prithvida/Splade_PP_en_v1</td>\n",
-       "      <td>30522</td>\n",
+       "      <td>30522.0</td>\n",
        "      <td>Misspelled version of the model. Retained for ...</td>\n",
        "      <td>0.532</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <th>3</th>\n",
        "      <td>prithivida/Splade_PP_en_v1</td>\n",
-       "      <td>30522</td>\n",
+       "      <td>30522.0</td>\n",
        "      <td>Independent Implementation of SPLADE++ Model f...</td>\n",
        "      <td>0.532</td>\n",
+       "      <td>NaN</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "                                     model  vocab_size  \\\n",
-       "0  Qdrant/bm42-all-minilm-l6-v2-attentions       30522   \n",
-       "1                prithvida/Splade_PP_en_v1       30522   \n",
-       "2               prithivida/Splade_PP_en_v1       30522   \n",
+       "0                              Qdrant/bm25         NaN   \n",
+       "1  Qdrant/bm42-all-minilm-l6-v2-attentions     30522.0   \n",
+       "2                prithvida/Splade_PP_en_v1     30522.0   \n",
+       "3               prithivida/Splade_PP_en_v1     30522.0   \n",
        "\n",
-       "                                         description  size_in_GB  \n",
-       "0  Light sparse embedding model, which assigns an...       0.090  \n",
-       "1  Misspelled version of the model. Retained for ...       0.532  \n",
-       "2  Independent Implementation of SPLADE++ Model f...       0.532  "
+       "                                         description  size_in_GB requires_idf  \n",
+       "0  BM25 as sparse embeddings meant to be used wit...       0.010         True  \n",
+       "1  Light sparse embedding model, which assigns an...       0.090         True  \n",
+       "2  Misspelled version of the model. Retained for ...       0.532          NaN  \n",
+       "3  Independent Implementation of SPLADE++ Model f...       0.532          NaN  "
       ]
      },
-     "execution_count": 8,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -429,7 +452,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-05-31T18:14:34.370252Z",
@@ -482,7 +505,7 @@
        "0  colbert-ir/colbertv2.0  128  Late interaction model        0.44"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -507,7 +530,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 6,
    "metadata": {
     "ExecuteTime": {
      "end_time": "2024-05-31T18:14:42.501881Z",
@@ -558,6 +581,20 @@
        "      <td>CLIP vision encoder based on ViT-B/32</td>\n",
        "      <td>0.34</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Qdrant/Unicom-ViT-B-32</td>\n",
+       "      <td>512</td>\n",
+       "      <td>Unicom Unicom-ViT-B-32 from open-metric-learning</td>\n",
+       "      <td>0.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Qdrant/Unicom-ViT-B-16</td>\n",
+       "      <td>768</td>\n",
+       "      <td>Unicom Unicom-ViT-B-16 from open-metric-learning</td>\n",
+       "      <td>0.82</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
@@ -566,13 +603,17 @@
        "                         model   dim  \\\n",
        "0         Qdrant/resnet50-onnx  2048   \n",
        "1  Qdrant/clip-ViT-B-32-vision   512   \n",
+       "2       Qdrant/Unicom-ViT-B-32   512   \n",
+       "3       Qdrant/Unicom-ViT-B-16   768   \n",
        "\n",
        "                                         description  size_in_GB  \n",
        "0  ResNet-50 from `Deep Residual Learning for Ima...        0.10  \n",
-       "1              CLIP vision encoder based on ViT-B/32        0.34  "
+       "1              CLIP vision encoder based on ViT-B/32        0.34  \n",
+       "2   Unicom Unicom-ViT-B-32 from open-metric-learning        0.48  \n",
+       "3   Unicom Unicom-ViT-B-16 from open-metric-learning        0.82  "
       ]
      },
-     "execution_count": 12,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -602,7 +643,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.4"
+   "version": "3.11.8"
   },
   "orig_nbformat": 4,
   "vscode": {