|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": 4, |
| 15 | + "execution_count": 2, |
16 | 16 | "metadata": {},
|
17 | 17 | "outputs": [
|
18 | 18 | {
|
|
110 | 110 | " </tr>\n",
|
111 | 111 | " <tr>\n",
|
112 | 112 | " <th>8</th>\n",
|
| 113 | + " <td>sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
| 114 | + " <td>384</td>\n", |
| 115 | + " <td>Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
| 116 | + " <td>0.46</td>\n", |
| 117 | + " <td>{'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'}</td>\n", |
| 118 | + " </tr>\n", |
| 119 | + " <tr>\n", |
| 120 | + " <th>9</th>\n", |
113 | 121 | " <td>nomic-ai/nomic-embed-text-v1</td>\n",
|
114 | 122 | " <td>768</td>\n",
|
115 | 123 | " <td>8192 context length english model</td>\n",
|
116 | 124 | " <td>0.54</td>\n",
|
117 |
| - " <td>{'hf': 'xenova/nomic-embed-text-v1'}</td>\n", |
| 125 | + " <td>{'hf': 'nomic-ai/nomic-embed-text-v1'}</td>\n", |
118 | 126 | " </tr>\n",
|
119 | 127 | " <tr>\n",
|
120 |
| - " <th>9</th>\n", |
| 128 | + " <th>10</th>\n", |
121 | 129 | " <td>intfloat/multilingual-e5-large</td>\n",
|
122 | 130 | " <td>1024</td>\n",
|
123 | 131 | " <td>Multilingual model, e5-large. Recommend using this model for non-English languages</td>\n",
|
124 | 132 | " <td>2.24</td>\n",
|
125 | 133 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'}</td>\n",
|
126 | 134 | " </tr>\n",
|
127 | 135 | " <tr>\n",
|
128 |
| - " <th>10</th>\n", |
| 136 | + " <th>11</th>\n", |
129 | 137 | " <td>sentence-transformers/paraphrase-multilingual-mpnet-base-v2</td>\n",
|
130 | 138 | " <td>768</td>\n",
|
131 | 139 | " <td>Sentence-transformers model for tasks like clustering or semantic search</td>\n",
|
132 | 140 | " <td>1.11</td>\n",
|
133 | 141 | " <td>{'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'}</td>\n",
|
134 | 142 | " </tr>\n",
|
135 | 143 | " <tr>\n",
|
136 |
| - " <th>11</th>\n", |
| 144 | + " <th>12</th>\n", |
137 | 145 | " <td>jinaai/jina-embeddings-v2-base-en</td>\n",
|
138 | 146 | " <td>768</td>\n",
|
139 | 147 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
140 | 148 | " <td>0.55</td>\n",
|
141 | 149 | " <td>{'hf': 'xenova/jina-embeddings-v2-base-en'}</td>\n",
|
142 | 150 | " </tr>\n",
|
143 | 151 | " <tr>\n",
|
144 |
| - " <th>12</th>\n", |
| 152 | + " <th>13</th>\n", |
145 | 153 | " <td>jinaai/jina-embeddings-v2-small-en</td>\n",
|
146 | 154 | " <td>512</td>\n",
|
147 | 155 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
|
162 | 170 | "5 BAAI/bge-small-en-v1.5 384 \n",
|
163 | 171 | "6 BAAI/bge-small-zh-v1.5 512 \n",
|
164 | 172 | "7 sentence-transformers/all-MiniLM-L6-v2 384 \n",
|
165 |
| - "8 nomic-ai/nomic-embed-text-v1 768 \n", |
166 |
| - "9 intfloat/multilingual-e5-large 1024 \n", |
167 |
| - "10 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
168 |
| - "11 jinaai/jina-embeddings-v2-base-en 768 \n", |
169 |
| - "12 jinaai/jina-embeddings-v2-small-en 512 \n", |
| 173 | + "8 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 384 \n", |
| 174 | + "9 nomic-ai/nomic-embed-text-v1 768 \n", |
| 175 | + "10 intfloat/multilingual-e5-large 1024 \n", |
| 176 | + "11 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
| 177 | + "12 jinaai/jina-embeddings-v2-base-en 768 \n", |
| 178 | + "13 jinaai/jina-embeddings-v2-small-en 512 \n", |
170 | 179 | "\n",
|
171 | 180 | " description \\\n",
|
172 | 181 | "0 Base English model \n",
|
|
177 | 186 | "5 Fast and Default English model \n",
|
178 | 187 | "6 Fast and recommended Chinese model \n",
|
179 | 188 | "7 Sentence Transformer model, MiniLM-L6-v2 \n",
|
180 |
| - "8 8192 context length english model \n", |
181 |
| - "9 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
182 |
| - "10 Sentence-transformers model for tasks like clustering or semantic search \n", |
183 |
| - "11 English embedding model supporting 8192 sequence length \n", |
| 189 | + "8 Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2 \n", |
| 190 | + "9 8192 context length english model \n", |
| 191 | + "10 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
| 192 | + "11 Sentence-transformers model for tasks like clustering or semantic search \n", |
184 | 193 | "12 English embedding model supporting 8192 sequence length \n",
|
| 194 | + "13 English embedding model supporting 8192 sequence length \n", |
185 | 195 | "\n",
|
186 | 196 | " size_in_GB \\\n",
|
187 | 197 | "0 0.50 \n",
|
|
192 | 202 | "5 0.13 \n",
|
193 | 203 | "6 0.10 \n",
|
194 | 204 | "7 0.09 \n",
|
195 |
| - "8 0.54 \n", |
196 |
| - "9 2.24 \n", |
197 |
| - "10 1.11 \n", |
198 |
| - "11 0.55 \n", |
199 |
| - "12 0.13 \n", |
| 205 | + "8 0.46 \n", |
| 206 | + "9 0.54 \n", |
| 207 | + "10 2.24 \n", |
| 208 | + "11 1.11 \n", |
| 209 | + "12 0.55 \n", |
| 210 | + "13 0.13 \n", |
200 | 211 | "\n",
|
201 | 212 | " sources \n",
|
202 | 213 | "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n",
|
|
207 | 218 | "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
|
208 | 219 | "6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
|
209 | 220 | "7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
|
210 |
| - "8 {'hf': 'xenova/nomic-embed-text-v1'} \n", |
211 |
| - "9 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
212 |
| - "10 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
213 |
| - "11 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
214 |
| - "12 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
| 221 | + "8 {'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'} \n", |
| 222 | + "9 {'hf': 'nomic-ai/nomic-embed-text-v1'} \n", |
| 223 | + "10 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
| 224 | + "11 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
| 225 | + "12 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
| 226 | + "13 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
215 | 227 | ]
|
216 | 228 | },
|
217 |
| - "execution_count": 4, |
| 229 | + "execution_count": 2, |
218 | 230 | "metadata": {},
|
219 | 231 | "output_type": "execute_result"
|
220 | 232 | }
|
|
244 | 256 | "name": "python",
|
245 | 257 | "nbconvert_exporter": "python",
|
246 | 258 | "pygments_lexer": "ipython3",
|
247 |
| - "version": "3.11.7" |
| 259 | + "version": "3.11.4" |
248 | 260 | },
|
249 | 261 | "orig_nbformat": 4
|
250 | 262 | },
|
|
0 commit comments