|
12 | 12 | },
|
13 | 13 | {
|
14 | 14 | "cell_type": "code",
|
15 |
| - "execution_count": 2, |
| 15 | + "execution_count": 6, |
16 | 16 | "metadata": {},
|
17 | 17 | "outputs": [
|
18 | 18 | {
|
|
110 | 110 | " </tr>\n",
|
111 | 111 | " <tr>\n",
|
112 | 112 | " <th>8</th>\n",
|
113 |
| - " <td>sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
114 |
| - " <td>384</td>\n", |
115 |
| - " <td>Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
116 |
| - " <td>0.46</td>\n", |
117 |
| - " <td>{'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'}</td>\n", |
| 113 | + " <td>nomic-ai/nomic-embed-text-v1</td>\n", |
| 114 | + " <td>768</td>\n", |
| 115 | + " <td>8192 context length english model</td>\n", |
| 116 | + " <td>0.54</td>\n", |
| 117 | + " <td>{'hf': 'nomic-ai/nomic-embed-text-v1'}</td>\n", |
118 | 118 | " </tr>\n",
|
119 | 119 | " <tr>\n",
|
120 | 120 | " <th>9</th>\n",
|
121 |
| - " <td>nomic-ai/nomic-embed-text-v1</td>\n", |
| 121 | + " <td>nomic-ai/nomic-embed-text-v1.5</td>\n", |
122 | 122 | " <td>768</td>\n",
|
123 | 123 | " <td>8192 context length english model</td>\n",
|
124 | 124 | " <td>0.54</td>\n",
|
125 |
| - " <td>{'hf': 'nomic-ai/nomic-embed-text-v1'}</td>\n", |
| 125 | + " <td>{'hf': 'nomic-ai/nomic-embed-text-v1.5'}</td>\n", |
126 | 126 | " </tr>\n",
|
127 | 127 | " <tr>\n",
|
128 | 128 | " <th>10</th>\n",
|
| 129 | + " <td>thenlper/gte-large</td>\n", |
| 130 | + " <td>1024</td>\n", |
| 131 | + " <td>Large general text embeddings model</td>\n", |
| 132 | + " <td>1.34</td>\n", |
| 133 | + " <td>{'hf': 'qdrant/gte-large-onnx'}</td>\n", |
| 134 | + " </tr>\n", |
| 135 | + " <tr>\n", |
| 136 | + " <th>11</th>\n", |
129 | 137 | " <td>intfloat/multilingual-e5-large</td>\n",
|
130 | 138 | " <td>1024</td>\n",
|
131 | 139 | " <td>Multilingual model, e5-large. Recommend using this model for non-English languages</td>\n",
|
132 | 140 | " <td>2.24</td>\n",
|
133 | 141 | " <td>{'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'}</td>\n",
|
134 | 142 | " </tr>\n",
|
135 | 143 | " <tr>\n",
|
136 |
| - " <th>11</th>\n", |
| 144 | + " <th>12</th>\n", |
137 | 145 | " <td>sentence-transformers/paraphrase-multilingual-mpnet-base-v2</td>\n",
|
138 | 146 | " <td>768</td>\n",
|
139 | 147 | " <td>Sentence-transformers model for tasks like clustering or semantic search</td>\n",
|
140 | 148 | " <td>1.11</td>\n",
|
141 | 149 | " <td>{'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'}</td>\n",
|
142 | 150 | " </tr>\n",
|
143 | 151 | " <tr>\n",
|
144 |
| - " <th>12</th>\n", |
| 152 | + " <th>13</th>\n", |
| 153 | + " <td>sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
| 154 | + " <td>384</td>\n", |
| 155 | + " <td>Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2</td>\n", |
| 156 | + " <td>0.46</td>\n", |
| 157 | + " <td>{'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'}</td>\n", |
| 158 | + " </tr>\n", |
| 159 | + " <tr>\n", |
| 160 | + " <th>14</th>\n", |
145 | 161 | " <td>jinaai/jina-embeddings-v2-base-en</td>\n",
|
146 | 162 | " <td>768</td>\n",
|
147 | 163 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
148 | 164 | " <td>0.55</td>\n",
|
149 | 165 | " <td>{'hf': 'xenova/jina-embeddings-v2-base-en'}</td>\n",
|
150 | 166 | " </tr>\n",
|
151 | 167 | " <tr>\n",
|
152 |
| - " <th>13</th>\n", |
| 168 | + " <th>15</th>\n", |
153 | 169 | " <td>jinaai/jina-embeddings-v2-small-en</td>\n",
|
154 | 170 | " <td>512</td>\n",
|
155 | 171 | " <td>English embedding model supporting 8192 sequence length</td>\n",
|
|
170 | 186 | "5 BAAI/bge-small-en-v1.5 384 \n",
|
171 | 187 | "6 BAAI/bge-small-zh-v1.5 512 \n",
|
172 | 188 | "7 sentence-transformers/all-MiniLM-L6-v2 384 \n",
|
173 |
| - "8 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 384 \n", |
174 |
| - "9 nomic-ai/nomic-embed-text-v1 768 \n", |
175 |
| - "10 intfloat/multilingual-e5-large 1024 \n", |
176 |
| - "11 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
177 |
| - "12 jinaai/jina-embeddings-v2-base-en 768 \n", |
178 |
| - "13 jinaai/jina-embeddings-v2-small-en 512 \n", |
| 189 | + "8 nomic-ai/nomic-embed-text-v1 768 \n", |
| 190 | + "9 nomic-ai/nomic-embed-text-v1.5 768 \n", |
| 191 | + "10 thenlper/gte-large 1024 \n", |
| 192 | + "11 intfloat/multilingual-e5-large 1024 \n", |
| 193 | + "12 sentence-transformers/paraphrase-multilingual-mpnet-base-v2 768 \n", |
| 194 | + "13 sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 384 \n", |
| 195 | + "14 jinaai/jina-embeddings-v2-base-en 768 \n", |
| 196 | + "15 jinaai/jina-embeddings-v2-small-en 512 \n", |
179 | 197 | "\n",
|
180 | 198 | " description \\\n",
|
181 | 199 | "0 Base English model \n",
|
|
186 | 204 | "5 Fast and Default English model \n",
|
187 | 205 | "6 Fast and recommended Chinese model \n",
|
188 | 206 | "7 Sentence Transformer model, MiniLM-L6-v2 \n",
|
189 |
| - "8 Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2 \n", |
| 207 | + "8 8192 context length english model \n", |
190 | 208 | "9 8192 context length english model \n",
|
191 |
| - "10 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
192 |
| - "11 Sentence-transformers model for tasks like clustering or semantic search \n", |
193 |
| - "12 English embedding model supporting 8192 sequence length \n", |
194 |
| - "13 English embedding model supporting 8192 sequence length \n", |
| 209 | + "10 Large general text embeddings model \n", |
| 210 | + "11 Multilingual model, e5-large. Recommend using this model for non-English languages \n", |
| 211 | + "12 Sentence-transformers model for tasks like clustering or semantic search \n", |
| 212 | + "13 Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2 \n", |
| 213 | + "14 English embedding model supporting 8192 sequence length \n", |
| 214 | + "15 English embedding model supporting 8192 sequence length \n", |
195 | 215 | "\n",
|
196 | 216 | " size_in_GB \\\n",
|
197 | 217 | "0 0.50 \n",
|
|
202 | 222 | "5 0.13 \n",
|
203 | 223 | "6 0.10 \n",
|
204 | 224 | "7 0.09 \n",
|
205 |
| - "8 0.46 \n", |
| 225 | + "8 0.54 \n", |
206 | 226 | "9 0.54 \n",
|
207 |
| - "10 2.24 \n", |
208 |
| - "11 1.11 \n", |
209 |
| - "12 0.55 \n", |
210 |
| - "13 0.13 \n", |
| 227 | + "10 1.34 \n", |
| 228 | + "11 2.24 \n", |
| 229 | + "12 1.11 \n", |
| 230 | + "13 0.46 \n", |
| 231 | + "14 0.55 \n", |
| 232 | + "15 0.13 \n", |
211 | 233 | "\n",
|
212 | 234 | " sources \n",
|
213 | 235 | "0 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz'} \n",
|
|
218 | 240 | "5 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-en-v1.5.tar.gz', 'hf': 'qdrant/bge-small-en-v1.5-onnx-q'} \n",
|
219 | 241 | "6 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz'} \n",
|
220 | 242 | "7 {'url': 'https://storage.googleapis.com/qdrant-fastembed/sentence-transformers-all-MiniLM-L6-v2.tar.gz', 'hf': 'qdrant/all-MiniLM-L6-v2-onnx'} \n",
|
221 |
| - "8 {'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'} \n", |
222 |
| - "9 {'hf': 'nomic-ai/nomic-embed-text-v1'} \n", |
223 |
| - "10 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
224 |
| - "11 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
225 |
| - "12 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
226 |
| - "13 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
| 243 | + "8 {'hf': 'nomic-ai/nomic-embed-text-v1'} \n", |
| 244 | + "9 {'hf': 'nomic-ai/nomic-embed-text-v1.5'} \n", |
| 245 | + "10 {'hf': 'qdrant/gte-large-onnx'} \n", |
| 246 | + "11 {'url': 'https://storage.googleapis.com/qdrant-fastembed/fast-multilingual-e5-large.tar.gz', 'hf': 'qdrant/multilingual-e5-large-onnx'} \n", |
| 247 | + "12 {'hf': 'xenova/paraphrase-multilingual-mpnet-base-v2'} \n", |
| 248 | + "13 {'hf': 'qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q'} \n", |
| 249 | + "14 {'hf': 'xenova/jina-embeddings-v2-base-en'} \n", |
| 250 | + "15 {'hf': 'xenova/jina-embeddings-v2-small-en'} " |
227 | 251 | ]
|
228 | 252 | },
|
229 |
| - "execution_count": 2, |
| 253 | + "execution_count": 6, |
230 | 254 | "metadata": {},
|
231 | 255 | "output_type": "execute_result"
|
232 | 256 | }
|
|
256 | 280 | "name": "python",
|
257 | 281 | "nbconvert_exporter": "python",
|
258 | 282 | "pygments_lexer": "ipython3",
|
259 |
| - "version": "3.11.4" |
| 283 | + "version": "3.10.13" |
260 | 284 | },
|
261 | 285 | "orig_nbformat": 4
|
262 | 286 | },
|
|
0 commit comments