|
12 | 12 | {
|
13 | 13 | "model": "BAAI/bge-base-en",
|
14 | 14 | "dim": 768,
|
15 |
| - "description": "Base English model", |
| 15 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2023 year", |
16 | 16 | "size_in_GB": 0.42,
|
17 | 17 | "sources": {
|
18 | 18 | "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en.tar.gz",
|
|
22 | 22 | {
|
23 | 23 | "model": "BAAI/bge-base-en-v1.5",
|
24 | 24 | "dim": 768,
|
25 |
| - "description": "Base English model, v1.5", |
| 25 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year", |
26 | 26 | "size_in_GB": 0.21,
|
27 | 27 | "sources": {
|
28 | 28 | "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-base-en-v1.5.tar.gz",
|
|
33 | 33 | {
|
34 | 34 | "model": "BAAI/bge-large-en-v1.5",
|
35 | 35 | "dim": 1024,
|
36 |
| - "description": "Large English model, v1.5", |
| 36 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year", |
37 | 37 | "size_in_GB": 1.20,
|
38 | 38 | "sources": {
|
39 | 39 | "hf": "qdrant/bge-large-en-v1.5-onnx",
|
|
43 | 43 | {
|
44 | 44 | "model": "BAAI/bge-small-en",
|
45 | 45 | "dim": 384,
|
46 |
| - "description": "Fast English model", |
| 46 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2023 year", |
47 | 47 | "size_in_GB": 0.13,
|
48 | 48 | "sources": {
|
49 | 49 | "url": "https://storage.googleapis.com/qdrant-fastembed/BAAI-bge-small-en.tar.gz",
|
|
53 | 53 | {
|
54 | 54 | "model": "BAAI/bge-small-en-v1.5",
|
55 | 55 | "dim": 384,
|
56 |
| - "description": "Fast and Default English model", |
| 56 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year", |
57 | 57 | "size_in_GB": 0.067,
|
58 | 58 | "sources": {
|
59 | 59 | "hf": "qdrant/bge-small-en-v1.5-onnx-q",
|
|
63 | 63 | {
|
64 | 64 | "model": "BAAI/bge-small-zh-v1.5",
|
65 | 65 | "dim": 512,
|
66 |
| - "description": "Fast and recommended Chinese model", |
| 66 | + "description": "Text embeddings, Unimodal (text), Chinese, 512 input tokens truncation, Prefixes for queries/documents: not so necessary, 2023 year", |
67 | 67 | "size_in_GB": 0.09,
|
68 | 68 | "sources": {
|
69 | 69 | "url": "https://storage.googleapis.com/qdrant-fastembed/fast-bge-small-zh-v1.5.tar.gz",
|
|
73 | 73 | {
|
74 | 74 | "model": "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
75 | 75 | "dim": 384,
|
76 |
| - "description": "Sentence Transformer model, paraphrase-multilingual-MiniLM-L12-v2", |
| 76 | + "description": "Text embeddings, Unimodal (text), Multilingual (~50 languages), 512 input tokens truncation, Prefixes for queries/documents: not necessary, 2019 year", |
77 | 77 | "size_in_GB": 0.22,
|
78 | 78 | "sources": {
|
79 | 79 | "hf": "qdrant/paraphrase-multilingual-MiniLM-L12-v2-onnx-Q",
|
|
83 | 83 | {
|
84 | 84 | "model": "thenlper/gte-large",
|
85 | 85 | "dim": 1024,
|
86 |
| - "description": "Large general text embeddings model", |
| 86 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: not necessary, 2023 year", |
87 | 87 | "size_in_GB": 1.20,
|
88 | 88 | "sources": {
|
89 | 89 | "hf": "qdrant/gte-large-onnx",
|
|
93 | 93 | {
|
94 | 94 | "model": "mixedbread-ai/mxbai-embed-large-v1",
|
95 | 95 | "dim": 1024,
|
96 |
| - "description": "MixedBread Base sentence embedding model, does well on MTEB", |
| 96 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
97 | 97 | "size_in_GB": 0.64,
|
98 | 98 | "sources": {
|
99 | 99 | "hf": "mixedbread-ai/mxbai-embed-large-v1",
|
|
103 | 103 | {
|
104 | 104 | "model": "snowflake/snowflake-arctic-embed-xs",
|
105 | 105 | "dim": 384,
|
106 |
| - "description": "Based on all-MiniLM-L6-v2 model with only 22m parameters, ideal for latency/TCO budgets.", |
| 106 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
107 | 107 | "size_in_GB": 0.09,
|
108 | 108 | "sources": {
|
109 | 109 | "hf": "snowflake/snowflake-arctic-embed-xs",
|
|
113 | 113 | {
|
114 | 114 | "model": "snowflake/snowflake-arctic-embed-s",
|
115 | 115 | "dim": 384,
|
116 |
| - "description": "Based on infloat/e5-small-unsupervised, does not trade off retrieval accuracy for its small size.", |
| 116 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
117 | 117 | "size_in_GB": 0.13,
|
118 | 118 | "sources": {
|
119 | 119 | "hf": "snowflake/snowflake-arctic-embed-s",
|
|
123 | 123 | {
|
124 | 124 | "model": "snowflake/snowflake-arctic-embed-m",
|
125 | 125 | "dim": 768,
|
126 |
| - "description": "Based on intfloat/e5-base-unsupervised model, provides the best retrieval without slowing down inference.", |
| 126 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
127 | 127 | "size_in_GB": 0.43,
|
128 | 128 | "sources": {
|
129 | 129 | "hf": "Snowflake/snowflake-arctic-embed-m",
|
|
133 | 133 | {
|
134 | 134 | "model": "snowflake/snowflake-arctic-embed-m-long",
|
135 | 135 | "dim": 768,
|
136 |
| - "description": "Based on nomic-ai/nomic-embed-text-v1-unsupervised model, 8192 context-length model", |
| 136 | + "description": "Text embeddings, Unimodal (text), English, 2048 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
137 | 137 | "size_in_GB": 0.54,
|
138 | 138 | "sources": {
|
139 | 139 | "hf": "snowflake/snowflake-arctic-embed-m-long",
|
|
143 | 143 | {
|
144 | 144 | "model": "snowflake/snowflake-arctic-embed-l",
|
145 | 145 | "dim": 1024,
|
146 |
| - "description": "Based on intfloat/e5-large-unsupervised, large model for most accurate retrieval.", |
| 146 | + "description": "Text embeddings, Unimodal (text), English, 512 input tokens truncation, Prefixes for queries/documents: necessary, 2024 year", |
147 | 147 | "size_in_GB": 1.02,
|
148 | 148 | "sources": {
|
149 | 149 | "hf": "snowflake/snowflake-arctic-embed-l",
|
|
0 commit comments