new: update docs (#257)

joein · web-flow · commit d725974fc465 · 2024-06-04T20:30:11.000+02:00
diff --git a/docs/Getting Started.ipynb b/docs/Getting Started.ipynb
@@ -11,7 +11,7 @@
     "\n",
     "## Quick Start\n",
     "\n",
-    "The fastembed package is designed to be easy to use. We'll be using `TextEmbedding` class. It takes a list of strings as input and returns an generator of vectors. If you're seeing generators for the first time, don't worry, you can convert it to a list using `list()`.\n",
+    "The fastembed package is designed to be easy to use. We'll be using `TextEmbedding` class. It takes a list of strings as input and returns a generator of vectors.\n",
     "\n",
     "> 💡 You can learn more about generators from [Python Wiki](https://wiki.python.org/moin/Generators)"
    ]
@@ -23,7 +23,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "!pip install -Uqq fastembed # Install fastembed"
+    "!pip install -Uqq fastembed"
    ]
   },
   {
@@ -65,9 +65,12 @@
     }
    ],
    "source": [
+    "from typing import List\n",
+    "\n",
     "import numpy as np\n",
+    "\n",
     "from fastembed import TextEmbedding\n",
-    "from typing import List\n",
+    "\n",
     "\n",
     "# Example list of documents\n",
     "documents: List[str] = [\n",
@@ -79,9 +82,8 @@
     "embedding_model = TextEmbedding()\n",
     "print(\"The model BAAI/bge-small-en-v1.5 is ready to use.\")\n",
     "\n",
-    "embeddings_generator = embedding_model.embed(documents)  # reminder this is a generator\n",
+    "embeddings_generator = embedding_model.embed(documents)\n",
     "embeddings_list = list(embeddings_generator)\n",
-    "# you can also convert the generator to a list, and that to a numpy array\n",
     "len(embeddings_list[0])  # Vector of 384 dimensions"
    ]
   },
@@ -113,7 +115,7 @@
     }
    ],
    "source": [
-    "embeddings_generator = embedding_model.embed(documents)  # reminder this is a generator\n",
+    "embeddings_generator = embedding_model.embed(documents)\n",
     "\n",
     "for doc, vector in zip(documents, embeddings_generator):\n",
     "    print(\"Document:\", doc)\n",
@@ -138,9 +140,7 @@
     }
    ],
    "source": [
-    "embeddings_list = np.array(\n",
-    "    list(embedding_model.embed(documents))\n",
-    ")  # you can also convert the generator to a list, and that to a numpy array\n",
+    "embeddings_list = np.array(list(embedding_model.embed(documents)))\n",
     "embeddings_list.shape"
    ]
   },
@@ -185,7 +185,7 @@
     }
    ],
    "source": [
-    "multilingual_large_model = TextEmbedding(\"intfloat/multilingual-e5-large\")  # This can take a few minutes to download"
+    "multilingual_large_model = TextEmbedding(\"intfloat/multilingual-e5-large\")"
    ]
   },
   {
diff --git a/docs/index.md b/docs/index.md
@@ -2,17 +2,17 @@
 
 FastEmbed is a lightweight, fast, Python library built for embedding generation. We [support popular text models](https://qdrant.github.io/fastembed/examples/Supported_Models/). Please [open a Github issue](https://github.com/qdrant/fastembed/issues/new) if you want us to add a new model.
 
-The default embedding supports "query" and "passage" prefixes for the input text. The default model is Flag Embedding, which is top of the [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard. Here is an example for [Retrieval Embedding Generation](https://qdrant.github.io/fastembed/examples/Retrieval%20with%20FastEmbed/) and how to use [FastEmbed with Qdrant](https://qdrant.github.io/fastembed/examples/Usage_With_Qdrant/).
-
 1. Light & Fast
     - Quantized model weights
-    - ONNX Runtime for inference via [Optimum](https://github.com/huggingface/optimum)
+    - ONNX Runtime for inference
 
 2. Accuracy/Recall
     - Better than OpenAI Ada-002
-    - Default is Flag Embedding, which is top of the [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard
+    - Default is Flag Embedding, which has shown good results on the [MTEB](https://huggingface.co/spaces/mteb/leaderboard) leaderboard
     - List of [supported models](https://qdrant.github.io/fastembed/examples/Supported_Models/) - including multilingual models
 
+Here is an example for [Retrieval Embedding Generation](https://qdrant.github.io/fastembed/examples/Retrieval%20with%20FastEmbed/) and how to use [FastEmbed with Qdrant](https://qdrant.github.io/fastembed/examples/Usage_With_Qdrant/).
+
 ## 🚀 Installation
 
 To install the FastEmbed library, pip works:
@@ -24,16 +24,16 @@ pip install fastembed
 ## 📖 Usage
 
 ```python
-from fastembed.embedding import FlagEmbedding as Embedding
+from fastembed import TextEmbedding
 
 documents: List[str] = [
     "passage: Hello, World!",
-    "query: Hello, World!", # these are two different embedding
+    "query: Hello, World!",
     "passage: This is an example passage.",
-    "fastembed is supported by and maintained by Qdrant." # You can leave out the prefix but it's recommended
+    "fastembed is supported by and maintained by Qdrant."
 ]
-embedding_model = Embedding(model_name="BAAI/bge-base-en", max_length=512)
-embeddings: List[np.ndarray] = embedding_model.embed(documents) # If you use
+embedding_model = TextEmbedding()
+embeddings: List[np.ndarray] = embedding_model.embed(documents)
 ```
 
 ## Usage with Qdrant
@@ -50,17 +50,16 @@ Might have to use ```pip install 'qdrant-client[fastembed]'``` on zsh.
 from qdrant_client import QdrantClient
 
 # Initialize the client
-client = QdrantClient(":memory:")  # or QdrantClient(path="path/to/db")
+client = QdrantClient(":memory:")  # Using an in-process Qdrant
 
 # Prepare your documents, metadata, and IDs
 docs = ["Qdrant has Langchain integrations", "Qdrant also has Llama Index integrations"]
 metadata = [
     {"source": "Langchain-docs"},
-    {"source": "Linkedin-docs"},
+    {"source": "Llama-index-docs"},
 ]
 ids = [42, 2]
 
-# Use the new add method
 client.add(
     collection_name="demo_collection",
     documents=docs,