diff --git a/.github/workflows/data-fetch-push.yml b/.github/workflows/data-fetch-push.yml index c8a689a1e..de245a242 100644 --- a/.github/workflows/data-fetch-push.yml +++ b/.github/workflows/data-fetch-push.yml @@ -11,7 +11,7 @@ jobs: strategy: matrix: - python-version: [3.8] + python-version: [3.10.13] steps: - name: Checkout repository @@ -22,9 +22,10 @@ jobs: - name: Add dependencies run: | - python -m pip install requests - python -m pip install tqdm - python -m pip install pypistats + python -m pip install --upgrade pip + pip install requests + pip install tqdm + pip install pypistats - name: Set pepy.tech API key run: | diff --git a/docs/articles/semantic_chunking.md b/docs/articles/semantic_chunking.md new file mode 100644 index 000000000..8bb788aad --- /dev/null +++ b/docs/articles/semantic_chunking.md @@ -0,0 +1,546 @@ +# Semantic Chunking + + + +Chunking in Natural Language Processing is simply dividing large bodies of text into smaller pieces that computers can manage more easily. Splitting large datasets into chunks enables your Retrieval Augmented Generation (RAG) system to embed, index, and store even very large datasets optimally. But *how* you chunk your data is crucial in determining whether you can efficiently return only the most relevant results to your user queries. + +To get your RAG system to handle user queries better, you need a chunking method that's a good fit for your data. Some widely used chunking algorithms are **rule-based** - e.g., fixed character splitter, recursive character splitter, document-specific splitter, among others. But in some real-world applications, rule-based methods have trouble. If, for example, your dataset has multi-topic documents, rule-based splitting algorithms can result in incomplete contexts or noise-filled chunks. **Semantic chunking**, on the other hand - because it divides text on the basis of meaning rather than rules - creates chunks that are semantically independent and cohesive, and therefore results in more effective text processing and information retrieval. + +**But exactly which semantic chunking method should you use?** In this article, we'll describe and implement three different popular semantic chunking methods - embedding-similarity-based, hierarchical-clustering-based, and LLM-based. Then we'll evaluate them, applying an embedding model and a reranker to two different datasets - one that tests our chunking methods' ability to handle complex, multi-hop reasoning tasks, and another that checks if they can identify and extract only the most relevant results. + +Let's get started. + +## What is semantic chunking? + +In semantic chunking, a splitter adaptively picks the breakpoint between sentences by comparing embedding similarity. This ensures that each chunk contains semantically cohesive sentences. Typically, a semantic splitting algorithm uses a sliding window approach, calculating the cosine similarity between the embeddings of consecutive sentences, and establishing a threshold for assigning chunk boundaries. When sentence similarity drops below this threshold, it signals a shift in semantic content, and the splitter marks a breakpoint. + + +The workflow of a semantic splitter has basically 3 basic steps: + +![Full text](../assets/use_cases/semantic_chunking/sample1.png) + +1. Split the text into sentences. + + ![Text split](../assets/use_cases/semantic_chunking/sample2.png) + +2. Generate embeddings for the sentences. + + ![Embedding split](../assets/use_cases/semantic_chunking/sample3.png) + +3. Group sentences based on their embedding similarity. + + ![Chunks](../assets/use_cases/semantic_chunking/sample4.png) + +## Types of semantic chunking + +Which method of semantic chunking will produce optimal outcomes depends on your use case. To get a sense of which splitting algorithms fit which scenarios, let's take an in-depth look at, implement, and evaluate three popular methods: embedding-similarity-based, hierarchical-clustering-based, and LLM-based. + +### Embedding-similarity-based chunking + +In embedding-similarity-based chunking, we create chunks by comparing semantic similarity between sentences, which we calculate by measuring the cosine distance between consecutive sentence embeddings. + +Let's walk through how to implement it. + +First, we install and import the required libraries. + +```python +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.metrics.pairwise import cosine_distances +from transformers import AutoTokenizer, AutoModel +import torch +``` + +We define a helper function to split the text into sentences - based on regular end-of-sentence punctuation followed by a whitespace. + +```python +def _split_sentences(text): + sentences = re.split(r'(?<=[.?!])\s+', text) + return sentences +``` + +To provide a context window to better understand each sentence, we define a function to combine it with its preceding and following sentences. + +```python +def _combine_sentences(sentences): + combined_sentences = [] + for i in range(len(sentences)): + combined_sentence = sentences[i] + if i > 0: + combined_sentence = sentences[i-1] + ' ' + combined_sentence + if i < len(sentences) - 1: + combined_sentence += ' ' + sentences[i+1] + combined_sentences.append(combined_sentence) + return combined_sentences +``` + +Next, we define a cosine similarity distance calculation function, and an embedding function. + +```python +def _calculate_cosine_distances(embeddings): + distances = [] + for i in range(len(embeddings) - 1): + similarity = cosine_similarity([embeddings[i]], [embeddings[i + 1]])[0][0] + distance = 1 - similarity + distances.append(distance) + return distances + +def get_embeddings(texts, model_name="BAAI/bge-small-en-v1.5"): + tokenizer = AutoTokenizer.from_pretrained(model_name) + model = AutoModel.from_pretrained(model_name) + + encoded_input = tokenizer(texts, padding=True, truncation=True, return_tensors="pt") + + with torch.no_grad(): + model_output = model(**encoded_input) + + embeddings = mean_pooling(model_output, encoded_input['attention_mask']) + + return embeddings.numpy() + +def mean_pooling(model_output, attention_mask): + token_embeddings = model_output[0] + input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() + return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9) +``` + +Now that we've split the text into sentences, calculated similarity, and generated embeddings, we turn to **chunking**. + +Our chunking function determines a `breakpoint_distance_threshold` for identifying breakpoints. We use `breakpoint_percentile_threshold` to identify indices where cosine distances exceed the 80th percentile. Sentences with distances exceeding the threshold are considered chunk boundaries. `chunk_text` then creates chunks by joining sentences between breakpoints. Any sentences in the text remaining after the last identified breakpoint are clustered into a final chunk. + +```python +def chunk_text(text): + single_sentences_list = _split_sentences(text) + print(single_sentences_list) + combined_sentences = _combine_sentences(single_sentences_list) + print(combined_sentences) + embeddings = get_embeddings(combined_sentences) + distances = _calculate_cosine_distances(embeddings) + + # Determine the threshold distance for identifying breakpoints based on the 80th percentile of all distances. + breakpoint_percentile_threshold = 80 + breakpoint_distance_threshold = np.percentile(distances, breakpoint_percentile_threshold) + # Find all indices where the distance exceeds the calculated threshold, indicating a potential chunk breakpoint. + indices_above_thresh = [i for i, distance in enumerate(distances) if distance > breakpoint_distance_threshold] + + chunks = [] + start_index = 0 + for index in indices_above_thresh: + chunk = ' '.join(single_sentences_list[start_index:index+1]) + chunks.append(chunk) + start_index = index + 1 + + # If there are any sentences left after the last breakpoint, add them as the final chunk. + if start_index < len(single_sentences_list): + chunk = ' '.join(single_sentences_list[start_index:]) + chunks.append(chunk) + + return chunks +``` + +Depending on the needs of your use case - e.g., more fine-grained analysis, improved context, enhanced readability, alignment with user queries, etc. - you may also want to reduce your breakpoint distance threshold, generating more chunks. + +Let's run a sample text input using our 80% `breakpoint_percentile_threshold` to see what results we get. + +```python +text = """ +Regular exercise is essential for maintaining overall health and well-being. It helps in controlling weight, +improving cardiovascular health, and boosting mental health. +Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels. Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning. +Physical activity can also help reduce symptoms of anxiety and depression, leading to a more balanced emotional state. +Activities like walking, jogging, or swimming can be easily incorporated into a daily routine, making it accessible for everyone. +By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle. +Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable. +""" +chunks = chunk_text(text) +for i, chunk in enumerate(chunks, 1): + print(f"Chunk {i}:") + print(chunk) + print("----------------------------------------------------------------------------") + +print(f"\nTotal number of chunks: {len(chunks)}") +``` + +Here's the resulting output: + +```markdown +Chunk 1: +Regular exercise is essential for maintaining overall health and well-being. It helps in controlling weight, +improving cardiovascular health, and boosting mental health. Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels. Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +---------------------------------------------------------------------------- +Chunk 2: +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning. Physical activity can also help reduce symptoms of anxiety and depression, leading to a more balanced emotional state. Activities like walking, jogging, or swimming can be easily incorporated into a daily routine, making it accessible for everyone. +---------------------------------------------------------------------------- +Chunk 3: +By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle. Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable. +---------------------------------------------------------------------------- +``` + +![results-embedding-similarity-based](../assets/use_cases/semantic_chunking/graph1.png) + +Our results seem very relevant and discrete. So far so good. Now, let's take a look at our second semantic chunking method. + +### Hierarchical-clustering-based chunking + +In this approach, we again calculate semantic similarity in terms of cosine distances between embeddings of consecutive sentences. But this time we hierarchically cluster our sentences. What exactly does this look like? + +First, let's import the required libraries. Our utility functions are the same as in embedding-similarity-based chunking, but here we also add utilities for clustering and cluster evaluation. + +```python +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from sklearn.metrics.pairwise import cosine_distances +from sklearn.metrics import silhouette_score +from scipy.cluster.hierarchy import linkage, fcluster +from transformers import AutoTokenizer, AutoModel +import torch +``` + +The **chunk_text** function in hierarchical clustering calculates a distance matrix based on cosine distances between embeddings - the `linkage` method builds a hierarchical cluster tree. To determine your chunk boundaries and how closely your sentences will be grouped, you use either a specified number of clusters (`num_clusters`) or a distance threshold (`distance_threshold`). The `chunk_text` function then assigns each sentence to a cluster and returns the resulting clusters as chunks of text. + +```python +def chunk_text(text, num_clusters=4, distance_threshold=None): + + single_sentences_list = _split_sentences(text) + print(single_sentences_list) + + combined_sentences = _combine_sentences(single_sentences_list) + print(combined_sentences) + + + embeddings = get_embeddings(combined_sentences) + + + distance_matrix = cosine_distances(embeddings) + + + Z = linkage(distance_matrix, method='average') # 'average' is for average linkage; you can also try 'ward', 'complete', etc. + + + if num_clusters: + cluster_labels = fcluster(Z, t=num_clusters, criterion='maxclust') + elif distance_threshold: + cluster_labels = fcluster(Z, t=distance_threshold, criterion='distance') + else: + raise ValueError("Either num_clusters or distance_threshold must be specified.") + + + chunks = [] + current_chunk = [] + current_label = cluster_labels[0] + + for i, sentence in enumerate(single_sentences_list): + if cluster_labels[i] == current_label: + current_chunk.append(sentence) + else: + # Start a new chunk when the cluster label changes + chunks.append(' '.join(current_chunk)) + current_chunk = [sentence] + current_label = cluster_labels[i] + + # Append the last chunk + if current_chunk: + chunks.append(' '.join(current_chunk)) + + + return chunks +``` + +Let's use the same text (as we used with embedding-similarity-based chunking) and, this time, apply hierarchical clustering to it. + +```python +text = """ +Regular exercise is essential for maintaining overall health and well-being. It helps in controlling weight, +improving cardiovascular health, and boosting mental health. +Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels. Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning. +Physical activity can also help reduce symptoms of anxiety and depression, leading to a more balanced emotional state. +Activities like walking, jogging, or swimming can be easily incorporated into a daily routine, making it accessible for everyone. +By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle. +Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable. +""" +chunks = chunk_text(text) +for chunk in chunks: + print(chunk,"\n----------------------------------------------------------------------------\n") +print(f"\n{len(chunks)} chunks") +``` + +Here's our output: + +```markdown +Chunk 1: +Regular exercise is essential for maintaining overall health and well-being. It helps in controlling weight, +improving cardiovascular health, and boosting mental health. Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels. +---------------------------------------------------------------------------- +Chunk 2: +Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning. Physical activity can also help reduce symptoms of anxiety and depression, leading to a more balanced emotional state. +---------------------------------------------------------------------------- +Chunk 3: +Activities like walking, jogging, or swimming can be easily incorporated into a daily routine, making it accessible for everyone. By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle. Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable. +---------------------------------------------------------------------------- +``` + +![results-hierarchical based](../assets/use_cases/semantic_chunking/graph2.png) + +These results look pretty decent. But to make our clusters even more optimized (meaningfully tight and internally cohesive), we can incorporate **Within-Cluster Sum of Squares (WCSS)** - a measure of the cluster compactness. WCSS calculates the sum of the squared distances between each point in a cluster and the cluster's centroid. The lower the WCSS, the more compact and tightly-knit are the clusters - i.e., sentences within each chunk are more semantically similar. The **elbow point** is a heuristic method for determining the optimal number of clusters - the point at which adding more clusters doesn't significantly reduce the WCSS. + +Let's try adding WCSS and the elbow point method to see how it affects our results. + +```python +from sklearn.metrics import silhouette_score +def determine_optimal_clusters(embeddings, max_clusters=10): + distance_matrix = cosine_distances(embeddings) + + Z = linkage(distance_matrix, method='average') + + wcss = [] + for n_clusters in range(2, max_clusters + 1): + cluster_labels = fcluster(Z, t=n_clusters, criterion='maxclust') + wcss.append(calculate_wcss(embeddings, cluster_labels)) + + total_variance = np.sum((embeddings - np.mean(embeddings, axis=0))**2) + explained_variance = [1 - (w / total_variance) for w in wcss] + + optimal_clusters = find_elbow_point(range(2, max_clusters + 1), explained_variance) + + return optimal_clusters + +def calculate_wcss(data, labels): + n_clusters = len(set(labels)) + wcss = 0 + for i in range(n_clusters): + cluster_points = data[labels == i+1] + cluster_mean = np.mean(cluster_points, axis=0) + wcss += np.sum((cluster_points - cluster_mean)**2) + return wcss + +def find_elbow_point(x, y): + diffs = np.diff(y, 2) + return x[np.argmax(diffs) + 1] + +def chunk_text_with_clusters(text, num_clusters): + single_sentences_list = _split_sentences(text) + combined_sentences = _combine_sentences(single_sentences_list) + embeddings = get_embeddings(combined_sentences) + + distance_matrix = cosine_distances(embeddings) + Z = linkage(distance_matrix, method='average') + cluster_labels = fcluster(Z, t=num_clusters, criterion='maxclust') + + chunks = [] + current_chunk = [] + current_label = cluster_labels[0] + + for i, sentence in enumerate(single_sentences_list): + if cluster_labels[i] == current_label: + current_chunk.append(sentence) + else: + chunks.append(' '.join(current_chunk)) + current_chunk = [sentence] + current_label = cluster_labels[i] + + if current_chunk: + chunks.append(' '.join(current_chunk)) + + return chunks + + +def chunk_text(text, max_clusters=10): + single_sentences_list = _split_sentences(text) + combined_sentences = _combine_sentences(single_sentences_list) + embeddings = get_embeddings(combined_sentences) + + optimal_clusters = determine_optimal_clusters(embeddings, max_clusters) + + return chunk_text_with_clusters(text, num_clusters=optimal_clusters) +``` + +You can adjust the value of max_clusters - more for longer text input, less for shorter. + +What insights does applying the WCSS elbow method give us? Let's take a look. + +![Optimal clusters calculation](../assets/use_cases/semantic_chunking/graph3.png) + +The elbow point (vertical red line, at 5 clusters) is where the rate of decrease sharply shifts. + +The silhouette score (right axis) is a measure of both how similar an embedding is to its cluster and how different it is from other clusters' embeddings. A high silhouette score indicates that our clustering is optimal; a low score may suggest that we should reconsider the number of clusters. + +What are our WCSS outputs? +We have 5 chunks that, based on our elbow point, should give us the most internally similar and externally distinct clusters. + +```markdown +Chunk 1: +Regular exercise is essential for maintaining overall health and well-being. It helps in controlling weight, +improving cardiovascular health, and boosting mental health. Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels. Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +---------------------------------------------------------------------------- +Chunk 2: +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning. +---------------------------------------------------------------------------- +Chunk 3: +Physical activity can also help reduce symptoms of anxiety and depression, leading to a more balanced emotional state. Activities like walking, jogging, or swimming can be easily incorporated into a daily routine, making it accessible for everyone. +---------------------------------------------------------------------------- +Chunk 4: +By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle. +---------------------------------------------------------------------------- +Chunk 5: +Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable. +``` + +![results-hierarchical-clustering-based with WCSS](../assets/use_cases/semantic_chunking/graph4.png) + +Now that we've looked at our first two semantic chunking methods (semantic embedding and hierarchical clustering), let's turn to consider our third semantic chunking method. + +### LLM-based chunking + +In LLM-based chunking, you prompt a large language model to process your text, convert it into semantic embeddings, evaluate them, and determine logical breakpoints for creating chunks. Here, our prompting aims to identify [propositions](https://arxiv.org/pdf/2312.06648.pdf) - highly refined chunks that are semantically rich and self-contained, preserving context and meaning - thereby improving retrieval in downstream applications. + +Here is the complete code, implemented with a base class from LangChain: + +```python +from langchain.text_splitter import TextSplitter +from typing import List +import uuid +from langchain_huggingface import HuggingFacePipeline +from langchain import PromptTemplate +from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig, AutoModel +from langchain_community.llms import HuggingFaceHub +from langchain.chains import create_extraction_chain_pydantic +from pydantic import BaseModel +from langchain_core.prompts import ChatPromptTemplate +from typing import List +from langchain.docstore.document import Document +from langchain.output_parsers import PydanticOutputParser +import json +from pydantic import ValidationError +import re +import dspy +import time + +class ExtractSentences(dspy.Signature): + """Extract meaningful propositions (semantic chunks) from the given text.""" + text = dspy.InputField() + sentences = dspy.OutputField(desc="List of extracted sentences") + +class ExtractSentencesProgram(dspy.Program): + def run(self, text): + extract = dspy.Predict(ExtractSentences) + result = extract(text=text) + return result.sentences + + +class LlmSemanticChunker(TextSplitter): + def __init__(self, llm, chunk_size: int = 1000): + super().__init__(chunk_size=chunk_size) + self.llm = llm + self.chunk_size = chunk_size # Explicitly set chunk_size as an instance attribute + dspy.settings.configure(lm=llm) + self.extractor = ExtractSentencesProgram() + + def get_propositions(self, text): + sentences = self.extractor.run(text) + + if isinstance(sentences, list): + return sentences + + # Fallback: extract sentences heuristically + return [s.strip() for s in text.split('.') if s.strip()] + + def split_text(self, text: str) -> List[str]: + """Extract propositions and chunk them accordingly.""" + propositions = self.get_propositions(text) + return self._chunk_propositions(propositions) + + def split_documents(self, documents: List[Document]) -> List[Document]: + """Split documents into chunks.""" + split_docs = [] + for doc in documents: + chunks = self.split_text(doc.page_content) + for i, chunk in enumerate(chunks): + metadata = doc.metadata.copy() + metadata.update({"chunk_index": i}) + split_docs.append(Document(page_content=chunk, metadata=metadata)) + return split_docs + + def _chunk_propositions(self, propositions: List[str]) -> List[str]: + chunks = [] + current_chunk = [] + current_size = 0 + + for prop in propositions: + prop_size = len(prop) + if current_size + prop_size > self.chunk_size and current_chunk: + chunks.append(" ".join(current_chunk)) + current_chunk = [] + current_size = 0 + current_chunk.append(prop) + current_size += prop_size + + if current_chunk: + chunks.append(" ".join(current_chunk)) + + return chunks +``` + +Let's see what output we get from LLM-based chunking using the same input as we used in our two methods above. + +```markdown +Chunk 1: +Regular exercise is essential for maintaining overall health and well-being It helps in controlling weight, +improving cardiovascular health, and boosting mental health Engaging in physical activity regularly can also enhance the immune system, reduce the risk of chronic diseases, +and increase energy levels Regular workouts are known to improve muscle strength and flexibility, which can prevent injuries and enhance mobility. +Moreover, exercise contributes to better sleep and improved mood, which are crucial for daily functioning +-------------------------------------------------- +Chunk 2: +Physical activity can also help +reduce symptoms of anxiety and depression, leading to a more balanced emotional state Activities like walking, jogging, or swimming can be easily +incorporated into a daily routine, making it accessible for everyone By setting realistic goals and staying consistent, individuals can enjoy these benefits and lead a healthier lifestyle +-------------------------------------------------- +Chunk 3: +Group fitness classes or sports teams can provide motivation and social support, making exercise more enjoyable and sustainable +-------------------------------------------------- +``` + +## Evaluating our semantic chunking methods + +Now that we've gone through an implementation of our three semantic chunking methods, we'll run some experiments so we can compare them more systematically - using two popular benchmark datasets, a specific embedding model, and a reranker. + +***Datasets*** + +1. [HotpotQA](https://huggingface.co/datasets/hotpotqa/hotpot_qa?row=16) +2. [SQUAD](https://huggingface.co/datasets/squad?row=0) + +HotpotQA is a good test of our methods in handling complex, multi-hop reasoning tasks, while SQUAD is good at evaluating our methods on their ability to identify and extract the exact span of text that answers a given question. + +***Embedding models*** + +1. [BAAI/bge-small-en-v1.5](https://huggingface.co/BAAI/bge-small-en-v1.5) +2. BAAI/bge-small-en-v1.5 with reranker + +After running our experiments, we evaluated our three methods - embedding-similarity-based, hierarchical-clustering-based, and LLM-based chunking - in terms of latency, context precision, context recall, faithfulness, and relevancy. + +![Evaluation results](../assets/use_cases/semantic_chunking/table.png) + +Let's visualize these results side by side on a graph. + +![Results summary](../assets/use_cases/semantic_chunking/graph5.png) + +## What we found + +- **LLM-based** semantic chunking performed **best overall**, achieving the highest scores across nearly all metrics (only embedding-similarity-based chunking had marginally more relevant results). LLM-based chunking excelled especially in faithfulness and handling complex, multi-hop reasoning (i.e., hotpot_qa dataset) tasks. This suggests that LLMs can **better understand and preserve complex semantic relationships when chunking text**. But LLM inference takes time, so this method's **latency was higher** than our other methods (e.g., 6.88s on hotpot_qa vs. 5.24s for semantic chunking). +- **Embedding-similarity-based** semantic chunking achieved the **highest relevancy scores** across both datasets (very slightly better than LLM-based), and well-balanced (closely matching) precision and recall scores. +- **Hierarchical-clustering-based** semantic chunking performed respectably but **not as well across nearly all metrics except latency** (where it was basically even with embedding-similarity-based, and significantly less latent than LLM-based chunking). Hierarchical clustering is viable but may struggle capturing fine-grained semantic relationships (e.g., 0.63 faithfulness score for hotpot_qa). +- The reranker improves performance but also increases latency on all scores for all chunking methods on both datasets, except hierarchical clustering on hotpot_qa. +- In general, our semantic chunking evaluation suggests a not surprising trade-off between performance and speed. + +## Contributors + +- [Ashish Abraham, author](https://www.linkedin.com/in/ashish-abraham/) +- [Robert Turner, editor](https://www.linkedin.com/in/robertdhayanturner/) diff --git a/docs/assets/use_cases/semantic_chunking/graph1.png b/docs/assets/use_cases/semantic_chunking/graph1.png new file mode 100644 index 000000000..2576c297f Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/graph1.png differ diff --git a/docs/assets/use_cases/semantic_chunking/graph2.png b/docs/assets/use_cases/semantic_chunking/graph2.png new file mode 100644 index 000000000..aecdd4a4c Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/graph2.png differ diff --git a/docs/assets/use_cases/semantic_chunking/graph3.png b/docs/assets/use_cases/semantic_chunking/graph3.png new file mode 100644 index 000000000..e045b501b Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/graph3.png differ diff --git a/docs/assets/use_cases/semantic_chunking/graph4.png b/docs/assets/use_cases/semantic_chunking/graph4.png new file mode 100644 index 000000000..6ae2e43a1 Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/graph4.png differ diff --git a/docs/assets/use_cases/semantic_chunking/graph5.png b/docs/assets/use_cases/semantic_chunking/graph5.png new file mode 100644 index 000000000..202f452b8 Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/graph5.png differ diff --git a/docs/assets/use_cases/semantic_chunking/sample1.png b/docs/assets/use_cases/semantic_chunking/sample1.png new file mode 100644 index 000000000..82e34749e Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/sample1.png differ diff --git a/docs/assets/use_cases/semantic_chunking/sample2.png b/docs/assets/use_cases/semantic_chunking/sample2.png new file mode 100644 index 000000000..8e5cbe9a7 Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/sample2.png differ diff --git a/docs/assets/use_cases/semantic_chunking/sample3.png b/docs/assets/use_cases/semantic_chunking/sample3.png new file mode 100644 index 000000000..ca454efab Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/sample3.png differ diff --git a/docs/assets/use_cases/semantic_chunking/sample4.png b/docs/assets/use_cases/semantic_chunking/sample4.png new file mode 100644 index 000000000..e92749d83 Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/sample4.png differ diff --git a/docs/assets/use_cases/semantic_chunking/table.png b/docs/assets/use_cases/semantic_chunking/table.png new file mode 100644 index 000000000..7baec520f Binary files /dev/null and b/docs/assets/use_cases/semantic_chunking/table.png differ diff --git a/docs/tools/vdb_table/data/activeloop.json b/docs/tools/vdb_table/data/activeloop.json index fa7a18091..0208e27fc 100644 --- a/docs/tools/vdb_table/data/activeloop.json +++ b/docs/tools/vdb_table/data/activeloop.json @@ -157,7 +157,7 @@ "comment": "" }, "github_stars": { - "value": 8081, + "value": 8101, "source_url": "https://github.com/activeloopai/deeplake", "comment": "", "value_90_days": 0 @@ -169,10 +169,10 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 917329, + "value": 949405, "source_url": "https://pypi.org/project/deeplake/", "comment": "", - "value_90_days": 174429 + "value_90_days": 167920 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/aerospike.json b/docs/tools/vdb_table/data/aerospike.json index 1e653987b..63cb96103 100644 --- a/docs/tools/vdb_table/data/aerospike.json +++ b/docs/tools/vdb_table/data/aerospike.json @@ -178,10 +178,10 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 1191, + "value": 1671, "source_url": "https://pypi.org/project/aerospike-vector/", "comment": "", - "value_90_days": 524 + "value_90_days": 926 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/anariai.json b/docs/tools/vdb_table/data/anariai.json index 863c6f710..15e9aab89 100644 --- a/docs/tools/vdb_table/data/anariai.json +++ b/docs/tools/vdb_table/data/anariai.json @@ -174,10 +174,10 @@ "value_90_days": 0 }, "npm_downloads": { - "value": 4909, + "value": 5096, "source_url": "https://www.npmjs.com/package/epsillajs", "comment": "", - "value_90_days": 521 + "value_90_days": 608 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/apachecassandra.json b/docs/tools/vdb_table/data/apachecassandra.json index d7f9c12c7..f138f5ab1 100644 --- a/docs/tools/vdb_table/data/apachecassandra.json +++ b/docs/tools/vdb_table/data/apachecassandra.json @@ -157,33 +157,33 @@ "comment": "via Lucene" }, "github_stars": { - "value": 8755, + "value": 8793, "source_url": "https://github.com/apache/cassandra", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 215709756, + "value": 216043667, "source_url": "https://hub.docker.com/_/cassandra", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 83982033, + "value": 85375406, "source_url": "https://pypi.org/project/cassandra-driver/", "comment": "", - "value_90_days": 8977518 + "value_90_days": 9192091 }, "npm_downloads": { - "value": 4571141, + "value": 4598405, "source_url": "https://www.npmjs.com/package/cassandra-driver", "comment": "", - "value_90_days": 884795 + "value_90_days": 868693 }, "crates_io_downloads": { - "value": 88518, + "value": 89762, "source_url": "https://crates.io/crates/cassandra", "comment": "", - "value_90_days": 974 + "value_90_days": 1044 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/apachesolr.json b/docs/tools/vdb_table/data/apachesolr.json index 5c9cce18e..a52b21473 100644 --- a/docs/tools/vdb_table/data/apachesolr.json +++ b/docs/tools/vdb_table/data/apachesolr.json @@ -157,33 +157,33 @@ "comment": "via Lucene" }, "github_stars": { - "value": 1178, + "value": 1197, "source_url": "https://github.com/apache/solr", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 343830137, + "value": 344024528, "source_url": "https://hub.docker.com/_/solr", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 11847305, + "value": 12001490, "source_url": "https://pypi.org/project/pysolr/", "comment": "", - "value_90_days": 924379 + "value_90_days": 919478 }, "npm_downloads": { - "value": 599287, + "value": 595209, "source_url": "https://www.npmjs.com/package/solr-client", "comment": "", - "value_90_days": 109339 + "value_90_days": 110632 }, "crates_io_downloads": { - "value": 1483, + "value": 1515, "source_url": "https://crates.io/crates/solr", "comment": "", - "value_90_days": 215 + "value_90_days": 221 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/aperturedb.json b/docs/tools/vdb_table/data/aperturedb.json index 8821b336f..d1fb8aaab 100644 --- a/docs/tools/vdb_table/data/aperturedb.json +++ b/docs/tools/vdb_table/data/aperturedb.json @@ -168,16 +168,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 153360, + "value": 170148, "source_url": "https://pypi.org/project/aperturedb/", "comment": "", - "value_90_days": 19781 + "value_90_days": 33621 }, "npm_downloads": { - "value": 18936, + "value": 18587, "source_url": "https://www.npmjs.com/package/aperture", "comment": "", - "value_90_days": 1697 + "value_90_days": 1644 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/azureai.json b/docs/tools/vdb_table/data/azureai.json index 7cbd12345..711a97cd6 100644 --- a/docs/tools/vdb_table/data/azureai.json +++ b/docs/tools/vdb_table/data/azureai.json @@ -179,16 +179,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 30230401, + "value": 31788852, "source_url": "https://pypi.org/project/azure-ai-ml/", "comment": "", - "value_90_days": 9291085 + "value_90_days": 9778906 }, "npm_downloads": { - "value": 6479858, + "value": 6865795, "source_url": "https://www.npmjs.com/package/@azure/openai", "comment": "", - "value_90_days": 2155449 + "value_90_days": 2258047 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/chroma.json b/docs/tools/vdb_table/data/chroma.json index 96ffb45f4..ac6e6dc36 100644 --- a/docs/tools/vdb_table/data/chroma.json +++ b/docs/tools/vdb_table/data/chroma.json @@ -157,7 +157,7 @@ "comment": "" }, "github_stars": { - "value": 14811, + "value": 15014, "source_url": "https://github.com/chroma-core/chroma", "comment": "", "value_90_days": 0 @@ -169,21 +169,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 18897962, + "value": 19924126, "source_url": "https://pypi.org/project/chromadb/", "comment": "", - "value_90_days": 5238534 + "value_90_days": 5528579 }, "npm_downloads": { - "value": 1726093, + "value": 1805790, "source_url": "https://www.npmjs.com/package/chromadb", "comment": "", - "value_90_days": 475082 + "value_90_days": 494462 }, "crates_io_downloads": { - "value": 13492, + "value": 14791, "source_url": "https://crates.io/crates/chromadb", "comment": "", - "value_90_days": 1368 + "value_90_days": 1597 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/clickhouse.json b/docs/tools/vdb_table/data/clickhouse.json index 1aa833fed..b9c071620 100644 --- a/docs/tools/vdb_table/data/clickhouse.json +++ b/docs/tools/vdb_table/data/clickhouse.json @@ -156,7 +156,7 @@ "comment": "HNSW via USearch" }, "github_stars": { - "value": 37016, + "value": 37224, "source_url": "https://github.com/ClickHouse/ClickHouse", "comment": "", "value_90_days": 0 @@ -168,21 +168,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 200794, + "value": 202127, "source_url": "https://pypi.org/project/clickhouse/", "comment": "", - "value_90_days": 8612 + "value_90_days": 8169 }, "npm_downloads": { - "value": 10197127, + "value": 11576731, "source_url": "https://www.npmjs.com/package/@clickhouse/client", "comment": "", - "value_90_days": 4165890 + "value_90_days": 5018953 }, "crates_io_downloads": { - "value": 439372, + "value": 457291, "source_url": "https://crates.io/crates/clickhouse", "comment": "", - "value_90_days": 92556 + "value_90_days": 95418 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/couchbase.json b/docs/tools/vdb_table/data/couchbase.json index a137f6cfb..e7aad2eef 100644 --- a/docs/tools/vdb_table/data/couchbase.json +++ b/docs/tools/vdb_table/data/couchbase.json @@ -135,33 +135,33 @@ "comment": "Automatic algorithm selection between: IdMap2,Flat, IVF,Flat, IVF,SQ8" }, "github_stars": { - "value": 1620, + "value": 1625, "source_url": "https://github.com/couchbase/couchbase-lite-ios", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 87116407, + "value": 87146631, "source_url": "https://hub.docker.com/_/couchbase", "comment": "50M+", "value_90_days": 0 }, "pypi_downloads": { - "value": 13405354, + "value": 13592323, "source_url": "https://pypi.org/project/couchbase/", "comment": "", - "value_90_days": 1148597 + "value_90_days": 1098901 }, "npm_downloads": { - "value": 1129807, + "value": 1134244, "source_url": "https://www.npmjs.com/package/couchbase", "comment": "", - "value_90_days": 207614 + "value_90_days": 207212 }, "crates_io_downloads": { - "value": 37828, + "value": 38241, "source_url": "https://crates.io/crates/couchbase", "comment": "", - "value_90_days": 1018 + "value_90_days": 1141 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/cratedb.json b/docs/tools/vdb_table/data/cratedb.json index 79604b6e0..fc6a6cba3 100644 --- a/docs/tools/vdb_table/data/cratedb.json +++ b/docs/tools/vdb_table/data/cratedb.json @@ -157,33 +157,33 @@ "comment": "via Lucene" }, "github_stars": { - "value": 4065, + "value": 4083, "source_url": "https://github.com/crate/crate", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 18703647, + "value": 18711061, "source_url": "https://hub.docker.com/_/crate", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 1342001, + "value": 1356972, "source_url": "https://pypi.org/project/crate/", "comment": "", - "value_90_days": 67143 + "value_90_days": 70259 }, "npm_downloads": { - "value": 25992, + "value": 25865, "source_url": "https://www.npmjs.com/package/node-crate", "comment": "", - "value_90_days": 4597 + "value_90_days": 4438 }, "crates_io_downloads": { - "value": 7059, + "value": 7436, "source_url": "https://crates.io/crates/cratedb", "comment": "", - "value_90_days": 866 + "value_90_days": 1031 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/databricks.json b/docs/tools/vdb_table/data/databricks.json new file mode 100644 index 000000000..2957bb1b1 --- /dev/null +++ b/docs/tools/vdb_table/data/databricks.json @@ -0,0 +1,188 @@ +{ + "name": "Databricks Vector Search", + "links": { + "docs": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "github": "", + "website": "https://www.databricks.com/product/machine-learning/vector-search", + "vendor_discussion": "https://github.com/superlinked/VectorHub/discussions/514", + "poc_github": "kobindra", + "slug": "databricks" + }, + "oss": { + "support": "none", + "source_url": "", + "comment": "" + }, + "license": { + "value": "Proprietary", + "source_url": "https://www.databricks.com/legal/mcsa", + "comment": "" + }, + "dev_languages": { + "value": [ + "" + ], + "source_url": "", + "comment": "" + }, + "vector_launch_year": 2024, + "metadata_filter": { + "support": "full", + "source_url": "https://www.databricks.com/blog/announcing-mosaic-ai-vector-search-general-availability-databricks", + "comment": "" + }, + "hybrid_search": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "" + }, + "facets": { + "support": "none", + "source_url": "", + "comment": "" + }, + "geo_search": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_vec": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sparse_vectors": { + "support": "none", + "source_url": "", + "comment": "" + }, + "bm25": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#how-does-mosaic-ai-vector-search-work", + "comment": "Okapi BM25" + }, + "full_text": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "" + }, + "embeddings_text": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/external-models/index.html", + "comment": "" + }, + "embeddings_image": { + "support": "none", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#options-for-providing-vector-embeddings", + "comment": "" + }, + "embeddings_structured": { + "support": "none", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#options-for-providing-vector-embeddings", + "comment": "" + }, + "rag": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/retrieval-augmented-generation.html#databricks-vector-search", + "comment": "" + }, + "recsys": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#options-for-providing-vector-embeddings", + "comment": "" + }, + "langchain": { + "support": "full", + "source_url": "https://python.langchain.com/v0.2/docs/integrations/vectorstores/databricks_vector_search/", + "comment": "" + }, + "llamaindex": { + "support": "full", + "source_url": "https://docs.llamaindex.ai/en/stable/examples/vector_stores/DatabricksVectorSearchDemo/", + "comment": "" + }, + "managed_cloud": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "" + }, + "pricing": { + "value": "full", + "source_url": "https://www.databricks.com/product/pricing/vector-search", + "comment": "Includes free tier" + }, + "in_process": { + "support": "none", + "source_url": "", + "comment": "" + }, + "multi_tenancy": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "via namespaces" + }, + "disk_index": { + "support": "partial", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "" + }, + "ephemeral": { + "support": "none", + "source_url": "", + "comment": "" + }, + "sharding": { + "support": "full", + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html", + "comment": "" + }, + "doc_size": { + "bytes": 32768, + "unlimited": false, + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#resource-and-data-size-limits", + "comment": "" + }, + "vector_dims": { + "value": 4096, + "unlimited": false, + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#resource-and-data-size-limits", + "comment": "" + }, + "index_types": { + "value": [ + "HNSW" + ], + "source_url": "https://docs.databricks.com/en/generative-ai/vector-search.html#how-does-mosaic-ai-vector-search-work", + "comment": "" + }, + "github_stars": { + "value": 0, + "source_url": "", + "comment": "", + "value_90_days": 0 + }, + "docker_pulls": { + "value": 0, + "source_url": "", + "comment": "", + "value_90_days": 0 + }, + "pypi_downloads": { + "value": 0, + "source_url": "", + "comment": "", + "value_90_days": 0 + }, + "npm_downloads": { + "value": 0, + "source_url": "", + "comment": "", + "value_90_days": 0 + }, + "crates_io_downloads": { + "value": 0, + "source_url": "", + "comment": "", + "value_90_days": 0 + } + } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/datastaxastra.json b/docs/tools/vdb_table/data/datastaxastra.json index 956c730ab..a0a3d9af2 100644 --- a/docs/tools/vdb_table/data/datastaxastra.json +++ b/docs/tools/vdb_table/data/datastaxastra.json @@ -171,16 +171,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 420263, + "value": 464250, "source_url": "https://pypi.org/project/astrapy/", "comment": "", - "value_90_days": 193483 + "value_90_days": 207983 }, "npm_downloads": { - "value": 718016, + "value": 774296, "source_url": "https://www.npmjs.com/package/@datastax/astra-db-ts", "comment": "", - "value_90_days": 313652 + "value_90_days": 324577 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/elasticsearch.json b/docs/tools/vdb_table/data/elasticsearch.json index 135815d2e..d21d98b2d 100644 --- a/docs/tools/vdb_table/data/elasticsearch.json +++ b/docs/tools/vdb_table/data/elasticsearch.json @@ -163,33 +163,33 @@ "comment": "via Lucene" }, "github_stars": { - "value": 69736, + "value": 69924, "source_url": "https://github.com/elastic/elasticsearch", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 884286576, + "value": 885838781, "source_url": "https://hub.docker.com/_/elasticsearch", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 857218788, + "value": 873978222, "source_url": "https://pypi.org/project/elasticsearch/", "comment": "", - "value_90_days": 96497586 + "value_90_days": 99832130 }, "npm_downloads": { - "value": 75225197, + "value": 75631450, "source_url": "https://www.npmjs.com/package/@elastic/elasticsearch", "comment": "", - "value_90_days": 13061790 + "value_90_days": 12881861 }, "crates_io_downloads": { - "value": 3404057, + "value": 3729362, "source_url": "https://crates.io/crates/elasticsearch", "comment": "", - "value_90_days": 1408585 + "value_90_days": 1556913 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/epsilla.json b/docs/tools/vdb_table/data/epsilla.json index bdd886b1f..072c04c75 100644 --- a/docs/tools/vdb_table/data/epsilla.json +++ b/docs/tools/vdb_table/data/epsilla.json @@ -156,7 +156,7 @@ "comment": "Parallel graph traversal algorithm" }, "github_stars": { - "value": 900, + "value": 902, "source_url": "https://github.com/epsilla-cloud/vectordb", "comment": "", "value_90_days": 0 @@ -168,10 +168,10 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 51640, + "value": 53934, "source_url": "https://pypi.org/project/pyepsilla/", "comment": "", - "value_90_days": 10937 + "value_90_days": 10941 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/kdbai.json b/docs/tools/vdb_table/data/kdbai.json index 6b983b790..9a0ab1a00 100644 --- a/docs/tools/vdb_table/data/kdbai.json +++ b/docs/tools/vdb_table/data/kdbai.json @@ -171,10 +171,10 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 13371, + "value": 14834, "source_url": "https://pypi.org/project/kdbai-client/", "comment": "", - "value_90_days": 5912 + "value_90_days": 6885 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/lancedb.json b/docs/tools/vdb_table/data/lancedb.json index 08b924093..990b5e511 100644 --- a/docs/tools/vdb_table/data/lancedb.json +++ b/docs/tools/vdb_table/data/lancedb.json @@ -157,7 +157,7 @@ "comment": "Disk-based IVF_PQ index" }, "github_stars": { - "value": 4270, + "value": 4405, "source_url": "https://github.com/lancedb/lancedb", "comment": "", "value_90_days": 0 @@ -169,21 +169,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 2744541, + "value": 2995418, "source_url": "https://pypi.org/project/lancedb/", "comment": "", - "value_90_days": 1402697 + "value_90_days": 1491507 }, "npm_downloads": { - "value": 1344278, + "value": 1389239, "source_url": "https://www.npmjs.com/package/@lancedb/vectordb-linux-x64-gnu", "comment": "", - "value_90_days": 291625 + "value_90_days": 281224 }, "crates_io_downloads": { - "value": 16356, + "value": 18310, "source_url": "https://crates.io/crates/lancedb", "comment": "", - "value_90_days": 4229 + "value_90_days": 4707 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/marqo.json b/docs/tools/vdb_table/data/marqo.json index 20c6aac4f..b1dc82396 100644 --- a/docs/tools/vdb_table/data/marqo.json +++ b/docs/tools/vdb_table/data/marqo.json @@ -156,7 +156,7 @@ "comment": "" }, "github_stars": { - "value": 4524, + "value": 4565, "source_url": "https://github.com/marqo-ai/marqo", "comment": "", "value_90_days": 0 @@ -168,16 +168,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 216590, + "value": 225728, "source_url": "https://pypi.org/project/marqo/", "comment": "", - "value_90_days": 43675 + "value_90_days": 45758 }, "npm_downloads": { - "value": 146, + "value": 144, "source_url": "https://www.npmjs.com/package/marqo", "comment": "", - "value_90_days": 39 + "value_90_days": 31 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/meilisearch.json b/docs/tools/vdb_table/data/meilisearch.json index 833257d41..9fae4fcae 100644 --- a/docs/tools/vdb_table/data/meilisearch.json +++ b/docs/tools/vdb_table/data/meilisearch.json @@ -149,7 +149,7 @@ "comment": "" }, "github_stars": { - "value": 46791, + "value": 47002, "source_url": "https://github.com/meilisearch/meilisearch", "comment": "", "value_90_days": 0 @@ -161,16 +161,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 1699620, + "value": 1785853, "source_url": "https://pypi.org/project/meilisearch/", "comment": "", - "value_90_days": 461445 + "value_90_days": 482739 }, "npm_downloads": { - "value": 5308333, + "value": 5241321, "source_url": "https://www.npmjs.com/package/meilisearch", "comment": "", - "value_90_days": 1135890 + "value_90_days": 1185591 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/milvus.json b/docs/tools/vdb_table/data/milvus.json index 8c0a6d7c8..8946302e2 100644 --- a/docs/tools/vdb_table/data/milvus.json +++ b/docs/tools/vdb_table/data/milvus.json @@ -176,7 +176,7 @@ "comment": "" }, "github_stars": { - "value": 29782, + "value": 30037, "source_url": "https://github.com/milvus-io/milvus", "comment": "", "value_90_days": 0 @@ -188,21 +188,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 12681699, + "value": 13190562, "source_url": "https://pypi.org/project/pymilvus/", "comment": "", - "value_90_days": 2515476 + "value_90_days": 2694911 }, "npm_downloads": { - "value": 939976, + "value": 992242, "source_url": "https://www.npmjs.com/package/@zilliz/milvus2-sdk-node", "comment": "", - "value_90_days": 331815 + "value_90_days": 341769 }, "crates_io_downloads": { - "value": 2448, + "value": 2591, "source_url": "https://crates.io/crates/milvus", "comment": "", - "value_90_days": 712 + "value_90_days": 777 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/mongo.json b/docs/tools/vdb_table/data/mongo.json index cf7a547aa..92d4154ec 100644 --- a/docs/tools/vdb_table/data/mongo.json +++ b/docs/tools/vdb_table/data/mongo.json @@ -164,22 +164,22 @@ "value_90_days": 0 }, "docker_pulls": { - "value": 4482718983, + "value": 4489475119, "source_url": "https://hub.docker.com/_/mongo", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 43059, + "value": 43587, "source_url": "https://pypi.org/project/mongodbatlas/", "comment": "", - "value_90_days": 2739 + "value_90_days": 2824 }, "npm_downloads": { - "value": 486589, + "value": 499631, "source_url": "https://www.npmjs.com/package/mongodb-atlas-api-client", "comment": "", - "value_90_days": 110086 + "value_90_days": 112202 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/myscale.json b/docs/tools/vdb_table/data/myscale.json index 78215b635..e7f153560 100644 --- a/docs/tools/vdb_table/data/myscale.json +++ b/docs/tools/vdb_table/data/myscale.json @@ -162,22 +162,22 @@ "comment": "MSTG is a proprietary index type developed by MyScale, available in MyScale Cloud only" }, "github_stars": { - "value": 847, + "value": 854, "source_url": "https://github.com/myscale/MyscaleDB", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 9344, + "value": 10414, "source_url": "https://hub.docker.com/r/myscale/myscaledb", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 22979, + "value": 23244, "source_url": "https://pypi.org/project/myscaledb-client/", "comment": "", - "value_90_days": 1093 + "value_90_days": 1154 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/neo4j.json b/docs/tools/vdb_table/data/neo4j.json index 360919cc6..1f44402c6 100644 --- a/docs/tools/vdb_table/data/neo4j.json +++ b/docs/tools/vdb_table/data/neo4j.json @@ -163,27 +163,27 @@ "value_90_days": 0 }, "docker_pulls": { - "value": 288074965, + "value": 288474273, "source_url": "https://hub.docker.com/_/neo4j", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 34071841, + "value": 34967196, "source_url": "https://pypi.org/project/neo4j/", "comment": "", - "value_90_days": 5003417 + "value_90_days": 5299611 }, "npm_downloads": { - "value": 45495, + "value": 45766, "source_url": "https://www.npmjs.com/package/neo4j", "comment": "", - "value_90_days": 8909 + "value_90_days": 8975 }, "crates_io_downloads": { - "value": 101708, + "value": 105803, "source_url": "https://crates.io/crates/neo4rs", "comment": "", - "value_90_days": 15543 + "value_90_days": 15821 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/nucliadb.json b/docs/tools/vdb_table/data/nucliadb.json index cc550f1dc..05996aecb 100644 --- a/docs/tools/vdb_table/data/nucliadb.json +++ b/docs/tools/vdb_table/data/nucliadb.json @@ -157,7 +157,7 @@ "comment": "" }, "github_stars": { - "value": 639, + "value": 650, "source_url": "https://github.com/nuclia/nucliadb", "comment": "", "value_90_days": 0 @@ -169,16 +169,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 660381, + "value": 693106, "source_url": "https://pypi.org/project/nucliadb/", "comment": "", - "value_90_days": 143559 + "value_90_days": 153210 }, "npm_downloads": { - "value": 10596, + "value": 10848, "source_url": "https://www.npmjs.com/package/@nuclia/core", "comment": "", - "value_90_days": 2355 + "value_90_days": 2391 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/opensearch.json b/docs/tools/vdb_table/data/opensearch.json index e9278bdbb..b465a8153 100644 --- a/docs/tools/vdb_table/data/opensearch.json +++ b/docs/tools/vdb_table/data/opensearch.json @@ -167,33 +167,33 @@ "comment": "via Lucene" }, "github_stars": { - "value": 9625, + "value": 9683, "source_url": "https://github.com/opensearch-project/OpenSearch", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 88979761, + "value": 90141980, "source_url": "https://hub.docker.com/r/opensearchproject/opensearch", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 359493, + "value": 362376, "source_url": "https://pypi.org/project/opensearch/", "comment": "", - "value_90_days": 22148 + "value_90_days": 21461 }, "npm_downloads": { - "value": 25231632, + "value": 25789509, "source_url": "https://www.npmjs.com/package/@opensearch-project/opensearch", "comment": "", - "value_90_days": 5612838 + "value_90_days": 5666381 }, "crates_io_downloads": { - "value": 619462, + "value": 650865, "source_url": "https://crates.io/crates/opensearch", "comment": "", - "value_90_days": 158992 + "value_90_days": 168058 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/oramasearch.json b/docs/tools/vdb_table/data/oramasearch.json index ccf99157f..adeb454a9 100644 --- a/docs/tools/vdb_table/data/oramasearch.json +++ b/docs/tools/vdb_table/data/oramasearch.json @@ -149,7 +149,7 @@ "comment": "" }, "github_stars": { - "value": 8559, + "value": 8613, "source_url": "https://github.com/oramasearch/orama", "comment": "", "value_90_days": 0 @@ -167,10 +167,10 @@ "value_90_days": 0 }, "npm_downloads": { - "value": 1122250, + "value": 1171095, "source_url": "https://www.npmjs.com/package/@orama/orama", "comment": "", - "value_90_days": 336356 + "value_90_days": 330912 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/paradedb.json b/docs/tools/vdb_table/data/paradedb.json index bd10e22ba..471772902 100644 --- a/docs/tools/vdb_table/data/paradedb.json +++ b/docs/tools/vdb_table/data/paradedb.json @@ -158,13 +158,13 @@ "comment": "" }, "github_stars": { - "value": 5934, + "value": 6084, "source_url": "https://github.com/paradedb/paradedb", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 44117, + "value": 46188, "source_url": "https://hub.docker.com/r/paradedb/paradedb", "comment": "", "value_90_days": 0 diff --git a/docs/tools/vdb_table/data/pgvecto_rs.json b/docs/tools/vdb_table/data/pgvecto_rs.json index ec2d42625..90dbf06bc 100644 --- a/docs/tools/vdb_table/data/pgvecto_rs.json +++ b/docs/tools/vdb_table/data/pgvecto_rs.json @@ -168,22 +168,22 @@ "comment": "" }, "github_stars": { - "value": 1700, + "value": 1725, "source_url": "https://github.com/tensorchord/pgvecto.rs", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 6782657, + "value": 7183792, "source_url": "https://hub.docker.com/r/tensorchord/pgvecto-rs", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 63244, + "value": 69287, "source_url": "https://pypi.org/project/pgvecto-rs/", "comment": "", - "value_90_days": 29066 + "value_90_days": 30974 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/pgvector.json b/docs/tools/vdb_table/data/pgvector.json index 55007d53d..a4f0daaad 100644 --- a/docs/tools/vdb_table/data/pgvector.json +++ b/docs/tools/vdb_table/data/pgvector.json @@ -158,7 +158,7 @@ "comment": "" }, "github_stars": { - "value": 12019, + "value": 12209, "source_url": "https://github.com/pgvector/pgvector", "comment": "", "value_90_days": 0 @@ -170,21 +170,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 13533984, + "value": 14703865, "source_url": "https://pypi.org/project/pgvector/", "comment": "", - "value_90_days": 6637648 + "value_90_days": 7052509 }, "npm_downloads": { - "value": 1321954, + "value": 1407918, "source_url": "https://www.npmjs.com/package/pgvector", "comment": "", - "value_90_days": 539977 + "value_90_days": 550523 }, "crates_io_downloads": { - "value": 101544, + "value": 107757, "source_url": "https://crates.io/crates/pgvector", "comment": "", - "value_90_days": 28962 + "value_90_days": 31245 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/pinecone.json b/docs/tools/vdb_table/data/pinecone.json index ddee87c42..9b5ac6e7f 100644 --- a/docs/tools/vdb_table/data/pinecone.json +++ b/docs/tools/vdb_table/data/pinecone.json @@ -168,21 +168,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 26019845, + "value": 27195136, "source_url": "https://pypi.org/project/pinecone-client/", "comment": "", - "value_90_days": 9336763 + "value_90_days": 9311322 }, "npm_downloads": { - "value": 9978024, + "value": 10241067, "source_url": "https://www.npmjs.com/package/@pinecone-database/pinecone", "comment": "", - "value_90_days": 1915303 + "value_90_days": 1948994 }, "crates_io_downloads": { - "value": 1855, + "value": 1946, "source_url": "https://crates.io/crates/pinecone-rs", "comment": "", - "value_90_days": 447 + "value_90_days": 467 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/qdrant.json b/docs/tools/vdb_table/data/qdrant.json index 23ec70824..b5836b1ac 100644 --- a/docs/tools/vdb_table/data/qdrant.json +++ b/docs/tools/vdb_table/data/qdrant.json @@ -170,22 +170,22 @@ "comment": "" }, "github_stars": { - "value": 20044, + "value": 20203, "source_url": "https://github.com/qdrant/qdrant", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 9009780, + "value": 9251651, "source_url": "https://hub.docker.com/r/qdrant/qdrant", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 15269456, + "value": 16476573, "source_url": "https://pypi.org/project/qdrant-client/", "comment": "", - "value_90_days": 7879282 + "value_90_days": 8129183 }, "npm_downloads": { "value": 1315891, @@ -194,9 +194,9 @@ "value_90_days": 551308 }, "crates_io_downloads": { - "value": 219495, + "value": 248222, "source_url": "https://crates.io/crates/qdrant-client", "comment": "", - "value_90_days": 46734 + "value_90_days": 58620 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/redis.json b/docs/tools/vdb_table/data/redis.json index 6a366f522..51c8b2c04 100644 --- a/docs/tools/vdb_table/data/redis.json +++ b/docs/tools/vdb_table/data/redis.json @@ -157,28 +157,28 @@ "comment": "" }, "github_stars": { - "value": 5467, + "value": 5495, "source_url": "https://github.com/RediSearch/RediSearch", "comment": "Vector Search library is part of RediSearch https://github.com/RediSearch/RediSearch/tree/master/deps", "value_90_days": 0 }, "docker_pulls": { - "value": 8330195, + "value": 8494734, "source_url": "https://hub.docker.com/r/redis/redis-stack", "comment": "Vector Search is part of the Redis Stack", "value_90_days": 0 }, "pypi_downloads": { - "value": 895163, + "value": 902093, "source_url": "https://pypi.org/project/redisearch/", "comment": "", - "value_90_days": 64474 + "value_90_days": 65113 }, "npm_downloads": { - "value": 116583751, + "value": 119121918, "source_url": "https://www.npmjs.com/package/@redis/search", "comment": "", - "value_90_days": 26421775 + "value_90_days": 26751081 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/rockset.json b/docs/tools/vdb_table/data/rockset.json index d85aa397b..646d0c036 100644 --- a/docs/tools/vdb_table/data/rockset.json +++ b/docs/tools/vdb_table/data/rockset.json @@ -168,16 +168,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 3466216, + "value": 3559832, "source_url": "https://pypi.org/project/rockset/", "comment": "", - "value_90_days": 792087 + "value_90_days": 782706 }, "npm_downloads": { - "value": 6818, + "value": 6919, "source_url": "https://www.npmjs.com/package/rockset", "comment": "", - "value_90_days": 1076 + "value_90_days": 1278 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/singlestoredb.json b/docs/tools/vdb_table/data/singlestoredb.json index 22bac44a8..ab5e68aff 100644 --- a/docs/tools/vdb_table/data/singlestoredb.json +++ b/docs/tools/vdb_table/data/singlestoredb.json @@ -165,22 +165,22 @@ "value_90_days": 0 }, "docker_pulls": { - "value": 522656, + "value": 524554, "source_url": "https://hub.docker.com/r/singlestore/cluster-in-a-box", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 1550319, + "value": 1625828, "source_url": "https://pypi.org/project/singlestoredb/", "comment": "", - "value_90_days": 401442 + "value_90_days": 419181 }, "npm_downloads": { - "value": 374, + "value": 334, "source_url": "https://www.npmjs.com/package/@singlestore/http-client", "comment": "", - "value_90_days": 26 + "value_90_days": 27 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/turbopuffer.json b/docs/tools/vdb_table/data/turbopuffer.json index 36500e2b3..c1e4a0bd0 100644 --- a/docs/tools/vdb_table/data/turbopuffer.json +++ b/docs/tools/vdb_table/data/turbopuffer.json @@ -168,16 +168,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 1437391, + "value": 1634464, "source_url": "https://pypi.org/project/turbopuffer/", "comment": "", - "value_90_days": 722171 + "value_90_days": 795479 }, "npm_downloads": { - "value": 314508, + "value": 338896, "source_url": "https://www.npmjs.com/package/@turbopuffer/turbopuffer", "comment": "", - "value_90_days": 157227 + "value_90_days": 164786 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/txtai.json b/docs/tools/vdb_table/data/txtai.json index 6ef0f4dd6..c8480efe3 100644 --- a/docs/tools/vdb_table/data/txtai.json +++ b/docs/tools/vdb_table/data/txtai.json @@ -159,7 +159,7 @@ "comment": "" }, "github_stars": { - "value": 8826, + "value": 8981, "source_url": "https://github.com/neuml/txtai", "comment": "", "value_90_days": 0 @@ -171,21 +171,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 694700, + "value": 709732, "source_url": "https://pypi.org/project/txtai/", "comment": "", - "value_90_days": 360425 + "value_90_days": 335871 }, "npm_downloads": { - "value": 2594, + "value": 2552, "source_url": "https://www.npmjs.com/package/txtai", "comment": "", - "value_90_days": 473 + "value_90_days": 477 }, "crates_io_downloads": { - "value": 17086, + "value": 18230, "source_url": "https://crates.io/crates/txtai", "comment": "", - "value_90_days": 1175 + "value_90_days": 1146 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/typesense.json b/docs/tools/vdb_table/data/typesense.json index bb450d82f..ca813dcff 100644 --- a/docs/tools/vdb_table/data/typesense.json +++ b/docs/tools/vdb_table/data/typesense.json @@ -157,7 +157,7 @@ "comment": "" }, "github_stars": { - "value": 20753, + "value": 20906, "source_url": "https://github.com/typesense/typesense", "comment": "", "value_90_days": 0 @@ -169,16 +169,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 2939888, + "value": 3002772, "source_url": "https://pypi.org/project/typesense/", "comment": "", - "value_90_days": 397745 + "value_90_days": 396990 }, "npm_downloads": { - "value": 6096384, + "value": 6281054, "source_url": "https://www.npmjs.com/package/typesense", "comment": "", - "value_90_days": 1492278 + "value_90_days": 1561799 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/usearch.json b/docs/tools/vdb_table/data/usearch.json index 6daafa55a..2214bac19 100644 --- a/docs/tools/vdb_table/data/usearch.json +++ b/docs/tools/vdb_table/data/usearch.json @@ -168,7 +168,7 @@ "comment": "" }, "github_stars": { - "value": 2151, + "value": 2172, "source_url": "https://github.com/unum-cloud/usearch", "comment": "", "value_90_days": 0 @@ -180,21 +180,21 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 1478316, + "value": 1604249, "source_url": "https://pypi.org/project/usearch/", "comment": "", - "value_90_days": 475683 + "value_90_days": 554576 }, "npm_downloads": { - "value": 102206, + "value": 105483, "source_url": "https://www.npmjs.com/package/usearch", "comment": "", - "value_90_days": 24377 + "value_90_days": 23763 }, "crates_io_downloads": { - "value": 83392, + "value": 87928, "source_url": "https://crates.io/crates/usearch", "comment": "", - "value_90_days": 1660 + "value_90_days": 1716 } } \ No newline at end of file diff --git a/docs/tools/vdb_table/data/vald.json b/docs/tools/vdb_table/data/vald.json index 0f4800ffe..4e184166b 100644 --- a/docs/tools/vdb_table/data/vald.json +++ b/docs/tools/vdb_table/data/vald.json @@ -157,7 +157,7 @@ "comment": "" }, "github_stars": { - "value": 1515, + "value": 1524, "source_url": "https://github.com/vdaas/vald", "comment": "", "value_90_days": 0 @@ -169,16 +169,16 @@ "value_90_days": 0 }, "pypi_downloads": { - "value": 239809, + "value": 244200, "source_url": "https://pypi.org/project/vald-client-python/", "comment": "", - "value_90_days": 12093 + "value_90_days": 15360 }, "npm_downloads": { - "value": 6611, + "value": 6583, "source_url": "https://www.npmjs.com/package/vald-client-node", "comment": "", - "value_90_days": 830 + "value_90_days": 839 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/vectara.json b/docs/tools/vdb_table/data/vectara.json index b5ebb52c6..0b6edacb6 100644 --- a/docs/tools/vdb_table/data/vectara.json +++ b/docs/tools/vdb_table/data/vectara.json @@ -155,7 +155,7 @@ "value_90_days": 0 }, "docker_pulls": { - "value": 7899, + "value": 7991, "source_url": "https://hub.docker.com/r/vectara/vectara-answer", "comment": "", "value_90_days": 0 @@ -167,10 +167,10 @@ "value_90_days": 0 }, "npm_downloads": { - "value": 4590, + "value": 4784, "source_url": "https://www.npmjs.com/package/@vectara/react-search", "comment": "", - "value_90_days": 1774 + "value_90_days": 1765 }, "crates_io_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/vespa.json b/docs/tools/vdb_table/data/vespa.json index c417c25b9..9c2c2c7a6 100644 --- a/docs/tools/vdb_table/data/vespa.json +++ b/docs/tools/vdb_table/data/vespa.json @@ -170,22 +170,22 @@ "comment": "" }, "github_stars": { - "value": 5685, + "value": 5733, "source_url": "https://github.com/vespa-engine/vespa", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 12033456, + "value": 12044425, "source_url": "https://hub.docker.com/r/vespaengine/vespa", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 646652, + "value": 679482, "source_url": "https://pypi.org/project/pyvespa/", "comment": "", - "value_90_days": 164044 + "value_90_days": 176274 }, "npm_downloads": { "value": 0, diff --git a/docs/tools/vdb_table/data/weaviate.json b/docs/tools/vdb_table/data/weaviate.json index d390d9624..9318427f9 100644 --- a/docs/tools/vdb_table/data/weaviate.json +++ b/docs/tools/vdb_table/data/weaviate.json @@ -165,33 +165,33 @@ "comment": "Option to dynamically switch from Flat to HNSW" }, "github_stars": { - "value": 11024, + "value": 11168, "source_url": "https://github.com/weaviate/weaviate", "comment": "", "value_90_days": 0 }, "docker_pulls": { - "value": 6415947, + "value": 6520325, "source_url": "https://hub.docker.com/r/semitechnologies/weaviate", "comment": "", "value_90_days": 0 }, "pypi_downloads": { - "value": 30345780, + "value": 33146138, "source_url": "https://pypi.org/project/weaviate-client/", "comment": "", - "value_90_days": 15176733 + "value_90_days": 16612071 }, "npm_downloads": { - "value": 344907, + "value": 365681, "source_url": "https://www.npmjs.com/package/weaviate-client", "comment": "", - "value_90_days": 101271 + "value_90_days": 124935 }, "crates_io_downloads": { - "value": 666, + "value": 699, "source_url": "https://crates.io/crates/weaviate-client", "comment": "", - "value_90_days": 207 + "value_90_days": 216 } } \ No newline at end of file