Skip to content

Commit 9977e78

Browse files
authored
Add MongoDB vector store (#42)
1 parent 04b4abb commit 9977e78

File tree

4 files changed

+162
-0
lines changed

4 files changed

+162
-0
lines changed

.github/workflows/pipeline.yml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
name: pipeline
22
on: pull_request
33

4+
env:
5+
REQUIRED_PHP_EXTENSIONS: 'mongodb'
6+
47
jobs:
58
tests:
69
runs-on: ubuntu-latest
@@ -16,6 +19,8 @@ jobs:
1619
uses: shivammathur/setup-php@v2
1720
with:
1821
php-version: ${{ matrix.php }}
22+
coverage: "none"
23+
extensions: "${{ env.REQUIRED_PHP_EXTENSIONS }}"
1924

2025
- name: Install Composer
2126
uses: "ramsey/composer-install@v3"
@@ -41,6 +46,8 @@ jobs:
4146
uses: shivammathur/setup-php@v2
4247
with:
4348
php-version: '8.2'
49+
coverage: "none"
50+
extensions: "${{ env.REQUIRED_PHP_EXTENSIONS }}"
4451

4552
- name: Install Composer
4653
uses: "ramsey/composer-install@v3"

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Supported Stores
4545

4646
* [x] [ChromaDB](https://trychroma.com)
4747
* [x] [Azure AI Search](https://azure.microsoft.com/en-us/products/ai-services/ai-search)
48+
* [x] [MongoDB Atlas Search](https://mongodb.com/products/platform/atlas-vector-search)
4849
* [ ] [Pinecone](https://pinecone.io)
4950

5051
Provided Tools

composer.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
},
2525
"require-dev": {
2626
"codewithkyrian/chromadb-php": "^0.2.1",
27+
"mongodb/mongodb": "^1.19",
2728
"php-cs-fixer/shim": "^3.64",
2829
"phpstan/phpstan": "^1.12",
2930
"phpunit/phpunit": "^11.3",
@@ -36,6 +37,7 @@
3637
"symfony/var-dumper": "^6.4 || ^7.1"
3738
},
3839
"suggest": {
40+
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
3941
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
4042
"symfony/clock": "For using the clock tool.",
4143
"symfony/css-selector": "For using the YouTube transcription tool.",

src/Store/MongoDB/Store.php

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpLlm\LlmChain\Store\MongoDB;
6+
7+
use MongoDB\BSON\Binary;
8+
use MongoDB\Client;
9+
use MongoDB\Collection;
10+
use PhpLlm\LlmChain\Document\Document;
11+
use PhpLlm\LlmChain\Document\Metadata;
12+
use PhpLlm\LlmChain\Document\Vector;
13+
use PhpLlm\LlmChain\Store\VectorStoreInterface;
14+
use Psr\Log\LoggerInterface;
15+
use Symfony\Component\Uid\Uuid;
16+
17+
/**
18+
* @see https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-overview/
19+
*
20+
* For this store you need to create a separate MongoDB Atlas Search index.
21+
* The index needs to be created with the following settings:
22+
* {
23+
* "fields": [
24+
* {
25+
* "numDimensions": 1536,
26+
* "path": "vector",
27+
* "similarity": "euclidean",
28+
* "type": "vector"
29+
* }
30+
* ]
31+
* }
32+
*
33+
* Note, that the `path` key needs to match the $vectorFieldName.
34+
*
35+
* For the `similarity` key you can choose between `euclidean`, `cosine` and `dotProduct`.
36+
* {@see https://www.mongodb.com/docs/atlas/atlas-search/field-types/knn-vector/#define-the-index-for-the-fts-field-type-type}
37+
*
38+
* @author Oskar Stark <[email protected]>
39+
*/
40+
final readonly class Store implements VectorStoreInterface
41+
{
42+
/**
43+
* @param string $databaseName The name of the database
44+
* @param string $collectionName The name of the collection
45+
* @param string $indexName The name of the Atlas Search index
46+
* @param string $vectorFieldName The name of the field int the index that contains the vector
47+
* @param bool $bulkWrite Use bulk write operations
48+
*/
49+
public function __construct(
50+
private Client $client,
51+
private LoggerInterface $logger,
52+
private string $databaseName,
53+
private string $collectionName,
54+
private string $indexName,
55+
private string $vectorFieldName = 'vector',
56+
private bool $bulkWrite = false,
57+
) {
58+
}
59+
60+
public function addDocument(Document $document): void
61+
{
62+
$this->addDocuments([$document]);
63+
}
64+
65+
public function addDocuments(array $documents): void
66+
{
67+
$operations = [];
68+
69+
foreach ($documents as $document) {
70+
if (!$document->hasVector()) {
71+
$this->logger->warning('Document {id} does not have a vector', ['id' => $document->id]);
72+
}
73+
74+
$operation = [
75+
['_id' => $this->toBinary($document->id)], // we use binary for the id, because of storage efficiency
76+
array_filter([
77+
'metadata' => $document->metadata,
78+
$this->vectorFieldName => $document->vector->getData(),
79+
'text' => $document->text,
80+
]),
81+
['upsert' => true], // insert if not exists
82+
];
83+
84+
if ($this->bulkWrite) {
85+
$operations[] = ['replaceOne' => $operation];
86+
continue;
87+
}
88+
89+
$this->getCollection()->replaceOne(...$operation);
90+
}
91+
92+
if ($this->bulkWrite) {
93+
$this->getCollection()->bulkWrite($operations);
94+
}
95+
}
96+
97+
/**
98+
* @param array{
99+
* limit?: positive-int,
100+
* numCandidates?: positive-int,
101+
* filter?: array<mixed>
102+
* } $options
103+
*
104+
* @return Document[]
105+
*/
106+
public function query(Vector $vector, array $options = []): array
107+
{
108+
$results = $this->getCollection()->aggregate([
109+
[
110+
'$vectorSearch' => array_merge([
111+
'index' => $this->indexName,
112+
'path' => $this->vectorFieldName,
113+
'queryVector' => $vector->getData(),
114+
'numCandidates' => 200,
115+
'limit' => 5,
116+
], $options),
117+
],
118+
[
119+
'$addFields' => [
120+
'score' => ['$meta' => 'vectorSearchScore'],
121+
],
122+
],
123+
], ['typeMap' => ['root' => 'array', 'document' => 'array', 'array' => 'array']]);
124+
125+
$documents = [];
126+
127+
foreach ($results as $result) {
128+
$documents[] = Document::fromVector(
129+
Vector::create1536($result[$this->vectorFieldName]),
130+
$this->toUuid($result['_id']),
131+
new Metadata($result['metadata'] ?? []),
132+
);
133+
}
134+
135+
return $documents;
136+
}
137+
138+
private function getCollection(): Collection
139+
{
140+
return $this->client->selectCollection($this->databaseName, $this->collectionName);
141+
}
142+
143+
private function toBinary(Uuid $uuid): Binary
144+
{
145+
return new Binary($uuid->toBinary(), Binary::TYPE_UUID);
146+
}
147+
148+
private function toUuid(Binary $binary): Uuid
149+
{
150+
return Uuid::fromString($binary->getData());
151+
}
152+
}

0 commit comments

Comments
 (0)