Skip to content

Commit 27418f2

Browse files
authored
feat: Initialize MongoDB store (#51)
* Initialize store * - * - * - * - * - * - * - * Update .env * fix * -
1 parent 70ff90b commit 27418f2

File tree

5 files changed

+133
-3
lines changed

5 files changed

+133
-3
lines changed

.env

+3
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@ AZURE_OPENAI_KEY=
1717

1818
# For using SerpApi (tool)
1919
SERP_API_KEY=
20+
21+
# For using MongoDB Atlas (store)
22+
MONGODB_URI=

composer.json

+4-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
},
2525
"require-dev": {
2626
"codewithkyrian/chromadb-php": "^0.2.1",
27-
"mongodb/mongodb": "^1.19",
27+
"mongodb/mongodb": "^1.20",
2828
"php-cs-fixer/shim": "^3.64",
2929
"phpstan/phpstan": "^1.12",
3030
"phpunit/phpunit": "^11.3",
@@ -37,6 +37,9 @@
3737
"symfony/dotenv": "^6.4 || ^7.1",
3838
"symfony/var-dumper": "^6.4 || ^7.1"
3939
},
40+
"conflict": {
41+
"mongodb/mongodb": "<1.20"
42+
},
4043
"suggest": {
4144
"codewithkyrian/chromadb-php": "For using the ChromaDB as retrieval vector store.",
4245
"mongodb/mongodb": "For using MongoDB Atlas as retrieval vector store.",
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
<?php
2+
3+
use MongoDB\Client as MongoDBClient;
4+
use PhpLlm\LlmChain\Chain;
5+
use PhpLlm\LlmChain\Document\Document;
6+
use PhpLlm\LlmChain\Document\Metadata;
7+
use PhpLlm\LlmChain\DocumentEmbedder;
8+
use PhpLlm\LlmChain\Message\Message;
9+
use PhpLlm\LlmChain\Message\MessageBag;
10+
use PhpLlm\LlmChain\OpenAI\Model\Embeddings;
11+
use PhpLlm\LlmChain\OpenAI\Model\Gpt;
12+
use PhpLlm\LlmChain\OpenAI\Model\Gpt\Version;
13+
use PhpLlm\LlmChain\OpenAI\Platform\OpenAI;
14+
use PhpLlm\LlmChain\Store\MongoDB\Store;
15+
use PhpLlm\LlmChain\ToolBox\ChainProcessor;
16+
use PhpLlm\LlmChain\ToolBox\Tool\SimilaritySearch;
17+
use PhpLlm\LlmChain\ToolBox\ToolAnalyzer;
18+
use PhpLlm\LlmChain\ToolBox\ToolBox;
19+
use Symfony\Component\Dotenv\Dotenv;
20+
use Symfony\Component\HttpClient\HttpClient;
21+
use Symfony\Component\Uid\Uuid;
22+
23+
require_once dirname(__DIR__).'/vendor/autoload.php';
24+
(new Dotenv())->loadEnv(dirname(__DIR__).'/.env');
25+
26+
if (empty($_ENV['OPENAI_API_KEY']) || empty($_ENV['MONGODB_URI'])) {
27+
echo 'Please set OPENAI_API_KEY and MONGODB_URI environment variables.'.PHP_EOL;
28+
exit(1);
29+
}
30+
31+
// initialize the store
32+
$store = new Store(
33+
client: new MongoDBClient($_ENV['MONGODB_URI']),
34+
databaseName: 'my-database',
35+
collectionName: 'my-collection',
36+
indexName: 'my-index',
37+
vectorFieldName: 'vector',
38+
);
39+
40+
// our data
41+
$movies = [
42+
['title' => 'Inception', 'description' => 'A skilled thief is given a chance at redemption if he can successfully perform inception, the act of planting an idea in someone\'s subconscious.', 'regisseur' => 'Christopher Nolan'],
43+
['title' => 'The Matrix', 'description' => 'A hacker discovers the world he lives in is a simulated reality and joins a rebellion to overthrow its controllers.', 'regisseur' => 'The Wachowskis'],
44+
['title' => 'The Godfather', 'description' => 'The aging patriarch of an organized crime dynasty transfers control of his empire to his reluctant son.', 'regisseur' => 'Francis Ford Coppola'],
45+
];
46+
47+
// create embeddings and documents
48+
foreach ($movies as $movie) {
49+
$documents[] = Document::fromText(
50+
id: Uuid::v4(),
51+
text: $movie['title'].' '.$movie['description'],
52+
metadata: new Metadata($movie),
53+
);
54+
}
55+
56+
// create embeddings for documents
57+
$platform = new OpenAI(HttpClient::create(), $_ENV['OPENAI_API_KEY']);
58+
$embedder = new DocumentEmbedder($embeddings = new Embeddings($platform), $store);
59+
$embedder->embed($documents);
60+
61+
// initialize the index
62+
$store->initialize();
63+
64+
$llm = new Gpt($platform, Version::gpt4oMini());
65+
66+
$similaritySearch = new SimilaritySearch($embeddings, $store);
67+
$toolBox = new ToolBox(new ToolAnalyzer(), [$similaritySearch]);
68+
$processor = new ChainProcessor($toolBox);
69+
$chain = new Chain($llm, [$processor], [$processor]);
70+
71+
$messages = new MessageBag(
72+
Message::forSystem('Please answer all user questions only using SimilaritySearch function.'),
73+
Message::ofUser('Which movie fits the theme of the mafia?')
74+
);
75+
$response = $chain->call($messages);
76+
77+
echo $response.PHP_EOL;
+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpLlm\LlmChain\Store;
6+
7+
interface InitializableStoreInterface extends StoreInterface
8+
{
9+
/**
10+
* @param array<mixed> $options
11+
*/
12+
public function initialize(array $options = []): void;
13+
}

src/Store/MongoDB/Store.php

+36-2
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,14 @@
77
use MongoDB\BSON\Binary;
88
use MongoDB\Client;
99
use MongoDB\Collection;
10+
use MongoDB\Driver\Exception\CommandException;
1011
use PhpLlm\LlmChain\Document\Document;
1112
use PhpLlm\LlmChain\Document\Metadata;
1213
use PhpLlm\LlmChain\Document\Vector;
14+
use PhpLlm\LlmChain\Store\InitializableStoreInterface;
1315
use PhpLlm\LlmChain\Store\VectorStoreInterface;
1416
use Psr\Log\LoggerInterface;
17+
use Psr\Log\NullLogger;
1518
use Symfony\Component\Uid\Uuid;
1619

1720
/**
@@ -37,7 +40,7 @@
3740
*
3841
* @author Oskar Stark <[email protected]>
3942
*/
40-
final readonly class Store implements VectorStoreInterface
43+
final readonly class Store implements VectorStoreInterface, InitializableStoreInterface
4144
{
4245
/**
4346
* @param string $databaseName The name of the database
@@ -48,12 +51,12 @@
4851
*/
4952
public function __construct(
5053
private Client $client,
51-
private LoggerInterface $logger,
5254
private string $databaseName,
5355
private string $collectionName,
5456
private string $indexName,
5557
private string $vectorFieldName = 'vector',
5658
private bool $bulkWrite = false,
59+
private LoggerInterface $logger = new NullLogger(),
5760
) {
5861
}
5962

@@ -135,6 +138,37 @@ public function query(Vector $vector, array $options = []): array
135138
return $documents;
136139
}
137140

141+
/**
142+
* @param array{fields?: array<mixed>} $options
143+
*/
144+
public function initialize(array $options = []): void
145+
{
146+
try {
147+
if ([] !== $options && !array_key_exists('fields', $options)) {
148+
throw new \InvalidArgumentException('The only supported option is "fields"');
149+
}
150+
151+
$this->getCollection()->createSearchIndex(
152+
[
153+
'fields' => array_merge([
154+
[
155+
'numDimensions' => 1536,
156+
'path' => $this->vectorFieldName,
157+
'similarity' => 'euclidean',
158+
'type' => 'vector',
159+
],
160+
], $options['fields'] ?? []),
161+
],
162+
[
163+
'name' => $this->indexName,
164+
'type' => 'vectorSearch',
165+
],
166+
);
167+
} catch (CommandException $e) {
168+
$this->logger->warning($e->getMessage());
169+
}
170+
}
171+
138172
private function getCollection(): Collection
139173
{
140174
return $this->client->selectCollection($this->databaseName, $this->collectionName);

0 commit comments

Comments
 (0)