Skip to content

Commit 4911c5f

Browse files
authored
refactor: extract logic from blog embed command (#5)
1 parent 8de854c commit 4911c5f

File tree

9 files changed

+1377
-105
lines changed

9 files changed

+1377
-105
lines changed

Diff for: src/Blog/Embedder.php

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace App\Blog;
6+
7+
use Codewithkyrian\ChromaDB\Client;
8+
use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings;
9+
use PhpLlm\LlmChain\Document\Vector;
10+
use PhpLlm\LlmChain\Model\Response\AsyncResponse;
11+
use PhpLlm\LlmChain\Model\Response\VectorResponse;
12+
use PhpLlm\LlmChain\PlatformInterface;
13+
14+
final readonly class Embedder
15+
{
16+
public function __construct(
17+
private Loader $loader,
18+
private PlatformInterface $platform,
19+
private Client $chromaClient,
20+
) {
21+
}
22+
23+
public function embedBlog(): void
24+
{
25+
$posts = $this->loader->load();
26+
$vectors = $this->createEmbeddings($posts);
27+
$this->pushToChromaDB($posts, $vectors);
28+
}
29+
30+
/**
31+
* @param Post[] $posts
32+
*
33+
* @return Vector[]
34+
*/
35+
private function createEmbeddings(array $posts): array
36+
{
37+
$texts = array_map(fn (Post $post) => $post->toString(), $posts);
38+
$response = $this->platform->request(new Embeddings(), $texts);
39+
40+
assert($response instanceof AsyncResponse);
41+
$response = $response->unwrap();
42+
assert($response instanceof VectorResponse);
43+
44+
return $response->getContent();
45+
}
46+
47+
/**
48+
* @param Post[] $posts
49+
* @param Vector[] $vectors
50+
*/
51+
private function pushToChromaDB(array $posts, array $vectors): void
52+
{
53+
$collection = $this->chromaClient->getOrCreateCollection('symfony_blog');
54+
55+
$ids = array_map(fn (Post $post) => $post->id, $posts);
56+
$vectors = array_map(fn (Vector $vector) => $vector->getData(), $vectors);
57+
58+
$collection->upsert($ids, $vectors, $posts);
59+
}
60+
}

Diff for: src/Blog/Loader.php

+42
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace App\Blog;
6+
7+
use Symfony\Component\DomCrawler\Crawler;
8+
use Symfony\Component\Uid\Uuid;
9+
use Symfony\Contracts\HttpClient\HttpClientInterface;
10+
11+
class Loader
12+
{
13+
public function __construct(
14+
private HttpClientInterface $httpClient,
15+
) {
16+
}
17+
18+
/**
19+
* @return Post[]
20+
*/
21+
public function load(): array
22+
{
23+
$response = $this->httpClient->request('GET', 'https://feeds.feedburner.com/symfony/blog');
24+
25+
$posts = [];
26+
$crawler = new Crawler($response->getContent());
27+
$crawler->filter('item')->each(function (Crawler $node) use (&$posts) {
28+
$title = $node->filter('title')->text();
29+
$posts[] = new Post(
30+
Uuid::v5(Uuid::fromString('6ba7b810-9dad-11d1-80b4-00c04fd430c8'), $title),
31+
$title,
32+
$node->filter('link')->text(),
33+
$node->filter('description')->text(),
34+
(new Crawler($node->filter('content\:encoded')->text()))->text(),
35+
$node->filter('dc\:creator')->text(),
36+
new \DateTimeImmutable($node->filter('pubDate')->text()),
37+
);
38+
});
39+
40+
return $posts;
41+
}
42+
}

Diff for: src/Blog/Post.php

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace App\Blog;
6+
7+
use Symfony\Component\Uid\Uuid;
8+
9+
final readonly class Post
10+
{
11+
public function __construct(
12+
public Uuid $id,
13+
public string $title,
14+
public string $link,
15+
public string $description,
16+
public string $content,
17+
public string $author,
18+
public \DateTimeImmutable $date,
19+
) {
20+
}
21+
22+
public function toString(): string
23+
{
24+
return <<<TEXT
25+
Title: {$this->title}
26+
From: {$this->author} on {$this->date->format('Y-m-d')}
27+
Description: {$this->description}
28+
{$this->content}
29+
TEXT;
30+
}
31+
32+
/**
33+
* @return array{
34+
* id: string,
35+
* title: string,
36+
* link: string,
37+
* description: string,
38+
* content: string,
39+
* author: string,
40+
* date: string,
41+
* }
42+
*/
43+
public function toArray(): array
44+
{
45+
return [
46+
'id' => $this->id->toRfc4122(),
47+
'title' => $this->title,
48+
'link' => $this->link,
49+
'description' => $this->description,
50+
'content' => $this->content,
51+
'author' => $this->author,
52+
'date' => $this->date->format('Y-m-d'),
53+
];
54+
}
55+
}

Diff for: src/Command/BlogEmbedCommand.php

+3-99
Original file line numberDiff line numberDiff line change
@@ -4,39 +4,18 @@
44

55
namespace App\Command;
66

7-
use Codewithkyrian\ChromaDB\Client;
8-
use PhpLlm\LlmChain\Bridge\OpenAI\Embeddings;
9-
use PhpLlm\LlmChain\Document\Vector;
10-
use PhpLlm\LlmChain\Model\Response\AsyncResponse;
11-
use PhpLlm\LlmChain\Model\Response\VectorResponse;
12-
use PhpLlm\LlmChain\PlatformInterface;
7+
use App\Blog\Embedder;
138
use Symfony\Component\Console\Attribute\AsCommand;
149
use Symfony\Component\Console\Command\Command;
1510
use Symfony\Component\Console\Input\InputInterface;
1611
use Symfony\Component\Console\Output\OutputInterface;
1712
use Symfony\Component\Console\Style\SymfonyStyle;
18-
use Symfony\Component\DomCrawler\Crawler;
19-
use Symfony\Component\Uid\Uuid;
20-
use Symfony\Contracts\HttpClient\HttpClientInterface;
2113

22-
/**
23-
* @phpstan-type Post array{
24-
* id: Uuid,
25-
* title: string,
26-
* link: string,
27-
* description: string,
28-
* content: string,
29-
* author: string,
30-
* date: \DateTimeImmutable,
31-
* }
32-
*/
3314
#[AsCommand('app:blog:embed', description: 'Create embeddings for Symfony blog and push to ChromaDB.')]
3415
final class BlogEmbedCommand extends Command
3516
{
3617
public function __construct(
37-
private readonly HttpClientInterface $httpClient,
38-
private readonly PlatformInterface $platform,
39-
private readonly Client $chromaClient,
18+
private readonly Embedder $embedder,
4019
) {
4120
parent::__construct();
4221
}
@@ -46,85 +25,10 @@ protected function execute(InputInterface $input, OutputInterface $output): int
4625
$io = new SymfonyStyle($input, $output);
4726
$io->title('Loading RSS of Symfony blog as embeddings into ChromaDB');
4827

49-
$posts = $this->loadBlogPosts();
50-
$vectors = $this->createEmbeddings($posts);
51-
$this->pushToChromaDB($posts, $vectors);
28+
$this->embedder->embedBlog();
5229

5330
$io->success('Symfony Blog Successfully Embedded!');
5431

5532
return Command::SUCCESS;
5633
}
57-
58-
/**
59-
* @return list<array{
60-
* id: Uuid,
61-
* title: string,
62-
* link: string,
63-
* description: string,
64-
* content: string,
65-
* author: string,
66-
* date: \DateTimeImmutable,
67-
* }>
68-
*/
69-
private function loadBlogPosts(): array
70-
{
71-
$response = $this->httpClient->request('GET', 'https://feeds.feedburner.com/symfony/blog');
72-
73-
$posts = [];
74-
$crawler = new Crawler($response->getContent());
75-
$crawler->filter('item')->each(function (Crawler $node) use (&$posts) {
76-
$title = $node->filter('title')->text();
77-
$posts[] = [
78-
'id' => Uuid::v5(Uuid::fromString('6ba7b810-9dad-11d1-80b4-00c04fd430c8'), $title),
79-
'title' => $title,
80-
'link' => $node->filter('link')->text(),
81-
'description' => $node->filter('description')->text(),
82-
'content' => (new Crawler($node->filter('content\:encoded')->text()))->text(),
83-
'author' => $node->filter('dc\:creator')->text(),
84-
'date' => new \DateTimeImmutable($node->filter('pubDate')->text()),
85-
];
86-
});
87-
88-
return $posts;
89-
}
90-
91-
/**
92-
* @param Post[] $posts
93-
*
94-
* @return Vector[]
95-
*/
96-
private function createEmbeddings(array $posts): array
97-
{
98-
$texts = [];
99-
foreach ($posts as $post) {
100-
$texts[] = <<<TEXT
101-
Title: {$post['title']}
102-
From: {$post['author']} on {$post['date']->format('Y-m-d')}
103-
Description: {$post['description']}
104-
{$post['content']}
105-
TEXT;
106-
}
107-
108-
$response = $this->platform->request(new Embeddings(), $texts);
109-
110-
assert($response instanceof AsyncResponse);
111-
$response = $response->unwrap();
112-
assert($response instanceof VectorResponse);
113-
114-
return $response->getContent();
115-
}
116-
117-
/**
118-
* @param Post[] $posts
119-
* @param Vector[] $vectors
120-
*/
121-
private function pushToChromaDB(array $posts, array $vectors): void
122-
{
123-
$collection = $this->chromaClient->getOrCreateCollection('symfony_blog');
124-
125-
$ids = array_column($posts, 'id');
126-
$vectors = array_map(fn (Vector $vector) => $vector->getData(), $vectors);
127-
128-
$collection->upsert($ids, $vectors, $posts);
129-
}
13034
}

Diff for: tests/Blog/EmbedderTest.php

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace App\Tests\Blog;
6+
7+
use App\Blog\Embedder;
8+
use App\Blog\Loader;
9+
use App\Blog\Post;
10+
use Codewithkyrian\ChromaDB\Client;
11+
use Codewithkyrian\ChromaDB\Resources\CollectionResource;
12+
use PhpLlm\LlmChain\Document\Vector;
13+
use PhpLlm\LlmChain\Model\Model;
14+
use PhpLlm\LlmChain\Model\Response\AsyncResponse;
15+
use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse;
16+
use PhpLlm\LlmChain\Model\Response\VectorResponse;
17+
use PhpLlm\LlmChain\Platform\ResponseConverter;
18+
use PhpLlm\LlmChain\PlatformInterface;
19+
use PHPUnit\Framework\Attributes\CoversClass;
20+
use PHPUnit\Framework\Attributes\UsesClass;
21+
use PHPUnit\Framework\TestCase;
22+
use Symfony\Component\HttpClient\MockHttpClient;
23+
use Symfony\Component\HttpClient\Response\MockResponse;
24+
use Symfony\Contracts\HttpClient\ResponseInterface as HttpResponse;
25+
26+
#[CoversClass(Embedder::class)]
27+
#[UsesClass(Loader::class)]
28+
#[UsesClass(Post::class)]
29+
final class EmbedderTest extends TestCase
30+
{
31+
public function testEmbedBlog(): void
32+
{
33+
$response = MockResponse::fromFile(__DIR__.'/fixtures/blog.rss');
34+
$client = new MockHttpClient([$response, $response]);
35+
$loader = new Loader($client);
36+
$platform = $this->createMock(PlatformInterface::class);
37+
$chromaClient = $this->createMock(Client::class);
38+
$posts = $loader->load();
39+
$vectors = [
40+
new Vector([0.1, 0.2, 0.3]),
41+
new Vector([0.4, 0.5, 0.6]),
42+
new Vector([0.7, 0.8, 0.9]),
43+
new Vector([1.0, 1.1, 1.2]),
44+
new Vector([1.3, 1.4, 1.5]),
45+
new Vector([1.6, 1.7, 1.8]),
46+
new Vector([1.9, 2.0, 2.1]),
47+
new Vector([2.2, 2.3, 2.4]),
48+
new Vector([2.5, 2.6, 2.7]),
49+
new Vector([2.8, 2.9, 3.0]),
50+
];
51+
$platform
52+
->method('request')
53+
->willReturn($this->createAsyncResponse($vectors));
54+
55+
$collection = $this->createMock(CollectionResource::class);
56+
$chromaClient
57+
->expects($this->once())
58+
->method('getOrCreateCollection')
59+
->with('symfony_blog')
60+
->willReturn($collection);
61+
62+
$collection
63+
->expects($this->once())
64+
->method('upsert')
65+
->with(
66+
array_map(fn (Post $post) => $post->id, $posts),
67+
array_map(fn (Vector $vector) => $vector->getData(), $vectors),
68+
$posts,
69+
);
70+
71+
$embedder = new Embedder($loader, $platform, $chromaClient);
72+
$embedder->embedBlog();
73+
}
74+
75+
/**
76+
* @param Vector[] $vectors
77+
*/
78+
private function createAsyncResponse(array $vectors): AsyncResponse
79+
{
80+
$converter = new class($vectors) implements ResponseConverter {
81+
/**
82+
* @param Vector[] $vectors
83+
*/
84+
public function __construct(private readonly array $vectors)
85+
{
86+
}
87+
88+
public function supports(Model $model, object|array|string $input): bool
89+
{
90+
return true;
91+
}
92+
93+
public function convert(HttpResponse $response, array $options = []): LlmResponse
94+
{
95+
return new VectorResponse(...$this->vectors);
96+
}
97+
};
98+
99+
return new AsyncResponse($converter, new MockResponse());
100+
}
101+
}

0 commit comments

Comments
 (0)