Skip to content

feat: extend Claude support for images and pdf input #298

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -821,3 +821,4 @@ For testing multi-modal features, the repository contains binary media content,

* `tests/Fixture/image.jpg`: Chris F., Creative Commons, see [pexels.com](https://www.pexels.com/photo/blauer-und-gruner-elefant-mit-licht-1680755/)
* `tests/Fixture/audio.mp3`: davidbain, Creative Commons, see [freesound.org](https://freesound.org/people/davidbain/sounds/136777/)
* `tests/Fixture/document.pdf`: Chem8240ja, Public Domain, see [Wikipedia](https://en.m.wikipedia.org/wiki/File:Re_example.pdf)
32 changes: 32 additions & 0 deletions examples/anthropic/image-input-binary.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

use PhpLlm\LlmChain\Bridge\Anthropic\Claude;
use PhpLlm\LlmChain\Bridge\Anthropic\PlatformFactory;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Model\Message\Content\Image;
use PhpLlm\LlmChain\Model\Message\Message;
use PhpLlm\LlmChain\Model\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['ANTHROPIC_API_KEY'])) {
echo 'Please set the ANTHROPIC_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['ANTHROPIC_API_KEY']);
$llm = new Claude(Claude::SONNET_37);

$chain = new Chain($platform, $llm);
$messages = new MessageBag(
Message::forSystem('You are an image analyzer bot that helps identify the content of images.'),
Message::ofUser(
Image::fromFile(dirname(__DIR__, 2).'/tests/Fixture/image.jpg'),
'Describe this image.',
),
);
$response = $chain->call($messages);

echo $response->getContent().PHP_EOL;
32 changes: 32 additions & 0 deletions examples/anthropic/image-input-url.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<?php

use PhpLlm\LlmChain\Bridge\Anthropic\Claude;
use PhpLlm\LlmChain\Bridge\Anthropic\PlatformFactory;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Model\Message\Content\ImageUrl;
use PhpLlm\LlmChain\Model\Message\Message;
use PhpLlm\LlmChain\Model\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['ANTHROPIC_API_KEY'])) {
echo 'Please set the ANTHROPIC_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['ANTHROPIC_API_KEY']);
$llm = new Claude(Claude::SONNET_37);

$chain = new Chain($platform, $llm);
$messages = new MessageBag(
Message::forSystem('You are an image analyzer bot that helps identify the content of images.'),
Message::ofUser(
new ImageUrl('https://upload.wikimedia.org/wikipedia/commons/a/a7/Camponotus_flavomarginatus_ant.jpg'),
'Describe this image.',
),
);
$response = $chain->call($messages);

echo $response->getContent().PHP_EOL;
31 changes: 31 additions & 0 deletions examples/anthropic/pdf-input-binary.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

use PhpLlm\LlmChain\Bridge\Anthropic\Claude;
use PhpLlm\LlmChain\Bridge\Anthropic\PlatformFactory;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Model\Message\Content\Document;
use PhpLlm\LlmChain\Model\Message\Message;
use PhpLlm\LlmChain\Model\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['ANTHROPIC_API_KEY'])) {
echo 'Please set the ANTHROPIC_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['ANTHROPIC_API_KEY']);
$llm = new Claude(Claude::SONNET_37);

$chain = new Chain($platform, $llm);
$messages = new MessageBag(
Message::ofUser(
Document::fromFile(dirname(__DIR__, 2).'/tests/Fixture/document.pdf'),
'What is this document about?',
),
);
$response = $chain->call($messages);

echo $response->getContent().PHP_EOL;
31 changes: 31 additions & 0 deletions examples/anthropic/pdf-input-url.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
<?php

use PhpLlm\LlmChain\Bridge\Anthropic\Claude;
use PhpLlm\LlmChain\Bridge\Anthropic\PlatformFactory;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Model\Message\Content\DocumentUrl;
use PhpLlm\LlmChain\Model\Message\Message;
use PhpLlm\LlmChain\Model\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['ANTHROPIC_API_KEY'])) {
echo 'Please set the ANTHROPIC_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['ANTHROPIC_API_KEY']);
$llm = new Claude(Claude::SONNET_37);

$chain = new Chain($platform, $llm);
$messages = new MessageBag(
Message::ofUser(
new DocumentUrl('https://upload.wikimedia.org/wikipedia/commons/2/20/Re_example.pdf'),
'What is this document about?',
),
);
$response = $chain->call($messages);

echo $response->getContent().PHP_EOL;
38 changes: 38 additions & 0 deletions examples/anthropic/toolbox-stream.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
<?php

use PhpLlm\LlmChain\Bridge\Anthropic\Claude;
use PhpLlm\LlmChain\Bridge\Anthropic\PlatformFactory;
use PhpLlm\LlmChain\Chain;
use PhpLlm\LlmChain\Chain\Toolbox\ChainProcessor;
use PhpLlm\LlmChain\Chain\Toolbox\Tool\YouTubeTranscriber;
use PhpLlm\LlmChain\Chain\Toolbox\Toolbox;
use PhpLlm\LlmChain\Model\Message\Message;
use PhpLlm\LlmChain\Model\Message\MessageBag;
use Symfony\Component\Dotenv\Dotenv;
use Symfony\Component\HttpClient\HttpClient;

require_once dirname(__DIR__, 2).'/vendor/autoload.php';
(new Dotenv())->loadEnv(dirname(__DIR__, 2).'/.env');

if (empty($_ENV['ANTHROPIC_API_KEY'])) {
echo 'Please set the ANTHROPIC_API_KEY environment variable.'.PHP_EOL;
exit(1);
}

$platform = PlatformFactory::create($_ENV['ANTHROPIC_API_KEY']);
$llm = new Claude();

$transcriber = new YouTubeTranscriber(HttpClient::create());
$toolbox = Toolbox::create($transcriber);
$processor = new ChainProcessor($toolbox);
$chain = new Chain($platform, $llm, [$processor], [$processor]);

$messages = new MessageBag(Message::ofUser('Please summarize this video for me: https://www.youtube.com/watch?v=6uXW-ulpj0s'));
$response = $chain->call($messages, [
'stream' => true, // enable streaming of response text
]);

foreach ($response->getContent() as $word) {
echo $word;
}
echo PHP_EOL;
2 changes: 1 addition & 1 deletion src/Bridge/Anthropic/Claude.php
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public function supportsAudioInput(): bool

public function supportsImageInput(): bool
{
return false; // it does, but implementation here is still open.
return true;
}

public function supportsStreaming(): bool
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,27 +5,19 @@
namespace PhpLlm\LlmChain\Bridge\Anthropic;

use PhpLlm\LlmChain\Chain\Toolbox\Metadata;
use PhpLlm\LlmChain\Exception\RuntimeException;
use PhpLlm\LlmChain\Model\Message\AssistantMessage;
use PhpLlm\LlmChain\Model\Message\MessageBagInterface;
use PhpLlm\LlmChain\Model\Message\MessageInterface;
use PhpLlm\LlmChain\Model\Message\ToolCallMessage;
use PhpLlm\LlmChain\Model\Model;
use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse;
use PhpLlm\LlmChain\Model\Response\StreamResponse;
use PhpLlm\LlmChain\Model\Response\TextResponse;
use PhpLlm\LlmChain\Model\Response\ToolCall;
use PhpLlm\LlmChain\Model\Response\ToolCallResponse;
use PhpLlm\LlmChain\Platform\ModelClient;
use PhpLlm\LlmChain\Platform\ResponseConverter;
use Symfony\Component\HttpClient\Chunk\ServerSentEvent;
use PhpLlm\LlmChain\Platform\ModelClient as PlatformModelClient;
use Symfony\Component\HttpClient\EventSourceHttpClient;
use Symfony\Component\HttpClient\Exception\JsonException;
use Symfony\Contracts\HttpClient\HttpClientInterface;
use Symfony\Contracts\HttpClient\ResponseInterface;
use Webmozart\Assert\Assert;

final readonly class ModelHandler implements ModelClient, ResponseConverter
final readonly class ModelClient implements PlatformModelClient
{
private EventSourceHttpClient $httpClient;

Expand Down Expand Up @@ -108,55 +100,4 @@ public function request(Model $model, object|array|string $input, array $options
'json' => array_merge($options, $body),
]);
}

public function convert(ResponseInterface $response, array $options = []): LlmResponse
{
if ($options['stream'] ?? false) {
return new StreamResponse($this->convertStream($response));
}

$data = $response->toArray();

if (!isset($data['content']) || 0 === count($data['content'])) {
throw new RuntimeException('Response does not contain any content');
}

if (!isset($data['content'][0]['text'])) {
throw new RuntimeException('Response content does not contain any text');
}

$toolCalls = [];
foreach ($data['content'] as $content) {
if ('tool_use' === $content['type']) {
$toolCalls[] = new ToolCall($content['id'], $content['name'], $content['input']);
}
}
if (!empty($toolCalls)) {
return new ToolCallResponse(...$toolCalls);
}

return new TextResponse($data['content'][0]['text']);
}

private function convertStream(ResponseInterface $response): \Generator
{
foreach ((new EventSourceHttpClient())->stream($response) as $chunk) {
if (!$chunk instanceof ServerSentEvent || '[DONE]' === $chunk->getData()) {
continue;
}

try {
$data = $chunk->getArrayData();
} catch (JsonException) {
// try catch only needed for Symfony 6.4
continue;
}

if ('content_block_delta' != $data['type'] || !isset($data['delta']['text'])) {
continue;
}

yield $data['delta']['text'];
}
}
}
3 changes: 1 addition & 2 deletions src/Bridge/Anthropic/PlatformFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ public static function create(
?HttpClientInterface $httpClient = null,
): Platform {
$httpClient = $httpClient instanceof EventSourceHttpClient ? $httpClient : new EventSourceHttpClient($httpClient);
$responseHandler = new ModelHandler($httpClient, $apiKey, $version);

return new Platform([$responseHandler], [$responseHandler]);
return new Platform([new ModelClient($httpClient, $apiKey, $version)], [new ResponseConverter()]);
}
}
78 changes: 78 additions & 0 deletions src/Bridge/Anthropic/ResponseConverter.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Bridge\Anthropic;

use PhpLlm\LlmChain\Exception\RuntimeException;
use PhpLlm\LlmChain\Model\Message\MessageBagInterface;
use PhpLlm\LlmChain\Model\Model;
use PhpLlm\LlmChain\Model\Response\ResponseInterface as LlmResponse;
use PhpLlm\LlmChain\Model\Response\StreamResponse;
use PhpLlm\LlmChain\Model\Response\TextResponse;
use PhpLlm\LlmChain\Model\Response\ToolCall;
use PhpLlm\LlmChain\Model\Response\ToolCallResponse;
use PhpLlm\LlmChain\Platform\ResponseConverter as PlatformResponseConverter;
use Symfony\Component\HttpClient\Chunk\ServerSentEvent;
use Symfony\Component\HttpClient\EventSourceHttpClient;
use Symfony\Component\HttpClient\Exception\JsonException;
use Symfony\Contracts\HttpClient\ResponseInterface;

final readonly class ResponseConverter implements PlatformResponseConverter
{
public function supports(Model $model, array|string|object $input): bool
{
return $model instanceof Claude && $input instanceof MessageBagInterface;
}

public function convert(ResponseInterface $response, array $options = []): LlmResponse
{
if ($options['stream'] ?? false) {
return new StreamResponse($this->convertStream($response));
}

$data = $response->toArray();

if (!isset($data['content']) || 0 === count($data['content'])) {
throw new RuntimeException('Response does not contain any content');
}

if (!isset($data['content'][0]['text'])) {
throw new RuntimeException('Response content does not contain any text');
}

$toolCalls = [];
foreach ($data['content'] as $content) {
if ('tool_use' === $content['type']) {
$toolCalls[] = new ToolCall($content['id'], $content['name'], $content['input']);
}
}
if (!empty($toolCalls)) {
return new ToolCallResponse(...$toolCalls);
}

return new TextResponse($data['content'][0]['text']);
}

private function convertStream(ResponseInterface $response): \Generator
{
foreach ((new EventSourceHttpClient())->stream($response) as $chunk) {
if (!$chunk instanceof ServerSentEvent || '[DONE]' === $chunk->getData()) {
continue;
}

try {
$data = $chunk->getArrayData();
} catch (JsonException) {
// try catch only needed for Symfony 6.4
continue;
}

if ('content_block_delta' != $data['type'] || !isset($data['delta']['text'])) {
continue;
}

yield $data['delta']['text'];
}
}
}
23 changes: 23 additions & 0 deletions src/Model/Message/Content/Document.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

declare(strict_types=1);

namespace PhpLlm\LlmChain\Model\Message\Content;

final readonly class Document extends File implements Content
{
/**
* @return array{type: 'document', source: array{type: 'base64', media_type: string, data: string}}
*/
public function jsonSerialize(): array
{
return [
'type' => 'document',
'source' => [
'type' => 'base64',
'media_type' => $this->getFormat(),
'data' => $this->asBase64(),
],
];
}
}
Loading
Loading