From d66738aa9e932ab99222baab01e1bdf7df2b20fb Mon Sep 17 00:00:00 2001 From: Chris <101201552+ChrisB-TL@users.noreply.github.com> Date: Mon, 20 Jan 2025 20:58:40 +0000 Subject: [PATCH] Anthropic PDF Handling (#142) --- docs/providers/anthropic.md | 30 ++++++++- src/Providers/Anthropic/Maps/MessageMap.php | 22 ++++++- .../Messages/Support/Document.php | 51 ++++++++++++++ src/ValueObjects/Messages/UserMessage.php | 15 ++++- tests/Fixtures/test-pdf.pdf | Bin 0 -> 9706 bytes tests/Providers/Anthropic/MessageMapTest.php | 62 ++++++++++++++++++ 6 files changed, 176 insertions(+), 4 deletions(-) create mode 100644 src/ValueObjects/Messages/Support/Document.php create mode 100644 tests/Fixtures/test-pdf.pdf diff --git a/docs/providers/anthropic.md b/docs/providers/anthropic.md index df6e86a3c..5d860d7b9 100644 --- a/docs/providers/anthropic.md +++ b/docs/providers/anthropic.md @@ -14,10 +14,11 @@ Anthropic's prompt caching feature allows you to drastically reduce latency and We support Anthropic prompt caching on: - System Messages (text only) -- User Messages (Text and Image) +- User Messages (Text, Image and PDF (pdf only)) +- Assistant Messages (text only) - Tools -The API for enable prompt caching is the same for all, enabled via the `withProviderMeta()` method. Where a UserMessage contains both text and an image, both will be cached. +The API for enabling prompt caching is the same for all, enabled via the `withProviderMeta()` method. Where a UserMessage contains both text and an image or document, both will be cached. ```php use EchoLabs\Enums\Provider; @@ -55,6 +56,31 @@ Note that you must use the `withMessages()` method in order to enable prompt cac Please ensure you read Anthropic's [prompt caching documentation](https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching), which covers some important information on e.g. minimum cacheable tokens and message order consistency. +### PDF Support + +Prism supports Anthropic PDF processing on UserMessages via the `$additionalContent` parameter: + +```php +use EchoLabs\Enums\Provider; +use EchoLabs\Prism\Prism; +use EchoLabs\Prism\ValueObjects\Messages\UserMessage; + +Prism::text() + ->using(Provider::Anthropic, 'claude-3-5-sonnet-20241022') + ->withMessages([ + new UserMessage('Here is the document from base64', [ + Document::fromBase64(base64_encode(file_get_contents('tests/Fixtures/test-pdf.pdf')), 'application/pdf'), + ]), + new UserMessage('Here is the document from a local path', [ + Document::fromPath('tests/Fixtures/test-pdf.pdf', 'application/pdf'), + ]), + ]) + ->generate(); + +``` +Anthropic use vision to process PDFs, and consequently there are some limitations detailed in their [feature documentation](https://docs.anthropic.com/en/docs/build-with-claude/pdf-support). + + ## Considerations ### Message Order diff --git a/src/Providers/Anthropic/Maps/MessageMap.php b/src/Providers/Anthropic/Maps/MessageMap.php index 6aecbb4f6..d450ce8f8 100644 --- a/src/Providers/Anthropic/Maps/MessageMap.php +++ b/src/Providers/Anthropic/Maps/MessageMap.php @@ -8,6 +8,7 @@ use EchoLabs\Prism\Contracts\Message; use EchoLabs\Prism\Enums\Provider; use EchoLabs\Prism\ValueObjects\Messages\AssistantMessage; +use EchoLabs\Prism\ValueObjects\Messages\Support\Document; use EchoLabs\Prism\ValueObjects\Messages\Support\Image; use EchoLabs\Prism\ValueObjects\Messages\SystemMessage; use EchoLabs\Prism\ValueObjects\Messages\ToolResultMessage; @@ -106,6 +107,7 @@ protected static function mapUserMessage(UserMessage $message): array 'cache_control' => $cache_control, ]), ...self::mapImageParts($message->images(), $cache_control), + ...self::mapDocumentParts($message->documents(), $cache_control), ], ]; } @@ -145,7 +147,7 @@ protected static function mapAssistantMessage(AssistantMessage $message): array /** * @param Image[] $parts * @param array|null $cache_control - * @return array + * @return array */ protected static function mapImageParts(array $parts, ?array $cache_control = null): array { @@ -165,4 +167,22 @@ protected static function mapImageParts(array $parts, ?array $cache_control = nu ]); }, $parts); } + + /** + * @param Document[] $parts + * @param array|null $cache_control + * @return array + */ + protected static function mapDocumentParts(array $parts, ?array $cache_control = null): array + { + return array_map(fn (Document $document): array => array_filter([ + 'type' => 'document', + 'source' => [ + 'type' => 'base64', + 'media_type' => $document->mimeType, + 'data' => $document->document, + ], + 'cache_control' => $cache_control, + ]), $parts); + } } diff --git a/src/ValueObjects/Messages/Support/Document.php b/src/ValueObjects/Messages/Support/Document.php new file mode 100644 index 000000000..d2337cc30 --- /dev/null +++ b/src/ValueObjects/Messages/Support/Document.php @@ -0,0 +1,51 @@ + $additionalContent + * @param array $additionalContent */ public function __construct( protected readonly string $content, @@ -43,4 +44,16 @@ public function images(): array ->where(fn ($part): bool => $part instanceof Image) ->toArray(); } + + /** + * Note: Prism currently only supports Documents with Anthropic. + * + * @return Document[] + */ + public function documents(): array + { + return collect($this->additionalContent) + ->where(fn ($part): bool => $part instanceof Document) + ->toArray(); + } } diff --git a/tests/Fixtures/test-pdf.pdf b/tests/Fixtures/test-pdf.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cc01b9775133b9966d763c4235665aa46e049c8b GIT binary patch literal 9706 zcmeHN2~-o=(vBiBAmF$wBDAP%BXky$K-eUNAjl?(2H7+u4TL3$Ss($F5fD^V#AV!Y zfpJGx6&F-=5En#IP!vTR1wjV{9YtJ5eYcxMwt4T(JOBUw^Pe+0f!waHs$2Els@wP8 z>RDb4rWwJUs--dO1o&ua5Fk7x3JTNGu(5&JN-+XEa%J2IQ3&kC4MC(35#08M-MGRK zV?<~I!7Q0X&Xchv2;wag$sht5yYJ4Ggu4iXML-AIMEDS3L+tEecSOeJ1BDQgiZLz} zAkm1#OM(O=5=6*DJFrA=*x6}mAVNN72-;;oi!e)IN0D3zI)vv5_)hc_Y>$t9p$U?@{0l%YQasLKZ?5h7sT0<)bIGG~^ID?&fJX27aqRkWL5&1B@C97DNZ|)W@etXHY)&FV zmKFqaD`EfyTocNGky6N_s~3q#E(Seg!{Llxz$}<8;R>Z<)ES-Y&^xTV9C*JykZvVypjtA|y?8w9y zTLNd=9`X=1tJZU_DE7>`+o0TDP&_-RwR8Gj&oS*8>HEgk=%~_Ld>4h^XSy7Gz4S@z z2klWIpD!(vdx@*$+j27|kQ^?Cx|~@d$T@0uz^`bXk<&K&H(?b!*^ecSt!10mC(8;m zuRXh^%5|lDNQnu*>tI~&WJ;OjTe3dsP4?`c#wR{^wc9MH?O5|-#j)w}A1>9}AAY;o zbbr9*k1i()BSu)a#_uW>uYEtfsleqK7I8hk_5>dq3q3$#_@iM0v*bZCG&fKR9;S1p z2u7wmGMt>8e((~B#R8!eb`kNCaY;NCH`54sakMmNk71N7;Lf^OW!r34p@5 z2MRECcq1VK0JqZE0pvM?&LwiOI0A`8)3aZ&qFwYT>K%h`CSVqMa3c|T0RAANL!=Oi z2)lq8Ou%yxhD0C`9(F)S0L&bCGQN8_WRMvNTsU&Ya}hyEX!kvbs1S~T57NMrOaRlE zqU(Z$x3a`~pBm&k00Et4yItEFOq7PCJAwonf z&HF-!O)vdi?wx^Px;yM4l0+AD^phGicMm1y=rBNP(EN(UMNinU!NiS$jv>q> z_KSArP|?m22pxn{L2vY<>b{K@M8F~(W(p)ynPVtdf)0=y_gfo-89WED%;@Ldpb5Rd z=6-R|*Rub|anN_3c96?LMG|AjP>Dba(Io;tcpihSaRieyNJQf=Uvcg@0gi{0;6ykn z_zT9#aJe`L#{q2#jt^SFSPL4055kdf6dbunoj@dHfTw{mgGR)Ipe7K=1TvXQ#{YoF z8-Xuia~JXd1qXN<^5s0_f6B1Fkw6?2vYtU6shHN$ zveC3|+H}id%qf~lq&=PWJARt~u&_`NkpB9s{9T!ATtZQHsikGsec!YKGeMr{>HN0_ zFMZ}63E~iESKl8uDgN#Tz2(lHUO8`*Pps`o%JdJHDp#nmD&MV8->-LaAKbXZCuVuW zXx7B!jsB^QF0(a%2wm(`9n_o(-?%vX&L zfZLz%U%2NRBYIp3AFUhmp@}`jdZw>G9|oeOMgL(~Hs3uw3IF}MW=`WN+u30eoQVvd zJF25U?=7;241d>PJZs&g(@5*4+-cvA*V0o~5tvSf?i^R{CV zw!%rg^V|_HL*3RSxIX{vSXp=W)tJzvqGdB|^XkrY?p|DWCG1DyC6`d9+t}bzRi9in zNZFdb_d_kmVa|2y-Kv~>sjzVVt{A!Qu~V;BZJCmF(D`j3``TUUx+N3JedLUeCr7m7 zVkTWYLcF?u^88&Yrep3-w@dvx`snD%J}K`C-+KP~?t0{ulVjGD`P$poOJl*G>7ds8JrRhM7u5LA3>a>a?fM3*2?8e-y;OQ&})B z)hm8Q&3bRs6?YPD-N_UjUu(N>OM->m{(u+b(92!z?+>{8>hV)PY>Phs=F80w zxi4dpB{;9|t~ew(tEKq2Wq#H~){ge9Tz6%Cp#Ldc zeva0X>I-hIKHiF#nUws(!cat9z%&whg)1pV#;T|iQR~;IU@M7+{ zUTN1mo!7wIurFS~YEw!r~Fe*RRP zK)K!a+#e0kkmcqL%|_B%`$oy4n}rNq%I!MSv!#_;@LO*Kld6Tql@FgT-MW2VM|q1= zX4XrJU)kq15_|hpJr2>fXpP3s$)VY=#*Ex(`DEmAva$B)_4~7OZ^U1oz95kC{oyt3 z(bujn{>AOJIm>PGh&;W^GkJ|Mohj)A!+PBL&FV`7nkK?mnCmt@N6bBUi z*3nc|NK7lx(-cDC8gJsEqyVan2D*Nle0bg$TPI3u*} zw^&Ii=lE%p2etD2(;t=`)X7ev{z7S;^<(A^|Du>Xa|1O3gQFyxp@*7my`%O*x9{17 z>i!TL7SY}?>}$iPcX};Wjk*yJ#*y;d3!Y9J?c}>Fv0;V&y0yBG8y0K68#(Rg*rku3 z+6q-lR1GuUJbbt6{I;B!W3{C^-s$!A&If2aSgm0-7js8WyPduvyKH*cwLw{qX9F@@&QFxgRBQ2W4|l9=!4J_PJQjsk_r6(`L%Y9KL?< zEbH~@2WoK@(*#>vO51$%SOq4AJb7B4x!2T3oc5e!LQX~5Wi{HPb<0jn){oCCgzU9u zWd-j?^wM)5Ih(!NX4@pKIrHd~V`hE*RnL6Cyc%|j%BTBnzh2wqaP;u|o62=1S@n*J z@XgyMJg&`Ja{H2I{4Bq#!r(v;!}(WoGm~|~4n5hLrOzyxU~^`eXPoNplz767%uV@@ zWy(wATE`XxmansN>qk{dlho_Scet(v%uAb4EzHqEO zc)C-Ive72FaB1Q56{;CV=~U0(b$;K>Oda1kV$#zQ4>Z=PPF6AUy>U@fqS}tH8ugsI zzG&Xnhm*4>ek7#gH0ht38w>PF+vcJ^(w2=kO5y76g!r@*8aSF4Q=Jy*)Qel zqJ!t|m^b`A}Z5G2lCS z^`)G1*{-Y>qHFeth@+LhFBu^R4vieP#_$Y2=?-__M3brMNFYzAd8vP@&dgAKWxV>G zLNCr({-dSKYZ4RZoq2TJ>F}6y{Ku!{Nb2}hePBx7YG-@J1+@zAuLdDr9ebGNwk13S;O zWrmLm&`m6*oM**1u(z{O&=is4NN^SjDIz0;QkufnfQueN z&_Ej{!v+w>A`7=QAej@)sCa^boraBsA58ORFuR4om90UjOeUtmaCCIEc{IsfBng2D zR#sLpo(L0(W`M&?suaq&3NxW}MvsP`y)f8hL8wM^@{uG6-v!9!_0Z2 zNEqCJ3FagiRRr3!o*kB*yMQMVNkzf3UveRiNWzDL!NE)~5sU`e1{MA$U<0Ea_c5f~ zxjvL3P2=-=Jr>I)5tzSx9*lII+DL(Q1k5@iPpV)W{X4-{}zk`Wo&v02)ZA(L-)&Oc!C*$ z%qEa&WGW5YG}G{S8+ZUGCJDCDf?(ypBiV=ZKOH?_fV=xYhb1zy2N`#F8bidBqo;8$ z3_H18z^Bn2=_Dtr1JjJ^fOjynppvL&4n(S>nWKdxgF$g3Ix)#i%+3C?sH1@5B9zLw z;E4Y}(bh+H2ucvDH91!`FNFg;6JCM&}KQuL8Wlp``)#iFok8`vYDDp^(WWGKAjgp@D)m z`hkSKGy(;b^#;(01TX{xXk`3gK1-l!AfFX!2pSoTe}8=hP}3SrvmDH4F<2kL!V(Ph z0DWZAU|A}DkiP^g@?iV$1hCB>0;|C^%OUuz2GhWneUL1XFa(V_m7L8>TzvLCF7KmXK=l*6KSZGpu-J->rX*(Z FKL9yb0lWYJ literal 0 HcmV?d00001 diff --git a/tests/Providers/Anthropic/MessageMapTest.php b/tests/Providers/Anthropic/MessageMapTest.php index 1c5642fda..d16b3a9a3 100644 --- a/tests/Providers/Anthropic/MessageMapTest.php +++ b/tests/Providers/Anthropic/MessageMapTest.php @@ -8,6 +8,7 @@ use EchoLabs\Prism\Providers\Anthropic\Enums\AnthropicCacheType; use EchoLabs\Prism\Providers\Anthropic\Maps\MessageMap; use EchoLabs\Prism\ValueObjects\Messages\AssistantMessage; +use EchoLabs\Prism\ValueObjects\Messages\Support\Document; use EchoLabs\Prism\ValueObjects\Messages\Support\Image; use EchoLabs\Prism\ValueObjects\Messages\SystemMessage; use EchoLabs\Prism\ValueObjects\Messages\ToolResultMessage; @@ -73,6 +74,40 @@ ->toBe('image/png'); }); +it('maps user messages with documents from path', function (): void { + $mappedMessage = MessageMap::map([ + new UserMessage('Here is the document', [ + Document::fromPath('tests/Fixtures/test-pdf.pdf'), + ]), + ]); + + expect(data_get($mappedMessage, '0.content.1.type')) + ->toBe('document'); + expect(data_get($mappedMessage, '0.content.1.source.type')) + ->toBe('base64'); + expect(data_get($mappedMessage, '0.content.1.source.data')) + ->toContain(base64_encode(file_get_contents('tests/Fixtures/test-pdf.pdf'))); + expect(data_get($mappedMessage, '0.content.1.source.media_type')) + ->toBe('application/pdf'); +}); + +it('maps user messages with documents from base64', function (): void { + $mappedMessage = MessageMap::map([ + new UserMessage('Here is the document', [ + Document::fromBase64(base64_encode(file_get_contents('tests/Fixtures/test-pdf.pdf')), 'application/pdf'), + ]), + ]); + + expect(data_get($mappedMessage, '0.content.1.type')) + ->toBe('document'); + expect(data_get($mappedMessage, '0.content.1.source.type')) + ->toBe('base64'); + expect(data_get($mappedMessage, '0.content.1.source.data')) + ->toContain(base64_encode(file_get_contents('tests/Fixtures/test-pdf.pdf'))); + expect(data_get($mappedMessage, '0.content.1.source.media_type')) + ->toBe('application/pdf'); +}); + it('does not maps user messages with images from url', function (): void { $this->expectException(InvalidArgumentException::class); MessageMap::map([ @@ -215,6 +250,33 @@ ]]); }); +it('sets the cache type on a UserMessage document if cacheType providerMeta is set on message', function (): void { + expect(MessageMap::map([ + (new UserMessage( + content: 'Who are you?', + additionalContent: [Document::fromPath('tests/Fixtures/test-pdf.pdf')] + ))->withProviderMeta(Provider::Anthropic, ['cacheType' => 'ephemeral']), + ]))->toBe([[ + 'role' => 'user', + 'content' => [ + [ + 'type' => 'text', + 'text' => 'Who are you?', + 'cache_control' => ['type' => 'ephemeral'], + ], + [ + 'type' => 'document', + 'source' => [ + 'type' => 'base64', + 'media_type' => 'application/pdf', + 'data' => base64_encode(file_get_contents('tests/Fixtures/test-pdf.pdf')), + ], + 'cache_control' => ['type' => 'ephemeral'], + ], + ], + ]]); +}); + it('sets the cache type on an AssistantMessage if cacheType providerMeta is set on message', function (mixed $cacheType): void { expect(MessageMap::map([ (new AssistantMessage(content: 'Who are you?'))->withProviderMeta(Provider::Anthropic, ['cacheType' => $cacheType]),