Skip to content

Commit 19b5415

Browse files
authored
feat(server): add multimodal support to PrismServer API endpoints (#403)
1 parent d9bb489 commit 19b5415

File tree

2 files changed

+315
-3
lines changed

2 files changed

+315
-3
lines changed

src/Http/Controllers/PrismChatController.php

Lines changed: 91 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
use Prism\Prism\Text\PendingRequest;
99
use Prism\Prism\Text\Response as TextResponse;
1010
use Prism\Prism\ValueObjects\Messages\AssistantMessage;
11+
use Prism\Prism\ValueObjects\Messages\Support\Image;
1112
use Prism\Prism\ValueObjects\Messages\SystemMessage;
1213
use Prism\Prism\ValueObjects\Messages\UserMessage;
1314
use Symfony\Component\HttpFoundation\Response;
@@ -130,14 +131,101 @@ protected function mapMessages(array $messages): array
130131
{
131132
return collect($messages)
132133
->map(fn ($message): UserMessage|AssistantMessage|SystemMessage => match ($message['role']) {
133-
'user' => new UserMessage($message['content']),
134-
'assistant' => new AssistantMessage($message['content']),
135-
'system' => new SystemMessage($message['content']),
134+
'user' => $this->mapUserMessage($message),
135+
'assistant' => new AssistantMessage($this->extractTextContent($message['content'])),
136+
'system' => new SystemMessage($this->extractTextContent($message['content'])),
136137
default => throw new PrismServerException("Couldn't map messages to Prism messages")
137138
})
138139
->toArray();
139140
}
140141

142+
/**
143+
* @param array{role: string, content: mixed} $message
144+
*/
145+
protected function mapUserMessage(array $message): UserMessage
146+
{
147+
$content = $message['content'];
148+
149+
// Si le contenu est une string simple, retourner un UserMessage classique
150+
if (is_string($content)) {
151+
return new UserMessage($content);
152+
}
153+
154+
// Si le contenu est un array (format multimodal OpenAI)
155+
if (is_array($content)) {
156+
$textContent = '';
157+
$additionalContent = [];
158+
159+
foreach ($content as $part) {
160+
if (! is_array($part)) {
161+
continue;
162+
}
163+
if (! isset($part['type'])) {
164+
continue;
165+
}
166+
if (! is_string($part['type'])) {
167+
continue;
168+
}
169+
match ($part['type']) {
170+
'text' => $textContent .= $part['text'] ?? '',
171+
'image_url' => $additionalContent[] = $this->mapImageUrl($part),
172+
default => null // Ignore unknown types
173+
};
174+
}
175+
176+
return new UserMessage($textContent, $additionalContent);
177+
}
178+
179+
// This line should never be reached due to the type guards above
180+
throw new PrismServerException('Invalid message content type');
181+
}
182+
183+
/**
184+
* @param array<string, mixed> $imagePart
185+
*/
186+
protected function mapImageUrl(array $imagePart): Image
187+
{
188+
$imageUrl = $imagePart['image_url'] ?? [];
189+
$url = $imageUrl['url'] ?? '';
190+
191+
// Détecter si c'est une image base64 ou une URL
192+
if (str_starts_with((string) $url, 'data:')) {
193+
// Format: ...
194+
$parts = explode(',', (string) $url, 2);
195+
if (count($parts) === 2) {
196+
$metadata = $parts[0]; // data:image/png;base64
197+
$base64Data = $parts[1];
198+
199+
// Extraire le mime type
200+
preg_match('/data:([^;]+)/', $metadata, $matches);
201+
$mimeType = $matches[1] ?? 'image/jpeg';
202+
203+
return Image::fromBase64($base64Data, $mimeType);
204+
}
205+
}
206+
207+
// C'est une URL
208+
return Image::fromUrl($url);
209+
}
210+
211+
/**
212+
* @param string|array<int, mixed> $content
213+
*/
214+
protected function extractTextContent(string|array $content): string
215+
{
216+
if (is_string($content)) {
217+
return $content;
218+
}
219+
$text = '';
220+
foreach ($content as $part) {
221+
if (is_array($part) && isset($part['type']) && $part['type'] === 'text') {
222+
$text .= $part['text'] ?? '';
223+
}
224+
}
225+
226+
return $text;
227+
}
228+
141229
protected function resolvePrism(string $model): PendingRequest
142230
{
143231
try {

tests/Http/PrismChatControllerTest.php

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,3 +171,227 @@
171171
expect($response->json('error.message'))
172172
->toBe('Prism "nyx" is not registered with PrismServer');
173173
});
174+
175+
it('handles multimodal messages with image URL', function (): void {
176+
freezeTime();
177+
$generator = Mockery::mock(PendingRequest::class);
178+
179+
$generator->expects('withMessages')
180+
->withArgs(function ($messages): bool {
181+
$message = $messages[0];
182+
183+
return $message instanceof UserMessage
184+
&& $message->text() === 'What is in this image?'
185+
&& count($message->images()) === 1
186+
&& $message->images()[0]->url() === 'https://example.com/test.jpg'
187+
&& $message->images()[0]->isUrl();
188+
})
189+
->andReturnSelf();
190+
191+
$textResponse = new Response(
192+
steps: collect(),
193+
text: 'I can see a test image.',
194+
finishReason: FinishReason::Stop,
195+
toolCalls: [],
196+
toolResults: [],
197+
usage: new Usage(15, 12),
198+
meta: new Meta('cmp_image123', 'gpt-4-vision'),
199+
responseMessages: collect([
200+
new AssistantMessage('I can see a test image.'),
201+
]),
202+
messages: collect(),
203+
);
204+
205+
$generator->expects('asText')
206+
->andReturn($textResponse);
207+
208+
PrismServer::register(
209+
'vision-model',
210+
fn () => $generator
211+
);
212+
213+
/** @var TestResponse */
214+
$response = $this->postJson('prism/openai/v1/chat/completions', [
215+
'model' => 'vision-model',
216+
'messages' => [[
217+
'role' => 'user',
218+
'content' => [
219+
['type' => 'text', 'text' => 'What is in this image?'],
220+
['type' => 'image_url', 'image_url' => ['url' => 'https://example.com/test.jpg']],
221+
],
222+
]],
223+
]);
224+
225+
$response->assertOk();
226+
expect($response->json('choices.0.message.content'))->toBe('I can see a test image.');
227+
});
228+
229+
it('handles multimodal messages with base64 image', function (): void {
230+
freezeTime();
231+
$generator = Mockery::mock(PendingRequest::class);
232+
233+
$base64Image = base64_encode('fake-image-data');
234+
235+
$generator->expects('withMessages')
236+
->withArgs(function ($messages) use ($base64Image): bool {
237+
$message = $messages[0];
238+
239+
return $message instanceof UserMessage
240+
&& $message->text() === 'Analyze this screenshot'
241+
&& count($message->images()) === 1
242+
&& $message->images()[0]->base64() === $base64Image
243+
&& $message->images()[0]->mimeType() === 'image/png'
244+
&& ! $message->images()[0]->isUrl();
245+
})
246+
->andReturnSelf();
247+
248+
$textResponse = new Response(
249+
steps: collect(),
250+
text: 'This appears to be a screenshot.',
251+
finishReason: FinishReason::Stop,
252+
toolCalls: [],
253+
toolResults: [],
254+
usage: new Usage(20, 15),
255+
meta: new Meta('cmp_base64_123', 'gpt-4-vision'),
256+
responseMessages: collect([
257+
new AssistantMessage('This appears to be a screenshot.'),
258+
]),
259+
messages: collect(),
260+
);
261+
262+
$generator->expects('asText')
263+
->andReturn($textResponse);
264+
265+
PrismServer::register(
266+
'vision-model',
267+
fn () => $generator
268+
);
269+
270+
/** @var TestResponse */
271+
$response = $this->postJson('prism/openai/v1/chat/completions', [
272+
'model' => 'vision-model',
273+
'messages' => [[
274+
'role' => 'user',
275+
'content' => [
276+
['type' => 'text', 'text' => 'Analyze this screenshot'],
277+
['type' => 'image_url', 'image_url' => ['url' => "data:image/png;base64,{$base64Image}"]],
278+
],
279+
]],
280+
]);
281+
282+
$response->assertOk();
283+
expect($response->json('choices.0.message.content'))->toBe('This appears to be a screenshot.');
284+
});
285+
286+
it('handles multimodal messages with multiple images', function (): void {
287+
freezeTime();
288+
$generator = Mockery::mock(PendingRequest::class);
289+
290+
$generator->expects('withMessages')
291+
->withArgs(function ($messages): bool {
292+
$message = $messages[0];
293+
294+
return $message instanceof UserMessage
295+
&& $message->text() === 'Compare these two images'
296+
&& count($message->images()) === 2
297+
&& $message->images()[0]->url() === 'https://example.com/image1.jpg'
298+
&& $message->images()[1]->url() === 'https://example.com/image2.jpg';
299+
})
300+
->andReturnSelf();
301+
302+
$textResponse = new Response(
303+
steps: collect(),
304+
text: 'Both images show different scenes.',
305+
finishReason: FinishReason::Stop,
306+
toolCalls: [],
307+
toolResults: [],
308+
usage: new Usage(25, 18),
309+
meta: new Meta('cmp_multi123', 'gpt-4-vision'),
310+
responseMessages: collect([
311+
new AssistantMessage('Both images show different scenes.'),
312+
]),
313+
messages: collect(),
314+
);
315+
316+
$generator->expects('asText')
317+
->andReturn($textResponse);
318+
319+
PrismServer::register(
320+
'vision-model',
321+
fn () => $generator
322+
);
323+
324+
/** @var TestResponse */
325+
$response = $this->postJson('prism/openai/v1/chat/completions', [
326+
'model' => 'vision-model',
327+
'messages' => [[
328+
'role' => 'user',
329+
'content' => [
330+
['type' => 'text', 'text' => 'Compare these two images'],
331+
['type' => 'image_url', 'image_url' => ['url' => 'https://example.com/image1.jpg']],
332+
['type' => 'image_url', 'image_url' => ['url' => 'https://example.com/image2.jpg']],
333+
],
334+
]],
335+
]);
336+
337+
$response->assertOk();
338+
expect($response->json('choices.0.message.content'))->toBe('Both images show different scenes.');
339+
});
340+
341+
it('handles mixed simple and multimodal messages', function (): void {
342+
freezeTime();
343+
$generator = Mockery::mock(PendingRequest::class);
344+
345+
$generator->expects('withMessages')
346+
->withArgs(fn ($messages): bool => count($messages) === 2
347+
&& $messages[0] instanceof UserMessage
348+
&& $messages[0]->text() === 'Hello!'
349+
&& $messages[0]->images() === []
350+
&& $messages[1] instanceof UserMessage
351+
&& $messages[1]->text() === 'What about this image?'
352+
&& count($messages[1]->images()) === 1)
353+
->andReturnSelf();
354+
355+
$textResponse = new Response(
356+
steps: collect(),
357+
text: 'Hello! I can see the image you shared.',
358+
finishReason: FinishReason::Stop,
359+
toolCalls: [],
360+
toolResults: [],
361+
usage: new Usage(20, 15),
362+
meta: new Meta('cmp_mixed123', 'gpt-4-vision'),
363+
responseMessages: collect([
364+
new AssistantMessage('Hello! I can see the image you shared.'),
365+
]),
366+
messages: collect(),
367+
);
368+
369+
$generator->expects('asText')
370+
->andReturn($textResponse);
371+
372+
PrismServer::register(
373+
'vision-model',
374+
fn () => $generator
375+
);
376+
377+
/** @var TestResponse */
378+
$response = $this->postJson('prism/openai/v1/chat/completions', [
379+
'model' => 'vision-model',
380+
'messages' => [
381+
[
382+
'role' => 'user',
383+
'content' => 'Hello!',
384+
],
385+
[
386+
'role' => 'user',
387+
'content' => [
388+
['type' => 'text', 'text' => 'What about this image?'],
389+
['type' => 'image_url', 'image_url' => ['url' => 'https://example.com/test.jpg']],
390+
],
391+
],
392+
],
393+
]);
394+
395+
$response->assertOk();
396+
expect($response->json('choices.0.message.content'))->toBe('Hello! I can see the image you shared.');
397+
});

0 commit comments

Comments
 (0)