feat(openai): streaming usage (#424)

sixlive · web-flow · commit 776c37f16f12 · 2025-06-19T13:14:37.000-04:00
diff --git a/docs/core-concepts/streaming-output.md b/docs/core-concepts/streaming-output.md
@@ -32,8 +32,13 @@ foreach ($response as $chunk) {
     // The text fragment in this chunk
     echo $chunk->text;
 
+    if ($chunk->usage) {
+        echo "Prompt tokens: " . $chunk->usage->promptTokens;
+        echo "Completion tokens: " . $chunk->usage->completionTokens;
+    }
+
     // Check if this is the final chunk
-    if ($chunk->finishReason) {
+    if ($chunk->finishReason === FinishReason::Stop) {
         echo "Generation complete: " . $chunk->finishReason->name;
     }
 }
diff --git a/src/Providers/OpenAI/Handlers/Stream.php b/src/Providers/OpenAI/Handlers/Stream.php
@@ -27,6 +27,7 @@
 use Prism\Prism\ValueObjects\Messages\ToolResultMessage;
 use Prism\Prism\ValueObjects\Meta;
 use Prism\Prism\ValueObjects\ToolCall;
+use Prism\Prism\ValueObjects\Usage;
 use Psr\Http\Message\StreamInterface;
 use Throwable;
 
@@ -98,6 +99,19 @@ protected function processStream(Response $response, Request $request, int $dept
                 text: $content,
                 finishReason: $finishReason !== FinishReason::Unknown ? $finishReason : null
             );
+
+            if (data_get($data, 'type') === 'response.completed') {
+                yield new Chunk(
+                    text: '',
+                    usage: new Usage(
+                        promptTokens: data_get($data, 'response.usage.input_tokens'),
+                        completionTokens: data_get($data, 'response.usage.output_tokens'),
+                        cacheReadInputTokens: data_get($data, 'response.usage.input_tokens_details.cached_tokens'),
+                        thoughtTokens: data_get($data, 'response.usage.output_tokens_details.reasoning_tokens')
+                    ),
+                    chunkType: ChunkType::Meta,
+                );
+            }
         }
 
         if ($toolCalls !== []) {
diff --git a/src/Text/Chunk.php b/src/Text/Chunk.php
@@ -9,6 +9,7 @@
 use Prism\Prism\ValueObjects\Meta;
 use Prism\Prism\ValueObjects\ToolCall;
 use Prism\Prism\ValueObjects\ToolResult;
+use Prism\Prism\ValueObjects\Usage;
 
 readonly class Chunk
 {
@@ -23,6 +24,7 @@ public function __construct(
         public array $toolResults = [],
         public ?FinishReason $finishReason = null,
         public ?Meta $meta = null,
+        public ?Usage $usage = null,
         public array $additionalContent = [],
         public ChunkType $chunkType = ChunkType::Text
     ) {}
diff --git a/tests/Providers/OpenAI/StreamTest.php b/tests/Providers/OpenAI/StreamTest.php
@@ -10,6 +10,7 @@
 use Prism\Prism\Exceptions\PrismRateLimitedException;
 use Prism\Prism\Facades\Tool;
 use Prism\Prism\Prism;
+use Prism\Prism\ValueObjects\Usage;
 use Tests\Fixtures\FixtureResponse;
 
 beforeEach(function (): void {
@@ -31,7 +32,7 @@
     $model = null;
 
     foreach ($response as $chunk) {
-        if ($chunk->chunkType === ChunkType::Meta) {
+        if ($chunk->meta) {
             $responseId = $chunk->meta?->id;
             $model = $chunk->meta?->model;
         }
@@ -217,21 +218,46 @@
 
     $fullResponse = '';
     $toolCallCount = 0;
+    /** @var Usage[] $usage */
+    $usage = [];
 
     foreach ($response as $chunk) {
         if ($chunk->toolCalls !== []) {
             $toolCallCount += count($chunk->toolCalls);
         }
         $fullResponse .= $chunk->text;
+
+        if ($chunk->usage) {
+            $usage[] = $chunk->usage;
+        }
     }
 
     expect($toolCallCount)->toBe(2);
     expect($fullResponse)->not->toBeEmpty();
 
+    // Verify reasoning usage
+    expect($usage[0]->thoughtTokens)->toBeGreaterThan(0);
+
     // Verify we made multiple requests for a conversation with tool calls
     Http::assertSentCount(3);
 });
 
+it('emits usage information', function (): void {
+    FixtureResponse::fakeResponseSequence('v1/responses', 'openai/stream-basic-text-responses');
+
+    $response = Prism::text()
+        ->using('openai', 'gpt-4')
+        ->withPrompt('Who are you?')
+        ->asStream();
+
+    foreach ($response as $chunk) {
+        if ($chunk->usage) {
+            expect($chunk->usage->promptTokens)->toBeGreaterThan(0);
+            expect($chunk->usage->completionTokens)->toBeGreaterThan(0);
+        }
+    }
+});
+
 it('throws a PrismRateLimitedException with a 429 response code', function (): void {
     Http::fake([
         '*' => Http::response(

Original file line number	Diff line number	Diff line change
`@@ -32,8 +32,13 @@ foreach ($response as $chunk) {`
`32`	`32`	`// The text fragment in this chunk`
`33`	`33`	`echo $chunk->text;`
`34`	`34`
	`35`	`+ if ($chunk->usage) {`
	`36`	`+ echo "Prompt tokens: " . $chunk->usage->promptTokens;`
	`37`	`+ echo "Completion tokens: " . $chunk->usage->completionTokens;`
	`38`	`+ }`
	`39`	`+`
`35`	`40`	`// Check if this is the final chunk`
`36`		`- if ($chunk->finishReason) {`
	`41`	`+ if ($chunk->finishReason === FinishReason::Stop) {`
`37`	`42`	`echo "Generation complete: " . $chunk->finishReason->name;`
`38`	`43`	`}`
`39`	`44`	`}`