11// Copyright (c) Microsoft. All rights reserved. 
22
3+ using  System ; 
34using  System . Collections . Generic ; 
45using  System . Diagnostics . CodeAnalysis ; 
56using  System . Net . Http ; 
1213using  Microsoft . KernelMemory . Diagnostics ; 
1314using  Microsoft . SemanticKernel ; 
1415using  Microsoft . SemanticKernel . Connectors . AzureOpenAI ; 
16+ using  OpenAI . Chat ; 
1517
1618namespace  Microsoft . KernelMemory . AI . AzureOpenAI ; 
1719
@@ -28,6 +30,8 @@ public sealed class AzureOpenAITextGenerator : ITextGenerator
2830    private  readonly  ITextTokenizer  _textTokenizer ; 
2931    private  readonly  ILogger < AzureOpenAITextGenerator >  _log ; 
3032
33+     private  readonly  string  _deployment ; 
34+ 
3135    /// <inheritdoc/> 
3236    public  int  MaxTokenTotal  {  get ;  } 
3337
@@ -87,6 +91,7 @@ public AzureOpenAITextGenerator(
8791    { 
8892        this . _client  =  skClient ; 
8993        this . _log  =  ( loggerFactory  ??  DefaultLogger . Factory ) . CreateLogger < AzureOpenAITextGenerator > ( ) ; 
94+         this . _deployment  =  config . Deployment ; 
9095        this . MaxTokenTotal  =  config . MaxTokenTotal ; 
9196
9297        textTokenizer  ??=  TokenizerFactory . GetTokenizerForEncoding ( config . Tokenizer ) ; 
@@ -114,7 +119,7 @@ public IReadOnlyList<string> GetTokens(string text)
114119    } 
115120
116121    /// <inheritdoc/> 
117-     public  async  IAsyncEnumerable < string >  GenerateTextAsync ( 
122+     public  async  IAsyncEnumerable < GeneratedTextContent >  GenerateTextAsync ( 
118123        string  prompt , 
119124        TextGenerationOptions  options , 
120125        [ EnumeratorCancellation ]  CancellationToken  cancellationToken  =  default ) 
@@ -153,9 +158,33 @@ public async IAsyncEnumerable<string> GenerateTextAsync(
153158
154159        await  foreach  ( StreamingTextContent  x  in  result . WithCancellation ( cancellationToken ) ) 
155160        { 
156-             if  ( x . Text  ==  null )  {  continue ;  } 
157- 
158-             yield  return  x . Text ; 
161+             TokenUsage ?  tokenUsage  =  null ; 
162+ 
163+             // The last message includes tokens usage metadata. 
164+             // https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options 
165+             if  ( x . Metadata ? [ "Usage" ]  is  ChatTokenUsage  usage ) 
166+             { 
167+                 this . _log . LogTrace ( "Usage report: input tokens: {InputTokenCount}, output tokens: {OutputTokenCount}, output reasoning tokens: {ReasoningTokenCount}" , 
168+                     usage . InputTokenCount ,  usage . OutputTokenCount ,  usage . OutputTokenDetails ? . ReasoningTokenCount  ??  0 ) ; 
169+ 
170+                 tokenUsage  =  new  TokenUsage 
171+                 { 
172+                     Timestamp  =  ( DateTimeOffset ? ) x . Metadata [ "CreatedAt" ]  ??  DateTimeOffset . UtcNow , 
173+                     ServiceType  =  "Azure OpenAI" , 
174+                     ModelType  =  Constants . ModelType . TextGeneration , 
175+                     ModelName  =  this . _deployment , 
176+                     ServiceTokensIn  =  usage . InputTokenCount , 
177+                     ServiceTokensOut  =  usage . OutputTokenCount , 
178+                     ServiceReasoningTokens  =  usage . OutputTokenDetails ? . ReasoningTokenCount 
179+                 } ; 
180+             } 
181+ 
182+             // NOTE: as stated at https://platform.openai.com/docs/api-reference/chat/streaming#chat/streaming-choices, 
183+             // the Choice can also be empty for the last chunk if we set stream_options: { "include_usage": true} to get token counts, so it is possible that 
184+             // x.Text is null, but tokenUsage is not (token usage statistics for the entire request are included in the last chunk). 
185+             if  ( x . Text  is  null  &&  tokenUsage  is  null )  {  continue ;  } 
186+ 
187+             yield  return  new ( x . Text  ??  string . Empty ,  tokenUsage ) ; 
159188        } 
160189    } 
161190} 
0 commit comments