Skip to content

Commit 4e40f39

Browse files
.Net: Fix Gemini Auto Invoke when returned function is non-first part (#12174)
### Motivation and Context <!-- Thank you for your contribution to the semantic-kernel repo! Please help reviewers and future users, providing the following information: 1. Why is this change required? 2. What problem does it solve? 3. What scenario does it contribute to? 4. If it fixes an open issue, please link to the issue here. --> Fixes #11651 - Currently Gemini Connectors will not Auto invoke functions when tool calls are returned in the same message with text parts, which is common with 2.5 Flash and Pro models: ```json "content": { "parts": [ { "text": "Running the TimePlugin.Now function..." }, { "functionCall": { "name": "TimePlugin.Now", "args": { "param1": "hello" } } } ], "role": "model" } ``` This fixes this to now support auto kernel functions with this behavior and adds unit tests with this data. ### Description <!-- Describe your changes, the overall approach, the underlying design. These notes will help understanding how your code works. Thanks! --> - Added unit test data for streaming responses with text & tool parts - Changed `GetChatMessageContentFromCandidate` to return tools from all candidate parts instead of only the first part - Added extra check in `GenerateChatMessageAsync` for empty Tool results - Fixed stream responses in auto invoke mode with texts & tool parts not returned to the caller, added unit test ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [x] The code builds clean without any errors or warnings - [x] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [x] All unit tests pass, and I have added new tests where possible - [x] I didn't break anyone 😄 --------- Co-authored-by: Roger Barreto <[email protected]>
1 parent e514120 commit 4e40f39

File tree

3 files changed

+49
-7
lines changed

3 files changed

+49
-7
lines changed

dotnet/src/Connectors/Connectors.Google.UnitTests/Core/Gemini/Clients/GeminiChatStreamingFunctionCallingTests.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -272,6 +272,35 @@ await client.StreamGenerateChatMessageAsync(chatHistory, executionSettings: exec
272272
Assert.False(string.IsNullOrWhiteSpace(item.Content)));
273273
}
274274

275+
[Fact]
276+
public async Task IfAutoInvokeShouldReturnAssistantToolCallMessagesWithTextAsync()
277+
{
278+
// Arrange
279+
using var handlerStub = new MultipleHttpMessageHandlerStub();
280+
handlerStub.AddJsonResponse(this._responseContentWithFunction);
281+
handlerStub.AddJsonResponse(this._responseContent);
282+
#pragma warning disable CA2000
283+
var client = this.CreateChatCompletionClient(httpClient: handlerStub.CreateHttpClient());
284+
#pragma warning restore CA2000
285+
var chatHistory = CreateSampleChatHistory();
286+
var executionSettings = new GeminiPromptExecutionSettings
287+
{
288+
ToolCallBehavior = GeminiToolCallBehavior.AutoInvokeKernelFunctions
289+
};
290+
291+
// Act
292+
var messages =
293+
await client.StreamGenerateChatMessageAsync(chatHistory, executionSettings: executionSettings, kernel: this._kernelWithFunctions)
294+
.ToListAsync();
295+
296+
// Assert
297+
var firstMessage = (GeminiStreamingChatMessageContent?)messages.FirstOrDefault();
298+
Assert.NotNull(firstMessage?.ToolCalls);
299+
Assert.Single(firstMessage.ToolCalls,
300+
item => item.FullyQualifiedName == this._timePluginNow.FullyQualifiedName);
301+
Assert.False(string.IsNullOrWhiteSpace(firstMessage.Content));
302+
}
303+
275304
[Fact]
276305
public async Task IfAutoInvokeShouldPassToolsToEachRequestAsync()
277306
{

dotnet/src/Connectors/Connectors.Google.UnitTests/TestData/chat_one_function_response.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
{
44
"content": {
55
"parts": [
6+
{
7+
"text": "Running the TimePlugin.Now function..."
8+
},
69
{
710
"functionCall": {
811
"name": "TimePlugin%nameSeparator%Now",

dotnet/src/Connectors/Connectors.Google/Core/Gemini/Clients/GeminiChatCompletionClient.cs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ public async Task<IReadOnlyList<ChatMessageContent>> GenerateChatMessageAsync(
197197
}
198198

199199
state.LastMessage = chatResponses[0];
200-
if (state.LastMessage.ToolCalls is null)
200+
if (state.LastMessage.ToolCalls is null || state.LastMessage.ToolCalls.Count == 0)
201201
{
202202
return chatResponses;
203203
}
@@ -356,12 +356,16 @@ private async IAsyncEnumerable<StreamingChatMessageContent> GetStreamingChatMess
356356

357357
// If function call was returned there is no more data in stream
358358
state.LastMessage = messageContent;
359+
360+
// Yield the message also if it contains text
361+
if (!string.IsNullOrWhiteSpace(messageContent.Content))
362+
{
363+
yield return this.GetStreamingChatContentFromChatContent(messageContent);
364+
}
365+
359366
yield break;
360367
}
361368

362-
// We disable auto-invoke because the first message in the stream doesn't contain ToolCalls or auto-invoke is already false
363-
state.AutoInvoke = false;
364-
365369
// If we don't want to attempt to invoke any functions, just return the result.
366370
yield return this.GetStreamingChatContentFromChatContent(messageContent);
367371
}
@@ -604,11 +608,17 @@ [new GeminiChatMessageContent(role: AuthorRole.Assistant, content: string.Empty,
604608

605609
private GeminiChatMessageContent GetChatMessageContentFromCandidate(GeminiResponse geminiResponse, GeminiResponseCandidate candidate)
606610
{
607-
GeminiPart? part = candidate.Content?.Parts?[0];
608-
GeminiPart.FunctionCallPart[]? toolCalls = part?.FunctionCall is { } function ? [function] : null;
611+
// Join text parts
612+
string text = string.Concat(candidate.Content?.Parts?.Select(part => part.Text) ?? []);
613+
614+
// Gemini sometimes returns function calls with text parts, so collect them
615+
var toolCalls = candidate.Content?.Parts?
616+
.Select(part => part.FunctionCall!)
617+
.Where(toolCall => toolCall is not null).ToArray();
618+
609619
return new GeminiChatMessageContent(
610620
role: candidate.Content?.Role ?? AuthorRole.Assistant,
611-
content: part?.Text ?? string.Empty,
621+
content: text,
612622
modelId: this._modelId,
613623
functionsToolCalls: toolCalls,
614624
metadata: GetResponseMetadata(geminiResponse, candidate));

0 commit comments

Comments
 (0)