From da84641216074d4e6489bb4a4dd0fda3f7adf568 Mon Sep 17 00:00:00 2001 From: Eric Dong Date: Tue, 21 Jan 2025 16:54:45 +0000 Subject: [PATCH] feat: add function calling and code execution examples to multimodal live api websockets notebook --- .../intro_multimodal_live_api.ipynb | 189 ++++++++++++++++++ 1 file changed, 189 insertions(+) diff --git a/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb b/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb index b205d9daf0f..c62a6e2c6a9 100644 --- a/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb +++ b/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb @@ -104,6 +104,8 @@ "- Text-to-text generation\n", "- Text-to-audio generation\n", "- Text-to-audio conversation\n", + "- Function calling\n", + "- Code execution\n", "\n", "See the [Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live) page for more details." ] @@ -590,6 +592,193 @@ "await main()" ] }, + { + "cell_type": "markdown", + "metadata": { + "id": "f214d0c3bee0" + }, + "source": [ + "### **Example 4**: Function calling\n", + "\n", + "You can use function calling to create a description of a function, then pass that description to the model in a request. The response from the model includes the name of a function that matches the description and the arguments to call it with.\n", + "\n", + "**Notes**:\n", + "\n", + "- All functions must be declared at the start of the session by sending tool definitions as part of the `setup` message.\n", + "- Currently only one tool is supported in the API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "8a7595aee24a" + }, + "outputs": [], + "source": [ + "# Set model generation_config\n", + "CONFIG = {\"response_modalities\": [\"TEXT\"]}\n", + "\n", + "# Define function declarations\n", + "TOOLS = {\n", + " \"function_declarations\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in the given location\",\n", + " \"parameters\": {\n", + " \"type\": \"OBJECT\",\n", + " \"properties\": {\"location\": {\"type\": \"STRING\"}},\n", + " },\n", + " }\n", + "}\n", + "\n", + "headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {bearer_token[0]}\",\n", + "}\n", + "\n", + "# Connect to the server\n", + "async with connect(SERVICE_URL, additional_headers=headers) as ws:\n", + " # Setup the session\n", + " await ws.send(\n", + " json.dumps(\n", + " {\n", + " \"setup\": {\n", + " \"model\": MODEL,\n", + " \"generation_config\": CONFIG,\n", + " \"tools\": TOOLS,\n", + " }\n", + " }\n", + " )\n", + " )\n", + "\n", + " # Receive setup response\n", + " raw_response = await ws.recv(decode=False)\n", + " setup_response = json.loads(raw_response.decode())\n", + "\n", + " # Send text message\n", + " text_input = \"Get the current weather in Santa Clara, San Jose and Mountain View\"\n", + " display(Markdown(f\"**Input:** {text_input}\"))\n", + "\n", + " msg = {\n", + " \"client_content\": {\n", + " \"turns\": [{\"role\": \"user\", \"parts\": [{\"text\": text_input}]}],\n", + " \"turn_complete\": True,\n", + " }\n", + " }\n", + "\n", + " await ws.send(json.dumps(msg))\n", + "\n", + " responses = []\n", + "\n", + " # Receive chucks of server response\n", + " async for raw_response in ws:\n", + " response = json.loads(raw_response.decode(\"UTF-8\"))\n", + "\n", + " if (tool_call := response.get(\"toolCall\")) is not None:\n", + " for function_call in tool_call[\"functionCalls\"]:\n", + " responses.append(f\"FunctionCall: {str(function_call)}\\n\")\n", + "\n", + " if (server_content := response.get(\"serverContent\")) is not None:\n", + " if server_content.get(\"turnComplete\", True):\n", + " break\n", + "\n", + " # Print the server response\n", + " display(Markdown(\"**Response >** {}\".format(\"\\n\".join(responses))))" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ad6b585deadb" + }, + "source": [ + "### **Example 5**: Code execution\n", + "\n", + "You can use code execution capability to generate and execute Python code directly within the API.\n", + "\n", + "In this example, you initialize the code execution tool by passing `code_execution` in the `Tools` configuration, and register this tool with the model at the start of the session by sending tool definitions as part of the `setup` message." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "acbbd8c0155e" + }, + "outputs": [], + "source": [ + "# Set model generation_config\n", + "CONFIG = {\"response_modalities\": [\"TEXT\"]}\n", + "\n", + "# Set code execution\n", + "TOOLS = {\"code_execution\": {}}\n", + "\n", + "headers = {\n", + " \"Content-Type\": \"application/json\",\n", + " \"Authorization\": f\"Bearer {bearer_token[0]}\",\n", + "}\n", + "\n", + "# Connect to the server\n", + "async with connect(SERVICE_URL, additional_headers=headers) as ws:\n", + " # Setup the session\n", + " await ws.send(\n", + " json.dumps(\n", + " {\n", + " \"setup\": {\n", + " \"model\": MODEL,\n", + " \"generation_config\": CONFIG,\n", + " \"tools\": TOOLS,\n", + " }\n", + " }\n", + " )\n", + " )\n", + "\n", + " # Receive setup response\n", + " raw_response = await ws.recv(decode=False)\n", + " setup_response = json.loads(raw_response.decode())\n", + "\n", + " # Send text message\n", + " text_input = \"Write code to calculate the 15th fibonacci number then find the nearest palindrome to it\"\n", + " display(Markdown(f\"**Input:** {text_input}\"))\n", + "\n", + " msg = {\n", + " \"client_content\": {\n", + " \"turns\": [{\"role\": \"user\", \"parts\": [{\"text\": text_input}]}],\n", + " \"turn_complete\": True,\n", + " }\n", + " }\n", + "\n", + " await ws.send(json.dumps(msg))\n", + "\n", + " responses = []\n", + "\n", + " # Receive chucks of server response\n", + " async for raw_response in ws:\n", + " response = json.loads(raw_response.decode(\"UTF-8\"))\n", + "\n", + " if (server_content := response.get(\"serverContent\")) is not None:\n", + " model_turn = server_content.get(\"modelTurn\")\n", + " if (parts := model_turn.get(\"parts\")) is not None:\n", + " if parts[0].get(\"text\"):\n", + " responses.append(parts[0][\"text\"])\n", + " for part in parts:\n", + " if (executable_code := part.get(\"executableCode\")) is not None:\n", + " display(\n", + " Markdown(\n", + " f\"\"\"**Executable code:**\n", + "```py\n", + "{executable_code.get(\"code\")}\n", + "```\n", + " \"\"\"\n", + " )\n", + " )\n", + " if server_content.get(\"turnComplete\", False):\n", + " break\n", + "\n", + " # Print the server response\n", + " display(Markdown(f\"**Response >** {''.join(responses)}\"))" + ] + }, { "cell_type": "markdown", "metadata": {