feat: add function calling and code execution examples to multimodal live api websockets notebook

gericdong · gericdong · commit da8464121607 · 2025-01-21T16:54:45.000Z
diff --git a/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb b/gemini/multimodal-live-api/intro_multimodal_live_api.ipynb
@@ -104,6 +104,8 @@
         "- Text-to-text generation\n",
         "- Text-to-audio generation\n",
         "- Text-to-audio conversation\n",
+        "- Function calling\n",
+        "- Code execution\n",
         "\n",
         "See the [Multimodal Live API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live) page for more details."
       ]
@@ -590,6 +592,193 @@
         "await main()"
       ]
     },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "f214d0c3bee0"
+      },
+      "source": [
+        "### **Example 4**: Function calling\n",
+        "\n",
+        "You can use function calling to create a description of a function, then pass that description to the model in a request. The response from the model includes the name of a function that matches the description and the arguments to call it with.\n",
+        "\n",
+        "**Notes**:\n",
+        "\n",
+        "- All functions must be declared at the start of the session by sending tool definitions as part of the `setup` message.\n",
+        "- Currently only one tool is supported in the API."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "8a7595aee24a"
+      },
+      "outputs": [],
+      "source": [
+        "# Set model generation_config\n",
+        "CONFIG = {\"response_modalities\": [\"TEXT\"]}\n",
+        "\n",
+        "# Define function declarations\n",
+        "TOOLS = {\n",
+        "    \"function_declarations\": {\n",
+        "        \"name\": \"get_current_weather\",\n",
+        "        \"description\": \"Get the current weather in the given location\",\n",
+        "        \"parameters\": {\n",
+        "            \"type\": \"OBJECT\",\n",
+        "            \"properties\": {\"location\": {\"type\": \"STRING\"}},\n",
+        "        },\n",
+        "    }\n",
+        "}\n",
+        "\n",
+        "headers = {\n",
+        "    \"Content-Type\": \"application/json\",\n",
+        "    \"Authorization\": f\"Bearer {bearer_token[0]}\",\n",
+        "}\n",
+        "\n",
+        "# Connect to the server\n",
+        "async with connect(SERVICE_URL, additional_headers=headers) as ws:\n",
+        "    # Setup the session\n",
+        "    await ws.send(\n",
+        "        json.dumps(\n",
+        "            {\n",
+        "                \"setup\": {\n",
+        "                    \"model\": MODEL,\n",
+        "                    \"generation_config\": CONFIG,\n",
+        "                    \"tools\": TOOLS,\n",
+        "                }\n",
+        "            }\n",
+        "        )\n",
+        "    )\n",
+        "\n",
+        "    # Receive setup response\n",
+        "    raw_response = await ws.recv(decode=False)\n",
+        "    setup_response = json.loads(raw_response.decode())\n",
+        "\n",
+        "    # Send text message\n",
+        "    text_input = \"Get the current weather in Santa Clara, San Jose and Mountain View\"\n",
+        "    display(Markdown(f\"**Input:** {text_input}\"))\n",
+        "\n",
+        "    msg = {\n",
+        "        \"client_content\": {\n",
+        "            \"turns\": [{\"role\": \"user\", \"parts\": [{\"text\": text_input}]}],\n",
+        "            \"turn_complete\": True,\n",
+        "        }\n",
+        "    }\n",
+        "\n",
+        "    await ws.send(json.dumps(msg))\n",
+        "\n",
+        "    responses = []\n",
+        "\n",
+        "    # Receive chucks of server response\n",
+        "    async for raw_response in ws:\n",
+        "        response = json.loads(raw_response.decode(\"UTF-8\"))\n",
+        "\n",
+        "        if (tool_call := response.get(\"toolCall\")) is not None:\n",
+        "            for function_call in tool_call[\"functionCalls\"]:\n",
+        "                responses.append(f\"FunctionCall: {str(function_call)}\\n\")\n",
+        "\n",
+        "        if (server_content := response.get(\"serverContent\")) is not None:\n",
+        "            if server_content.get(\"turnComplete\", True):\n",
+        "                break\n",
+        "\n",
+        "    # Print the server response\n",
+        "    display(Markdown(\"**Response >** {}\".format(\"\\n\".join(responses))))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ad6b585deadb"
+      },
+      "source": [
+        "### **Example 5**: Code execution\n",
+        "\n",
+        "You can use code execution capability to generate and execute Python code directly within the API.\n",
+        "\n",
+        "In this example, you initialize the code execution tool by passing `code_execution` in the `Tools` configuration, and register this tool with the model at the start of the session by sending tool definitions as part of the `setup` message."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "acbbd8c0155e"
+      },
+      "outputs": [],
+      "source": [
+        "# Set model generation_config\n",
+        "CONFIG = {\"response_modalities\": [\"TEXT\"]}\n",
+        "\n",
+        "# Set code execution\n",
+        "TOOLS = {\"code_execution\": {}}\n",
+        "\n",
+        "headers = {\n",
+        "    \"Content-Type\": \"application/json\",\n",
+        "    \"Authorization\": f\"Bearer {bearer_token[0]}\",\n",
+        "}\n",
+        "\n",
+        "# Connect to the server\n",
+        "async with connect(SERVICE_URL, additional_headers=headers) as ws:\n",
+        "    # Setup the session\n",
+        "    await ws.send(\n",
+        "        json.dumps(\n",
+        "            {\n",
+        "                \"setup\": {\n",
+        "                    \"model\": MODEL,\n",
+        "                    \"generation_config\": CONFIG,\n",
+        "                    \"tools\": TOOLS,\n",
+        "                }\n",
+        "            }\n",
+        "        )\n",
+        "    )\n",
+        "\n",
+        "    # Receive setup response\n",
+        "    raw_response = await ws.recv(decode=False)\n",
+        "    setup_response = json.loads(raw_response.decode())\n",
+        "\n",
+        "    # Send text message\n",
+        "    text_input = \"Write code to calculate the 15th fibonacci number then find the nearest palindrome to it\"\n",
+        "    display(Markdown(f\"**Input:** {text_input}\"))\n",
+        "\n",
+        "    msg = {\n",
+        "        \"client_content\": {\n",
+        "            \"turns\": [{\"role\": \"user\", \"parts\": [{\"text\": text_input}]}],\n",
+        "            \"turn_complete\": True,\n",
+        "        }\n",
+        "    }\n",
+        "\n",
+        "    await ws.send(json.dumps(msg))\n",
+        "\n",
+        "    responses = []\n",
+        "\n",
+        "    # Receive chucks of server response\n",
+        "    async for raw_response in ws:\n",
+        "        response = json.loads(raw_response.decode(\"UTF-8\"))\n",
+        "\n",
+        "        if (server_content := response.get(\"serverContent\")) is not None:\n",
+        "            model_turn = server_content.get(\"modelTurn\")\n",
+        "            if (parts := model_turn.get(\"parts\")) is not None:\n",
+        "                if parts[0].get(\"text\"):\n",
+        "                    responses.append(parts[0][\"text\"])\n",
+        "                for part in parts:\n",
+        "                    if (executable_code := part.get(\"executableCode\")) is not None:\n",
+        "                        display(\n",
+        "                            Markdown(\n",
+        "                                f\"\"\"**Executable code:**\n",
+        "```py\n",
+        "{executable_code.get(\"code\")}\n",
+        "```\n",
+        "                            \"\"\"\n",
+        "                            )\n",
+        "                        )\n",
+        "            if server_content.get(\"turnComplete\", False):\n",
+        "                break\n",
+        "\n",
+        "    # Print the server response\n",
+        "    display(Markdown(f\"**Response >** {''.join(responses)}\"))"
+      ]
+    },
     {
       "cell_type": "markdown",
       "metadata": {