From 745c78aff91d8c06dfbde8bce4bb629dcba5e026 Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Fri, 14 Feb 2025 11:59:32 -0800
Subject: [PATCH 1/3] feat: Fix VLLM tool_choice (#1001)

---
 letta/llm_api/llm_api_tools.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/letta/llm_api/llm_api_tools.py b/letta/llm_api/llm_api_tools.py
index 65bdc1f12a..e4bc63f632 100644
--- a/letta/llm_api/llm_api_tools.py
+++ b/letta/llm_api/llm_api_tools.py
@@ -151,7 +151,8 @@ def create(
         if function_call is None and functions is not None and len(functions) > 0:
             # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
             # TODO(matt) move into LLMConfig
-            if llm_config.model_endpoint == "https://inference.memgpt.ai":
+            # TODO: This vllm checking is very brittle and is a patch at most
+            if llm_config.model_endpoint == "https://inference.memgpt.ai" or (llm_config.handle and "vllm" in llm_config.handle):
                 function_call = "auto"  # TODO change to "required" once proxy supports it
             else:
                 function_call = "required"

From 1c489245ef6e9809197f56c863e9b421e903db2d Mon Sep 17 00:00:00 2001
From: Matthew Zhou <mattzh1314@gmail.com>
Date: Fri, 14 Feb 2025 14:21:11 -0800
Subject: [PATCH 2/3] chore: Feb 14th Sync (#1007)

---
 letta/__init__.py | 2 +-
 pyproject.toml    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/letta/__init__.py b/letta/__init__.py
index 98bc9f067c..c566a339f0 100644
--- a/letta/__init__.py
+++ b/letta/__init__.py
@@ -1,4 +1,4 @@
-__version__ = "0.6.23"
+__version__ = "0.6.26"
 
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client
diff --git a/pyproject.toml b/pyproject.toml
index 7bba7df6c6..70d1bcc2e6 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "letta"
-version = "0.6.23"
+version = "0.6.26"
 packages = [
     {include = "letta"},
 ]

From 1bf26d4ae523bf772ea81a32e3c769c1f2f86e70 Mon Sep 17 00:00:00 2001
From: Sarah Wooders <sarahwooders@gmail.com>
Date: Fri, 14 Feb 2025 18:14:01 -0800
Subject: [PATCH 3/3] feat: use function calling mode `ANY` for vertex (#1008)

Co-authored-by: cpacker <packercharles@gmail.com>
---
 letta/llm_api/google_vertex.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/letta/llm_api/google_vertex.py b/letta/llm_api/google_vertex.py
index a8bedcf5ec..c7c5772900 100644
--- a/letta/llm_api/google_vertex.py
+++ b/letta/llm_api/google_vertex.py
@@ -307,15 +307,27 @@ def google_vertex_chat_completions_request(
     """
 
     from google import genai
+    from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
 
     client = genai.Client(vertexai=True, project=project_id, location=region, http_options={"api_version": "v1"})
     # add dummy model messages to the end of the input
     if add_postfunc_model_messages:
         contents = add_dummy_model_messages(contents)
 
+    tool_config = ToolConfig(
+        function_calling_config=FunctionCallingConfig(
+            # ANY mode forces the model to predict only function calls
+            mode=FunctionCallingConfigMode.ANY,
+        )
+    )
+    config["tool_config"] = tool_config.model_dump()
+
     # make request to client
-    response = client.models.generate_content(model=model, contents=contents, config=config)
-    print(response)
+    response = client.models.generate_content(
+        model=model,
+        contents=contents,
+        config=config,
+    )
 
     # convert back response
     try: