From ed1d23b6a9a0222ae85db486d4a25e2b6596a20d Mon Sep 17 00:00:00 2001
From: Shawn Lewis <shlewis@gmail.com>
Date: Thu, 12 Sep 2024 15:42:06 -0700
Subject: [PATCH 1/2] Add support for o1 agents.

---
 README.md                               |  14 +-
 programmer-ui/ui.py                     |   6 +-
 programmer/agent_texteditor_o1.py       | 223 ++++++++++++++++++++++++
 programmer/config.py                    |  69 ++++++++
 programmer/console.py                   |  39 +----
 programmer/evals/eval_repeated_edits.py |   8 +-
 programmer/programmer.py                |  50 +++++-
 programmer/settings_manager.py          |   8 +-
 8 files changed, 368 insertions(+), 49 deletions(-)
 create mode 100644 programmer/agent_texteditor_o1.py

diff --git a/README.md b/README.md
index 55069b5..bc7fd78 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 programmer is a command-line based AI programmer, that will help you get stuff done.
 
-programmer has direct access to your machine, it can run commands, and read and write files, without safety checks. You have been warned!
+**WARNING**: programmer has direct access to your machine, it can run commands, and read and write files, without safety checks. You have been warned!
 
 ![Demo](./assets/programmer-demo-1080.gif)
 
@@ -13,6 +13,18 @@ pip install programmer
 programmer 
 ```
 
+## Switching to o1-based agents
+
+Programmer works with OpenAI's just released [o1 models](https://openai.com/index/introducing-openai-o1-preview/).
+
+**GIANT WARNING**: Remember, programmer directly runs agent commands on your machine, without prompting you first. The o1 models are brand new and should not be trusted to do this! You have been **GIANT WARNING** warned.
+
+```
+programmer settings set agent o1-preview-2024-09-12_o1harness
+# OR
+programmer settings set agent o1-mini-2024-09-12_o1harness
+```
+
 ## Examples
 
 - "What processes are listening on port 4512?" ... "ok, please kill them"
diff --git a/programmer-ui/ui.py b/programmer-ui/ui.py
index 4dcb55b..9459b43 100644
--- a/programmer-ui/ui.py
+++ b/programmer-ui/ui.py
@@ -429,8 +429,10 @@ def on_change_parallel_tool_calls():
     if st.button("Generate"):
         chat_inputs = {**editable_call["inputs"]}
         # st.json(chat_inputs, expanded=False)
-        del chat_inputs["stream"]
-        del chat_inputs["self"]
+        if "stream" in chat_inputs:
+            del chat_inputs["stream"]
+        if "self" in chat_inputs:
+            del chat_inputs["self"]
         chat_inputs["n"] = n_choices
         call_resp = openai.chat.completions.create(**chat_inputs).model_dump()
 
diff --git a/programmer/agent_texteditor_o1.py b/programmer/agent_texteditor_o1.py
new file mode 100644
index 0000000..27c832b
--- /dev/null
+++ b/programmer/agent_texteditor_o1.py
@@ -0,0 +1,223 @@
+from typing import Any, Union
+from dataclasses import dataclass
+from pydantic import Field
+import openai
+from openai.types.chat import ChatCompletionMessageParam
+import json
+import re
+import time
+import uuid
+from openai.types.chat import (
+    ChatCompletionMessageToolCall,
+)
+
+import weave
+from weave.trace.vals import WeaveList
+
+from .console import Console
+from .tool_calling import (
+    chat_call_tool_params,
+    perform_tool_calls,
+    generate_json_schema,
+)
+from .text_editor import (
+    TextEditor,
+    TextEditorState,
+    TextEditorStateful,
+    open_file,
+    replace_file_lines,
+    text_editor,
+)
+from .agent import AgentState, Agent
+
+
+def weavelist_add(self: Union[list, WeaveList], other: list) -> Union[list, WeaveList]:
+    if isinstance(self, list):
+        return self + other
+    if not isinstance(other, list):
+        return NotImplemented
+    return WeaveList(list(self) + other, server=self.server)
+
+
+@dataclass
+class ToolCallFunction:
+    name: str
+    arguments: str
+
+
+@dataclass
+class ToolCall:
+    function: ToolCallFunction
+    id: str
+
+
+class AgentStateTextEditor(AgentState):
+    text_editor_state: TextEditorState = Field(default_factory=TextEditorState)
+
+    def with_history(self, history: list[Any]) -> "AgentStateTextEditor":
+        next_state = super().with_history(history)
+        return AgentStateTextEditor(
+            history=next_state.history,
+            env_snapshot_key=next_state.env_snapshot_key,
+            text_editor_state=self.text_editor_state,
+        )
+
+    def with_texteditor_state(
+        self, text_editor_state: TextEditorState
+    ) -> "AgentStateTextEditor":
+        return AgentStateTextEditor(
+            history=self.history,
+            env_snapshot_key=self.env_snapshot_key,
+            text_editor_state=text_editor_state,
+        )
+
+
+class AgentTextEditorO1(Agent):
+    parallel_tool_calls: bool = True
+    text_editor: TextEditor
+
+    def initial_state(self, history: list[Any]) -> AgentStateTextEditor:
+        return AgentStateTextEditor(history=history)
+
+    @weave.op()
+    def step(self, state: AgentStateTextEditor) -> AgentStateTextEditor:
+        """Run a step of the agent.
+
+        Args:
+            state: The current state of the environment.
+
+        Returns:
+            The new state of the environment.
+        """
+        Console.step_start("agent", "green")
+
+        # Prepare messages
+        messages: list[ChatCompletionMessageParam] = []
+
+        # Combine system message and open_file_info into a user message
+        open_file_info = state.text_editor_state.get_open_file_info()
+        initial_content = (
+            f"{self.system_message}\n\n{open_file_info.format_for_messages()}"
+        )
+
+        # Include descriptions of available tools
+        self_tools = [*self.tools] or []
+        text_editor_stateful = TextEditorStateful(
+            self.text_editor, state.text_editor_state
+        )
+
+        self_tools += [open_file, replace_file_lines]
+
+        # Generate tool descriptions
+        tools_descriptions = ""
+        for tool in self_tools:
+            tool_schema = generate_json_schema(tool)
+            tool_name = tool.__name__
+            tool_description = tool_schema.get("function", {}).get("description", "")
+            tool_parameters = tool_schema.get("function", {}).get("parameters", {})
+            tools_descriptions += f"\n- {tool_name}: {tool_description}\nParameters: {json.dumps(tool_parameters)}\n"
+
+        initial_content += f"\n\nAvailable tools:{tools_descriptions}\n"
+
+        # Add instructions to the assistant about how to call tools
+        initial_content += (
+            "When you want to use a tool, please output the tool call in the following format:\n"
+            "<tool_call id='unique_id'><tool_name>(<json_arguments>)</tool_call>\n"
+            'For example: <tool_call id=\'123\'><open_file>({"file_name": "example.txt"})</open_file></tool_call>\n'
+            "Please include the tool call in your response where appropriate."
+            "If you have achieved your goal, our you're stuck, don't call a tool!"
+        )
+
+        # Add the initial user message
+        messages.append(
+            {
+                "role": "user",
+                "content": f"<user_instructions>{initial_content}</user_instructions>",
+            }
+        )
+
+        # Add conversation history, ensuring only 'assistant' and 'user' roles
+        messages += [
+            msg for msg in state.history if msg.get("role") in ["assistant", "user"]
+        ]
+
+        Console.chat_response_start()
+
+        # Call the OpenAI API
+        response = openai.chat.completions.create(
+            model=self.model_name,
+            temperature=self.temperature,
+            messages=messages,
+            timeout=600,
+        )
+
+        # Get the assistant's response
+        response_message = response.choices[0].message
+
+        if response_message.content:
+            print(response_message.content)
+            Console.chat_response_complete(response_message.content)
+
+        new_messages = []
+        # Store the assistant's response
+        new_messages.append(
+            {
+                "role": response_message.role,
+                "content": response_message.content,
+            }
+        )
+
+        # Parse any tool calls from the assistant's response
+        tool_calls = self.parse_tool_calls(response_message.content or "")
+
+        if tool_calls:
+            with text_editor(text_editor_stateful):
+                tool_messages = perform_tool_calls(self_tools, tool_calls)
+
+                # Combine tool call responses into a single user message
+                tool_responses = "<tool_call_responses>\n"
+                for msg in tool_messages:
+                    tool_responses += f"<tool_call_response id='{msg['tool_call_id']}'>{msg['content']}</tool_call_response>\n"
+                tool_responses += "</tool_call_responses>"
+
+                new_messages.append({"role": "user", "content": tool_responses})
+
+        new_history = weavelist_add(state.history, new_messages)
+
+        next_state = state.with_history(new_history)
+        next_state = next_state.with_texteditor_state(text_editor_stateful.state)
+        return next_state
+
+    def parse_tool_calls(self, content: str) -> list:
+        tool_calls = []
+        pattern = r"<tool_call id='(.*?)'><(.*?)>\((.*?)\)</\2></tool_call>"
+        matches = re.finditer(pattern, content, re.DOTALL)
+        for match in matches:
+            tool_id = match.group(1)
+            tool_name = match.group(2)
+            arguments = match.group(3)
+            tool_call = ToolCall(
+                function=ToolCallFunction(
+                    name=tool_name,
+                    arguments=arguments,
+                ),
+                id=tool_id,
+            )
+            tool_calls.append(tool_call)
+        return tool_calls
+
+    @weave.op()
+    def run(self, state: AgentState, max_runtime_seconds: int = -1):
+        start_time = time.time()
+        while True:
+            last_message = state.history[-1]
+            if last_message["role"] == "assistant":
+                # Check if there are no tool calls in the content
+                if not self.parse_tool_calls(last_message.get("content", "")):
+                    return {"state": state, "stop_reason": "done"}
+            state = self.step(state)
+            if (
+                max_runtime_seconds > 0
+                and time.time() - start_time > max_runtime_seconds
+            ):
+                return {"state": state, "stop_reason": "time_limit_exceeded"}
diff --git a/programmer/config.py b/programmer/config.py
index fb81ea8..c2e90a6 100644
--- a/programmer/config.py
+++ b/programmer/config.py
@@ -17,6 +17,8 @@
 from .agent import Agent
 from .agent_texteditor import AgentTextEditor
 from .text_editor import TextEditor
+from .agent_texteditor_o1 import AgentTextEditorO1
+from typing import Optional, Any
 
 agent_4o_basic = Agent(
     name="gpt-4o-2024-08-06_basic",
@@ -127,3 +129,70 @@
     tools=[list_files, run_command, view_image],
     parallel_tool_calls=False,
 )
+
+agent_texteditor_o1_gpt4o = AgentTextEditorO1(
+    name="gpt4o_o1harness",
+    model_name="gpt-4o-2024-08-06",
+    temperature=0.7,
+    system_message=SYSTEM_MESSAGE,
+    text_editor=text_editor,
+    tools=[list_files, run_command, view_image],
+)
+
+agent_texteditor_o1_o1preview = AgentTextEditorO1(
+    name="o1-preview-2024-09-12_o1harness",
+    model_name="o1-preview-2024-09-12",
+    temperature=1,
+    system_message=SYSTEM_MESSAGE,
+    text_editor=text_editor,
+    tools=[list_files, run_command, view_image],
+)
+
+agent_texteditor_o1_o1mini = AgentTextEditorO1(
+    name="o1-mini-2024-09-12_o1harness",
+    model_name="o1-mini-2024-09-12",
+    temperature=1,
+    system_message=SYSTEM_MESSAGE,
+    text_editor=text_editor,
+    tools=[list_files, run_command, view_image],
+)
+
+
+def get_config_by_name(name: str) -> Optional[Any]:
+    """
+    Fetch a configuration object by its name.
+
+    Args:
+        name (str): The name of the configuration to fetch.
+
+    Returns:
+        Optional[Any]: The configuration object if found, None otherwise.
+    """
+    # Get all variables defined in this module
+    all_vars = globals()
+
+    # Look for a variable that matches the given name
+    for var_name, var_value in all_vars.items():
+        if isinstance(var_value, Agent):
+            if var_value.name == name:
+                return var_value
+
+    # If no matching configuration is found, return None
+    return None
+
+
+def get_all_config_names() -> list[str]:
+    """
+    Get a list of all valid configuration names.
+
+    Returns:
+        list[str]: A list of all configuration names.
+    """
+    all_vars = globals()
+    config_names = []
+
+    for var_name, var_value in all_vars.items():
+        if isinstance(var_value, (Agent, AgentTextEditor, AgentTextEditorO1)):
+            config_names.append(var_value.name)
+
+    return sorted(config_names)
diff --git a/programmer/console.py b/programmer/console.py
index 11a1758..65ceab7 100644
--- a/programmer/console.py
+++ b/programmer/console.py
@@ -1,18 +1,20 @@
 import sys
+from typing import Optional
 from rich.console import Console as RichConsole
 from rich.padding import Padding
 
-# Adjusting import to absolute path
-from .settings_manager import SettingsManager
 
 console = RichConsole()
 
 
 class Console:
     @staticmethod
-    def welcome() -> None:
+    def welcome(agent_name: Optional[str] = None) -> None:
         console.rule("[bold blue]Programmer")
-        console.print("Welcome to programmer.\n")
+        console.print("Welcome to programmer.")
+        if agent_name:
+            console.print(f"Using agent: [bold]{agent_name}[/bold]")
+        console.print()
 
     @staticmethod
     def step_start(name: str, color: str) -> None:
@@ -50,32 +52,3 @@ def tool_call_complete(tool_response: str) -> None:
     @staticmethod
     def user_input_complete(user_input: str) -> None:
         console.print()
-
-    @staticmethod
-    def settings_command(command_args):
-        if len(command_args) < 2:
-            console.print("[red]Invalid settings command[/red]")
-            return
-        action = command_args[0]
-        key = command_args[1]
-        if action == "get":
-            value = SettingsManager.get_setting(key)
-            if value is not None:
-                console.print(f"{key} = {value}")
-            else:
-                console.print(f"[red]Setting '{key}' not found[/red]")
-        elif action == "set" and len(command_args) == 3:
-            value = command_args[2]
-            SettingsManager.set_setting(key, value)
-            console.print(f"[green]Setting '{key}' updated to '{value}'[/green]")
-        else:
-            console.print("[red]Invalid settings command[/red]")
-
-
-# Example of integrating a basic command line argument parsing
-if __name__ == "__main__":
-    SettingsManager.initialize_settings()
-    if len(sys.argv) > 1 and sys.argv[1] == "settings":
-        Console.settings_command(sys.argv[2:])
-    else:
-        Console.welcome()
diff --git a/programmer/evals/eval_repeated_edits.py b/programmer/evals/eval_repeated_edits.py
index 0797713..2dda96f 100644
--- a/programmer/evals/eval_repeated_edits.py
+++ b/programmer/evals/eval_repeated_edits.py
@@ -336,11 +336,13 @@ def run_single_trial(trial_idx: int):
         # agent_claude_splice,
         # agent_texteditor_4o_basic,
         # agent_texteditor_4o_basic_temp0,
-        agent_texteditor_4o_basic_noparalleltc,
+        # agent_texteditor_4o_basic_noparalleltc,
+        # agent_texteditor_o1_o1preview,
+        agent_texteditor_o1_o1mini,
     ]
 
-    config = EvalEditMemoryConfig(n_lines=1000, run_timeout_seconds=60)
-    n_trials = 10
+    config = EvalEditMemoryConfig(n_lines=100, run_timeout_seconds=600)
+    n_trials = 1
     config_s = f'{config["n_lines"]}lines_{config["run_timeout_seconds"]}timeout'
     results = {}
     for agent in agents:
diff --git a/programmer/programmer.py b/programmer/programmer.py
index 403d1d6..fdf4d6a 100644
--- a/programmer/programmer.py
+++ b/programmer/programmer.py
@@ -11,10 +11,13 @@
 import weave
 
 from .agent import Agent, AgentState, get_commit_message
-from .console import Console
+from .console import Console, console
 from .config import (
-    agent_4o_replace,
-    agent_texteditor_4o_basic,
+    # agent_4o_replace,
+    # agent_texteditor_4o_basic,
+    # agent_texteditor_o1_gpt4o,
+    # agent_texteditor_o1_o1preview,
+    get_config_by_name,
 )
 from .environment import (
     environment_session,
@@ -28,8 +31,6 @@
 
 from .git import GitRepo
 
-agent = agent_texteditor_4o_basic
-
 
 @weave.op
 def get_user_input():
@@ -53,6 +54,26 @@ def user_input_step(state: AgentState) -> AgentState:
     return state.with_history(history)
 
 
+def settings_command(command_args):
+    if len(command_args) < 2:
+        console.print("[red]Invalid settings command[/red]")
+        return
+    action = command_args[0]
+    key = command_args[1]
+    if action == "get":
+        value = SettingsManager.get_setting(key)
+        if value is not None:
+            console.print(f"{key} = {value}")
+        else:
+            console.print(f"[red]Setting '{key}' not found[/red]")
+    elif action == "set" and len(command_args) == 3:
+        value = command_args[2]
+        SettingsManager.set_setting(key, value)
+        console.print(f"[green]Setting '{key}' updated to '{value}'[/green]")
+    else:
+        console.print("[red]Invalid settings command[/red]")
+
+
 def make_environment():
     git_repo = GitRepo.from_current_dir()
     git_tracking_enabled = SettingsManager.get_setting("git_tracking") == "on"
@@ -119,7 +140,7 @@ def programmer():
     args = parser.parse_args()
 
     if args.command == "settings":
-        Console.settings_command(
+        settings_command(
             [args.action, args.key, args.value]
             if args.value
             else [args.action, args.key]
@@ -137,13 +158,24 @@ def programmer():
 
     # log to local sqlite db for now
 
-    Console.welcome()
-
     if args.state:
         state = weave.ref(args.state).get()
         if state.env_snapshot_key:
             environment = restore_environment(state.env_snapshot_key)
 
+    agent_name = SettingsManager.get_setting("agent")
+    if not agent_name:
+        raise ValueError(
+            "No agent name set. Please set the agent name in the settings."
+        )
+    agent = get_config_by_name(agent_name)
+    if not agent:
+        raise ValueError(
+            f"Agent {agent_name} not found. Please set a valid agent name in the settings."
+        )
+
+    Console.welcome(agent_name=agent.name)
+
     if args.command == "prompt":
         initial_prompt = " ".join(args.prompt_args)
         print("Initial prompt:", initial_prompt)
@@ -159,7 +191,7 @@ def programmer():
         ]
     )
 
-    session(agent_texteditor_4o_basic, state)
+    session(agent, state)
 
 
 def main():
diff --git a/programmer/settings_manager.py b/programmer/settings_manager.py
index a8f848f..552fe63 100644
--- a/programmer/settings_manager.py
+++ b/programmer/settings_manager.py
@@ -1,4 +1,5 @@
 import os
+from .config import get_all_config_names
 
 
 class SettingsError(Exception):
@@ -8,8 +9,13 @@ class SettingsError(Exception):
 class SettingsManager:
     PROGRAMMER_DIR = ".programmer"
     SETTINGS_FILE = "settings"
-    DEFAULT_SETTINGS = {"weave_logging": "local", "git_tracking": "off"}
+    DEFAULT_SETTINGS = {
+        "agent": "gpt-4o-2024-08-06_basic",
+        "weave_logging": "local",
+        "git_tracking": "off",
+    }
     ALLOWED_VALUES = {
+        "agent": get_all_config_names(),
         "weave_logging": ["off", "local", "cloud"],
         "git_tracking": ["off", "on"],
     }

From 227429064a84c33477d8d1d263690ff9b15a8180 Mon Sep 17 00:00:00 2001
From: Shawn Lewis <shlewis@gmail.com>
Date: Thu, 12 Sep 2024 15:43:04 -0700
Subject: [PATCH 2/2] Add more to readme

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index bc7fd78..17b1d04 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,8 @@ programmer settings set agent o1-preview-2024-09-12_o1harness
 programmer settings set agent o1-mini-2024-09-12_o1harness
 ```
 
+The o1 agents currently don't work very well, and might do dangerous things. But they do work!
+
 ## Examples
 
 - "What processes are listening on port 4512?" ... "ok, please kill them"