Merge pull request #172 from mjspeck/upstream-main

joshbickett · web-flow · commit 6c919172926d · 2025-01-22T17:08:50.000-08:00
feat: added ability to use ollama running remotely
diff --git a/operate/config.py b/operate/config.py
@@ -1,10 +1,12 @@
 import os
 import sys
+
+import google.generativeai as genai
 from dotenv import load_dotenv
+from ollama import Client
 from openai import OpenAI
 import anthropic
 from prompt_toolkit.shortcuts import input_dialog
-import google.generativeai as genai
 
 
 class Config:
@@ -15,6 +17,7 @@ class Config:
         verbose (bool): Flag indicating whether verbose mode is enabled.
         openai_api_key (str): API key for OpenAI.
         google_api_key (str): API key for Google.
+        ollama_host (str): url to ollama running remotely.
     """
 
     _instance = None
@@ -34,6 +37,9 @@ def __init__(self):
         self.google_api_key = (
             None  # instance variables are backups in case saving to a `.env` fails
         )
+        self.ollama_host = (
+            None  # instance variables are backups in case savint to a `.env` fails
+        )
         self.anthropic_api_key = (
             None  # instance variables are backups in case saving to a `.env` fails
         )
@@ -76,6 +82,19 @@ def initialize_google(self):
 
         return model
 
+    def initialize_ollama(self):
+        if self.ollama_host:
+            if self.verbose:
+                print("[Config][initialize_ollama] using cached ollama host")
+        else:
+            if self.verbose:
+                print(
+                    "[Config][initialize_ollama] no cached ollama host. Assuming ollama running locally."
+                )
+            self.ollama_host = os.getenv("OLLAMA_HOST", None)
+        model = Client(host=self.ollama_host)
+        return model
+
     def initialize_anthropic(self):
         if self.anthropic_api_key:
             api_key = self.anthropic_api_key
diff --git a/operate/models/apis.py b/operate/models/apis.py
@@ -1,36 +1,31 @@
+import base64
+import io
+import json
 import os
 import time
-import json
-import base64
 import traceback
-import io
+
 import easyocr
 import ollama
-
+import pkg_resources
 from PIL import Image
 from ultralytics import YOLO
 
 from operate.config import Config
 from operate.exceptions import ModelNotRecognizedException
-from operate.utils.screenshot import (
-    capture_screen_with_cursor,
-)
 from operate.models.prompts import (
+    get_system_prompt,
     get_user_first_message_prompt,
     get_user_prompt,
-    get_system_prompt,
 )
-from operate.utils.ocr import get_text_element, get_text_coordinates
-
-
 from operate.utils.label import (
     add_labels,
     get_click_position_in_percent,
     get_label_coordinates,
 )
-from operate.utils.style import ANSI_GREEN, ANSI_RED, ANSI_RESET, ANSI_BRIGHT_MAGENTA
-import pkg_resources
-
+from operate.utils.ocr import get_text_coordinates, get_text_element
+from operate.utils.screenshot import capture_screen_with_cursor
+from operate.utils.style import ANSI_BRIGHT_MAGENTA, ANSI_GREEN, ANSI_RED, ANSI_RESET
 
 # Load configuration
 config = Config()
@@ -568,6 +563,7 @@ def call_ollama_llava(messages):
         print("[call_ollama_llava]")
     time.sleep(1)
     try:
+        model = config.initialize_ollama()
         screenshots_dir = "screenshots"
         if not os.path.exists(screenshots_dir):
             os.makedirs(screenshots_dir)
@@ -594,7 +590,7 @@ def call_ollama_llava(messages):
         }
         messages.append(vision_message)
 
-        response = ollama.chat(
+        response = model.chat(
             model="llava",
             messages=messages,
         )