Merge branch 'main' into rp_AzureDocs

rohitprasad15 · web-flow · commit c5a57d620f89 · 2024-09-20T10:36:21.000-07:00
diff --git a/aisuite/provider.py b/aisuite/provider.py
@@ -24,6 +24,7 @@ class ProviderNames(str, Enum):
     GROQ = "groq"
     GOOGLE = "google"
     MISTRAL = "mistral"
+    OLLAMA = "ollama"
     OPENAI = "openai"
 
 
@@ -46,6 +47,7 @@ class ProviderFactory:
             "aisuite.providers.mistral_provider",
             "MistralProvider",
         ),
+        ProviderNames.OLLAMA: ("aisuite.providers.ollama_provider", "OllamaProvider"),
         ProviderNames.OPENAI: ("aisuite.providers.openai_provider", "OpenAIProvider"),
     }
 
diff --git a/aisuite/providers/__init__.py b/aisuite/providers/__init__.py
@@ -2,6 +2,5 @@
 
 from .fireworks_interface import FireworksInterface
 from .octo_interface import OctoInterface
-from .ollama_interface import OllamaInterface
 from .replicate_interface import ReplicateInterface
 from .together_interface import TogetherInterface
diff --git a/aisuite/providers/ollama_interface.py b/aisuite/providers/ollama_interface.py
diff --git a/aisuite/providers/ollama_provider.py b/aisuite/providers/ollama_provider.py
@@ -0,0 +1,65 @@
+import os
+import httpx
+from aisuite.provider import Provider, LLMError
+from aisuite.framework import ChatCompletionResponse
+
+
+class OllamaProvider(Provider):
+    """
+    Ollama Provider that makes HTTP calls instead of using SDK.
+    It uses the /api/chat endpoint.
+    Read more here - https://github.com/ollama/ollama/blob/main/docs/api.md#generate-a-chat-completion
+    If OLLAMA_API_URL is not set and not passed in config, then it will default to "http://localhost:11434"
+    """
+
+    _CHAT_COMPLETION_ENDPOINT = "/api/chat"
+    _CONNECT_ERROR_MESSAGE = "Ollama is likely not running. Start Ollama by running `ollama serve` on your host."
+
+    def __init__(self, **config):
+        """
+        Initialize the Ollama provider with the given configuration.
+        """
+        self.url = config.get("api_url") or os.getenv(
+            "OLLAMA_API_URL", "http://localhost:11434"
+        )
+
+        # Optionally set a custom timeout (default to 30s)
+        self.timeout = config.get("timeout", 30)
+
+    def chat_completions_create(self, model, messages, **kwargs):
+        """
+        Makes a request to the chat completions endpoint using httpx.
+        """
+        kwargs["stream"] = False
+        data = {
+            "model": model,
+            "messages": messages,
+            **kwargs,  # Pass any additional arguments to the API
+        }
+
+        try:
+            response = httpx.post(
+                self.url.rstrip("/") + self._CHAT_COMPLETION_ENDPOINT,
+                json=data,
+                timeout=self.timeout,
+            )
+            response.raise_for_status()
+        except httpx.ConnectError:  # Handle connection errors
+            raise LLMError(f"Connection failed: {self._CONNECT_ERROR_MESSAGE}")
+        except httpx.HTTPStatusError as http_err:
+            raise LLMError(f"Ollama request failed: {http_err}")
+        except Exception as e:
+            raise LLMError(f"An error occurred: {e}")
+
+        # Return the normalized response
+        return self._normalize_response(response.json())
+
+    def _normalize_response(self, response_data):
+        """
+        Normalize the API response to a common format (ChatCompletionResponse).
+        """
+        normalized_response = ChatCompletionResponse()
+        normalized_response.choices[0].message.content = response_data["message"][
+            "content"
+        ]
+        return normalized_response
diff --git a/examples/client.ipynb b/examples/client.ipynb
@@ -135,21 +135,6 @@
     "print(response.choices[0].message.content)"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4b3e6c41-070d-4041-9ed9-c8977790fe18",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "together_llama3_8b = \"together:meta-llama/Llama-3-8b-chat-hf\"\n",
-    "#together_llama3_70b = \"together:meta-llama/Llama-3-70b-chat-hf\"\n",
-    "\n",
-    "response = client.chat.completions.create(model=together_llama3_8b, messages=messages)\n",
-    "\n",
-    "print(response.choices[0].message.content)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -168,7 +153,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "19cdb1ab",
+   "id": "6819ac17",
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/tests/providers/test_ollama_interface.py b/tests/providers/test_ollama_interface.py
diff --git a/tests/providers/test_ollama_provider.py b/tests/providers/test_ollama_provider.py
@@ -0,0 +1,45 @@
+import pytest
+from unittest.mock import patch, MagicMock
+from aisuite.providers.ollama_provider import OllamaProvider
+
+
+@pytest.fixture(autouse=True)
+def set_api_url_var(monkeypatch):
+    """Fixture to set environment variables for tests."""
+    monkeypatch.setenv("OLLAMA_API_URL", "http://localhost:11434")
+
+
+def test_completion():
+    """Test that completions request successfully."""
+
+    user_greeting = "Howdy!"
+    message_history = [{"role": "user", "content": user_greeting}]
+    selected_model = "best-model-ever"
+    chosen_temperature = 0.77
+    response_text_content = "mocked-text-response-from-ollama-model"
+
+    ollama = OllamaProvider()
+    mock_response = {"message": {"content": response_text_content}}
+
+    with patch(
+        "httpx.post",
+        return_value=MagicMock(status_code=200, json=lambda: mock_response),
+    ) as mock_post:
+        response = ollama.chat_completions_create(
+            messages=message_history,
+            model=selected_model,
+            temperature=chosen_temperature,
+        )
+
+        mock_post.assert_called_once_with(
+            "http://localhost:11434/api/chat",
+            json={
+                "model": selected_model,
+                "messages": message_history,
+                "stream": False,
+                "temperature": chosen_temperature,
+            },
+            timeout=30,
+        )
+
+        assert response.choices[0].message.content == response_text_content