Changed default values for base urls to just hostnames.

blkt · blkt · commit df06cf14e11a · 2025-02-20T13:42:03.000+01:00
Also, moar fixes.
diff --git a/src/codegate/config.py b/src/codegate/config.py
@@ -16,9 +16,9 @@
 
 # Default provider URLs
 DEFAULT_PROVIDER_URLS = {
-    "openai": "https://api.openai.com/v1",
-    "openrouter": "https://openrouter.ai/api/v1",
-    "anthropic": "https://api.anthropic.com/v1",
+    "openai": "https://api.openai.com",
+    "openrouter": "https://openrouter.ai/api",
+    "anthropic": "https://api.anthropic.com",
     "vllm": "http://localhost:8000",  # Base URL without /v1 path
     "ollama": "http://localhost:11434",  # Default Ollama server URL
     "lm_studio": "http://localhost:1234",
diff --git a/src/codegate/providers/anthropic/provider.py b/src/codegate/providers/anthropic/provider.py
@@ -28,7 +28,7 @@ def __init__(
         if self._get_base_url() != "":
             self.base_url = self._get_base_url()
         else:
-            self.base_url = "https://api.anthropic.com"
+            self.base_url = "https://api.anthropic.com/v1"
 
         completion_handler = AnthropicCompletion(stream_generator=stream_generator)
         super().__init__(
diff --git a/src/codegate/providers/ollama/adapter.py b/src/codegate/providers/ollama/adapter.py
@@ -5,46 +5,12 @@
 
 from codegate.providers.normalizer.base import ModelInputNormalizer, ModelOutputNormalizer
 from codegate.types.common import (
-    ChatCompletionRequest,
     Delta,
     ModelResponse,
     StreamingChoices,
 )
 
 
-class OllamaInputNormalizer(ModelInputNormalizer):
-
-    def normalize(self, data: Dict) -> ChatCompletionRequest:
-        """
-        Normalize the input data to the format expected by Ollama.
-        """
-        # Make a copy of the data to avoid modifying the original and normalize the message content
-        normalized_data = self._normalize_content_messages(data)
-        normalized_data["model"] = data.get("model", "").strip()
-        normalized_data["options"] = data.get("options", {})
-
-        if "prompt" in normalized_data:
-            normalized_data["messages"] = [
-                {"content": normalized_data.pop("prompt"), "role": "user"}
-            ]
-
-        # if we have the stream flag in data we set it, otherwise defaults to true
-        normalized_data["stream"] = data.get("stream", True)
-
-        # This would normally be the required to get the token usage.
-        # However Ollama python client doesn't support it. We would be able to get the response
-        # with a direct HTTP request. Since Ollama is local this is not critical.
-        # if normalized_data.get("stream", False):
-        #     normalized_data["stream_options"] = {"include_usage": True}
-        return ChatCompletionRequest(**normalized_data)
-
-    def denormalize(self, data: ChatCompletionRequest) -> Dict:
-        """
-        Convert back to raw format for the API request
-        """
-        return data
-
-
 class OLlamaToModel(AsyncIterator[ModelResponse]):
     def __init__(self, ollama_response: AsyncIterator[ChatResponse]):
         self.ollama_response = ollama_response
@@ -106,7 +72,7 @@ def normalize_chat_chunk(cls, chunk: ChatResponse) -> ModelResponse:
         return model_response
 
     @classmethod
-    def normalize_fim_chunk(cls, chunk: GenerateResponse) -> Dict:
+    def normalize_fim_chunk(cls, chunk) -> Dict:
         """
         Transform an ollama generation chunk to an OpenAI one
         """
@@ -174,37 +140,3 @@ async def __anext__(self) -> Union[ChatResponse]:
             return ollama_response
         except StopAsyncIteration:
             raise StopAsyncIteration
-
-
-class OllamaOutputNormalizer(ModelOutputNormalizer):
-    def __init__(self):
-        super().__init__()
-
-    def normalize_streaming(
-        self,
-        model_reply: AsyncIterator[ChatResponse],
-    ) -> AsyncIterator[ModelResponse]:
-        """
-        Pass through Ollama response
-        """
-        return OLlamaToModel(model_reply)
-
-    def normalize(self, model_reply: Any) -> Any:
-        """
-        Pass through Ollama response
-        """
-        return model_reply
-
-    def denormalize(self, normalized_reply: Any) -> Any:
-        """
-        Pass through Ollama response
-        """
-        return normalized_reply
-
-    def denormalize_streaming(
-        self, normalized_reply: AsyncIterator[ModelResponse]
-    ) -> AsyncIterator[ChatResponse]:
-        """
-        Pass through Ollama response
-        """
-        return ModelToOllama(normalized_reply)
diff --git a/src/codegate/providers/ollama/completion_handler.py b/src/codegate/providers/ollama/completion_handler.py
@@ -79,10 +79,10 @@ async def execute_completion(
     ) -> Union[ChatResponse, GenerateResponse]:
         """Stream response directly from Ollama API."""
         if isinstance(request, ChatCompletionRequest): # case for OpenAI-style requests
-            return await completions_streaming(request, api_key, base_url)
+            return completions_streaming(request, api_key, base_url)
         if is_fim_request:
-            return await generate_streaming(request, api_key, base_url)
-        return await chat_streaming(request, api_key, base_url)
+            return generate_streaming(request, api_key, base_url)
+        return chat_streaming(request, api_key, base_url)
 
     def _create_streaming_response(
         self,
diff --git a/src/codegate/providers/ollama/provider.py b/src/codegate/providers/ollama/provider.py
@@ -11,7 +11,6 @@
 from codegate.pipeline.factory import PipelineFactory
 from codegate.providers.base import BaseProvider, ModelFetchError
 from codegate.providers.fim_analyzer import FIMAnalyzer
-from codegate.providers.ollama.adapter import OllamaInputNormalizer, OllamaOutputNormalizer
 from codegate.providers.ollama.completion_handler import OllamaShim
 from codegate.types.openai import ChatCompletionRequest
 from codegate.types.ollama import ChatRequest, GenerateRequest
diff --git a/src/codegate/providers/openrouter/provider.py b/src/codegate/providers/openrouter/provider.py
@@ -19,7 +19,7 @@ def __init__(self, pipeline_factory: PipelineFactory):
         if self._get_base_url() != "":
             self.base_url = self._get_base_url()
         else:
-            self.base_url = "https://openrouter.ai/api/v1"
+            self.base_url = "https://openrouter.ai/api"
 
     @property
     def provider_route_name(self) -> str:
diff --git a/src/codegate/types/ollama/_generators.py b/src/codegate/types/ollama/_generators.py
@@ -42,13 +42,15 @@ async def stream_generator(
 async def chat_streaming(request, api_key, base_url):
     if base_url is None:
         base_url = "http://localhost:11434"
-    return streaming(request, api_key, f"{base_url}/api/chat", StreamingChatCompletion)
+    async for item in streaming(request, api_key, f"{base_url}/api/chat", StreamingChatCompletion):
+        yield item
 
 
 async def generate_streaming(request, api_key, base_url):
     if base_url is None:
         base_url = "http://localhost:11434"
-    return streaming(request, api_key, f"{base_url}/api/generate", StreamingGenerateCompletion)
+    async for item in streaming(request, api_key, f"{base_url}/api/generate", StreamingGenerateCompletion):
+        yield item
 
 
 async def streaming(request, api_key, url, cls):
@@ -90,7 +92,18 @@ async def get_data_lines(lines):
 async def message_wrapper(cls, lines):
     messages = get_data_lines(lines)
     async for payload in messages:
-        item = cls.model_validate_json(payload)
-        yield item
-        if item.done:
-            break
+        try:
+            item = cls.model_validate_json(payload)
+            yield item
+            if item.done:
+                break
+        except Exception as e:
+            logger.warn("HTTP error while consuming SSE stream", payload=payload, exc_info=e)
+            err = MessageError(
+                error=ErrorDetails(
+                    message=str(e),
+                    code=500,
+                ),
+            )
+            item = MessageError.model_validate_json(payload)
+            yield item
diff --git a/src/codegate/types/openai/_generators.py b/src/codegate/types/openai/_generators.py
@@ -50,7 +50,7 @@ async def stream_generator(stream: AsyncIterator[StreamingChatCompletion]) -> As
 async def completions_streaming(request, api_key, base_url):
     if base_url is None:
         base_url = "https://api.openai.com"
-    async for item in  streaming(request, api_key, f"{base_url}/chat/completions"):
+    async for item in  streaming(request, api_key, f"{base_url}/v1/chat/completions"):
         yield item
 
 
@@ -91,6 +91,8 @@ async def get_data_lines(lines):
     while True:
         # Get the `data: <type>` line.
         data_line = await anext(lines)
+        # Get the empty line.
+        _ = await anext(lines)
 
         # As per standard, we ignore comment lines
         # https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation
@@ -103,9 +105,6 @@ async def get_data_lines(lines):
             break
 
         yield data_line[6:]
-
-        # Get the empty line.
-        _ = await anext(lines)
     logger.debug(f"Consumed {count} messages", provider="openai", count=count)
 
 
@@ -116,6 +115,12 @@ async def message_wrapper(lines):
             item = StreamingChatCompletion.model_validate_json(payload)
             yield item
         except Exception as e:
-            logger.warn("HTTP error while consuming SSE stream", exc_info=e)
-            item = MessageError.model_validate_json(payload)
-            yield item
+            print(f"WAAAGH {payload}")
+            logger.warn("HTTP error while consuming SSE stream", payload=payload, exc_info=e)
+            err = MessageError(
+                error=ErrorDetails(
+                    message=str(e),
+                    code=500,
+                ),
+            )
+            yield err
diff --git a/tests/providers/ollama/test_ollama_adapter.py b/tests/providers/ollama/test_ollama_adapter.py