aryn-ai · karanataryn · Feb 17, 2025 · Feb 12, 2025 · Feb 13, 2025 · Feb 14, 2025
diff --git a/lib/sycamore/poetry.lock b/lib/sycamore/poetry.lock
diff --git a/lib/sycamore/pyproject.toml b/lib/sycamore/pyproject.toml
@@ -88,6 +88,7 @@ nltk = { version = "^3.9.0", optional = true }
 
 # LLM Dependencies
 anthropic = { version = "^0.42.0", optional = true }
+google-genai = {version = "^1.0", optional = true}
 tiktoken = "^0.8.0"
 
 [tool.poetry.group.test.dependencies]

diff --git a/lib/sycamore/sycamore/llms/__init__.py b/lib/sycamore/sycamore/llms/__init__.py
@@ -4,6 +4,7 @@
 from sycamore.llms.openai import OpenAI, OpenAIClientType, OpenAIModels, OpenAIClientParameters, OpenAIClientWrapper
 from sycamore.llms.bedrock import Bedrock, BedrockModels
 from sycamore.llms.anthropic import Anthropic, AnthropicModels
+from sycamore.llms.gemini import Gemini, GeminiModels
 
 # Register the model constructors.
 MODELS: Dict[str, Callable[..., LLM]] = {}
@@ -16,6 +17,7 @@
 MODELS.update(
     {f"anthropic.{model.value}": lambda **kwargs: Anthropic(model.value, **kwargs) for model in AnthropicModels}
 )
+MODELS.update({f"gemini.{model.value}": lambda **kwargs: Gemini(model.value.name, **kwargs) for model in GeminiModels})
 
 
 def get_llm(model_name: str) -> Callable[..., LLM]:
@@ -38,4 +40,6 @@ def get_llm(model_name: str) -> Callable[..., LLM]:
     "BedrockModels",
     "Anthropic",
     "AnthropicModels",
+    "Gemini",
+    "GeminiModels",
 ]
diff --git a/lib/sycamore/sycamore/llms/gemini.py b/lib/sycamore/sycamore/llms/gemini.py
@@ -0,0 +1,133 @@
+from dataclasses import dataclass
+import datetime
+from enum import Enum
+from typing import Any, Optional, Union
+
+from google.genai import Client, types
+
+from sycamore.llms.llms import LLM
+from sycamore.llms.prompts.prompts import RenderedPrompt
+from sycamore.utils.cache import Cache
+from sycamore.utils.import_utils import requires_modules
+
+DEFAULT_MAX_TOKENS = 1024
+
+
+@dataclass
+class GeminiModel:
+    name: str
+    is_chat: bool = False
+
+
+class GeminiModels(Enum):
+    """Represents available Gemini models. More info: https://googleapis.github.io/python-genai/"""
+
+    # Note that the models available on a given Gemini account may vary.
+    GEMINI_2_FLASH = GeminiModel(name="gemini-2.0-flash-exp", is_chat=True)
+    GEMINI_2_FLASH_LITE = GeminiModel(name="gemini-2.0-flash-lite-preview-02-05", is_chat=True)
+    GEMINI_2_FLASH_THINKING = GeminiModel(name="gemini-2.0-flash-thinking-exp", is_chat=True)
+    GEMINI_2_PRO = GeminiModel(name="gemini-2.0-pro-exp", is_chat=True)
+
+    @classmethod
+    def from_name(cls, name: str):
+        for m in iter(cls):
+            if m.value.name == name:
+                return m
+        return None
+
+
+class Gemini(LLM):
+    """This is an LLM implementation that uses the Google Gemini API to generate text.
+
+    Args:
+        model_name: The name of the Gemini model to use.
+        cache: A cache object to use for caching results.
+    """
+
+    @requires_modules("google-genai")
+    def __init__(
+        self,
+        model_name: Union[GeminiModels, str],
+        cache: Optional[Cache] = None,
+        api_key: Optional[str] = None,
+    ):
+        self.model_name = model_name
+
+        if isinstance(model_name, GeminiModels):
+            self.model = model_name.value
+        elif isinstance(model_name, str):
+            self.model = GeminiModel(name=model_name)
+        if api_key is not None:
+            self._client = Client(api_key=api_key)
+        else:
+            self._client = Client()
+        super().__init__(self.model.name, cache)
+
+    def __reduce__(self):
+        def deserializer(kwargs):
+            return Gemini(**kwargs)
+
+        kwargs = {"model_name": self.model_name, "cache": self._cache}
+        return deserializer, (kwargs,)
+
+    def is_chat_mode(self) -> bool:
+        """Returns True if the LLM is in chat mode, False otherwise."""
+        return True
+
+    def get_generate_kwargs(self, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict:
+        kwargs: dict[str, Any] = {}
+        config = {
+            "temperature": 0,
+            "candidate_count": 1,
+            **(llm_kwargs or {}),
+        }
+        config["max_output_tokens"] = config.get("max_output_tokens", DEFAULT_MAX_TOKENS)
+        content_list = []
+        for message in prompt.messages:
+            if message.role == "system":
+                config["system_message"] = message.content
+                continue
+            role = "model" if message.role == "assistant" else "user"
+            content = types.Content(parts=[types.Part.from_text(text=message.content)], role=role)
+            if message.images:
+                for image in message.images:
+                    image_bytes = image.convert("RGB").tobytes()
+                    content.parts.append(types.Part.from_bytes(image_bytes, media_type="image/png"))
+            content_list.append(content)
+        kwargs["config"] = None
+        if config:
+            kwargs["config"] = types.GenerateContentConfig(**config)
+        kwargs["content"] = content
+        return kwargs
+
+    def generate_metadata(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> dict:
+        ret = self._llm_cache_get(prompt, llm_kwargs)
+        if isinstance(ret, dict):
+            print(f"cache return {ret}")
+            return ret
+        assert ret is None
+
+        kwargs = self.get_generate_kwargs(prompt, llm_kwargs)
+
+        start = datetime.datetime.now()
+        response: types.GenerateContentResponse = self._client.models.generate_content(
+            model=self.model.name, content=kwargs["content"], config=kwargs["config"]
+        )
+        wall_latency = datetime.datetime.now() - start
+        md = response.usage_metadata
+        in_tokens = int(md.prompt_token_count) if md else 0
+        out_tokens = int(md.candidates_token_count) if md else 0
+        output = response.candidates[0].content
+        ret = {
+            "output": output,
+            "wall_latency": wall_latency,
+            "in_tokens": in_tokens,
+            "out_tokens": out_tokens,
+        }
+        self.add_llm_metadata(kwargs, output, wall_latency, in_tokens, out_tokens)
+        self._llm_cache_set(prompt, llm_kwargs, ret)
+        return ret
+
+    def generate(self, *, prompt: RenderedPrompt, llm_kwargs: Optional[dict] = None) -> str:
+        d = self.generate_metadata(prompt=prompt, llm_kwargs=llm_kwargs)
+        return d["output"]