getzep · prasmussen15 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024 · Aug 23, 2024
diff --git a/Makefile b/Makefile
@@ -22,7 +22,7 @@ format:
 # Lint code
 lint:
 	$(RUFF) check
-	$(MYPY) . --show-column-numbers --show-error-codes --pretty 
+	$(MYPY) ./core --show-column-numbers --show-error-codes --pretty 
 
 # Run tests
 test:

diff --git a/core/graphiti.py b/core/graphiti.py
@@ -56,7 +56,7 @@ def __init__(self, uri: str, user: str, password: str, llm_client: LLMClient | N
 		else:
 			self.llm_client = OpenAIClient(
 				LLMConfig(
-					api_key=os.getenv('OPENAI_API_KEY'),
+					api_key=os.getenv('OPENAI_API_KEY', default=''),
 					model='gpt-4o-mini',
 					base_url='https://api.openai.com/v1',
 				)
@@ -72,28 +72,16 @@ async def retrieve_episodes(
 		self,
 		reference_time: datetime,
 		last_n: int = EPISODE_WINDOW_LEN,
-		sources: list[str] | None = 'messages',
 	) -> list[EpisodicNode]:
 		"""Retrieve the last n episodic nodes from the graph"""
-		return await retrieve_episodes(self.driver, reference_time, last_n, sources)
-
-	# Invalidate edges that are no longer valid
-	async def invalidate_edges(
-		self,
-		episode: EpisodicNode,
-		new_nodes: list[EntityNode],
-		new_edges: list[EntityEdge],
-		relevant_schema: dict[str, any],
-		previous_episodes: list[EpisodicNode],
-	): ...
+		return await retrieve_episodes(self.driver, reference_time, last_n)
 
 	async def add_episode(
 		self,
 		name: str,
 		episode_body: str,
 		source_description: str,
-		reference_time: datetime | None = None,
-		episode_type: str | None = 'string',  # TODO: this field isn't used yet?
+		reference_time: datetime,
 		success_callback: Callable | None = None,
 		error_callback: Callable | None = None,
 	):
@@ -104,7 +92,7 @@ async def add_episode(
 			nodes: list[EntityNode] = []
 			entity_edges: list[EntityEdge] = []
 			episodic_edges: list[EpisodicEdge] = []
-			embedder = self.llm_client.client.embeddings
+			embedder = self.llm_client.get_embedder()
 			now = datetime.now()
 
 			previous_episodes = await self.retrieve_episodes(reference_time)
@@ -234,7 +222,7 @@ async def add_episode_bulk(
 	):
 		try:
 			start = time()
-			embedder = self.llm_client.client.embeddings
+			embedder = self.llm_client.get_embedder()
 			now = datetime.now()
 
 			episodes = [
@@ -276,14 +264,22 @@ async def add_episode_bulk(
 			await asyncio.gather(*[node.save(self.driver) for node in nodes])
 
 			# re-map edge pointers so that they don't point to discard dupe nodes
-			extracted_edges: list[EntityEdge] = resolve_edge_pointers(extracted_edges, uuid_map)
-			episodic_edges: list[EpisodicEdge] = resolve_edge_pointers(episodic_edges, uuid_map)
+			extracted_edges_with_resolved_pointers: list[EntityEdge] = resolve_edge_pointers(
+				extracted_edges, uuid_map
+			)
+			episodic_edges_with_resolved_pointers: list[EpisodicEdge] = resolve_edge_pointers(
+				episodic_edges, uuid_map
+			)
 
 			# save episodic edges to KG
-			await asyncio.gather(*[edge.save(self.driver) for edge in episodic_edges])
+			await asyncio.gather(
+				*[edge.save(self.driver) for edge in episodic_edges_with_resolved_pointers]
+			)
 
 			# Dedupe extracted edges
-			edges = await dedupe_edges_bulk(self.driver, self.llm_client, extracted_edges)
+			edges = await dedupe_edges_bulk(
+				self.driver, self.llm_client, extracted_edges_with_resolved_pointers
+			)
 			logger.info(f'extracted edge length: {len(edges)}')
 
 			# invalidate edges
@@ -302,18 +298,18 @@ async def search(self, query: str, num_results=10):
 		edges = (
 			await hybrid_search(
 				self.driver,
-				self.llm_client.client.embeddings,
+				self.llm_client.get_embedder(),
 				query,
 				datetime.now(),
 				search_config,
 			)
-		)['edges']
+		).edges
 
 		facts = [edge.fact for edge in edges]
 
 		return facts
 
 	async def _search(self, query: str, timestamp: datetime, config: SearchConfig):
 		return await hybrid_search(
-			self.driver, self.llm_client.client.embeddings, query, timestamp, config
+			self.driver, self.llm_client.get_embedder(), query, timestamp, config
 		)
diff --git a/core/llm_client/client.py b/core/llm_client/client.py
@@ -1,5 +1,7 @@
+import typing
 from abc import ABC, abstractmethod
 
+from ..prompts.models import Message
 from .config import LLMConfig
 
 
@@ -9,5 +11,9 @@ def __init__(self, config: LLMConfig):
 		pass
 
 	@abstractmethod
-	async def generate_response(self, messages: list[dict[str, str]]) -> dict[str, any]:
+	def get_embedder(self) -> typing.Any:
+		pass
+
+	@abstractmethod
+	async def generate_response(self, messages: list[Message]) -> dict[str, typing.Any]:
 		pass
diff --git a/core/llm_client/openai_client.py b/core/llm_client/openai_client.py
@@ -1,8 +1,11 @@
 import json
 import logging
+import typing
 
 from openai import AsyncOpenAI
+from openai.types.chat import ChatCompletionMessageParam
 
+from ..prompts.models import Message
 from .client import LLMClient
 from .config import LLMConfig
 
@@ -14,16 +17,26 @@ def __init__(self, config: LLMConfig):
 		self.client = AsyncOpenAI(api_key=config.api_key, base_url=config.base_url)
 		self.model = config.model
 
-	async def generate_response(self, messages: list[dict[str, str]]) -> dict[str, any]:
+	def get_embedder(self) -> typing.Any:
+		return self.client.embeddings
+
+	async def generate_response(self, messages: list[Message]) -> dict[str, typing.Any]:
+		openai_messages: list[ChatCompletionMessageParam] = []
+		for m in messages:
+			if m.role == 'user':
+				openai_messages.append({'role': 'user', 'content': m.content})
+			elif m.role == 'system':
+				openai_messages.append({'role': 'system', 'content': m.content})
 		try:
 			response = await self.client.chat.completions.create(
 				model=self.model,
-				messages=messages,
+				messages=openai_messages,
 				temperature=0.1,
 				max_tokens=3000,
 				response_format={'type': 'json_object'},
 			)
-			return json.loads(response.choices[0].message.content)
+			result = response.choices[0].message.content or ''
+			return json.loads(result)
 		except Exception as e:
 			logger.error(f'Error in generating LLM response: {e}')
 			raise
diff --git a/core/prompts/dedupe_edges.py b/core/prompts/dedupe_edges.py
@@ -1,12 +1,13 @@
 import json
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .models import Message, PromptFunction, PromptVersion
 
 
 class Prompt(Protocol):
 	v1: PromptVersion
 	v2: PromptVersion
+	edge_list: PromptVersion
 
 
 class Versions(TypedDict):
@@ -15,7 +16,7 @@ class Versions(TypedDict):
 	edge_list: PromptFunction
 
 
-def v1(context: dict[str, any]) -> list[Message]:
+def v1(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -55,7 +56,7 @@ def v1(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def v2(context: dict[str, any]) -> list[Message]:
+def v2(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -97,7 +98,7 @@ def v2(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def edge_list(context: dict[str, any]) -> list[Message]:
+def edge_list(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',

diff --git a/core/prompts/dedupe_nodes.py b/core/prompts/dedupe_nodes.py
@@ -1,5 +1,5 @@
 import json
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .models import Message, PromptFunction, PromptVersion
 
@@ -16,7 +16,7 @@ class Versions(TypedDict):
 	node_list: PromptVersion
 
 
-def v1(context: dict[str, any]) -> list[Message]:
+def v1(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -56,7 +56,7 @@ def v1(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def v2(context: dict[str, any]) -> list[Message]:
+def v2(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -96,7 +96,7 @@ def v2(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def node_list(context: dict[str, any]) -> list[Message]:
+def node_list(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',

diff --git a/core/prompts/extract_edges.py b/core/prompts/extract_edges.py
@@ -1,5 +1,5 @@
 import json
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .models import Message, PromptFunction, PromptVersion
 
@@ -14,7 +14,7 @@ class Versions(TypedDict):
 	v2: PromptFunction
 
 
-def v1(context: dict[str, any]) -> list[Message]:
+def v1(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -70,7 +70,7 @@ def v1(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def v2(context: dict[str, any]) -> list[Message]:
+def v2(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',

diff --git a/core/prompts/extract_nodes.py b/core/prompts/extract_nodes.py
@@ -1,5 +1,5 @@
 import json
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .models import Message, PromptFunction, PromptVersion
 
@@ -16,7 +16,7 @@ class Versions(TypedDict):
 	v3: PromptFunction
 
 
-def v1(context: dict[str, any]) -> list[Message]:
+def v1(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -64,7 +64,7 @@ def v1(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def v2(context: dict[str, any]) -> list[Message]:
+def v2(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',
@@ -105,7 +105,7 @@ def v2(context: dict[str, any]) -> list[Message]:
 	]
 
 
-def v3(context: dict[str, any]) -> list[Message]:
+def v3(context: dict[str, Any]) -> list[Message]:
 	sys_prompt = """You are an AI assistant that extracts entity nodes from conversational text. Your primary task is to identify and extract the speaker and other significant entities mentioned in the conversation."""
 
 	user_prompt = f"""

diff --git a/core/prompts/invalidate_edges.py b/core/prompts/invalidate_edges.py
@@ -1,4 +1,4 @@
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .models import Message, PromptFunction, PromptVersion
 
@@ -11,7 +11,7 @@ class Versions(TypedDict):
 	v1: PromptFunction
 
 
-def v1(context: dict[str, any]) -> list[Message]:
+def v1(context: dict[str, Any]) -> list[Message]:
 	return [
 		Message(
 			role='system',

diff --git a/core/prompts/lib.py b/core/prompts/lib.py
@@ -1,4 +1,4 @@
-from typing import Protocol, TypedDict
+from typing import Any, Protocol, TypedDict
 
 from .dedupe_edges import (
 	Prompt as DedupeEdgesPrompt,
@@ -68,7 +68,7 @@ class VersionWrapper:
 	def __init__(self, func: PromptFunction):
 		self.func = func
 
-	def __call__(self, context: dict[str, any]) -> list[Message]:
+	def __call__(self, context: dict[str, Any]) -> list[Message]:
 		return self.func(context)
 
 
@@ -81,7 +81,7 @@ def __init__(self, versions: dict[str, PromptFunction]):
 class PromptLibraryWrapper:
 	def __init__(self, library: PromptLibraryImpl):
 		for prompt_type, versions in library.items():
-			setattr(self, prompt_type, PromptTypeWrapper(versions))
+			setattr(self, prompt_type, PromptTypeWrapper(versions))  # type: ignore[arg-type]
 
 
 PROMPT_LIBRARY_IMPL: PromptLibraryImpl = {
@@ -91,5 +91,4 @@ def __init__(self, library: PromptLibraryImpl):
 	'dedupe_edges': dedupe_edges_versions,
 	'invalidate_edges': invalidate_edges_versions,
 }
-
-prompt_library: PromptLibrary = PromptLibraryWrapper(PROMPT_LIBRARY_IMPL)
+prompt_library: PromptLibrary = PromptLibraryWrapper(PROMPT_LIBRARY_IMPL)  # type: ignore[assignment]
diff --git a/core/prompts/models.py b/core/prompts/models.py
@@ -1,4 +1,4 @@
-from typing import Callable, Protocol
+from typing import Any, Callable, Protocol
 
 from pydantic import BaseModel
 
@@ -9,7 +9,7 @@ class Message(BaseModel):
 
 
 class PromptVersion(Protocol):
-	def __call__(self, context: dict[str, any]) -> list[Message]: ...
+	def __call__(self, context: dict[str, Any]) -> list[Message]: ...
 
 
-PromptFunction = Callable[[dict[str, any]], list[Message]]
+PromptFunction = Callable[[dict[str, Any]], list[Message]]