feat: Retrieval augmented generation for chat (#2886)

mergify[bot] · web-flow · commit f5a655f66396 · 2025-01-22T03:27:34.000Z
Resolves #2876 Depends on #2832 Dev doc: instructlab/dev-docs#161 **Checklist:** - [X] **Commit Message Formatting**: Commit titles and messages follow guidelines in the [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/#summary). - [x] [Changelog](https://github.com/instructlab/instructlab/blob/main/CHANGELOG.md) updated with breaking and/or notable changes for the next minor release. - [x] Documentation has been updated, if necessary. - [x] Unit tests have been added, if necessary. - [x] Functional tests have been added, if necessary. - [x] E2E Workflow tests have been added, if necessary. Approved-by: cdoern Approved-by: nathan-weinberg
diff --git a/src/instructlab/cli/model/chat.py b/src/instructlab/cli/model/chat.py
@@ -10,6 +10,7 @@
 # First Party
 from instructlab import clickext
 from instructlab import configuration as cfg
+from instructlab.defaults import DEFAULTS
 from instructlab.model.chat import chat_model
 
 logger = logging.getLogger(__name__)
@@ -101,6 +102,45 @@
     "--temperature",
     cls=clickext.ConfigOption,
 )
+@click.option(
+    "--rag",
+    "rag_enabled",
+    default=False,
+    is_flag=True,
+)
+@click.option(
+    "--document-store-uri",
+    "uri",
+    type=click.STRING,
+    cls=clickext.ConfigOption,
+    config_class="rag",
+    config_sections="document_store",
+)
+@click.option(
+    "--document-store-collection-name",
+    "collection_name",
+    type=click.STRING,
+    cls=clickext.ConfigOption,
+    config_class="rag",
+    config_sections="document_store",
+)
+@click.option(
+    "--retriever-embedding-model-name",
+    "embedding_model_name",
+    type=click.STRING,
+    cls=clickext.ConfigOption,
+    config_class="rag",
+    config_sections="embedding_model",
+)
+@click.option(
+    "--retriever-top-k",
+    "top_k",
+    type=click.INT,
+    default=DEFAULTS.RETRIEVER_TOP_K,
+    cls=clickext.ConfigOption,
+    config_class="rag",
+    config_sections="retriever",
+)
 @click.pass_context
 @clickext.display_params
 def chat(
@@ -120,6 +160,11 @@ def chat(
     model_family,
     serving_log_file,
     temperature,
+    rag_enabled,
+    uri,
+    collection_name,
+    embedding_model_name,
+    top_k,
 ):
     """Runs a chat using the modified model"""
     chat_model(
@@ -138,6 +183,11 @@ def chat(
         model_family,
         serving_log_file,
         temperature,
+        rag_enabled,
+        uri,
+        collection_name,
+        embedding_model_name,
+        top_k,
         backend_type=ctx.obj.config.serve.server.backend_type,
         host=ctx.obj.config.serve.server.host,
         port=ctx.obj.config.serve.server.port,
diff --git a/src/instructlab/configuration.py b/src/instructlab/configuration.py
@@ -127,6 +127,31 @@ def after_debug_level(self):
         return self
 
 
+class _document_store(BaseModel):
+    """Class describing configuration of document store backend for RAG."""
+
+    uri: str = Field(
+        default_factory=lambda: DEFAULTS.DEFAULT_DOCUMENT_STORE_PATH,
+        description="Document store service URI.",
+    )
+    collection_name: str = Field(
+        default=DEFAULTS.DOCUMENT_STORE_COLLECTION_NAME,
+        description="Document store collection name.",
+    )
+
+
+class _embedding_model(BaseModel):
+    """Class describing configuration of embedding parameters for RAG."""
+
+    # model configuration
+    model_config = ConfigDict(extra="ignore", protected_namespaces=())
+
+    embedding_model_name: StrictStr = Field(
+        default_factory=lambda: DEFAULTS.DEFAULT_EMBEDDING_MODEL,
+        description="Embedding model to use for RAG.",
+    )
+
+
 class _chat(BaseModel):
     """Class describing configuration of the 'chat' sub-command."""
 
@@ -285,9 +310,33 @@ class _convert(BaseModel):
     )
 
 
+class _retriever(BaseModel):
+    """Class describing configuration of retrieval parameters for RAG."""
+
+    top_k: int = Field(
+        default=DEFAULTS.RETRIEVER_TOP_K,
+        description="The maximum number of documents to retrieve.",
+    )
+
+
 class _rag(BaseModel):
     """Class describing configuration of the 'ilab rag' command."""
 
+    enabled: bool = Field(
+        default=False, description="Flag for enabling RAG functionality."
+    )
+    document_store: _document_store = Field(
+        default_factory=_document_store,
+        description="Document store configuration for RAG.",
+    )
+    embedding_model: _embedding_model = Field(
+        default_factory=_embedding_model,
+        description="Embedding model configuration for RAG",
+    )
+    retriever: _retriever = Field(
+        default_factory=_retriever,
+        description="Retrieval configuration parameters for RAG",
+    )
     convert: _convert = Field(
         default_factory=_convert, description="RAG convert configuration section."
     )
@@ -597,54 +646,6 @@ class _train(BaseModel):
     )
 
 
-class _document_store(BaseModel):
-    """Class describing configuration of document store backend for RAG."""
-
-    uri: str = Field(default="embeddings.db", description="Document store service URI.")
-    collection_name: str = Field(
-        default="ilab", description="Document store collection name."
-    )
-
-
-class _embedding_model(BaseModel):
-    """Class describing configuration of embedding parameters for RAG."""
-
-    # model configuration
-    model_config = ConfigDict(extra="ignore", protected_namespaces=())
-
-    model_dir: str = Field(
-        default=DEFAULTS.MODELS_DIR,
-        description="The default system model location store, located in the data directory.",
-    )
-    model_name: str = Field(
-        default_factory=lambda: DEFAULTS.DEFAULT_EMBEDDING_MODEL,
-        description="Embedding model to use for RAG.",
-    )
-
-    def local_model_path(self) -> str:
-        if self.model_dir is None:
-            click.secho(f"Missing value for field model_dir in {vars(self)}")
-            raise click.exceptions.Exit(1)
-
-        if self.model_name is None:
-            click.secho(f"Missing value for field model_name in {vars(self)}")
-            raise click.exceptions.Exit(1)
-
-        return os.path.join(self.model_dir, self.model_name)
-
-
-class _retriever(BaseModel):
-    """Class describing configuration of retrieval parameters for RAG."""
-
-    top_k: int = Field(
-        default=20, description="The maximum number of documents to retrieve."
-    )
-    embedding_model: _embedding_model = Field(
-        default=_embedding_model(),
-        description="Embedding parameters for retrieval.",
-    )
-
-
 class _metadata(BaseModel):
     # model configuration
     model_config = ConfigDict(extra="ignore")
diff --git a/src/instructlab/defaults.py b/src/instructlab/defaults.py
@@ -86,6 +86,9 @@ class _InstructlabDefaults:
     MISTRAL_GGUF_REPO = "TheBloke/Mistral-7B-Instruct-v0.2-GGUF"
     GRANITE_GGUF_MODEL_NAME = "granite-7b-lab-Q4_K_M.gguf"
     GRANITE_EMBEDDING_MODEL_NAME = "ibm-granite/granite-embedding-125m-english"
+    DOCUMENT_STORE_NAME = "embeddings.db"
+    DOCUMENT_STORE_COLLECTION_NAME = "ilab"
+    RETRIEVER_TOP_K = 20
     MERLINITE_GGUF_MODEL_NAME = "merlinite-7b-lab-Q4_K_M.gguf"
     MISTRAL_GGUF_MODEL_NAME = "mistral-7b-instruct-v0.2.Q4_K_M.gguf"
     MODEL_REPO = "instructlab/granite-7b-lab"
@@ -174,6 +177,10 @@ def DEFAULT_CHAT_MODEL(self) -> str:
     def DEFAULT_EMBEDDING_MODEL(self) -> str:
         return path.join(self.MODELS_DIR, self.GRANITE_EMBEDDING_MODEL_NAME)
 
+    @property
+    def DEFAULT_DOCUMENT_STORE_PATH(self) -> str:
+        return path.join(self._data_dir, self.DOCUMENT_STORE_NAME)
+
     @property
     def DEFAULT_TEACHER_MODEL(self) -> str:
         return path.join(self.MODELS_DIR, self.MISTRAL_GGUF_MODEL_NAME)
diff --git a/src/instructlab/model/chat.py b/src/instructlab/model/chat.py
@@ -33,6 +33,8 @@
 
 # Local
 from ..client_utils import http_client
+from ..rag.document_store import DocumentStoreRetriever
+from ..rag.document_store_factory import create_document_retriever
 from ..utils import get_cli_helper_sysprompt, get_model_arch, get_sysprompt
 from .backends import backends
 
@@ -87,6 +89,7 @@ def __init__(
         self,
         model,
         client,
+        retriever=None,
         vi_mode=False,
         prompt=True,
         vertical_overflow="ellipsis",
@@ -98,6 +101,7 @@ def __init__(
         backend_type="",
     ):
         self.client = client
+        self.retriever: DocumentStoreRetriever | None = retriever
         self.model = model
         self.vi_mode = vi_mode
         self.vertical_overflow = vertical_overflow
@@ -395,6 +399,13 @@ def start_prompt(
 
         self.log_message(PROMPT_PREFIX + content + "\n\n")
 
+        # if RAG is enabled, fetch context and insert into session
+        # TODO: what if context is already too long? note that current retriever implementation concatenates all docs
+        # TODO: better way to check whether we should perform retrieval?
+        if self.retriever is not None:
+            context = self.retriever.augmented_context(user_query=content)
+            self._update_conversation(context, "assistant")
+
         # Update message history and token counters
         self._update_conversation(content, "user")
 
@@ -552,6 +563,11 @@ def chat_model(
     model_family,
     serving_log_file,
     temperature,
+    rag_enabled,
+    document_store_uri,
+    collection_name,
+    embedding_model,
+    top_k,
     backend_type,
     host,
     port,
@@ -693,6 +709,11 @@ def chat_model(
             max_tokens=max_tokens,
             max_ctx_size=max_ctx_size,
             temperature=temperature,
+            rag_enabled=rag_enabled,
+            document_store_uri=document_store_uri,
+            collection_name=collection_name,
+            embedding_model=embedding_model,
+            top_k=top_k,
             backend_type=backend_type,
             params=params,
         )
@@ -715,6 +736,11 @@ def chat_cli(
     max_ctx_size,
     temperature,
     backend_type,
+    rag_enabled,
+    document_store_uri,
+    collection_name,
+    embedding_model,
+    top_k,
     logs_dir,
     vi_mode,
     visible_overflow,
@@ -756,6 +782,19 @@ def chat_cli(
     sys_prompt = CONTEXTS.get(context, "default")(get_model_arch(pathlib.Path(model)))
     loaded["messages"] = [{"role": "system", "content": sys_prompt}]
 
+    # Instantiate retriever if RAG is enabled
+    if rag_enabled:
+        logger.debug("RAG enabled for chat; initializing retriever")
+        retriever: DocumentStoreRetriever | None = create_document_retriever(
+            document_store_uri=document_store_uri,
+            document_store_collection_name=collection_name,
+            top_k=top_k,
+            embedding_model_path=embedding_model,
+        )
+    else:
+        logger.debug("RAG not enabled for chat; skipping retrieval setup")
+        retriever: DocumentStoreRetriever | None = None
+
     # Session from CLI
     if session is not None:
         loaded["name"] = os.path.basename(session.name).strip(".json")
@@ -778,6 +817,7 @@ def chat_cli(
     ccb = ConsoleChatBot(
         model if model is None else model,
         client=client,
+        retriever=retriever,
         vi_mode=vi_mode,
         log_file=log_file,
         prompt=not qq,
diff --git a/tests/test_lab.py b/tests/test_lab.py
@@ -115,6 +115,7 @@ def has_debug_params(self) -> bool:
     Command(("config", "show")),
     Command(("model",), needs_config=False, should_fail=False),
     Command(("model", "chat")),
+    Command(("model", "chat"), ("--rag",)),
     Command(("model", "convert"), ("--model-dir", "test")),
     Command(("model", "download")),
     Command(("model", "evaluate"), ("--benchmark", "mmlu")),
diff --git a/tests/test_model_chat.py b/tests/test_model_chat.py
@@ -1,14 +1,17 @@
 # Standard
 from unittest.mock import MagicMock
 import contextlib
+import logging
 import re
 
 # Third Party
 from rich.console import Console
 import pytest
 
 # First Party
-from instructlab.model.chat import ConsoleChatBot
+from instructlab.model.chat import ChatException, ConsoleChatBot
+
+logger = logging.getLogger(__name__)
 
 
 @pytest.mark.parametrize(
@@ -24,6 +27,19 @@ def test_model_name(model_path, expected_name):
     assert chatbot.model_name == expected_name
 
 
+def test_retriever_is_called_when_present():
+    retriever = MagicMock()
+    chatbot = ConsoleChatBot(
+        model="/var/model/file", client=None, retriever=retriever, loaded={}
+    )
+    assert chatbot.retriever == retriever
+    user_query = "test"
+    with pytest.raises(ChatException) as exc_info:
+        chatbot.start_prompt(content=user_query, logger=logger)
+        logger.info(exc_info)
+        retriever.augmented_context.assert_called_with(user_query=user_query)
+
+
 def handle_output(output):
     return re.sub(r"\s+", " ", output).strip()
 
diff --git a/tests/testdata/default_config.yaml b/tests/testdata/default_config.yaml
@@ -239,6 +239,27 @@ rag:
     # Directory where taxonomy is stored and accessed from.
     # Default: /data/instructlab/taxonomy
     taxonomy_path: /data/instructlab/taxonomy
+  # Document store configuration for RAG.
+  document_store:
+    # Document store collection name.
+    # Default: ilab
+    collection_name: ilab
+    # Document store service URI.
+    # Default: /data/instructlab/embeddings.db
+    uri: /data/instructlab/embeddings.db
+  # Embedding model configuration for RAG
+  embedding_model:
+    # Embedding model to use for RAG.
+    # Default: /cache/instructlab/models/ibm-granite/granite-embedding-125m-english
+    embedding_model_name: /cache/instructlab/models/ibm-granite/granite-embedding-125m-english
+  # Flag for enabling RAG functionality.
+  # Default: False
+  enabled: false
+  # Retrieval configuration parameters for RAG
+  retriever:
+    # The maximum number of documents to retrieve.
+    # Default: 20
+    top_k: 20
 # Serve configuration section.
 serve:
   # Serving backend to use to host the model.