base of icl rag

ls1intum · Jan 16, 2025 · 5e74e8b · 5e74e8b
1 parent 9f9e97c
commit 5e74e8b
Show file tree

Hide file tree

Showing 12 changed files with 910 additions and 537 deletions.
diff --git a/athena/poetry.lock b/athena/poetry.lock
diff --git a/athena/pyproject.toml b/athena/pyproject.toml
@@ -13,6 +13,7 @@ httpx = "^0.24.1"
 gitpython = "^3.1.41"
 sqlalchemy = {extras = ["mypy"], version = "^2.0.21"}
 psycopg2 = "^2.9.9"
+faiss-cpu = "1.9.0.post1"
 
 [tool.poetry.group.dev.dependencies]
 types-requests = "^2.31.0.8"

diff --git a/modules/text/module_text_llm/module_text_llm/__main__.py b/modules/text/module_text_llm/module_text_llm/__main__.py
@@ -11,7 +11,9 @@
 from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics
 from module_text_llm.generate_evaluation import generate_evaluation
 from module_text_llm.approach_controller import generate_suggestions
-
+from module_text_llm.storage_embeddings import save_embedding
+from module_text_llm.generate_embeddings import embed_text
+from module_text_llm.index_storage import store_embedding_index
 @submissions_consumer
 def receive_submissions(exercise: Exercise, submissions: List[Submission]):
     logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)
@@ -26,7 +28,13 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
 @feedback_consumer
 def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
     logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)
-
+    # Saving of the embeddings here.
+    submission_id = submission.id
+    exercise_id = exercise.id
+    embedded_submission = embed_text(submission.text)
+    store_embedding_index(exercise_id, submission_id)
+    save_embedding(embedded_submission)
+
 @feedback_provider
 async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
     logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested",

diff --git a/modules/text/module_text_llm/module_text_llm/config.py b/modules/text/module_text_llm/module_text_llm/config.py
@@ -1,11 +1,12 @@
+from module_text_llm.icl_rag_approach import IclRagApproachConfig
 from pydantic import BaseModel, Field
 from typing import Union
 from athena import config_schema_provider
 
 from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
 from module_text_llm.basic_approach import BasicApproachConfig
 
-ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]
+ApproachConfigUnion = Union[IclRagApproachConfig, BasicApproachConfig, ChainOfThoughtConfig]
 
 @config_schema_provider
 class Configuration(BaseModel):

diff --git a/modules/text/module_text_llm/module_text_llm/generate_embeddings.py b/modules/text/module_text_llm/module_text_llm/generate_embeddings.py
@@ -0,0 +1,7 @@
+from langchain_openai import OpenAIEmbeddings
+import numpy as np
+def embed_text(text):
+    # Load the OpenAI API key from the environment
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
+    query_result = embeddings.embed_query(text)
+    return np.array(query_result, dtype=np.float32)
diff --git a/modules/text/module_text_llm/module_text_llm/icl_rag_approach/__init__.py b/modules/text/module_text_llm/module_text_llm/icl_rag_approach/__init__.py
@@ -0,0 +1,13 @@
+from module_text_llm.approach_config import ApproachConfig
+from pydantic import Field
+from typing import Literal
+from athena.text import Exercise, Submission
+from module_text_llm.icl_rag_approach.generate_suggestions import generate_suggestions
+from module_text_llm.icl_rag_approach.prompt_generate_suggestions import GenerateSuggestionsPrompt
+
+class IclRagApproachConfig(ApproachConfig):
+    type: Literal['icl_rag_approach'] = 'icl_rag_approach'
+    generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
+
+    async def generate_suggestions(self, exercise: Exercise, submission: Submission, config, debug: bool):
+        return await generate_suggestions(exercise, submission, config, debug)
diff --git a/modules/text/module_text_llm/module_text_llm/icl_rag_approach/generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/icl_rag_approach/generate_suggestions.py
@@ -0,0 +1,145 @@
+from typing import List
+from module_text_llm.approach_config import ApproachConfig
+from athena import emit_meta
+from athena.text import Exercise, Submission, Feedback
+from athena.logger import logger
+from llm_core.utils.llm_utils import (
+    get_chat_prompt_with_formatting_instructions, 
+    check_prompt_length_and_omit_features_if_necessary, 
+    num_tokens_from_prompt,
+)
+from llm_core.utils.predict_and_parse import predict_and_parse
+from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
+from module_text_llm.icl_rag_approach.prompt_generate_suggestions import AssessmentModel
+from module_text_llm.index_storage import retrieve_embedding_index
+from module_text_llm.storage_embeddings import query_embedding
+from module_text_llm.generate_embeddings import embed_text
+from athena.text import get_stored_feedback,get_stored_submissions
+
+def format_rag_context(rag_context):
+    formatted_string = ""
+
+    for context_item in rag_context:
+        submission_text = context_item["submission"].text
+        feedback_list = context_item["feedback"]
+
+        # Format submission text
+        formatted_string += f"**Submission:**\n{submission_text}\n\n"
+
+        # Format feedback list
+        formatted_string += "**Feedback:**\n"
+        for idx, feedback in enumerate(feedback_list, start=1):
+            formatted_string += f"{idx}. {feedback}\n"
+
+        # Add a separator between submissions
+        formatted_string += "\n" + "-"*40 + "\n"
+
+    return formatted_string
+async def generate_suggestions(exercise: Exercise, submission: Submission, config:ApproachConfig, debug: bool) -> List[Feedback]:
+    model = config.model.get_model()  # type: ignore[attr-defined]
+
+    # We can now, retrieve the embeddings. And feed them to the prompt.
+    query_submission= embed_text(submission.text)
+
+    rag_context = []
+
+    list_of_indices = query_embedding(query_submission) # a list of indicies in the index storage
+    if list_of_indices is not None:
+        logger.info(f"List of indices: {list_of_indices}")
+        for index in list_of_indices[0]:
+            if index != -1:
+                exercise_id, submission_id = retrieve_embedding_index(list_of_indices) # retrieve the embeddings from the index storage
+                stored_feedback = list(get_stored_feedback(exercise_id, submission_id))
+                stored_submission = list(get_stored_submissions(exercise_id,only_ids=[submission_id]))[0]
+                # Assuming `stored_feedback` is a list and `stored_submission` has a `.text` attribute
+                logger.info("Stored feedback:")
+                for feedback_item in stored_feedback:
+                    logger.info(f"- {feedback_item}")  # Each feedback is logged on a new line
+
+                logger.info("Stored submission:")
+                logger.info(f"{stored_submission.text}")  # The submission text is logged
+                rag_context.append({"submission": stored_submission.text, "feedback": stored_feedback})
+
+        formatted_rag_context = format_rag_context(rag_context)
+        logger.info("Formatted RAG context %s: {formatted_rag_context}")
+    else:
+        format_rag_context = "There are no submission at the moment"
+    prompt_input = {
+        "max_points": exercise.max_points,
+        "bonus_points": exercise.bonus_points,
+        "grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
+        "problem_statement": exercise.problem_statement or "No problem statement.",
+        "example_solution": exercise.example_solution,
+        "rag_context": formatted_rag_context,
+        "submission": add_sentence_numbers(submission.text)
+    }
+
+    chat_prompt = get_chat_prompt_with_formatting_instructions(
+        model=model, 
+        system_message=config.generate_suggestions_prompt.system_message, 
+        human_message=config.generate_suggestions_prompt.human_message, 
+        pydantic_object=AssessmentModel
+    )
+
+    # Check if the prompt is too long and omit features if necessary (in order of importance)
+    omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
+    prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
+        prompt=chat_prompt,
+        prompt_input= prompt_input,
+        max_input_tokens=config.max_input_tokens,
+        omittable_features=omittable_features,
+        debug=debug
+    )
+
+    # Skip if the prompt is too long
+    if not should_run:
+        logger.warning("Input too long. Skipping.")
+        if debug:
+            emit_meta("prompt", chat_prompt.format(**prompt_input))
+            emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
+        return []
+
+    result = await predict_and_parse(
+        model=model, 
+        chat_prompt=chat_prompt, 
+        prompt_input=prompt_input, 
+        pydantic_object=AssessmentModel,
+        tags=[
+            f"exercise-{exercise.id}",
+            f"submission-{submission.id}",
+        ],
+        use_function_calling=True
+    )
+
+    if debug:
+        emit_meta("generate_suggestions", {
+            "prompt": chat_prompt.format(**prompt_input),
+            "result": result.dict() if result is not None else None
+        })
+
+    if result is None:
+        return []
+
+    grading_instruction_ids = set(
+        grading_instruction.id 
+        for criterion in exercise.grading_criteria or [] 
+        for grading_instruction in criterion.structured_grading_instructions
+    )
+
+    feedbacks = []
+    for feedback in result.feedbacks:
+        index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
+        grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
+        feedbacks.append(Feedback(
+            exercise_id=exercise.id,
+            submission_id=submission.id,
+            title=feedback.title,
+            description=feedback.description,
+            index_start=index_start,
+            index_end=index_end,
+            credits=feedback.credits,
+            structured_grading_instruction_id=grading_instruction_id,
+            meta={}
+        ))
+
+    return feedbacks
diff --git a/modules/text/module_text_llm/module_text_llm/icl_rag_approach/prompt_generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/icl_rag_approach/prompt_generate_suggestions.py
@@ -0,0 +1,56 @@
+from pydantic import Field, BaseModel
+from typing import List, Optional
+
+system_message = """You are an AI tutor for text assessment at a prestigious university.
+
+# Task
+Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.
+
+# Style
+1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
+
+# Problem statement
+{problem_statement}
+
+# Example solution
+{example_solution}
+
+# Grading instructions
+{grading_instructions}
+Max points: {max_points}, bonus points: {bonus_points}
+
+# Tutor selected feedback to guide you
+{rag_context}
+    
+Respond in json.
+"""
+
+human_message = """Student's submission to grade (with sentence numbers <number>: <sentence>):
+
+{submission}
+"""
+
+# Input Prompt
+class GenerateSuggestionsPrompt(BaseModel):
+    """Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
+
+_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._"""
+    system_message: str = Field(default=system_message,
+                                description="Message for priming AI behavior and instructing it what to do.")
+    human_message: str = Field(default=human_message,
+                               description="Message from a human. The input on which the AI is supposed to act.")
+
+# Output Object
+class FeedbackModel(BaseModel):
+    title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
+    description: str = Field(description="Feedback description")
+    line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
+    line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
+    credits: float = Field(0.0, description="Number of points received/deducted")
+    grading_instruction_id: Optional[int] = Field(
+        description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
+    )
+
+class AssessmentModel(BaseModel):
+    """Collection of feedbacks making up an assessment"""
+    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
diff --git a/modules/text/module_text_llm/module_text_llm/index_storage.py b/modules/text/module_text_llm/module_text_llm/index_storage.py
@@ -0,0 +1,40 @@
+import json
+import os
+
+INDEX_FILE = "indices.json"
+
+def load_indices():
+    """ Load the indices from the file or return an empty dictionary if the file does not exist. """
+    if os.path.exists(INDEX_FILE):
+        with open(INDEX_FILE, 'r') as f:
+            return json.load(f)
+    else:
+        return {}
+
+def store_embedding_index(exercise_id, submission_id):
+    """ Store a new submission and exercise ID with an auto-incrementing index. """
+    indices = load_indices()  # Load existing indices from the file
+    next_index = len(indices)  # Calculate the next available index
+
+    # Add the new entry to the dictionary with next_index as the key
+    indices[next_index] = {
+        "exercise_id": exercise_id,
+        "submission_id": submission_id
+    }
+
+    # Write the updated dictionary back to the file
+    with open(INDEX_FILE, 'w') as f:
+        json.dump(indices, f, indent=4)
+
+    print(f"Stored new entry: Exercise ID {exercise_id}, Submission ID {submission_id} at index {next_index}")
+
+def retrieve_embedding_index(index):
+    index = str(index)  # Convert index to string for dictionary lookup
+    """ Retrieve the exercise_id and submission_id by index. """
+    indices = load_indices()
+
+    # Find the entry by index (direct dictionary lookup)
+    if index in indices:
+        return indices[index]["exercise_id"], indices[index]["submission_id"]
+
+    return None, None  # Return None if index is not found
diff --git a/modules/text/module_text_llm/module_text_llm/storage_embeddings.py b/modules/text/module_text_llm/module_text_llm/storage_embeddings.py
@@ -0,0 +1,55 @@
+import faiss
+import numpy as np
+import os
+def save_embedding(embedding):
+    # Check if the index file already exists
+    if os.path.exists("embeddings.index"):
+        # If it exists, load the existing index
+        index = faiss.read_index("embeddings.index")
+    else:
+        # If not, create a new index
+        index = faiss.IndexFlatL2(embedding.shape[0])  # Use the correct dimension (embedding size)
+
+    # Add the embedding to the index
+    index.add(np.array([embedding], dtype=np.float32))  # Wrap embedding in array if needed
+
+    # Save the updated index to the disk
+    faiss.write_index(index, "embeddings.index")
+
+def query_embedding(query_embedding):
+    # Check if the index file exists
+    index_file = 'embeddings.index'
+
+    if not os.path.exists(index_file):
+        print(f"Error: The index file '{index_file}' does not exist.")
+        return None
+
+    try:
+        # Read the FAISS index
+        index = faiss.read_index(index_file)
+    except Exception as e:
+        print(f"Error while reading the FAISS index: {e}")
+        return None
+
+    # Ensure the query_embedding is in the correct shape
+    try:
+        query_embedding = np.array([query_embedding], dtype=np.float32)  # Convert to np array
+        if query_embedding.ndim != 2 or query_embedding.shape[1] != index.d:  # Check the dimension compatibility
+            print(f"Error: Query embedding shape does not match the index dimensions.")
+            return None
+    except Exception as e:
+        print(f"Error with embedding format: {e}")
+        return None
+
+    # Set the number of nearest neighbors to retrieve
+    k = 3  # Number of nearest neighbors
+
+    try:
+        # Search the index for nearest neighbors
+        distances, indices = index.search(query_embedding, k)
+        print(f"Indices of similar texts: {indices}")
+        print(f"Distances: {distances}")
+        return indices
+    except Exception as e:
+        print(f"Error during FAISS search: {e}")
+        return None