Skip to content

Commit

Permalink
base of icl rag
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Jan 16, 2025
1 parent 9f9e97c commit 5e74e8b
Show file tree
Hide file tree
Showing 12 changed files with 910 additions and 537 deletions.
774 changes: 390 additions & 384 deletions athena/poetry.lock

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions athena/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ httpx = "^0.24.1"
gitpython = "^3.1.41"
sqlalchemy = {extras = ["mypy"], version = "^2.0.21"}
psycopg2 = "^2.9.9"
faiss-cpu = "1.9.0.post1"

[tool.poetry.group.dev.dependencies]
types-requests = "^2.31.0.8"
Expand Down
12 changes: 10 additions & 2 deletions modules/text/module_text_llm/module_text_llm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@
from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics
from module_text_llm.generate_evaluation import generate_evaluation
from module_text_llm.approach_controller import generate_suggestions

from module_text_llm.storage_embeddings import save_embedding
from module_text_llm.generate_embeddings import embed_text
from module_text_llm.index_storage import store_embedding_index
@submissions_consumer
def receive_submissions(exercise: Exercise, submissions: List[Submission]):
logger.info("receive_submissions: Received %d submissions for exercise %d", len(submissions), exercise.id)
Expand All @@ -26,7 +28,13 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
@feedback_consumer
def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)

# Saving of the embeddings here.
submission_id = submission.id
exercise_id = exercise.id
embedded_submission = embed_text(submission.text)
store_embedding_index(exercise_id, submission_id)
save_embedding(embedded_submission)

@feedback_provider
async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested",
Expand Down
3 changes: 2 additions & 1 deletion modules/text/module_text_llm/module_text_llm/config.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from module_text_llm.icl_rag_approach import IclRagApproachConfig
from pydantic import BaseModel, Field
from typing import Union
from athena import config_schema_provider

from module_text_llm.chain_of_thought_approach import ChainOfThoughtConfig
from module_text_llm.basic_approach import BasicApproachConfig

ApproachConfigUnion = Union[BasicApproachConfig, ChainOfThoughtConfig]
ApproachConfigUnion = Union[IclRagApproachConfig, BasicApproachConfig, ChainOfThoughtConfig]

@config_schema_provider
class Configuration(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from langchain_openai import OpenAIEmbeddings
import numpy as np
def embed_text(text):
# Load the OpenAI API key from the environment
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
query_result = embeddings.embed_query(text)
return np.array(query_result, dtype=np.float32)
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from module_text_llm.approach_config import ApproachConfig
from pydantic import Field
from typing import Literal
from athena.text import Exercise, Submission
from module_text_llm.icl_rag_approach.generate_suggestions import generate_suggestions
from module_text_llm.icl_rag_approach.prompt_generate_suggestions import GenerateSuggestionsPrompt

class IclRagApproachConfig(ApproachConfig):
type: Literal['icl_rag_approach'] = 'icl_rag_approach'
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())

async def generate_suggestions(self, exercise: Exercise, submission: Submission, config, debug: bool):
return await generate_suggestions(exercise, submission, config, debug)
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
from typing import List
from module_text_llm.approach_config import ApproachConfig
from athena import emit_meta
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
from llm_core.utils.llm_utils import (
get_chat_prompt_with_formatting_instructions,
check_prompt_length_and_omit_features_if_necessary,
num_tokens_from_prompt,
)
from llm_core.utils.predict_and_parse import predict_and_parse
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
from module_text_llm.icl_rag_approach.prompt_generate_suggestions import AssessmentModel
from module_text_llm.index_storage import retrieve_embedding_index
from module_text_llm.storage_embeddings import query_embedding
from module_text_llm.generate_embeddings import embed_text
from athena.text import get_stored_feedback,get_stored_submissions

def format_rag_context(rag_context):
formatted_string = ""

for context_item in rag_context:
submission_text = context_item["submission"].text
feedback_list = context_item["feedback"]

# Format submission text
formatted_string += f"**Submission:**\n{submission_text}\n\n"

# Format feedback list
formatted_string += "**Feedback:**\n"
for idx, feedback in enumerate(feedback_list, start=1):
formatted_string += f"{idx}. {feedback}\n"

# Add a separator between submissions
formatted_string += "\n" + "-"*40 + "\n"

return formatted_string
async def generate_suggestions(exercise: Exercise, submission: Submission, config:ApproachConfig, debug: bool) -> List[Feedback]:
model = config.model.get_model() # type: ignore[attr-defined]

# We can now, retrieve the embeddings. And feed them to the prompt.
query_submission= embed_text(submission.text)

rag_context = []

list_of_indices = query_embedding(query_submission) # a list of indicies in the index storage
if list_of_indices is not None:
logger.info(f"List of indices: {list_of_indices}")
for index in list_of_indices[0]:
if index != -1:
exercise_id, submission_id = retrieve_embedding_index(list_of_indices) # retrieve the embeddings from the index storage
stored_feedback = list(get_stored_feedback(exercise_id, submission_id))
stored_submission = list(get_stored_submissions(exercise_id,only_ids=[submission_id]))[0]
# Assuming `stored_feedback` is a list and `stored_submission` has a `.text` attribute
logger.info("Stored feedback:")
for feedback_item in stored_feedback:
logger.info(f"- {feedback_item}") # Each feedback is logged on a new line

logger.info("Stored submission:")
logger.info(f"{stored_submission.text}") # The submission text is logged
rag_context.append({"submission": stored_submission.text, "feedback": stored_feedback})

formatted_rag_context = format_rag_context(rag_context)
logger.info("Formatted RAG context %s: {formatted_rag_context}")
else:
format_rag_context = "There are no submission at the moment"
prompt_input = {
"max_points": exercise.max_points,
"bonus_points": exercise.bonus_points,
"grading_instructions": format_grading_instructions(exercise.grading_instructions, exercise.grading_criteria),
"problem_statement": exercise.problem_statement or "No problem statement.",
"example_solution": exercise.example_solution,
"rag_context": formatted_rag_context,
"submission": add_sentence_numbers(submission.text)
}

chat_prompt = get_chat_prompt_with_formatting_instructions(
model=model,
system_message=config.generate_suggestions_prompt.system_message,
human_message=config.generate_suggestions_prompt.human_message,
pydantic_object=AssessmentModel
)

# Check if the prompt is too long and omit features if necessary (in order of importance)
omittable_features = ["example_solution", "problem_statement", "grading_instructions"]
prompt_input, should_run = check_prompt_length_and_omit_features_if_necessary(
prompt=chat_prompt,
prompt_input= prompt_input,
max_input_tokens=config.max_input_tokens,
omittable_features=omittable_features,
debug=debug
)

# Skip if the prompt is too long
if not should_run:
logger.warning("Input too long. Skipping.")
if debug:
emit_meta("prompt", chat_prompt.format(**prompt_input))
emit_meta("error", f"Input too long {num_tokens_from_prompt(chat_prompt, prompt_input)} > {config.max_input_tokens}")
return []

result = await predict_and_parse(
model=model,
chat_prompt=chat_prompt,
prompt_input=prompt_input,
pydantic_object=AssessmentModel,
tags=[
f"exercise-{exercise.id}",
f"submission-{submission.id}",
],
use_function_calling=True
)

if debug:
emit_meta("generate_suggestions", {
"prompt": chat_prompt.format(**prompt_input),
"result": result.dict() if result is not None else None
})

if result is None:
return []

grading_instruction_ids = set(
grading_instruction.id
for criterion in exercise.grading_criteria or []
for grading_instruction in criterion.structured_grading_instructions
)

feedbacks = []
for feedback in result.feedbacks:
index_start, index_end = get_index_range_from_line_range(feedback.line_start, feedback.line_end, submission.text)
grading_instruction_id = feedback.grading_instruction_id if feedback.grading_instruction_id in grading_instruction_ids else None
feedbacks.append(Feedback(
exercise_id=exercise.id,
submission_id=submission.id,
title=feedback.title,
description=feedback.description,
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
structured_grading_instruction_id=grading_instruction_id,
meta={}
))

return feedbacks
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from pydantic import Field, BaseModel
from typing import List, Optional

system_message = """You are an AI tutor for text assessment at a prestigious university.
# Task
Create graded feedback suggestions for a student's text submission that a human tutor would accept. Meaning, the feedback you provide should be applicable to the submission with little to no modification.
# Style
1. Constructive, 2. Specific, 3. Balanced, 4. Clear and Concise, 5. Actionable, 6. Educational, 7. Contextual
# Problem statement
{problem_statement}
# Example solution
{example_solution}
# Grading instructions
{grading_instructions}
Max points: {max_points}, bonus points: {bonus_points}
# Tutor selected feedback to guide you
{rag_context}
Respond in json.
"""

human_message = """Student's submission to grade (with sentence numbers <number>: <sentence>):
{submission}
"""

# Input Prompt
class GenerateSuggestionsPrompt(BaseModel):
"""Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._"""
system_message: str = Field(default=system_message,
description="Message for priming AI behavior and instructing it what to do.")
human_message: str = Field(default=human_message,
description="Message from a human. The input on which the AI is supposed to act.")

# Output Object
class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
)

class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
40 changes: 40 additions & 0 deletions modules/text/module_text_llm/module_text_llm/index_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json
import os

INDEX_FILE = "indices.json"

def load_indices():
""" Load the indices from the file or return an empty dictionary if the file does not exist. """
if os.path.exists(INDEX_FILE):
with open(INDEX_FILE, 'r') as f:
return json.load(f)
else:
return {}

def store_embedding_index(exercise_id, submission_id):
""" Store a new submission and exercise ID with an auto-incrementing index. """
indices = load_indices() # Load existing indices from the file
next_index = len(indices) # Calculate the next available index

# Add the new entry to the dictionary with next_index as the key
indices[next_index] = {
"exercise_id": exercise_id,
"submission_id": submission_id
}

# Write the updated dictionary back to the file
with open(INDEX_FILE, 'w') as f:
json.dump(indices, f, indent=4)

print(f"Stored new entry: Exercise ID {exercise_id}, Submission ID {submission_id} at index {next_index}")

def retrieve_embedding_index(index):
index = str(index) # Convert index to string for dictionary lookup
""" Retrieve the exercise_id and submission_id by index. """
indices = load_indices()

# Find the entry by index (direct dictionary lookup)
if index in indices:
return indices[index]["exercise_id"], indices[index]["submission_id"]

return None, None # Return None if index is not found
55 changes: 55 additions & 0 deletions modules/text/module_text_llm/module_text_llm/storage_embeddings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import faiss
import numpy as np
import os
def save_embedding(embedding):
# Check if the index file already exists
if os.path.exists("embeddings.index"):
# If it exists, load the existing index
index = faiss.read_index("embeddings.index")
else:
# If not, create a new index
index = faiss.IndexFlatL2(embedding.shape[0]) # Use the correct dimension (embedding size)

# Add the embedding to the index
index.add(np.array([embedding], dtype=np.float32)) # Wrap embedding in array if needed

# Save the updated index to the disk
faiss.write_index(index, "embeddings.index")

def query_embedding(query_embedding):
# Check if the index file exists
index_file = 'embeddings.index'

if not os.path.exists(index_file):
print(f"Error: The index file '{index_file}' does not exist.")
return None

try:
# Read the FAISS index
index = faiss.read_index(index_file)
except Exception as e:
print(f"Error while reading the FAISS index: {e}")
return None

# Ensure the query_embedding is in the correct shape
try:
query_embedding = np.array([query_embedding], dtype=np.float32) # Convert to np array
if query_embedding.ndim != 2 or query_embedding.shape[1] != index.d: # Check the dimension compatibility
print(f"Error: Query embedding shape does not match the index dimensions.")
return None
except Exception as e:
print(f"Error with embedding format: {e}")
return None

# Set the number of nearest neighbors to retrieve
k = 3 # Number of nearest neighbors

try:
# Search the index for nearest neighbors
distances, indices = index.search(query_embedding, k)
print(f"Indices of similar texts: {indices}")
print(f"Distances: {distances}")
return indices
except Exception as e:
print(f"Error during FAISS search: {e}")
return None
Loading

0 comments on commit 5e74e8b

Please sign in to comment.