Skip to content

Commit

Permalink
DC for student, RAG ICL for tutor
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Jan 28, 2025
1 parent 63ea116 commit e93b942
Show file tree
Hide file tree
Showing 9 changed files with 235 additions and 264 deletions.
27 changes: 15 additions & 12 deletions modules/text/module_text_llm/module_text_llm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
from module_text_llm.approach_controller import generate_suggestions
from module_text_llm.helpers.detect_suspicios_submission import hybrid_suspicion_score, llm_check
from module_text_llm.helpers.feedback_icl.store_feedback_icl import store_feedback_icl
from module_text_llm.few_shot_chain_of_thought_approach import FewShotChainOfThoughtConfig
from module_text_llm.divide_and_conquer import DivideAndConquerConfig
from module_text_llm.icl_rag import ICLRAGConfig
#Test Demo
from module_text_llm.analytics.compile import compile

Expand Down Expand Up @@ -51,17 +52,19 @@ async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded
logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested, with approach: %s and model: %s",
"Graded" if is_graded else "Non-graded", submission.id, exercise.id, module_config.approach.__class__.__name__, module_config.approach.model.model_name)

# if not is_graded:
# is_sus, score = hybrid_suspicion_score(submission.text, threshold=0.8)
# if is_sus:
# logger.info("Suspicious submission detected with score %f", score)
# is_suspicious,suspicios_text = await llm_check(submission.text)
# if is_suspicious:
# logger.info("Suspicious submission detected by LLM with text %s", suspicios_text)
# return [Feedback(title="Instructors need to review this submission", description="This Submission potentially violates the content policy!", credits=-1.0, exercise_id=exercise.id, submission_id=submission.id, is_graded=is_graded)]
# module_config.approach = FewShotChainOfThoughtConfig()
# return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug, is_graded)
# module_config.approach = FewShotChainOfThoughtConfig()
# STUDENT
if not is_graded:
is_sus, score = hybrid_suspicion_score(submission.text, threshold=0.8)
if is_sus:
logger.info("Suspicious submission detected with score %f", score)
is_suspicious,suspicios_text = await llm_check(submission.text)
if is_suspicious:
logger.info("Suspicious submission detected by LLM with text %s", suspicios_text)
return [Feedback(title="Instructors need to review this submission", description="This Submission potentially violates the content policy!", credits=-1.0, exercise_id=exercise.id, submission_id=submission.id, is_graded=is_graded)]
module_config.approach = FewShotChainOfThoughtConfig()
return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug, is_graded)
# TUTOR
module_config.approach = ICLRAGConfig()
return await generate_suggestions(exercise, submission, module_config.approach, module_config.debug, is_graded)


Expand Down
3 changes: 2 additions & 1 deletion modules/text/module_text_llm/module_text_llm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
from module_text_llm.basic_COT import BasicCOTApproachConfig
from module_text_llm.icl_rag import ICLRAGConfig
from module_text_llm.few_shot_COT import FewShotCOT
from module_text_llm.divide_and_conquer import DivideAndConquerConfig

ApproachConfigUnion = Union[FewShotCOT,ICLRAGConfig, BasicApproachConfig, FewShotChainOfThoughtConfig,BasicCOTApproachConfig, BestApproachConfig]
ApproachConfigUnion = Union[DivideAndConquerConfig,FewShotCOT,ICLRAGConfig, BasicApproachConfig, FewShotChainOfThoughtConfig,BasicCOTApproachConfig, BestApproachConfig]

@config_schema_provider
class Configuration(BaseModel):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from module_text_llm.approach_config import ApproachConfig
from typing import Literal
from athena.text import Exercise, Submission
from module_text_llm.divide_and_conquer.generate_suggestions import generate_suggestions

class DivideAndConquerConfig(ApproachConfig):
type: Literal['divide_and_conquer'] = 'divide_and_conquer'
# Prompts are generated at run time.
async def generate_suggestions(self, exercise: Exercise, submission: Submission, config,*, debug: bool, is_graded: bool):
return await generate_suggestions(exercise, submission, config, debug, is_graded)

Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger
from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions
from llm_core.utils.predict_and_parse import predict_and_parse
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel, double_curly_braces, get_system_prompt, get_human_message
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range
import asyncio

# Placeholder for generate suggestions logic.
async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool, is_graded: bool):
submission_text = double_curly_braces(submission.text)
model = config.model.get_model() # type: ignore[attr-defined]
prompt_input = {
"submission": add_sentence_numbers(submission_text)
}

grading_criteria = exercise.grading_criteria
feedbacks = []
grading_instruction_ids = set(
grading_instruction.id
for criterion in exercise.grading_criteria or []
for grading_instruction in criterion.structured_grading_instructions
)
tasks = []

for idx, criteria in enumerate(grading_criteria):
processing_inputs = {
"model": model,
"prompt_input": prompt_input,
"exercise": exercise,
"submission": submission,
"grading_instruction_ids": grading_instruction_ids,
"is_graded": is_graded,
"criteria_title": criteria.title
}
if ("plagiarism" in criteria.title.lower()): # Exclude plagarism because the model cannot know and it hallucinates
continue
usage_count, system_prompt = get_system_prompt(idx,exercise, criteria)
if (usage_count == 1):
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message = get_human_message(),pydantic_object = FeedbackModel)
processing_inputs["pydantic_object"] = FeedbackModel
processing_inputs["chat_prompt"] = chat_prompt
else:
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message= get_human_message(),pydantic_object = AssessmentModel)
processing_inputs["pydantic_object"] = AssessmentModel
processing_inputs["chat_prompt"] = chat_prompt
tasks.append(process_criteria(processing_inputs))

results = await asyncio.gather(*tasks)

# Flatten the list of feedbacks
for feedback_list in results:
feedbacks += feedback_list
return feedbacks

async def process_criteria(processing_inputs):

# Call the predict_and_parse method
result = await predict_and_parse(
model=processing_inputs["model"],
chat_prompt=processing_inputs["chat_prompt"],
prompt_input=processing_inputs["prompt_input"],
pydantic_object=processing_inputs["pydantic_object"],
tags=[
f"exercise-{processing_inputs['exercise'].id}",
f"submission-{processing_inputs['submission'].id}",
],
use_function_calling=True
)

if processing_inputs["pydantic_object"] is AssessmentModel:
try:
return parse_assessment_result(result, processing_inputs['exercise'], processing_inputs['submission'], processing_inputs["grading_instruction_ids"], processing_inputs["is_graded"])
except Exception as e:
logger.info("Failed to parse assessment result")
return []
else:
try:
return parse_feedback_result(result, processing_inputs['exercise'], processing_inputs['submission'], processing_inputs["grading_instruction_ids"], processing_inputs["is_graded"])
except Exception as e:
logger.info("Failed to parse feedback result")
return []

def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded):
result_feedbacks = []
for feedback in result.assessment:
result_feedbacks += parse_feedback_result(feedback, exercise, submission, grading_instruction_ids, is_graded)
return result_feedbacks

def parse_feedback_result(feedback, exercise, submission, grading_instruction_ids, is_graded):
result_feedbacks = []

index_start, index_end = get_index_range_from_line_range(
feedback.line_start, feedback.line_end, submission.text
)
assessment_instruction_id = (
feedback.assessment_instruction_id
if feedback.assessment_instruction_id in grading_instruction_ids
else None
)
result_feedbacks.append(Feedback(
exercise_id=exercise.id,
submission_id=submission.id,
title=feedback.criteria,
description=feedback.feedback,
index_start=index_start,
index_end=index_end,
credits=feedback.credits,
is_graded=is_graded,
structured_grading_instruction_id=assessment_instruction_id,
meta={}
))
return result_feedbacks
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from pydantic import Field, BaseModel
from typing import List, Optional
from athena.schemas.grading_criterion import GradingCriterion

def get_human_message():
return """
Now you must assess the following student submission and respond in json. The student submission to asses (with sentence numbers <number>: <sentence>):
\"\"\"
{submission}
\"\"\"\
"""

def double_curly_braces(input_str):
# Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text
return input_str.replace("{", " ").replace("}", " ")

# Prompts are generated at run time.
def get_system_prompt(index,exericse,cirteria:GradingCriterion):
system_prompt = """You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"""
usage_count, formatted_criterion = format_divide_and_conquer_criteria(index,exericse,cirteria)
return usage_count, system_prompt + formatted_criterion

def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
criteria_explanation_prompt = ""
problem_statement = f"""
# Problem Statement
{double_curly_braces(exercise.problem_statement)}.
# End Problem Statement
A sample solution to the problem statement is:
# Example Solution
{double_curly_braces(exercise.example_solution)}
# End Example Solution
# General Instructions
You do not have access to lecture materials, exercise sheet or other materials so do not make assumptions.
# End General Instructions"""

criteria_explanation_prompt += problem_statement
# Handle Arbitrarily often criteria, this is denoted by 0, CAREFUL WITH THIS ONE.

criteria_explanation_prompt += f"""
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
"""
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
use_same_usaged_count = False
if (len(set(usage_counts)) == 1):
use_same_usaged_count = True
if use_same_usaged_count:
criteria_explanation_prompt += f"""
{get_criteria_application(usage_counts)}.
The structured assessment instructions are as follows: \n"""
for idx,instruction in enumerate(criteria.structured_grading_instructions):
criteria_explanation_prompt += f"""
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit, however stay focused only on this criteria on your feedback. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
"""
return usage_counts[0] ,criteria_explanation_prompt

def get_criteria_application(usage_counts):
usaged_count_prompt = ""
if usage_counts[0] == 0:
usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission."
elif usage_counts[0] == 1:
usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. "
else:
usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission."

usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions."""
usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits.
Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions.
Keep in mind that the student might seperate his answers throught the whole submission.
""" if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
return usaged_count_prompt

# Output Object
# Names have been redefined here, to be consistent with the prompt
# Local LLMs do better with these names. GoatPT does not care and does everything!
class FeedbackModel(BaseModel):
""" A Feedback object consisting of the criteria title, the feedback text, a line_start and line_end to depict
a reference to the text, creidts to depcit the credit amount given and an assessment_instruction_id to depict the assessment instruction ID used"""
criteria: str = Field(description="Short Criteria title!")
feedback: str = Field(description="The feedback in text form.")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of credits received/deducted")
assessment_instruction_id: Optional[int] = Field(
description="ID of the assessment instruction that was used to generate this feedback, or empty if no assessment instruction was used"
)

class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""
assessment: List[FeedbackModel] = Field(description="Assessment feedbacks")

This file was deleted.

Loading

0 comments on commit e93b942

Please sign in to comment.