-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
= Enea_Gore
committed
Jan 28, 2025
1 parent
63ea116
commit e93b942
Showing
9 changed files
with
235 additions
and
264 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
modules/text/module_text_llm/module_text_llm/divide_and_conquer/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
from module_text_llm.approach_config import ApproachConfig | ||
from typing import Literal | ||
from athena.text import Exercise, Submission | ||
from module_text_llm.divide_and_conquer.generate_suggestions import generate_suggestions | ||
|
||
class DivideAndConquerConfig(ApproachConfig): | ||
type: Literal['divide_and_conquer'] = 'divide_and_conquer' | ||
# Prompts are generated at run time. | ||
async def generate_suggestions(self, exercise: Exercise, submission: Submission, config,*, debug: bool, is_graded: bool): | ||
return await generate_suggestions(exercise, submission, config, debug, is_graded) | ||
|
114 changes: 114 additions & 0 deletions
114
modules/text/module_text_llm/module_text_llm/divide_and_conquer/generate_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
from athena.text import Exercise, Submission, Feedback | ||
from athena.logger import logger | ||
from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions | ||
from llm_core.utils.predict_and_parse import predict_and_parse | ||
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel, double_curly_braces, get_system_prompt, get_human_message | ||
from module_text_llm.approach_config import ApproachConfig | ||
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range | ||
import asyncio | ||
|
||
# Placeholder for generate suggestions logic. | ||
async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool, is_graded: bool): | ||
submission_text = double_curly_braces(submission.text) | ||
model = config.model.get_model() # type: ignore[attr-defined] | ||
prompt_input = { | ||
"submission": add_sentence_numbers(submission_text) | ||
} | ||
|
||
grading_criteria = exercise.grading_criteria | ||
feedbacks = [] | ||
grading_instruction_ids = set( | ||
grading_instruction.id | ||
for criterion in exercise.grading_criteria or [] | ||
for grading_instruction in criterion.structured_grading_instructions | ||
) | ||
tasks = [] | ||
|
||
for idx, criteria in enumerate(grading_criteria): | ||
processing_inputs = { | ||
"model": model, | ||
"prompt_input": prompt_input, | ||
"exercise": exercise, | ||
"submission": submission, | ||
"grading_instruction_ids": grading_instruction_ids, | ||
"is_graded": is_graded, | ||
"criteria_title": criteria.title | ||
} | ||
if ("plagiarism" in criteria.title.lower()): # Exclude plagarism because the model cannot know and it hallucinates | ||
continue | ||
usage_count, system_prompt = get_system_prompt(idx,exercise, criteria) | ||
if (usage_count == 1): | ||
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message = get_human_message(),pydantic_object = FeedbackModel) | ||
processing_inputs["pydantic_object"] = FeedbackModel | ||
processing_inputs["chat_prompt"] = chat_prompt | ||
else: | ||
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message= get_human_message(),pydantic_object = AssessmentModel) | ||
processing_inputs["pydantic_object"] = AssessmentModel | ||
processing_inputs["chat_prompt"] = chat_prompt | ||
tasks.append(process_criteria(processing_inputs)) | ||
|
||
results = await asyncio.gather(*tasks) | ||
|
||
# Flatten the list of feedbacks | ||
for feedback_list in results: | ||
feedbacks += feedback_list | ||
return feedbacks | ||
|
||
async def process_criteria(processing_inputs): | ||
|
||
# Call the predict_and_parse method | ||
result = await predict_and_parse( | ||
model=processing_inputs["model"], | ||
chat_prompt=processing_inputs["chat_prompt"], | ||
prompt_input=processing_inputs["prompt_input"], | ||
pydantic_object=processing_inputs["pydantic_object"], | ||
tags=[ | ||
f"exercise-{processing_inputs['exercise'].id}", | ||
f"submission-{processing_inputs['submission'].id}", | ||
], | ||
use_function_calling=True | ||
) | ||
|
||
if processing_inputs["pydantic_object"] is AssessmentModel: | ||
try: | ||
return parse_assessment_result(result, processing_inputs['exercise'], processing_inputs['submission'], processing_inputs["grading_instruction_ids"], processing_inputs["is_graded"]) | ||
except Exception as e: | ||
logger.info("Failed to parse assessment result") | ||
return [] | ||
else: | ||
try: | ||
return parse_feedback_result(result, processing_inputs['exercise'], processing_inputs['submission'], processing_inputs["grading_instruction_ids"], processing_inputs["is_graded"]) | ||
except Exception as e: | ||
logger.info("Failed to parse feedback result") | ||
return [] | ||
|
||
def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded): | ||
result_feedbacks = [] | ||
for feedback in result.assessment: | ||
result_feedbacks += parse_feedback_result(feedback, exercise, submission, grading_instruction_ids, is_graded) | ||
return result_feedbacks | ||
|
||
def parse_feedback_result(feedback, exercise, submission, grading_instruction_ids, is_graded): | ||
result_feedbacks = [] | ||
|
||
index_start, index_end = get_index_range_from_line_range( | ||
feedback.line_start, feedback.line_end, submission.text | ||
) | ||
assessment_instruction_id = ( | ||
feedback.assessment_instruction_id | ||
if feedback.assessment_instruction_id in grading_instruction_ids | ||
else None | ||
) | ||
result_feedbacks.append(Feedback( | ||
exercise_id=exercise.id, | ||
submission_id=submission.id, | ||
title=feedback.criteria, | ||
description=feedback.feedback, | ||
index_start=index_start, | ||
index_end=index_end, | ||
credits=feedback.credits, | ||
is_graded=is_graded, | ||
structured_grading_instruction_id=assessment_instruction_id, | ||
meta={} | ||
)) | ||
return result_feedbacks |
93 changes: 93 additions & 0 deletions
93
...es/text/module_text_llm/module_text_llm/divide_and_conquer/prompt_generate_suggestions.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from pydantic import Field, BaseModel | ||
from typing import List, Optional | ||
from athena.schemas.grading_criterion import GradingCriterion | ||
|
||
def get_human_message(): | ||
return """ | ||
Now you must assess the following student submission and respond in json. The student submission to asses (with sentence numbers <number>: <sentence>): | ||
\"\"\" | ||
{submission} | ||
\"\"\"\ | ||
""" | ||
|
||
def double_curly_braces(input_str): | ||
# Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text | ||
return input_str.replace("{", " ").replace("}", " ") | ||
|
||
# Prompts are generated at run time. | ||
def get_system_prompt(index,exericse,cirteria:GradingCriterion): | ||
system_prompt = """You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:""" | ||
usage_count, formatted_criterion = format_divide_and_conquer_criteria(index,exericse,cirteria) | ||
return usage_count, system_prompt + formatted_criterion | ||
|
||
def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion): | ||
criteria_explanation_prompt = "" | ||
problem_statement = f""" | ||
# Problem Statement | ||
{double_curly_braces(exercise.problem_statement)}. | ||
# End Problem Statement | ||
A sample solution to the problem statement is: | ||
# Example Solution | ||
{double_curly_braces(exercise.example_solution)} | ||
# End Example Solution | ||
# General Instructions | ||
You do not have access to lecture materials, exercise sheet or other materials so do not make assumptions. | ||
# End General Instructions""" | ||
|
||
criteria_explanation_prompt += problem_statement | ||
# Handle Arbitrarily often criteria, this is denoted by 0, CAREFUL WITH THIS ONE. | ||
|
||
criteria_explanation_prompt += f""" | ||
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are | ||
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria. | ||
""" | ||
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions] | ||
use_same_usaged_count = False | ||
if (len(set(usage_counts)) == 1): | ||
use_same_usaged_count = True | ||
if use_same_usaged_count: | ||
criteria_explanation_prompt += f""" | ||
{get_criteria_application(usage_counts)}. | ||
The structured assessment instructions are as follows: \n""" | ||
for idx,instruction in enumerate(criteria.structured_grading_instructions): | ||
criteria_explanation_prompt += f""" | ||
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit, however stay focused only on this criteria on your feedback. Apply assessment instruction id {instruction.id} to this segment of the submission. \n | ||
""" | ||
return usage_counts[0] ,criteria_explanation_prompt | ||
|
||
def get_criteria_application(usage_counts): | ||
usaged_count_prompt = "" | ||
if usage_counts[0] == 0: | ||
usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission." | ||
elif usage_counts[0] == 1: | ||
usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. " | ||
else: | ||
usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission." | ||
|
||
usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions.""" | ||
usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits. | ||
Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions. | ||
Keep in mind that the student might seperate his answers throught the whole submission. | ||
""" if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!" | ||
return usaged_count_prompt | ||
|
||
# Output Object | ||
# Names have been redefined here, to be consistent with the prompt | ||
# Local LLMs do better with these names. GoatPT does not care and does everything! | ||
class FeedbackModel(BaseModel): | ||
""" A Feedback object consisting of the criteria title, the feedback text, a line_start and line_end to depict | ||
a reference to the text, creidts to depcit the credit amount given and an assessment_instruction_id to depict the assessment instruction ID used""" | ||
criteria: str = Field(description="Short Criteria title!") | ||
feedback: str = Field(description="The feedback in text form.") | ||
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced") | ||
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced") | ||
credits: float = Field(0.0, description="Number of credits received/deducted") | ||
assessment_instruction_id: Optional[int] = Field( | ||
description="ID of the assessment instruction that was used to generate this feedback, or empty if no assessment instruction was used" | ||
) | ||
|
||
class AssessmentModel(BaseModel): | ||
"""Collection of feedbacks making up an assessment""" | ||
assessment: List[FeedbackModel] = Field(description="Assessment feedbacks") | ||
|
22 changes: 0 additions & 22 deletions
22
modules/text/module_text_llm/module_text_llm/ollama_chain_of_thought_approach/__init__.py
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.