Skip to content

Commit

Permalink
cohesive refactoring and prompt improvments
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Jan 27, 2025
1 parent 3c6d46d commit 09fe557
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 67 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,10 @@
from athena.logger import logger
from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions
from llm_core.utils.predict_and_parse import predict_and_parse
from athena.schemas.grading_criterion import GradingCriterion
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel, double_curly_braces, get_system_prompt, get_human_message
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range
import asyncio
import re
def double_curly_braces(input_str):
# Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text
return input_str.replace("{", " ").replace("}", " ")

# Placeholder for generate suggestions logic.
async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool, is_graded: bool):
Expand All @@ -31,15 +26,16 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
)
tasks = []
for idx, criteria in enumerate(grading_criteria):
if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism becase the model cannot know and it hallucinates
if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism because the model cannot know and it hallucinates
continue
usage_count, criterion_explanation_prompt = format_divide_and_conquer_criteria(idx,exercise, criteria)
if(usage_count > 1):
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
usage_count, system_prompt = get_system_prompt(idx,exercise, criteria)
if(usage_count == 1):
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message = get_human_message(),pydantic_object = FeedbackModel)
tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
else:
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message= get_human_message(),pydantic_object = AssessmentModel)
tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))

results = await asyncio.gather(*tasks)

# Flatten the list of feedbacks
Expand All @@ -66,13 +62,13 @@ async def process_criteria(pydantic_object, model, chat_prompt, prompt_input, ex
try:
return parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
except Exception as e:
print("Failed to parse assessment result")
logger.info("Failed to parse assessment result")
return []
else:
try:
return parse_feedback_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
except Exception as e:
print("Failed to parse feedback result")
logger.info("Failed to parse feedback result")
return []

def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title):
Expand Down Expand Up @@ -107,54 +103,3 @@ def parse_feedback_result(feedback, exercise, submission, grading_instruction_id
return result_feedbacks


def get_criteria_application(usage_counts):
usaged_count_prompt = ""
if usage_counts[0] == 0:
usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission."
elif usage_counts[0] == 1:
usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. "
else:
usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission."

usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions."""
usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits.
Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions.
Keep in mind that the student might seperate his answers throught the whole submission.
""" if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
return usaged_count_prompt


def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
problem_statement = f"""
# Problem Statement
{double_curly_braces(exercise.problem_statement)}.
# End Problem Statement
A sample solution to the problem statement is:
# Example Solution
{double_curly_braces(exercise.example_solution)}
# End Example Solution
# General Instructions
You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
# End General Instructions"""

criteria_explanation_prompt += problem_statement
# Handle Arbitrarily often criteria, this is denoted by 0

criteria_explanation_prompt += f"""
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
"""
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
use_same_usaged_count = False
if(len(set(usage_counts)) == 1):
use_same_usaged_count = True
if use_same_usaged_count:
criteria_explanation_prompt += f"""
{get_criteria_application(usage_counts)}.
The structured assessment instructions are as follows: \n"""
for idx,instruction in enumerate(criteria.structured_grading_instructions):
criteria_explanation_prompt += f"""
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
"""
return usage_counts[0] ,criteria_explanation_prompt
Original file line number Diff line number Diff line change
@@ -1,6 +1,77 @@
from pydantic import Field, BaseModel
from typing import List, Optional
from athena.schemas.grading_criterion import GradingCriterion

def get_human_message():
return """
Now you must assess the following student submission and respond in json. The student submission to asses (with sentence numbers <number>: <sentence>):
\"\"\"
{submission}
\"\"\"\
"""

def double_curly_braces(input_str):
# Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text
return input_str.replace("{", " ").replace("}", " ")

# Prompts are generated at run time.
def get_system_prompt(index,exericse,cirteria:GradingCriterion):
system_prompt = f"""You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"""
usage_count, formatted_criterion = format_divide_and_conquer_criteria(index,exericse,cirteria)
return usage_count, system_prompt + formatted_criterion

def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
criteria_explanation_prompt = ""
problem_statement = f"""
# Problem Statement
{double_curly_braces(exercise.problem_statement)}.
# End Problem Statement
A sample solution to the problem statement is:
# Example Solution
{double_curly_braces(exercise.example_solution)}
# End Example Solution
# General Instructions
You do not have access to lecture materials, exercise sheet or other materials so do not make assumptions.
# End General Instructions"""

criteria_explanation_prompt += problem_statement
# Handle Arbitrarily often criteria, this is denoted by 0, CAREFUL WITH THIS ONE.

criteria_explanation_prompt += f"""
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
"""
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
use_same_usaged_count = False
if(len(set(usage_counts)) == 1):
use_same_usaged_count = True
if use_same_usaged_count:
criteria_explanation_prompt += f"""
{get_criteria_application(usage_counts)}.
The structured assessment instructions are as follows: \n"""
for idx,instruction in enumerate(criteria.structured_grading_instructions):
criteria_explanation_prompt += f"""
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit, however stay focused only on this criteria on your feedback. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
"""
return usage_counts[0] ,criteria_explanation_prompt

def get_criteria_application(usage_counts):
usaged_count_prompt = ""
if usage_counts[0] == 0:
usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission."
elif usage_counts[0] == 1:
usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. "
else:
usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission."

usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions."""
usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits.
Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions.
Keep in mind that the student might seperate his answers throught the whole submission.
""" if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
return usaged_count_prompt

# Output Object
# Names have been redefined here, to be consistent with the prompt
# Local LLMs do better with these names. GoatPT does not care and does everything!
Expand Down

0 comments on commit 09fe557

Please sign in to comment.