Skip to content

Commit

Permalink
some refactoring and sanitizing
Browse files Browse the repository at this point in the history
  • Loading branch information
= Enea_Gore committed Jan 27, 2025
1 parent 7e5197d commit 3c6d46d
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 55 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@
from typing import Literal
from athena.text import Exercise, Submission
from module_text_llm.divide_and_conquer.generate_suggestions import generate_suggestions
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import GenerateSuggestionsPrompt
# from module_text_llm.divide_and_conquer.prompt_generate_suggestions import GenerateSuggestionsPrompt

class DivideAndConquerConfig(ApproachConfig):
type: Literal['divide_and_conquer'] = 'divide_and_conquer'
generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
# Prompts are generated at run time.
# generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())

async def generate_suggestions(self, exercise: Exercise, submission: Submission, config,*, debug: bool, is_graded: bool):
return await generate_suggestions(exercise, submission, config, debug, is_graded)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from athena.logger import logger
from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions
from llm_core.utils.predict_and_parse import predict_and_parse
from athena.schemas.grading_criterion import GradingCriterion
from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel
from module_text_llm.approach_config import ApproachConfig
from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
Expand Down Expand Up @@ -32,43 +33,12 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
for idx, criteria in enumerate(grading_criteria):
if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism becase the model cannot know and it hallucinates
continue
criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
problem_statement = f"""
# Problem Statement
{double_curly_braces(exercise.problem_statement)}.
# End Problem Statement
A sample solution to the problem statement is:
# Example Solution
{double_curly_braces(exercise.example_solution)}
# End Example Solution
# General Instructions
You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
# End General Instructions"""

criteria_explanation_prompt += problem_statement
# Handle Arbitrarily often criteria, this is denoted by 0

criteria_explanation_prompt += f"""
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
"""
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
use_same_usaged_count = False
if(len(set(usage_counts)) == 1):
use_same_usaged_count = True
if use_same_usaged_count:
criteria_explanation_prompt += f"""
{get_criteria_application(usage_counts)}.
The structured assessment instructions are as follows: \n"""
for idx,instruction in enumerate(criteria.structured_grading_instructions):
criteria_explanation_prompt += f"""
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
"""
if(usage_counts[0] > 1):
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criteria_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
usage_count, criterion_explanation_prompt = format_divide_and_conquer_criteria(idx,exercise, criteria)
if(usage_count > 1):
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
else:
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criteria_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
results = await asyncio.gather(*tasks)

Expand Down Expand Up @@ -96,11 +66,13 @@ async def process_criteria(pydantic_object, model, chat_prompt, prompt_input, ex
try:
return parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
except Exception as e:
print("Failed to parse assessment result")
return []
else:
try:
return parse_feedback_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
except Exception as e:
print("Failed to parse feedback result")
return []

def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title):
Expand Down Expand Up @@ -149,4 +121,40 @@ def get_criteria_application(usage_counts):
Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions.
Keep in mind that the student might seperate his answers throught the whole submission.
""" if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
return usaged_count_prompt
return usaged_count_prompt


def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
problem_statement = f"""
# Problem Statement
{double_curly_braces(exercise.problem_statement)}.
# End Problem Statement
A sample solution to the problem statement is:
# Example Solution
{double_curly_braces(exercise.example_solution)}
# End Example Solution
# General Instructions
You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
# End General Instructions"""

criteria_explanation_prompt += problem_statement
# Handle Arbitrarily often criteria, this is denoted by 0

criteria_explanation_prompt += f"""
You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
{len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
"""
usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
use_same_usaged_count = False
if(len(set(usage_counts)) == 1):
use_same_usaged_count = True
if use_same_usaged_count:
criteria_explanation_prompt += f"""
{get_criteria_application(usage_counts)}.
The structured assessment instructions are as follows: \n"""
for idx,instruction in enumerate(criteria.structured_grading_instructions):
criteria_explanation_prompt += f"""
Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
"""
return usage_counts[0] ,criteria_explanation_prompt
Original file line number Diff line number Diff line change
@@ -1,27 +1,21 @@
from pydantic import Field, BaseModel
from typing import List, Optional

# Input Prompt
class GenerateSuggestionsPrompt(BaseModel):
"""Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._"""
system_message: str = Field(default=system_message,
description="Message for priming AI behavior and instructing it what to do.")
human_message: str = Field(default=human_message,
description="Message from a human. The input on which the AI is supposed to act.")

# Prompts are generated at run time.
# Output Object
# Names have been redefined here, to be consistent with the prompt
# Local LLMs do better with these names. GoatPT does not care and does everything!
class FeedbackModel(BaseModel):
title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
description: str = Field(description="Feedback description")
""" A Feedback object consisting of the criteria title, the feedback text, a line_start and line_end to depict
a reference to the text, creidts to depcit the credit amount given and an assessment_instruction_id to depict the assessment instruction ID used"""
criteria: str = Field(description="Short Criteria title!")
feedback: str = Field(description="The feedback in text form.")
line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
credits: float = Field(0.0, description="Number of points received/deducted")
grading_instruction_id: Optional[int] = Field(
description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
credits: float = Field(0.0, description="Number of credits received/deducted")
assessment_instruction_id: Optional[int] = Field(
description="ID of the assessment instruction that was used to generate this feedback, or empty if no assessment instruction was used"
)

class AssessmentModel(BaseModel):
"""Collection of feedbacks making up an assessment"""
feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
assessment: List[FeedbackModel] = Field(description="Assessment feedbacks")

0 comments on commit 3c6d46d

Please sign in to comment.