some refactoring and sanitizing

ls1intum · Jan 27, 2025 · 3c6d46d · 3c6d46d
1 parent 7e5197d
commit 3c6d46d
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 55 deletions.
diff --git a/modules/text/module_text_llm/module_text_llm/divide_and_conquer/__init__.py b/modules/text/module_text_llm/module_text_llm/divide_and_conquer/__init__.py
@@ -3,11 +3,12 @@
 from typing import Literal
 from athena.text import Exercise, Submission
 from module_text_llm.divide_and_conquer.generate_suggestions import generate_suggestions
-from module_text_llm.divide_and_conquer.prompt_generate_suggestions import GenerateSuggestionsPrompt
+# from module_text_llm.divide_and_conquer.prompt_generate_suggestions import GenerateSuggestionsPrompt
 
 class DivideAndConquerConfig(ApproachConfig):
     type: Literal['divide_and_conquer'] = 'divide_and_conquer'
-    generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
+    # Prompts are generated at run time.
+    # generate_suggestions_prompt: GenerateSuggestionsPrompt = Field(default=GenerateSuggestionsPrompt())
 
     async def generate_suggestions(self, exercise: Exercise, submission: Submission, config,*, debug: bool, is_graded: bool):
         return await generate_suggestions(exercise, submission, config, debug, is_graded)
diff --git a/modules/text/module_text_llm/module_text_llm/divide_and_conquer/generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/divide_and_conquer/generate_suggestions.py
@@ -4,6 +4,7 @@
 from athena.logger import logger
 from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions
 from llm_core.utils.predict_and_parse import predict_and_parse
+from athena.schemas.grading_criterion import GradingCriterion
 from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel
 from module_text_llm.approach_config import ApproachConfig
 from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
@@ -32,43 +33,12 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
     for idx, criteria in enumerate(grading_criteria):
         if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism becase the model cannot know and it hallucinates
             continue
-        criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
-        problem_statement = f"""
-        # Problem Statement 
-         {double_curly_braces(exercise.problem_statement)}.
-        # End Problem Statement
-        A sample solution to the problem statement is:
-        # Example Solution
-        {double_curly_braces(exercise.example_solution)}
-        # End Example Solution
-        # General Instructions
-        You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
-        # End General Instructions"""
-
-        criteria_explanation_prompt += problem_statement
-        # Handle Arbitrarily often criteria, this is denoted by 0
-
-        criteria_explanation_prompt += f""" 
-        You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
-        {len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
-        """
-        usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
-        use_same_usaged_count = False
-        if(len(set(usage_counts)) == 1):
-            use_same_usaged_count = True
-        if use_same_usaged_count:
-            criteria_explanation_prompt += f""" 
-            {get_criteria_application(usage_counts)}.
-            The structured assessment instructions are as follows: \n"""
-        for idx,instruction in enumerate(criteria.structured_grading_instructions):
-            criteria_explanation_prompt += f""" 
-            Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
-            """
-        if(usage_counts[0] > 1):
-            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criteria_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
+        usage_count, criterion_explanation_prompt = format_divide_and_conquer_criteria(idx,exercise, criteria)
+        if(usage_count > 1):
+            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
             tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
         else:
-            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criteria_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
+            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
             tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
     results = await asyncio.gather(*tasks)
 
@@ -96,11 +66,13 @@ async def process_criteria(pydantic_object, model, chat_prompt, prompt_input, ex
         try:
             return parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
         except Exception as e:
+            print("Failed to parse assessment result")
             return []
     else:
         try:
             return parse_feedback_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
         except Exception as e:
+            print("Failed to parse feedback result")
             return []
 
 def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title):
@@ -149,4 +121,40 @@ def get_criteria_application(usage_counts):
     Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions. 
     Keep in mind that the student might seperate his answers throught the whole submission. 
     """ if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
-    return usaged_count_prompt
+    return usaged_count_prompt
+
+
+def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
+    criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
+    problem_statement = f"""
+    # Problem Statement 
+        {double_curly_braces(exercise.problem_statement)}.
+    # End Problem Statement
+    A sample solution to the problem statement is:
+    # Example Solution
+    {double_curly_braces(exercise.example_solution)}
+    # End Example Solution
+    # General Instructions
+    You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
+    # End General Instructions"""
+
+    criteria_explanation_prompt += problem_statement
+    # Handle Arbitrarily often criteria, this is denoted by 0
+
+    criteria_explanation_prompt += f""" 
+    You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
+    {len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
+    """
+    usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
+    use_same_usaged_count = False
+    if(len(set(usage_counts)) == 1):
+        use_same_usaged_count = True
+    if use_same_usaged_count:
+        criteria_explanation_prompt += f""" 
+        {get_criteria_application(usage_counts)}.
+        The structured assessment instructions are as follows: \n"""
+    for idx,instruction in enumerate(criteria.structured_grading_instructions):
+        criteria_explanation_prompt += f""" 
+        Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
+        """
+    return usage_counts[0] ,criteria_explanation_prompt
diff --git a/...es/text/module_text_llm/module_text_llm/divide_and_conquer/prompt_generate_suggestions.py b/...es/text/module_text_llm/module_text_llm/divide_and_conquer/prompt_generate_suggestions.py
@@ -1,27 +1,21 @@
 from pydantic import Field, BaseModel
 from typing import List, Optional
-
-# Input Prompt
-class GenerateSuggestionsPrompt(BaseModel):
-    """Features available: **{problem_statement}**, **{example_solution}**, **{grading_instructions}**, **{max_points}**, **{bonus_points}**, **{submission}**
-
-_Note: **{problem_statement}**, **{example_solution}**, or **{grading_instructions}** might be omitted if the input is too long._"""
-    system_message: str = Field(default=system_message,
-                                description="Message for priming AI behavior and instructing it what to do.")
-    human_message: str = Field(default=human_message,
-                               description="Message from a human. The input on which the AI is supposed to act.")
-
+# Prompts are generated at run time.
 # Output Object
+# Names have been redefined here, to be consistent with the prompt
+# Local LLMs do better with these names. GoatPT does not care and does everything!
 class FeedbackModel(BaseModel):
-    title: str = Field(description="Very short title, i.e. feedback category or similar", example="Logic Error")
-    description: str = Field(description="Feedback description")
+    """ A Feedback object consisting of the criteria title, the feedback text, a line_start and line_end to depict
+    a reference to the text, creidts to depcit the credit amount given and an assessment_instruction_id to depict the assessment instruction ID used"""
+    criteria: str = Field(description="Short Criteria title!")
+    feedback: str = Field(description="The feedback in text form.")
     line_start: Optional[int] = Field(description="Referenced line number start, or empty if unreferenced")
     line_end: Optional[int] = Field(description="Referenced line number end, or empty if unreferenced")
-    credits: float = Field(0.0, description="Number of points received/deducted")
-    grading_instruction_id: Optional[int] = Field(
-        description="ID of the grading instruction that was used to generate this feedback, or empty if no grading instruction was used"
+    credits: float = Field(0.0, description="Number of credits received/deducted")
+    assessment_instruction_id: Optional[int] = Field(
+        description="ID of the assessment instruction that was used to generate this feedback, or empty if no assessment instruction was used"
     )
 
 class AssessmentModel(BaseModel):
     """Collection of feedbacks making up an assessment"""
-    feedbacks: List[FeedbackModel] = Field(description="Assessment feedbacks")
+    assessment: List[FeedbackModel] = Field(description="Assessment feedbacks")