cohesive refactoring and prompt improvments

ls1intum · Jan 27, 2025 · 09fe557 · 09fe557
1 parent 3c6d46d
commit 09fe557
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 67 deletions.
diff --git a/modules/text/module_text_llm/module_text_llm/divide_and_conquer/generate_suggestions.py b/modules/text/module_text_llm/module_text_llm/divide_and_conquer/generate_suggestions.py
@@ -4,15 +4,10 @@
 from athena.logger import logger
 from llm_core.utils.llm_utils import get_chat_prompt_with_formatting_instructions
 from llm_core.utils.predict_and_parse import predict_and_parse
-from athena.schemas.grading_criterion import GradingCriterion
-from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel
+from module_text_llm.divide_and_conquer.prompt_generate_suggestions import AssessmentModel, FeedbackModel, double_curly_braces, get_system_prompt, get_human_message
 from module_text_llm.approach_config import ApproachConfig
-from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range, format_grading_instructions
+from module_text_llm.helpers.utils import add_sentence_numbers, get_index_range_from_line_range
 import asyncio
-import re
-def double_curly_braces(input_str):
-    # Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text
-    return input_str.replace("{", " ").replace("}", " ")
 
 # Placeholder for generate suggestions logic.
 async def generate_suggestions(exercise: Exercise, submission: Submission, config: ApproachConfig, debug: bool, is_graded: bool):
@@ -31,15 +26,16 @@ async def generate_suggestions(exercise: Exercise, submission: Submission, confi
     )
     tasks = []
     for idx, criteria in enumerate(grading_criteria):
-        if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism becase the model cannot know and it hallucinates
+        if(criteria.title == "Plagiarism" or criteria.title == "plagiarism"): # Exclude plagarism because the model cannot know and it hallucinates
             continue
-        usage_count, criterion_explanation_prompt = format_divide_and_conquer_criteria(idx,exercise, criteria)
-        if(usage_count > 1):
-            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = AssessmentModel)
-            tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
+        usage_count, system_prompt = get_system_prompt(idx,exercise, criteria)
+        if(usage_count == 1):
+            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message = get_human_message(),pydantic_object = FeedbackModel)
+            tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))    
         else:
-            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = criterion_explanation_prompt,human_message = "Now you must assess the following student submission. The student submission:\n {submission}",pydantic_object = FeedbackModel)
-            tasks.append(process_criteria(FeedbackModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
+            chat_prompt = get_chat_prompt_with_formatting_instructions(model = model, system_message = system_prompt,human_message= get_human_message(),pydantic_object = AssessmentModel)
+            tasks.append(process_criteria(AssessmentModel, model, chat_prompt, prompt_input, exercise, submission, grading_instruction_ids, is_graded,criteria.title))
+
     results = await asyncio.gather(*tasks)
 
     # Flatten the list of feedbacks
@@ -66,13 +62,13 @@ async def process_criteria(pydantic_object, model, chat_prompt, prompt_input, ex
         try:
             return parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
         except Exception as e:
-            print("Failed to parse assessment result")
+            logger.info("Failed to parse assessment result")
             return []
     else:
         try:
             return parse_feedback_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title)
         except Exception as e:
-            print("Failed to parse feedback result")
+            logger.info("Failed to parse feedback result")
             return []
 
 def parse_assessment_result(result, exercise, submission, grading_instruction_ids, is_graded,criteria_title):
@@ -107,54 +103,3 @@ def parse_feedback_result(feedback, exercise, submission, grading_instruction_id
     return result_feedbacks
 
 
-def get_criteria_application(usage_counts):
-    usaged_count_prompt = ""
-    if usage_counts[0] == 0:
-        usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission."
-    elif usage_counts[0] == 1:
-        usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. "
-    else:
-        usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission."
-
-    usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions."""
-    usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits. 
-    Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions. 
-    Keep in mind that the student might seperate his answers throught the whole submission. 
-    """ if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
-    return usaged_count_prompt
-
-
-def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
-    criteria_explanation_prompt = f"You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"
-    problem_statement = f"""
-    # Problem Statement 
-        {double_curly_braces(exercise.problem_statement)}.
-    # End Problem Statement
-    A sample solution to the problem statement is:
-    # Example Solution
-    {double_curly_braces(exercise.example_solution)}
-    # End Example Solution
-    # General Instructions
-    You do not have access to lecture materials, exercise sheet or otherwise. If any criteria or instruction requires you to have this knowledge do not make any assumptions, such examples would include plagrarism, examples from lectures etc..
-    # End General Instructions"""
-
-    criteria_explanation_prompt += problem_statement
-    # Handle Arbitrarily often criteria, this is denoted by 0
-
-    criteria_explanation_prompt += f""" 
-    You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
-    {len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
-    """
-    usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
-    use_same_usaged_count = False
-    if(len(set(usage_counts)) == 1):
-        use_same_usaged_count = True
-    if use_same_usaged_count:
-        criteria_explanation_prompt += f""" 
-        {get_criteria_application(usage_counts)}.
-        The structured assessment instructions are as follows: \n"""
-    for idx,instruction in enumerate(criteria.structured_grading_instructions):
-        criteria_explanation_prompt += f""" 
-        Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
-        """
-    return usage_counts[0] ,criteria_explanation_prompt
diff --git a/...es/text/module_text_llm/module_text_llm/divide_and_conquer/prompt_generate_suggestions.py b/...es/text/module_text_llm/module_text_llm/divide_and_conquer/prompt_generate_suggestions.py
@@ -1,6 +1,77 @@
 from pydantic import Field, BaseModel
 from typing import List, Optional
+from athena.schemas.grading_criterion import GradingCriterion
+
+def get_human_message():
+    return """
+Now you must assess the following student submission and respond in json. The student submission to asses (with sentence numbers <number>: <sentence>):
+
+\"\"\"
+{submission}
+\"\"\"\
+"""
+
+def double_curly_braces(input_str):
+    # Curly braces are used as placeholders in the prompt, so we need to escape them if found in the text
+    return input_str.replace("{", " ").replace("}", " ")
+
 # Prompts are generated at run time.
+def get_system_prompt(index,exericse,cirteria:GradingCriterion):
+    system_prompt = f"""You are an AI Assistant TUTOR at a prestigious university tasked with assessing text submissions. You are tasked with assessing a submission from a student. The problem statement is:"""
+    usage_count, formatted_criterion = format_divide_and_conquer_criteria(index,exericse,cirteria)
+    return usage_count, system_prompt + formatted_criterion
+
+def format_divide_and_conquer_criteria(index,exercise, criteria: GradingCriterion):
+    criteria_explanation_prompt = ""
+    problem_statement = f"""
+    # Problem Statement 
+    {double_curly_braces(exercise.problem_statement)}.
+    # End Problem Statement
+    A sample solution to the problem statement is:
+    # Example Solution
+    {double_curly_braces(exercise.example_solution)}
+    # End Example Solution
+    # General Instructions
+    You do not have access to lecture materials, exercise sheet or other materials so do not make assumptions.
+    # End General Instructions"""
+
+    criteria_explanation_prompt += problem_statement
+    # Handle Arbitrarily often criteria, this is denoted by 0, CAREFUL WITH THIS ONE.
+
+    criteria_explanation_prompt += f""" 
+    You have to assess the submission based on the criteria with the title: "{criteria.title}". There are
+    {len(criteria.structured_grading_instructions)} structured assessment instructions options for this criteria.
+    """
+    usage_counts = [instruction.usage_count for instruction in criteria.structured_grading_instructions]
+    use_same_usaged_count = False
+    if(len(set(usage_counts)) == 1):
+        use_same_usaged_count = True
+    if use_same_usaged_count:
+        criteria_explanation_prompt += f""" 
+        {get_criteria_application(usage_counts)}.
+        The structured assessment instructions are as follows: \n"""
+    for idx,instruction in enumerate(criteria.structured_grading_instructions):
+        criteria_explanation_prompt += f""" 
+        Instruction Number {idx+1}: Apply {instruction.credits} credits if the following description fits the students submission: "{instruction.instruction_description}. A possible feedback could be in the likes of "{instruction.feedback}" but you may adjust it as you see fit, however stay focused only on this criteria on your feedback. Apply assessment instruction id {instruction.id} to this segment of the submission. \n
+        """
+    return usage_counts[0] ,criteria_explanation_prompt
+
+def get_criteria_application(usage_counts):
+    usaged_count_prompt = ""
+    if usage_counts[0] == 0:
+        usaged_count_prompt = "You may apply this criteria as many times as it is needed if it fits the submission."
+    elif usage_counts[0] == 1:
+        usaged_count_prompt = "You may only apply this criteria ONCE. You must pick the instruction that best fits the submission. "
+    else:
+        usaged_count_prompt = f"You may apply thic criteria {usage_counts[0]} times. Each time must pick the instruction that best fits the submission."
+
+    usaged_count_prompt += """ For this criteria you have different levels of assessment to give, based on the structured assessment instructions."""
+    usaged_count_prompt += """For different segments of the submission you may apply a different assessment instruction that is fitting to that segment and give it its respective deserved credits. 
+    Identify all segments of the submission that relate to this criteria and its instructions and apply the correct feedback as described by the instructions. 
+    Keep in mind that the student might seperate his answers throught the whole submission. 
+    """ if usage_counts[0] != 1 else "You may apply this criteria only once and choose only a SINGLE assessment instruciton that best fits the submission!"
+    return usaged_count_prompt
+
 # Output Object
 # Names have been redefined here, to be consistent with the prompt
 # Local LLMs do better with these names. GoatPT does not care and does everything!