diff --git a/athena/athena/__init__.py b/athena/athena/__init__.py index f7eee0079..104e5a862 100644 --- a/athena/athena/__init__.py +++ b/athena/athena/__init__.py @@ -7,7 +7,7 @@ from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion from .metadata import emit_meta, get_meta from .experiment import get_experiment_environment -from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore +from .endpoints import submission_selector, submissions_consumer, generate_statistics, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore @app.get("/") def module_health(): @@ -37,5 +37,6 @@ def run_module(): "ExerciseType", "GradingCriterion", "StructuredGradingInstruction", - "StructuredGradingCriterion" + "StructuredGradingCriterion", + "generate_statistics" ] diff --git a/athena/athena/endpoints.py b/athena/athena/endpoints.py index 6d259a2a2..ec02dc83e 100644 --- a/athena/athena/endpoints.py +++ b/athena/athena/endpoints.py @@ -1,9 +1,10 @@ # type: ignore # too much weird behavior of mypy with decorators import inspect -from fastapi import Depends, BackgroundTasks, Body +from fastapi import Depends, BackgroundTasks, Body, Request from pydantic import BaseModel, ValidationError from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type +from fastapi.responses import HTMLResponse from athena.app import app from athena.authenticate import authenticated from athena.metadata import with_meta @@ -196,6 +197,16 @@ async def wrapper(request: SubmissionSelectorRequest): return wrapper +def generate_statistics(func): + @app.post("/generate_statistics", response_class=HTMLResponse) + async def wrapper(request: Request): + try: + results = await request.json() + return await func(results) + except Exception as e: + return {"error": str(e)} + + return wrapper def feedback_consumer(func: Union[ Callable[[E, S, List[F]], None], @@ -234,7 +245,6 @@ def feedback_consumer(func: Union[ submission_type = inspect.signature(func).parameters["submission"].annotation feedback_type = inspect.signature(func).parameters["feedbacks"].annotation.__args__[0] module_config_type = inspect.signature(func).parameters["module_config"].annotation if "module_config" in inspect.signature(func).parameters else None - @app.post("/feedbacks", responses=module_responses) @authenticated @with_meta diff --git a/llm_core/llm_core/models/openai.py b/llm_core/llm_core/models/openai.py index 7bcc0f11f..ad33d4374 100644 --- a/llm_core/llm_core/models/openai.py +++ b/llm_core/llm_core/models/openai.py @@ -81,7 +81,7 @@ class OpenAIModelConfig(ModelConfig): We generally recommend altering this or `top_p` but not both.\ """) - top_p: float = Field(default=1, ge=0, le=1, description="""\ + top_p: float = Field(default=0, ge=0, le=1, description="""\ An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. \ So 0.1 means only the tokens comprising the top 10% probability mass are considered. diff --git a/modules/text/module_text_llm/module_text_llm/__main__.py b/modules/text/module_text_llm/module_text_llm/__main__.py index 0bfc6e41d..6380e356f 100644 --- a/modules/text/module_text_llm/module_text_llm/__main__.py +++ b/modules/text/module_text_llm/module_text_llm/__main__.py @@ -3,14 +3,14 @@ import nltk import tiktoken -from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider +from athena import app, submission_selector, submissions_consumer, generate_statistics,feedback_consumer, feedback_provider, evaluation_provider from athena.text import Exercise, Submission, Feedback from athena.logger import logger - from module_text_llm.config import Configuration from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics from module_text_llm.generate_evaluation import generate_evaluation from module_text_llm.approach_controller import generate_suggestions +from module_text_llm.analytics.compile import compile @submissions_consumer def receive_submissions(exercise: Exercise, submissions: List[Submission]): @@ -27,6 +27,11 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]): logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id) +@generate_statistics +async def compile_analytics(results: dict): + logger.info("generate_statistics: Generating statistics") + return compile(results) + @feedback_provider async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]: logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested, with approach: %s", diff --git a/modules/text/module_text_llm/module_text_llm/analytics/analytics.py b/modules/text/module_text_llm/module_text_llm/analytics/analytics.py new file mode 100644 index 000000000..178d89efe --- /dev/null +++ b/modules/text/module_text_llm/module_text_llm/analytics/analytics.py @@ -0,0 +1,350 @@ +import plotly.express as px +import plotly.graph_objects as go +import numpy as np +from collections import Counter + +def failure_success(credits_per_submission,failures,submission_ids): + failures_per_model = {} + list_of_models = failures.keys() + total_runs = len(submission_ids) + for submission_id, approaches in credits_per_submission.items(): + for model in list_of_models: + if model not in failures_per_model: + failures_per_model[model] = 0 + if model not in approaches: + failures_per_model[model] += 1 + + successes_per_model = {model: total_runs - failures for model, failures in failures_per_model.items()} + + models = list(failures_per_model.keys()) + failures = list(failures_per_model.values()) + successes = list(successes_per_model.values()) + + fig = go.Figure() + + fig.add_trace(go.Bar( + x=models, + y=failures, + name='Failures', + marker_color='red', + hovertemplate='%{y} failures' + )) + + fig.add_trace(go.Bar( + x=models, + y=successes, + name='Successes', + marker_color='green', + hovertemplate='%{y} successes' + )) + + fig.update_layout( + barmode='stack', + title='Approach/LLM Failure and Success Rates to produce output', + xaxis_title='LLM Models', + yaxis_title='Number of Calls', + legend_title='Outcome', + template='plotly_white', + hovermode='x unified' + ) + return {"fig": fig, "html_explanation": ""} +def total_credit_per_submission(data): + html_explanation = """ +

Total Credits awarded by each model on each submission

+ """ + submission_ids = [] + approaches = [] + total_credits = [] + + for submission_id, approaches_data in data.items(): + for approach, credits in approaches_data.items(): + submission_ids.append(submission_id) + approaches.append(approach) + total_credits.append(sum(credits)) + + fig = px.bar( + x=submission_ids, + y=total_credits, + color=approaches, + barmode="group", + title="Total Credits by Approach for Each Submission ID", + labels={"x": "Submission ID", "y": "Total Credits", "color": "Approach"} + ) + + return {"fig": fig, "html_explanation": html_explanation} + + +def visualize_histogram_kde_percentages(credit_data,max_points): + html_explanation = """ +

Histogram of frequency of total credits given

+

Insights into Score Distribution

+

+ Its just a histogram. +

+ + """ + x = [] + group_labels = [] + approach_credits = {} + for submission_id, approaches in credit_data.items(): + for approach, credits in approaches.items(): + if approach not in approach_credits: + approach_credits[approach] = [] + if (sum(credits) > max_points): + approach_credits[approach].append(max_points) + else: + approach_credits[approach].append(sum(credits)) # /max_points*100 + for approach, credits in approach_credits.items(): + x.append(credits) + group_labels.append(approach) + + fig = go.Figure() + for approach, credits in approach_credits.items(): + fig.add_trace(go.Histogram(x=credits, name=approach,xbins={"size": 0.5})) + fig.update_layout( + title='Histogram of Total Credits Given', + xaxis_title='Total Credits', + yaxis_title='Count') + fig.update_traces(opacity=0.7) + return {"fig": fig, "html_explanation": html_explanation} + +def visualize_differences_histogram(credit_data,max_points): + html_explanation = """ +

Distribution of Score Disparity Between LLM and Tutor

+

+ This graph represents the distribution of score differences between the LLM and the tutor. Negative values indicate + that the LLM has scored the submission lower than the tutor, while positive values suggest the opposite. +

+

+ The chart provides insights into the consistency and bias of the LLM's grading compared to the tutor. Viewers + should look for patterns such as a strong concentration of values near zero, which would indicate agreement, or + significant skew towards negative or positive values, highlighting systematic under- or over-grading by the LLM. +

+

+ This visualization can help identify discrepancies and areas where the LLM may need calibration or adjustment + to align more closely with tutor assessments. +

+ """ + differences_data = differences(credit_data) + + fig = go.Figure() + for approach, credits in differences_data.items(): + fig.add_trace(go.Histogram(x=credits, name=approach,xbins={"size": 0.5})) + fig.update_layout( + title='Histogram of differences', + xaxis_title='Difference LLM - Tutor', + yaxis_title='Count') + fig.update_traces(opacity=0.8) + return {"fig": fig, "html_explanation": html_explanation} + +def normalized_absolute_difference(credits, max_points): + """Plots the normalized absolute difference between the LLM and the other approaches in a sorted bar plot. + + Args: + credits (dict): A dictionary with approaches and their score differences. + max_points (float): Maximum possible credits for normalization. + """ + differences_data = differences(credits) + normalized_differences = { + approach: sum(abs(d) for d in diff_list) / len(diff_list) / max_points + for approach, diff_list in differences_data.items() + } + + sorted_differences = dict(sorted(normalized_differences.items(), key=lambda x: x[1], reverse=True)) + + fig = go.Figure() + fig.add_trace( + go.Bar( + x=list(sorted_differences.keys()), + y=list(sorted_differences.values()), + marker_color='cornflowerblue' + ) + ) + + fig.update_layout( + title='Normalized Absolute Differences Between LLM and Tutor Score', + xaxis_title='Approaches', + yaxis_title='Normalized Absolute Difference', + xaxis={"categoryorder": 'total descending'}, + yaxis={"range": [0, 1]}, + template='plotly_white' + ) + html_explanation = """

Normalized Absolute Differences Between LLM and the Tutor

+

+ This bar plot visualizes the normalized absolute differences in scores between the LLM and other approaches. + Each bar represents an approach, sorted from the highest to the lowest difference, and normalized by dividing the average + absolute difference by the maximum possible score. +

+

Insights:

+ + +

Note: THIS PLOT IS NOT AN ACCURATE REPRESENTATION OF ALIGNMENT WITH TUTOR FEEDBACK

+

Note: Refer to the next plot for a better representation of alignment

+ +""" + return {"fig": fig, "html_explanation": html_explanation} + +def differences(credits): + """ Calculates the literal differences between the tutor and the other approaches + removes the submission id, but keeps the credit differences in order so that + values at index 0 are the same submission and so on. + The calculation is LLM - Tutor, so a negative value means that the LLM has awarded less credits. + The end form is : + {approach: [differences], ...} + """ + differences_data = {} + for submission_id, approaches in credits.items(): + for approach, credit_list in approaches.items(): + if approach != "Tutor": + if approach not in differences_data: + differences_data[approach] = [] + differences_data[approach].append( sum(credit_list) - sum(approaches["Tutor"])) + return differences_data + +def getAbsoluteDifferences(differences): + abs_diff = {} + for approach, diff_list in differences.items(): + abs_diff[approach] = np.abs(diff_list) + return abs_diff + +def analyze_grading_instruction_usage(grading_instructions_used): + """ + Analyze grading instruction usage for each approach and plot matching vs. non-matching counts. + + Parameters: + - grading_instructions_used: dict, where keys are submission IDs, and values are dicts with approaches and lists of grading instruction IDs. + + Returns: + - A Plotly figure object with analytics on matching vs. non-matching grading instruction IDs. + - An HTML string explanation. + """ + approach_stats = {} + + for submission_id, approaches in grading_instructions_used.items(): + if "Tutor" not in approaches: + continue + + tutor_instructions = Counter(approaches["Tutor"]) + + for approach, instructions in approaches.items(): + if approach == "Tutor": + continue + + if approach not in approach_stats: + approach_stats[approach] = {"matches": 0, "non_matches": 0} + + approach_instructions = Counter(instructions) + + matches = 0 + non_matches = 0 + + for instruction, count in approach_instructions.items(): + if instruction in tutor_instructions: + matches += min(count, tutor_instructions[instruction]) + else: + non_matches += count + + approach_stats[approach]["matches"] += matches + approach_stats[approach]["non_matches"] += non_matches + + approaches = list(approach_stats.keys()) + matches = [approach_stats[approach]["matches"] for approach in approaches] + non_matches = [approach_stats[approach]["non_matches"] for approach in approaches] + + fig = go.Figure() + fig.add_trace(go.Bar( + x=approaches, y=matches, name="Matching Instructions", + marker_color="green" + )) + fig.add_trace(go.Bar( + x=approaches, y=non_matches, name="Non-Matching Instructions", + marker_color="red" + )) + + fig.update_layout( + barmode="group", + title="Matching vs. Non-Matching Grading Instructions by Approach", + xaxis_title="Approach", + yaxis_title="Count", + template="plotly_white", + ) + + html_explanation = """ +

Grading Instruction Usage Analysis

+

+ This visualization compares the grading instructions used by different approaches + against the "Tutor" approach. The green bars represent the count of grading instructions + that match those of the Tutor approach, while the red bars show the count of non-matching + instructions. This analysis highlights alignment and deviations between approaches. +

+ """ + + return fig, html_explanation + +def create_threshold_bar_plot(data,max_points): + thresholds = [0, 0.1, 0.15, 0.2, 0.25, 0.3] + data_dicts = [] + for threshold in thresholds: + data_dicts.append(percentage_within_range(data,max_points, threshold)) + fig = go.Figure() + + for approach in data_dicts[0].keys(): + fig.add_trace(go.Bar( + name=approach, + x=[f"{threshold*100}%" for threshold in thresholds], + y=[data[approach] for data in data_dicts], + text=[f"{v}%" for v in [data[approach] for data in data_dicts]], + textposition='auto' + )) + + fig.update_layout( + title="Percentage of Counts Within Thresholds by Approach", + xaxis_title="Thresholds", + yaxis_title="Percentage (%)", + barmode='group', + legend_title="Approaches", + template='plotly' + ) + html_explanation = """ +

Percentage of LLM results, that fall within a range of maximum points difference from the tutor feedback

+

+ For example, for threshold 10 per cent. If the max points are 5, it only includes those llm results that are within 0.5 points of the tutor feedback. A bar of 20 per cent + would translate to 20 per cent of the llm results being within 0.5 points of the tutor feedback. +

+ """ + return {"fig": fig, "html_explanation": html_explanation} + +def percentage_within_range(data,max_points , threshold): + """ This method shows the percentage of the data that falls within a certain range difference of the maximum credits from the tutor + Args: + data (_type_): the credits data + """ + approach_credits = {} + for submission_id, approaches in data.items(): + for approach, credits in approaches.items(): + if approach not in approach_credits: + approach_credits[approach] = [] + approach_credits[approach].append(sum(credits)) + + results = {} + tutor_credits = approach_credits["Tutor"] + for approach,credit_total in approach_credits.items(): + if approach != "Tutor": + if approach not in results: + results[approach] = 0 + for idx,credit in enumerate(credit_total): + within_range = calculate_within_cutoff(tutor_credits[idx], credit,max_points, threshold) + if within_range: + results[approach] += 1 + for approach, count in results.items(): + results[approach] = round(count/len(tutor_credits)*100,2) + return results +def calculate_within_cutoff(tutor_value, llm_value,max_points, threshold): + upper_credit_cutoff = tutor_value + max_points * threshold + lower_credit_cutoff = tutor_value - max_points * threshold + within_range = lower_credit_cutoff <= llm_value <= upper_credit_cutoff + return within_range diff --git a/modules/text/module_text_llm/module_text_llm/analytics/compile.py b/modules/text/module_text_llm/module_text_llm/analytics/compile.py new file mode 100644 index 000000000..37137ad91 --- /dev/null +++ b/modules/text/module_text_llm/module_text_llm/analytics/compile.py @@ -0,0 +1,134 @@ +from module_text_llm.analytics.pre_processing import pre_processing +from module_text_llm.analytics.analytics import create_threshold_bar_plot,total_credit_per_submission,failure_success,analyze_grading_instruction_usage, visualize_differences_histogram,normalized_absolute_difference,visualize_histogram_kde_percentages +import os +import traceback + + +def compile(results): + """This function will compile the analytics for the given results +It first preprocesses the data and then calls multiple functions to generate the analytics. +All these are put together in an HTML file which is then returned as a string. +Through plotly, the figures are embedded in the HTML file and are fully interactive. + """ + try: + credits_per_submission,grading_instructions_used,exercise_id,grading_criteria,max_points,experiment_id,failures,submission_ids,title,problem_statement = pre_processing(results) + directory = "module_text_llm/analytics/created_analytics" + ensure_directory_exists(directory) + output_file = f"{directory}/analytics_{experiment_id}.html" + + if file_exists(output_file): + return get_html_content(output_file) + + ############################# CREDIT BASED ANALYTICS ############################# + # Define them here, must return a dict of type {"fig":fig,"html_explanation":html_explanation} + creditPSub = total_credit_per_submission(credits_per_submission) + histo = visualize_differences_histogram(credits_per_submission,max_points) + kde_percent = visualize_histogram_kde_percentages(credits_per_submission,max_points) + nmda = normalized_absolute_difference(credits_per_submission,max_points) + fail = failure_success(credits_per_submission,failures,submission_ids) + threshold_bar_plot = create_threshold_bar_plot(credits_per_submission,max_points) + + with open(output_file, "w", encoding="utf-8") as f: + f.write(get_introduction()) + + f.write(""" +

Exercise Information

+
+ """) + f.write(get_exercise_details(title, problem_statement, grading_criteria, max_points)) + f.write(""" +

Credits Analaytics

+
+ """) + for i,dic in enumerate([fail,nmda,threshold_bar_plot,kde_percent,histo, creditPSub], start=1): # and use them here + f.write(f""" +
+

Plot {i}

+ """) + f.write(dic["html_explanation"]) + f.write(dic["fig"].to_html(full_html=False, include_plotlyjs="cdn")) + + ############################# CREDIT BASED ANALYTICS ############################# + ####################### Grading Instruction Based Analytics ####################### + f.write(""" +

Structured Grading Instruction IDs Analytics

+
+ """) + fig,html_expl = analyze_grading_instruction_usage(grading_instructions_used) + f.write(html_expl) + f.write(fig.to_html(full_html=False, include_plotlyjs="cdn")) + + f.write("") + ####################### Grading Instruction Based Analytics ####################### + + ###### Return the analytics as an html file ######## + with open(output_file, "r", encoding="utf-8") as file: + html_content = file.read() + except Exception as e: + html_content = getFallbackHtml(f"An error occurred while generating the analytics {str(e)} . Full Trace: {traceback.format_exc()}") + return html_content + +def file_exists(path) -> bool: + return os.path.exists(path) + +def get_html_content(path): + if file_exists(path): + with open(path, "r", encoding="utf-8") as file: + html_content = file.read() + return html_content + + return getFallbackHtml("File was not found") + +def getFallbackHtml(specifics: str): + return f""" + + + + + +

Warning: Analytics generation failed.

+

Please try again. Make sure the experiment has finished!

+

If the problem persists, please contact the administrator.

+

Error details: {specifics}

+ + + """ + +def get_introduction()->str: + return """ + + + + Athena Analytics + + +

Athena Interactive Analytics of Experiment

+

+ Welcome to the analytics report for the experiments. This report includes fully interactive visuals generated with Plotly. + You can explore the data by turning visuals on or off by clicking items in the legend. +

+""" + +def ensure_directory_exists(directory_path): + if not os.path.exists(directory_path): + os.makedirs(directory_path) + +def get_exercise_details(title, problem_statement, grading_criteria, max_points): + return f""" +
+

Exercise: { title }

+
+ +

Maximum Points

+

{ max_points } points

+
+""" \ No newline at end of file diff --git a/modules/text/module_text_llm/module_text_llm/analytics/pre_processing.py b/modules/text/module_text_llm/module_text_llm/analytics/pre_processing.py new file mode 100644 index 000000000..a75b07729 --- /dev/null +++ b/modules/text/module_text_llm/module_text_llm/analytics/pre_processing.py @@ -0,0 +1,93 @@ +""" +The data structure will be as follows: +credits: {submission_id: {approach: [credits]}, { submission_id: {approach: [credits]} ...} +grading_instructions_used: {submission_id: {approach: [grading_instruction_id]}} +Tutor has the reserved "Tutor" key in the approach field. +""" +def pre_processing(data): + tutor_feedback = data["data"]["tutor_feedbacks"] + exercise = data["data"]["exercise"] + results = data["data"]["results"] + + exercise_id,grading_criteria,max_points,title,problem_statement = process_exercise(exercise) + credits_per_submission,grading_instructions_used,submission_ids = process_tutor_feedback(tutor_feedback) + credits_per_submission,grading_instructions_used,submission_to_exclude,experiment_id,failures = process_results(results,credits_per_submission,grading_instructions_used,submission_ids) + filtered_credits_per_submission = { + key: value + for key, value in credits_per_submission.items() + if str(key) not in submission_to_exclude + } + filtered_grading_instructions_used ={ + key: value + for key, value in grading_instructions_used.items() + if int(key) not in submission_to_exclude + } + + # Remove submissions that did not have suggestions from all approaches, this would cause problems with analytics consistency but also failures + + return filtered_credits_per_submission,filtered_grading_instructions_used,exercise_id,grading_criteria,max_points,experiment_id,failures,submission_ids,title,problem_statement +def process_exercise(exercise): + exercise_id = exercise["id"] + title = "" + if "title" in exercise: + title = exercise["title"] + problem_statement = "" + if "problem_statement" in exercise: + problem_statement = exercise["problem_statement"] + grading_criteria = [] + if "grading_criteria" in exercise: + grading_criteria = exercise["grading_criteria"] + + max_points = exercise["max_points"] + return exercise_id,grading_criteria,max_points,title,problem_statement + +def process_results(results,credits_per_submission,grading_instructions_used,submission_ids): + failures = {} + submission_to_exclude = [] + for aggregated_results in results: + for key,result in aggregated_results.items(): + approach = result["name"] + if approach not in failures: + failures[approach] = 0 + all_suggestions = result["submissionsWithFeedbackSuggestions"] + experiment_id = result["experimentId"] + for submission_id in submission_ids: + submission_id = str(submission_id) + if submission_id not in all_suggestions: + submission_to_exclude.append(submission_id) + failures[approach] += 1 + continue + suggestions = all_suggestions[str(submission_id)] + feedbackSuggestions = suggestions["suggestions"] + for suggestion in feedbackSuggestions: + if (approach) not in credits_per_submission[submission_id]: + credits_per_submission[submission_id][approach] = [] + if (approach) not in grading_instructions_used[submission_id]: + grading_instructions_used[submission_id][approach] = [] + credits_per_submission[submission_id][approach].append(suggestion["credits"]) + grading_instructions_used[submission_id][approach].append(suggestion["structured_grading_instruction_id"]) + + return credits_per_submission,grading_instructions_used,set(submission_to_exclude),experiment_id,failures + +def process_tutor_feedback(tutor_feedbacks): + credits_per_submission = {} + grading_instructions_used = {} + submission_ids = [] + for tutor_feedback in tutor_feedbacks: + submission_ids.append(tutor_feedback["submission_id"]) + + if str(tutor_feedback["submission_id"]) not in credits_per_submission: + credits_per_submission[str(tutor_feedback["submission_id"])] = {} + if "Tutor" not in credits_per_submission[str(tutor_feedback["submission_id"])]: + credits_per_submission[str(tutor_feedback["submission_id"])]["Tutor"] = [] + credits_per_submission[str(tutor_feedback["submission_id"])]["Tutor"].append(tutor_feedback["credits"]) + # I keep the list of single credits, in case we want to compare more granularly in the future + + if str(tutor_feedback["submission_id"]) not in grading_instructions_used: + grading_instructions_used[str(tutor_feedback["submission_id"])] = {} + if "Tutor" not in grading_instructions_used[str(tutor_feedback["submission_id"])]: + grading_instructions_used[str(tutor_feedback["submission_id"])]["Tutor"] = [] + + if "structured_grading_instruction_id" in tutor_feedback: + grading_instructions_used[str(tutor_feedback["submission_id"])]["Tutor"].append(tutor_feedback["structured_grading_instruction_id"]) + return credits_per_submission,grading_instructions_used,set(submission_ids) \ No newline at end of file diff --git a/modules/text/module_text_llm/poetry.lock b/modules/text/module_text_llm/poetry.lock index 9f12b92d3..3f3ce33d3 100644 --- a/modules/text/module_text_llm/poetry.lock +++ b/modules/text/module_text_llm/poetry.lock @@ -1385,6 +1385,88 @@ files = [ {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] +[[package]] +name = "pandas" +version = "2.2.3" +description = "Powerful data structures for data analysis, time series, and statistics" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, +] + +[package.dependencies] +numpy = {version = ">=1.23.2", markers = "python_version == \"3.11\""} +python-dateutil = ">=2.8.2" +pytz = ">=2020.1" +tzdata = ">=2022.7" + +[package.extras] +all = ["PyQt5 (>=5.15.9)", "SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)", "beautifulsoup4 (>=4.11.2)", "bottleneck (>=1.3.6)", "dataframe-api-compat (>=0.1.7)", "fastparquet (>=2022.12.0)", "fsspec (>=2022.11.0)", "gcsfs (>=2022.11.0)", "html5lib (>=1.1)", "hypothesis (>=6.46.1)", "jinja2 (>=3.1.2)", "lxml (>=4.9.2)", "matplotlib (>=3.6.3)", "numba (>=0.56.4)", "numexpr (>=2.8.4)", "odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "pandas-gbq (>=0.19.0)", "psycopg2 (>=2.9.6)", "pyarrow (>=10.0.1)", "pymysql (>=1.0.2)", "pyreadstat (>=1.2.0)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "qtpy (>=2.3.0)", "s3fs (>=2022.11.0)", "scipy (>=1.10.0)", "tables (>=3.8.0)", "tabulate (>=0.9.0)", "xarray (>=2022.12.0)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)", "zstandard (>=0.19.0)"] +aws = ["s3fs (>=2022.11.0)"] +clipboard = ["PyQt5 (>=5.15.9)", "qtpy (>=2.3.0)"] +compression = ["zstandard (>=0.19.0)"] +computation = ["scipy (>=1.10.0)", "xarray (>=2022.12.0)"] +consortium-standard = ["dataframe-api-compat (>=0.1.7)"] +excel = ["odfpy (>=1.4.1)", "openpyxl (>=3.1.0)", "python-calamine (>=0.1.7)", "pyxlsb (>=1.0.10)", "xlrd (>=2.0.1)", "xlsxwriter (>=3.0.5)"] +feather = ["pyarrow (>=10.0.1)"] +fss = ["fsspec (>=2022.11.0)"] +gcp = ["gcsfs (>=2022.11.0)", "pandas-gbq (>=0.19.0)"] +hdf5 = ["tables (>=3.8.0)"] +html = ["beautifulsoup4 (>=4.11.2)", "html5lib (>=1.1)", "lxml (>=4.9.2)"] +mysql = ["SQLAlchemy (>=2.0.0)", "pymysql (>=1.0.2)"] +output-formatting = ["jinja2 (>=3.1.2)", "tabulate (>=0.9.0)"] +parquet = ["pyarrow (>=10.0.1)"] +performance = ["bottleneck (>=1.3.6)", "numba (>=0.56.4)", "numexpr (>=2.8.4)"] +plot = ["matplotlib (>=3.6.3)"] +postgresql = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "psycopg2 (>=2.9.6)"] +pyarrow = ["pyarrow (>=10.0.1)"] +spss = ["pyreadstat (>=1.2.0)"] +sql-other = ["SQLAlchemy (>=2.0.0)", "adbc-driver-postgresql (>=0.8.0)", "adbc-driver-sqlite (>=0.8.0)"] +test = ["hypothesis (>=6.46.1)", "pytest (>=7.3.2)", "pytest-xdist (>=2.2.0)"] +xml = ["lxml (>=4.9.2)"] + [[package]] name = "pep8-naming" version = "0.10.0" @@ -1415,6 +1497,21 @@ docs = ["furo (>=2024.8.6)", "proselint (>=0.14)", "sphinx (>=8.0.2)", "sphinx-a test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=8.3.2)", "pytest-cov (>=5)", "pytest-mock (>=3.14)"] type = ["mypy (>=1.11.2)"] +[[package]] +name = "plotly" +version = "5.24.1" +description = "An open-source, interactive data visualization library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "plotly-5.24.1-py3-none-any.whl", hash = "sha256:f67073a1e637eb0dc3e46324d9d51e2fe76e9727c892dde64ddf1e1b51f29089"}, + {file = "plotly-5.24.1.tar.gz", hash = "sha256:dbc8ac8339d248a4bcc36e08a5659bacfe1b079390b8953533f4eb22169b4bae"}, +] + +[package.dependencies] +packaging = "*" +tenacity = ">=6.2.0" + [[package]] name = "propcache" version = "0.2.1" @@ -1744,6 +1841,20 @@ files = [ [package.dependencies] pylint = ">=1.7" +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +description = "Extensions to the standard Python datetime module" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[package.dependencies] +six = ">=1.5" + [[package]] name = "python-dotenv" version = "1.0.0" @@ -1758,6 +1869,17 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "pytz" +version = "2024.2" +description = "World timezone definitions, modern and historical" +optional = false +python-versions = "*" +files = [ + {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"}, + {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, +] + [[package]] name = "pyyaml" version = "6.0.2" @@ -1999,6 +2121,17 @@ files = [ [package.extras] yaml = ["pyyaml"] +[[package]] +name = "six" +version = "1.17.0" +description = "Python 2 and 3 compatibility utilities" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + [[package]] name = "smmap" version = "5.0.1" @@ -2295,6 +2428,17 @@ files = [ mypy-extensions = ">=0.3.0" typing-extensions = ">=3.7.4" +[[package]] +name = "tzdata" +version = "2025.1" +description = "Provider of IANA time zone data" +optional = false +python-versions = ">=2" +files = [ + {file = "tzdata-2025.1-py2.py3-none-any.whl", hash = "sha256:7e127113816800496f027041c570f50bcd464a020098a3b6b199517772303639"}, + {file = "tzdata-2025.1.tar.gz", hash = "sha256:24894909e88cdb28bd1636c6887801df64cb485bd593f2fd83ef29075a81d694"}, +] + [[package]] name = "urllib3" version = "2.3.0" @@ -2429,4 +2573,4 @@ propcache = ">=0.2.0" [metadata] lock-version = "2.0" python-versions = "3.11.*" -content-hash = "a2ec0f8a34c58ac18bbbf07c9fd4bd855c80b9fadd8ba3b4048e8e51027e1ef6" +content-hash = "bb98064d5ff35679db0c6443ff63b4c8dbb088fc35bed39147024b37237fb2ee" diff --git a/modules/text/module_text_llm/pyproject.toml b/modules/text/module_text_llm/pyproject.toml index bf03e0282..5ef7b625c 100644 --- a/modules/text/module_text_llm/pyproject.toml +++ b/modules/text/module_text_llm/pyproject.toml @@ -15,6 +15,8 @@ gitpython = "3.1.41" nltk = "3.9.1" python-dotenv = "1.0.0" tiktoken = "0.7.0" +pandas = "2.2.3" +plotly = "5.24.1" [tool.poetry.dev-dependencies] pydantic = "1.10.17" diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx index 560d8b204..927ad970f 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/batch_module_experiment.tsx @@ -13,6 +13,7 @@ import { FullScreenHandle } from "react-full-screen"; import useHealth from "@/hooks/health"; import useBatchModuleExperiment from "@/hooks/batch_module_experiment"; +import { useSendResults } from "@/hooks/athena/send_results"; import { ModuleProvider } from "@/hooks/module_context"; import { ExperimentIdentifiersProvider } from "@/hooks/experiment_identifiers_context"; import { ModuleConfiguration } from "../configure_modules"; @@ -39,6 +40,8 @@ type ConductBatchModuleExperimentProps = { export type ConductBatchModuleExperimentHandles = { importData: ReturnType["importData"]; exportData: ReturnType["exportData"]; + analyseData: ReturnType["analyseData"] + getResults: ReturnType["getResults"]; }; // ForwardRef is needed to expose the ref to the parent component @@ -66,7 +69,8 @@ const ConductBatchModuleExperiment = React.forwardRef< const [showProgress, setShowProgress] = useState(true); const [isConfigModalOpen, setConfigModalOpen] = useState(false); - + // Use the `useSendResults` hook + const { mutate: sendResultsMutate } = useSendResults(); function handleOpenModal() { document.body.style.overflow = "hidden"; // Prevent scrolling setConfigModalOpen(true); @@ -80,10 +84,11 @@ const ConductBatchModuleExperiment = React.forwardRef< if (didStartExperiment) { moduleExperiment.startExperiment(); } - useImperativeHandle(ref, () => ({ importData: moduleExperiment.importData, exportData: moduleExperiment.exportData, + analyseData: moduleExperiment.analyseData, + getResults: moduleExperiment.getResults, })); useEffect(() => { diff --git a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx index 3c78e1ecb..682fe213a 100644 --- a/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx +++ b/playground/src/components/view_mode/evaluation_mode/conduct_experiment/index.tsx @@ -36,8 +36,8 @@ export default function ConductExperiment({ }, []); const [didStartExperiment, setDidStartExperiment] = useState(false); + const [isCompilingAnalyticsReport, setIsCompilingAnalyticsReport] = useState(false); const [modulesStep, setModulesStep] = useState([]); - const [viewSubmissionIndex, setViewSubmissionIndex] = useState(0); const [moduleRenderOrder, setModuleRenderOrder] = useState( moduleConfigurations.map((_, index) => index) @@ -47,6 +47,35 @@ export default function ConductExperiment({ [] ); + const handleAnalysis = async () => { + setIsCompilingAnalyticsReport(true); + + try { + let aggregatedData: any[] = []; + let lastRef; + let index = 0 + for (const moduleViewRef of moduleViewRefs.current) { + const data = moduleViewRef?.getResults(); + if(data){ + (data["results"] as any)["name"] = moduleConfigurations[index].name; + } + console.log(data); + aggregatedData.push(data); + lastRef = moduleViewRef; + index++; + } + + if (lastRef) { + const report = await lastRef.analyseData(aggregatedData); + console.log("Analysis completed successfully:", report); + } + } catch (error) { + console.error("Error during analysis:", error); + } finally { + setIsCompilingAnalyticsReport(false); + } + }; + const handleExport = () => { downloadJSONFiles( moduleViewRefs.current.flatMap((moduleViewRef, index) => { @@ -186,7 +215,7 @@ export default function ConductExperiment({ onClick={handleExport} > Export - + + +
{/* Submission switcher */}
diff --git a/playground/src/hooks/athena/send_results.ts b/playground/src/hooks/athena/send_results.ts new file mode 100644 index 000000000..f3ca90945 --- /dev/null +++ b/playground/src/hooks/athena/send_results.ts @@ -0,0 +1,68 @@ +import { AthenaError, useAthenaFetcher } from "@/hooks/athena_fetcher"; +import type ModuleResponse from "@/model/module_response"; +import { UseMutationOptions, useMutation } from "react-query"; +import { useModule } from "@/hooks/module_context"; +import useHealth from "@/hooks/health"; +import { Exercise } from "@/model/exercise"; +import { Feedback } from "@/model/feedback"; + +/** + * Hook to send results data to the backend. + * + * @param options React Query mutation options. + * + * @example + * const { mutate, isLoading } = useSendResults(); + * mutate(statisticsData); + */ +export function useSendResults( + options: Omit< + UseMutationOptions< + ModuleResponse[] | undefined, + AthenaError, + { exercise:Exercise;tutor_feedbacks:Feedback[]; results: any } + >, + "mutationFn" + > = {}, + onlyUseContextModule = false +) { + const athenaFetcher = useAthenaFetcher(); + const { module: contextModule } = useModule(); + const { data: health } = useHealth(); + + return useMutation({ + mutationFn: async ( data ) => { + // Get the list of modules to evaluate + const modules = onlyUseContextModule + ? [contextModule] + : Object.values(health?.modules ?? {}).filter( + (module) => + module.healthy && + module.type === contextModule.type && + module.supportsEvaluation + ); + + // Map over each module and send the data + const results = await Promise.allSettled( + modules.map((module) => + athenaFetcher( + "/generate_statistics", // The route to call + { data }, // The payload + { module, moduleConfig: undefined } + ) + ) + ); + + // Filter and process the results + return results.flatMap((result) => { + if (result.status === "fulfilled") { + return [result.value]; + } else { + console.error("Error fetching statistics:", result.reason); + return []; + } + }); + }, + ...options, + }); +} diff --git a/playground/src/hooks/athena_fetcher.ts b/playground/src/hooks/athena_fetcher.ts index 5fa934c3e..827d2e095 100644 --- a/playground/src/hooks/athena_fetcher.ts +++ b/playground/src/hooks/athena_fetcher.ts @@ -42,7 +42,7 @@ export function useAthenaFetcher() { const { experimentId, moduleConfigurationId, runId } = useExperimentIdentifiers(); return ( - async (moduleRoute: string, body?: any, overrideModule?: Module) => { + async (moduleRoute: string, body?: any, overrideModule?: Module, content_type="application/json") => { let targetModule = contextModule; let targetModuleConfig = contextModuleConfig; if (overrideModule) { @@ -72,7 +72,7 @@ export function useAthenaFetcher() { { method: body ? "POST" : "GET", headers: { - "Content-Type": "application/json", + "Content-Type": content_type, "Authorization": athenaSecret, "X-Server-URL": lmsUrl, ...headers, diff --git a/playground/src/hooks/batch_module_experiment.ts b/playground/src/hooks/batch_module_experiment.ts index 0c6fef5e2..7552b8740 100644 --- a/playground/src/hooks/batch_module_experiment.ts +++ b/playground/src/hooks/batch_module_experiment.ts @@ -7,6 +7,7 @@ import type { ModuleConfiguration } from "@/components/view_mode/evaluation_mode import { v4 as uuidv4 } from "uuid"; import { useEffect, useRef, useState } from "react"; import { useSendFeedbacks } from "./athena/send_feedbacks"; +import { useSendResults } from "./athena/send_results"; import useRequestSubmissionSelection from "./athena/request_submission_selection"; import useRequestFeedbackSuggestions from "./athena/request_feedback_suggestions"; import useSendSubmissions from "./athena/send_submissions"; @@ -56,6 +57,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC const [submissionsWithAutomaticEvaluation, setSubmissionsWithAutomaticEvaluation] = useState< Map | undefined >(undefined); + const { mutate: sendResultsMutate } = useSendResults(); const [processingStep, setProcessingStep] = useState< ExperimentStep | undefined @@ -75,6 +77,73 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC })); }; + + const analyseData = async (results: any) => { + const exercise = experiment.exercise + const submissions = experiment.evaluationSubmissions + const submissionIds = new Set(submissions.map(submission => submission.id)); + const tutor_feedbacks :any[]= []; + for (const feedback of experiment.tutorFeedbacks) { + if (submissionIds.has(feedback.submission_id)) { + tutor_feedbacks.push(feedback); + } + } + + return new Promise((resolve, reject) => { + sendResultsMutate( + { exercise, tutor_feedbacks, results }, + { + onSuccess: (response) => { + // const newWindow = window.open("", "_blank", "width=900,height=900"); + const htmlContent = response[0].data; + const blob = new Blob([htmlContent], { type: 'text/html' }); + const link = document.createElement('a'); + link.href = URL.createObjectURL(blob); + link.download = 'analysis.html'; + document.body.appendChild(link); + link.click(); + document.body.removeChild(link); + const width = 800; + const height = 600; + const left = (window.innerWidth - width) / 2; + const top = (window.innerHeight - height) / 2; + + const newWindow = window.open('', '', `width=${width},height=${height},left=${left},top=${top}`); + newWindow!.document.open(); + newWindow!.document.write(htmlContent); + newWindow!.document.close(); + + console.log("Data analysis sent successfully!"); + resolve(results); // Resolve the promise with results + }, + onError: (error) => { + console.error("Error sending data analysis to the backend:", error); + reject(error); // Reject the promise with the error + }, + } + ); + }); + }; + + const getResults = () => { + return { + results: { + type: "results", + runId: data.runId, + experimentId: experiment.id, + moduleConfigurationId: moduleConfiguration.id, + step: data.step, + didSendSubmissions: data.didSendSubmissions, + sentTrainingSubmissions: data.sentTrainingSubmissions, + submissionsWithFeedbackSuggestions: Object.fromEntries( + Array.from(data.submissionsWithFeedbackSuggestions.entries()).map(([key, value]) => [ + key, + { suggestions: value.suggestions }, // Exclude `meta` here + ]) + ), + }, + };} + const exportData = () => { return { results: { @@ -202,7 +271,7 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC const requestSubmissionSelection = useRequestSubmissionSelection({ retry: 3 }); const requestFeedbackSuggestions = useRequestFeedbackSuggestions({ retry: 3 }); const requestEvaluation = useRequestEvaluation({ retry: 3 }); - + const sendResult = useSendResults({ retry: 3 }); // 1. Send submissions to Athena const stepSendSubmissions = () => { setProcessingStep("sendingSubmissions"); @@ -490,6 +559,8 @@ export default function useBatchModuleExperiment(experiment: Experiment, moduleC continueWithAutomaticEvaluation, exportData, importData, + analyseData, + getResults, moduleRequests: { sendSubmissions, sendFeedbacks,