Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add interactive analytics for experiments in the playground #402

Open
wants to merge 23 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions athena/athena/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from .schemas import ExerciseType, GradingCriterion, StructuredGradingInstruction, StructuredGradingCriterion
from .metadata import emit_meta, get_meta
from .experiment import get_experiment_environment
from .endpoints import submission_selector, submissions_consumer, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore
from .endpoints import submission_selector, submissions_consumer, generate_statistics, feedback_consumer, feedback_provider, config_schema_provider, evaluation_provider # type: ignore

@app.get("/")
def module_health():
Expand Down Expand Up @@ -37,5 +37,6 @@ def run_module():
"ExerciseType",
"GradingCriterion",
"StructuredGradingInstruction",
"StructuredGradingCriterion"
"StructuredGradingCriterion",
"generate_statistics"
]
14 changes: 12 additions & 2 deletions athena/athena/endpoints.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
# type: ignore # too much weird behavior of mypy with decorators
import inspect
from fastapi import Depends, BackgroundTasks, Body
from fastapi import Depends, BackgroundTasks, Body, Request
from pydantic import BaseModel, ValidationError
from typing import TypeVar, Callable, List, Union, Any, Coroutine, Type

from fastapi.responses import HTMLResponse
from athena.app import app
from athena.authenticate import authenticated
from athena.metadata import with_meta
Expand Down Expand Up @@ -196,6 +197,16 @@ async def wrapper(request: SubmissionSelectorRequest):

return wrapper

def generate_statistics(func):
@app.post("/generate_statistics", response_class=HTMLResponse)
async def wrapper(request: Request):
try:
results = await request.json()
return await func(results)
except Exception as e:
return {"error": str(e)}

return wrapper

def feedback_consumer(func: Union[
Callable[[E, S, List[F]], None],
Expand Down Expand Up @@ -234,7 +245,6 @@ def feedback_consumer(func: Union[
submission_type = inspect.signature(func).parameters["submission"].annotation
feedback_type = inspect.signature(func).parameters["feedbacks"].annotation.__args__[0]
module_config_type = inspect.signature(func).parameters["module_config"].annotation if "module_config" in inspect.signature(func).parameters else None

@app.post("/feedbacks", responses=module_responses)
@authenticated
@with_meta
Expand Down
2 changes: 1 addition & 1 deletion llm_core/llm_core/models/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ class OpenAIModelConfig(ModelConfig):
We generally recommend altering this or `top_p` but not both.\
""")

top_p: float = Field(default=1, ge=0, le=1, description="""\
top_p: float = Field(default=0, ge=0, le=1, description="""\
An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. \
So 0.1 means only the tokens comprising the top 10% probability mass are considered.

Expand Down
9 changes: 7 additions & 2 deletions modules/text/module_text_llm/module_text_llm/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

import nltk
import tiktoken
from athena import app, submission_selector, submissions_consumer, feedback_consumer, feedback_provider, evaluation_provider
from athena import app, submission_selector, submissions_consumer, generate_statistics,feedback_consumer, feedback_provider, evaluation_provider
from athena.text import Exercise, Submission, Feedback
from athena.logger import logger

from module_text_llm.config import Configuration
from module_text_llm.evaluation import get_feedback_statistics, get_llm_statistics
from module_text_llm.generate_evaluation import generate_evaluation
from module_text_llm.approach_controller import generate_suggestions
from module_text_llm.analytics.compile import compile

@submissions_consumer
def receive_submissions(exercise: Exercise, submissions: List[Submission]):
Expand All @@ -27,6 +27,11 @@ def select_submission(exercise: Exercise, submissions: List[Submission]) -> Subm
def process_incoming_feedback(exercise: Exercise, submission: Submission, feedbacks: List[Feedback]):
logger.info("process_feedback: Received %d feedbacks for submission %d of exercise %d.", len(feedbacks), submission.id, exercise.id)

@generate_statistics
async def compile_analytics(results: dict):
logger.info("generate_statistics: Generating statistics")
return compile(results)

@feedback_provider
async def suggest_feedback(exercise: Exercise, submission: Submission, is_graded: bool, module_config: Configuration) -> List[Feedback]:
logger.info("suggest_feedback: %s suggestions for submission %d of exercise %d were requested, with approach: %s",
Expand Down
Loading
Loading