|
| 1 | +""" |
| 2 | +An example is a single input/output pair. |
| 3 | + - Examples are used for fine-tuning a model (i.e. golden example) or running an eval (i.e. expected example). |
| 4 | + - There are two types of examples: |
| 5 | + - **Golden Example**: A golden example is an example that is used to create a golden dataset. |
| 6 | + - **Expected Example**: An expected example is an example that is used to evaluate a producer. |
| 7 | + Internally, once an expected example has been run through an eval, we create an **evaluated example**, but you don't need to create this manually in the UI. |
| 8 | +""" |
| 9 | + |
| 10 | +import os |
| 11 | +import shutil |
| 12 | +from typing import Generic, Literal, TypeVar |
| 13 | + |
| 14 | +from pydantic import BaseModel |
| 15 | + |
| 16 | + |
| 17 | +class ExampleInput(BaseModel): |
| 18 | + prompt: str |
| 19 | + input_code: str | None = None |
| 20 | + line_number_target: int | None = None |
| 21 | + |
| 22 | + |
| 23 | +class BaseExample(BaseModel): |
| 24 | + id: str |
| 25 | + input: ExampleInput |
| 26 | + |
| 27 | + |
| 28 | +class ExampleOutput(BaseModel): |
| 29 | + output_code: str | None = None |
| 30 | + raw_output: str | None = None |
| 31 | + output_type: Literal["full", "diff"] = "diff" |
| 32 | + |
| 33 | + |
| 34 | +class ExpectedExample(BaseExample): |
| 35 | + expect_executable: bool = True |
| 36 | + expect_type_checkable: bool = True |
| 37 | + |
| 38 | + |
| 39 | +class ExpectResult(BaseModel): |
| 40 | + name: Literal["executable", "type_checkable", "patchable"] |
| 41 | + score: int # 0 or 1 |
| 42 | + message: str | None = None |
| 43 | + |
| 44 | + |
| 45 | +class EvaluatedExampleOutput(BaseModel): |
| 46 | + time_spent_secs: float |
| 47 | + tokens: int |
| 48 | + output: ExampleOutput |
| 49 | + expect_results: list[ExpectResult] |
| 50 | + |
| 51 | + |
| 52 | +class EvaluatedExample(BaseModel): |
| 53 | + expected: ExpectedExample |
| 54 | + outputs: list[EvaluatedExampleOutput] |
| 55 | + |
| 56 | + |
| 57 | +class GoldenExample(BaseExample): |
| 58 | + output: ExampleOutput |
| 59 | + |
| 60 | + |
| 61 | +T = TypeVar("T", bound=BaseExample) |
| 62 | + |
| 63 | + |
| 64 | +class ExampleStore(Generic[T]): |
| 65 | + def __init__(self, entity_type: type[T], *, dirname: str): |
| 66 | + self.entity_type = entity_type |
| 67 | + self.directory_path = os.path.join( |
| 68 | + os.path.dirname(__file__), "..", "..", "..", "data", dirname |
| 69 | + ) |
| 70 | + |
| 71 | + def get(self, id: str) -> T: |
| 72 | + dir_path = os.path.join(self.directory_path, id) |
| 73 | + json_path = os.path.join(dir_path, "example_input.json") |
| 74 | + with open(json_path) as f: |
| 75 | + entity_json = f.read() |
| 76 | + entity = self.entity_type.model_validate_json(entity_json) |
| 77 | + input = entity.input |
| 78 | + input_py_path = os.path.join(dir_path, "input.py") |
| 79 | + if os.path.exists(input_py_path): |
| 80 | + with open(input_py_path) as f: |
| 81 | + input.input_code = f.read() |
| 82 | + if isinstance(entity, GoldenExample): |
| 83 | + output_py_path = os.path.join(dir_path, "output.py") |
| 84 | + if os.path.exists(output_py_path): |
| 85 | + with open(output_py_path) as f: |
| 86 | + entity.output.output_code = f.read() |
| 87 | + raw_output_path = os.path.join(dir_path, "raw_output.txt") |
| 88 | + if os.path.exists(raw_output_path): |
| 89 | + with open(raw_output_path) as f: |
| 90 | + entity.output.raw_output = f.read() |
| 91 | + return entity |
| 92 | + |
| 93 | + def get_all(self) -> list[T]: |
| 94 | + entities: list[T] = [] |
| 95 | + for filename in os.listdir(self.directory_path): |
| 96 | + entities.append(self.get(filename)) |
| 97 | + return entities |
| 98 | + |
| 99 | + def save(self, entity: T, overwrite: bool = False): |
| 100 | + id = entity.id |
| 101 | + dir_path = os.path.join(self.directory_path, id) |
| 102 | + |
| 103 | + if not overwrite: |
| 104 | + if os.path.exists(dir_path): |
| 105 | + raise ValueError( |
| 106 | + f"{self.entity_type.__name__} with id {id} already exists" |
| 107 | + ) |
| 108 | + else: |
| 109 | + os.mkdir(dir_path) |
| 110 | + json_path = os.path.join(dir_path, "example_input.json") |
| 111 | + input_code = entity.input.input_code |
| 112 | + if input_code: |
| 113 | + input_py_path = os.path.join(dir_path, "input.py") |
| 114 | + with open(input_py_path, "w") as f: |
| 115 | + f.write(input_code) |
| 116 | + entity.input.input_code = None |
| 117 | + |
| 118 | + if isinstance(entity, GoldenExample): |
| 119 | + output_py_path = os.path.join(dir_path, "output.py") |
| 120 | + with open(output_py_path, "w") as f: |
| 121 | + f.write(entity.output.output_code) |
| 122 | + raw_output_path = os.path.join(dir_path, "raw_output.txt") |
| 123 | + with open(raw_output_path, "w") as f: |
| 124 | + f.write(entity.output.raw_output) |
| 125 | + entity.output.output_code = None |
| 126 | + entity.output.raw_output = None |
| 127 | + with open(json_path, "w") as f: |
| 128 | + f.write(entity.model_dump_json(indent=4)) |
| 129 | + |
| 130 | + def delete(self, entity_id: str): |
| 131 | + shutil.rmtree(os.path.join(self.directory_path, entity_id)) |
| 132 | + |
| 133 | + |
| 134 | +expected_example_store = ExampleStore( |
| 135 | + ExpectedExample, dirname="expected_examples" |
| 136 | +) |
| 137 | +golden_example_store = ExampleStore(GoldenExample, dirname="golden_examples") |
0 commit comments