-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark.py
185 lines (148 loc) · 6.39 KB
/
benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
import argparse
import contextlib
import json
import os
import subprocess
import sys
import time
from typing import List
import jsonpickle
from tqdm import tqdm
from pipeline.failure_extractor import FailureExtractor
from pipeline.patch_applicator import PatchApplicator
from pipeline.patch_generator_service import PatchGenerator, PipelineRunningMode
from pipeline.project_repairer import ProjectRepairer
from pipeline.types.llm import LLMType
from pipeline.types.project import Project
from dotenv import load_dotenv
from pipeline.types.project_repair_status import ProjectRepairStatus
class DummyFile(object):
file = None
def __init__(self, file):
self.file = file
def write(self, x):
# Avoid print() second call (useless \n)
if len(x.rstrip()) > 0:
tqdm.write(x, file=self.file)
@contextlib.contextmanager
def nostdout():
save_stdout = sys.stdout
sys.stdout = DummyFile(sys.stdout)
yield
sys.stdout = save_stdout
class BenchmarkReport:
def __init__(self, path: str):
self.path = path
self.results = {}
@staticmethod
def load(from_path: str):
report = BenchmarkReport(path=from_path)
if os.path.exists(from_path):
with open(from_path, "r") as f:
report.results = jsonpickle.decode(f.read())
f.close()
return report
def to_json(self):
return json.dumps(self, default=lambda o: o.__dict__,
sort_keys=True, indent=4)
def save(self):
to_path = self.path
os.makedirs(os.path.dirname(to_path), exist_ok=True)
with open(to_path, "w") as f:
f.write(jsonpickle.encode(self.results, indent=4))
f.close()
def add_result(self, key: str, result: ProjectRepairStatus):
self.results[key] = result
self.save()
# subprocess.call("git add . && git commit -m \"[Benchmark] update results\" && git push", shell=True)
def main(model: LLMType, pipeline: PipelineRunningMode, name: str, project_id: str = None):
project_ids = project_id.split(",") if project_id else None
benchmarks = {
"bump": get_bump(project_ids)
}
for key in benchmarks.keys():
run_benchmark(
key=key,
name=name,
projects=benchmarks[key],
pipeline=pipeline,
model=model
)
def update_metadata(project: Project, report: BenchmarkReport):
current_result = report.results.get(project.project_id)
if current_result.initial_errors_count <= 0:
print(f"updating 'initial_errors_count' for project {project.project_id}")
extractor = FailureExtractor(project)
current_result.initial_errors_count = len(extractor.get_failures())
report.results[project.project_id] = current_result
report.save()
def run_benchmark(key: str, name: str, projects: List[Project], pipeline: PipelineRunningMode, model: LLMType):
path = f"results/benchmark/{name}/{key}/{pipeline}/{model}"
report = BenchmarkReport.load(from_path=f"{path}/report.json")
run_id = os.getenv("RUN_ID")
without_apidiff = os.getenv("WITHOUT_APIDIFF") == "True"
progress = tqdm(projects,
desc=f"<{run_id} | {without_apidiff}> Running projects for [{name}/{key}/{pipeline}/{model}]...",
file=sys.stdout,
miniters=1)
for project in progress:
with nostdout():
try:
if report.results.get(project.project_id) is None:
status = run_project(
project=project,
pipeline=pipeline,
model=model
)
move_patches(project=project, path=path)
report.add_result(key=project.project_id, result=status)
else:
icon = "✅" if report.results.get(project.project_id).repaired is True else "❌"
print(f"{icon} {project.project_name} ({project.project_id})")
update_metadata(project=project, report=report)
time.sleep(0.2)
except KeyboardInterrupt:
sys.exit(1)
except Exception as error:
print(f"Skipping {project.project_id} because is failing to run:")
print(error)
def move_patches(project: Project, path: str):
os.makedirs(os.path.dirname(f"{path}/{project.project_id}/patches/"), exist_ok=True)
subprocess.call(f"mv {project.path}/patches/* {path}/{project.project_id}/patches", shell=True)
def run_project(project: Project, pipeline: PipelineRunningMode, model: LLMType) -> ProjectRepairStatus:
print(f"\n\n###### RUNNING PROJECT {project.project_name} ######")
subprocess.call(f"rm -rf {project.path}/patches", shell=True)
subprocess.call(f"rm -rf {project.path}/patched_code", shell=True)
repairer = ProjectRepairer(project=project, pipeline=pipeline, model=model)
return repairer.repair()
def get_bump(ids: [str] = None) -> List[Project]:
result = []
bump_folder = os.getenv("BUMP_PATH")
for filename in os.scandir(f"{bump_folder}/filtered_data"):
if filename.is_file():
key = filename.name.replace(".json", "")
if key in ids or ids is None:
project = Project.from_bump(bump_folder, key)
subprocess.run([
'bash',
'benchmarks/bump/scripts/clone_client_code.sh',
project.project_id,
project.path
])
result.append(project)
return sorted(result, key=lambda x: x.project_id)
if __name__ == "__main__":
load_dotenv(override=True)
parser = argparse.ArgumentParser()
parser.add_argument("-n", "--name", help="Name of the benchmark execution", type=str, required=True)
parser.add_argument("-m", "--model", help="LLM Model", type=LLMType, choices=list(LLMType), required=True)
parser.add_argument("-bu", help="Breaking Update IDs, comma separated list", required=False)
parser.add_argument("-p", "--pipeline", help="Pipeline [STANDARD, BASELINE, ADVANCED]", type=PipelineRunningMode,
choices=list(PipelineRunningMode), required=True)
options = parser.parse_args()
main(
name=options.name,
pipeline=options.pipeline,
model=options.model,
project_id=options.bu
)