Skip to content

Commit 8f08aaa

Browse files
Fixed results scripts
1 parent 336135d commit 8f08aaa

File tree

4 files changed

+40
-18
lines changed

4 files changed

+40
-18
lines changed

.vscode/launch_example.json

+7-5
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,15 @@
55
"version": "0.2.0",
66
"configurations": [
77
{
8-
"name": "Python: Current File",
8+
"name": "Ollama Runner",
99
"type": "python",
1010
"request": "launch",
11-
"program": "./TypeEvalPy_LLM/src/target_tools/ollama/src/runner.py",
11+
"program": "./src/target_tools/ollama/src/runner.py",
1212
"console": "integratedTerminal",
1313
"justMyCode": true,
1414
"args": [
1515
"--bechmark_path",
16-
"./TypeEvalPy_LLM/.scrapy/test",
16+
"./micro-benchmark",
1717
"--ollama_models",
1818
"codellama:34b-instruct",
1919
"codellama:34b-python",
@@ -22,8 +22,10 @@
2222
"--ollama_url",
2323
"",
2424
"--prompt_id",
25-
"json_based_1"
25+
"questions_based_2",
26+
"--results_dir",
27+
""
2628
]
2729
}
2830
]
29-
}
31+
}

src/main_analyze_results.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import argparse
12
import json
23
import logging
34
import os
@@ -896,10 +897,8 @@ def generate_top_n_performance(test_suite_dir, tool_name=None):
896897
return results_cat
897898

898899

899-
def run_results_analyzer():
900-
results_dir = None
901-
# results_dir = Path("../results/results_<>")
902-
if results_dir is None:
900+
def run_results_analyzer(args):
901+
if args.results_dir is None:
903902
dir_path = Path(SCRIPT_DIR) / "../results"
904903
directories = [
905904
f
@@ -909,11 +908,13 @@ def run_results_analyzer():
909908
directories.sort(key=lambda x: x.stat().st_mtime, reverse=True)
910909
# Get the latest directory
911910
results_dir = directories[0] if directories else None
911+
else:
912+
results_dir = Path(args.results_dir)
912913

913914
tools_results = {}
914915

915916
for item in results_dir.glob("*"):
916-
if item.is_file():
917+
if item.is_file() or item.name == "analysis_results":
917918
# ignore
918919
pass
919920
elif item.is_dir():
@@ -965,7 +966,7 @@ def run_results_analyzer():
965966
tools_list = utils.ML_TOOLS + utils.STANDARD_TOOLS
966967

967968
if len(tools_results) > 1:
968-
analysis_tables.create_comparison_table(tools_results, tools_list)
969+
analysis_tables.create_comparison_table(tools_results)
969970

970971
os.makedirs(results_dir / "analysis_results", exist_ok=True)
971972
results_dir = results_dir / "analysis_results"
@@ -1045,4 +1046,12 @@ def run_results_analyzer():
10451046

10461047

10471048
if __name__ == "__main__":
1048-
run_results_analyzer()
1049+
parser = argparse.ArgumentParser()
1050+
parser.add_argument(
1051+
"--results_dir",
1052+
help="Specify the results path",
1053+
default=None,
1054+
)
1055+
args = parser.parse_args()
1056+
1057+
run_results_analyzer(args)

src/result_analyzer/analysis_tables.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -409,13 +409,13 @@ def exact_match_category_table(stats):
409409
writer.writerows(rows)
410410

411411

412-
def create_comparison_table(stats, tools):
412+
def create_comparison_table(stats):
413413
# Sort stats based on total_caught
414414
stats = utils.sort_stats(stats)
415415

416416
headers = ["Tool Name"]
417417
stats = utils.sort_stats(stats)
418-
tool_names = [tool for tool in stats.keys() if tool in tools]
418+
tool_names = [tool for tool in stats.keys()]
419419
categories = list(stats[tool_names[0]]["exact_match_category"].keys())
420420
type_categories = list(
421421
list(stats[tool_names[0]]["exact_match_category"].values())[0].keys()

src/target_tools/ollama/src/runner.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -180,22 +180,27 @@ def process_file(file_path, llm, openai_llm, prompt_id):
180180

181181

182182
def main_runner(args):
183-
error_count = 0
184-
timeout_count = 0
185-
json_count = 0
186183
model_name = "text-davinci-003"
187184
temperature = 0.0
188185
openai_llm = OpenAI(
189186
model_name=model_name, temperature=temperature, openai_api_key=args.openai_key
190187
)
191188

192189
for model in args.ollama_models:
190+
error_count = 0
191+
timeout_count = 0
192+
json_count = 0
193193
files_analyzed = 0
194194

195195
# Create result folder for model specific results
196196
bechmark_path = Path(args.bechmark_path)
197197
results_src = bechmark_path
198-
results_dst = bechmark_path.parent / model / bechmark_path.name
198+
if args.results_dir is None:
199+
results_dst = bechmark_path.parent / model / bechmark_path.name
200+
else:
201+
results_dst = Path(args.results_dir) / model / bechmark_path.name
202+
os.makedirs(results_dst, exist_ok=True)
203+
199204
utils.copy_folder(results_src, results_dst)
200205

201206
python_files = list_python_files(results_dst)
@@ -267,6 +272,12 @@ def main_runner(args):
267272
default="/tmp/micro-benchmark",
268273
)
269274

275+
parser.add_argument(
276+
"--results_dir",
277+
help="Specify the benchmark path",
278+
default=None,
279+
)
280+
270281
parser.add_argument(
271282
"--ollama_url", help="Specify the ollama server url", required=True
272283
)

0 commit comments

Comments
 (0)