Skip to content

Commit 821fb8f

Browse files
Merge pull request #5 from secure-software-engineering/LLMs_MicroBench
Autogen Benchmark for Scaling Annotations
2 parents 46d7fb6 + b1633ed commit 821fb8f

File tree

12,933 files changed

+920208
-25
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

12,933 files changed

+920208
-25
lines changed

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,12 @@
1414
- 🔄 Efficiently transforms inferred types into a **standardized format**.
1515
- 📊 Automatically produces **meaningful metrics** for in-depth assessment and comparison.
1616

17+
### [New] TypeEvalPy Autogen
18+
- 🤖 **Autogenerates code snippets** and ground truth to scale the benchmark based on the original `TypeEvalPy` benchmark.
19+
- 📈 The autogen benchmark now contains:
20+
- **Python files**: 7121
21+
- **Type annotations**: 78373
22+
1723
## 🛠️ Supported Tools
1824

1925
| Supported :white_check_mark: | In-progress :wrench: | Planned :bulb: |
@@ -231,6 +237,29 @@ docker run \
231237

232238
---
233239

240+
## Running TypeEvalPy Autogen
241+
242+
To generate an extended version of the original TypeEvalPy benchmark to include many more Python types, run the following commands:
243+
244+
1. **Navigate to the `autogen` Directory**
245+
246+
```bash
247+
cd autogen
248+
```
249+
250+
251+
2. **Execute the Generation Script**
252+
253+
Run the following command to start the generation process:
254+
255+
```bash
256+
python generate_typeevalpy_dataset.py
257+
```
258+
259+
This will generate a folder in the repo root with the autogen benchmark with the current date.
260+
261+
---
262+
234263
### 🤝 Contributing
235264

236265
Thank you for your interest in contributing! To add support for a new tool, please utilize the Docker templates provided in our repository. After implementing and testing your tool, please submit a pull request (PR) with a descriptive message. Our maintainers will review your submission, and merge them.
Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,176 @@
1+
from helpers import read_template, process_file, process_import_case
2+
import os
3+
import shutil
4+
from pathlib import Path
5+
import tqdm
6+
import time
7+
import json
8+
import datetime
9+
10+
ROOT_DIR = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
11+
12+
current_datetime = datetime.datetime.now()
13+
formatted_datetime = current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
14+
15+
output_folder = f"{ROOT_DIR}/generated_typeevalpy_dataset_{formatted_datetime}"
16+
error_folder = f"{ROOT_DIR}/.scrapy/error"
17+
benchmark_dir = f"{ROOT_DIR}/micro-benchmark-autogen-templates"
18+
shutil.rmtree(output_folder, ignore_errors=True)
19+
shutil.rmtree(error_folder, ignore_errors=True)
20+
21+
22+
python_files = sorted(Path(benchmark_dir).rglob("*.py"))
23+
files_analyzed = 0
24+
error_count = 0
25+
last_folder = ""
26+
start_time = time.time()
27+
total_start_time = time.time()
28+
for file in tqdm.tqdm(python_files, desc="Processing files"):
29+
try:
30+
# print the folder path if its not the same as the last one
31+
if str(file.parent.parent.name) != last_folder:
32+
if last_folder:
33+
print(
34+
f"Time taken for {last_folder}: {time.time() - start_time} seconds"
35+
)
36+
print(
37+
f"##################\nProcessing: {file.parent.parent.name}\n##################"
38+
)
39+
last_folder = str(file.parent.parent.name)
40+
start_time = time.time()
41+
42+
# ignore if not main.py
43+
if file.name != "main.py":
44+
print(f">> Ignoring: {file}")
45+
continue
46+
47+
template_data = read_template(file)
48+
if template_data["replacement_mode"] == "Imports":
49+
process_import_case(
50+
name=template_data["name"],
51+
data_types=template_data["data_types"],
52+
code_template=template_data["code_template"],
53+
json_template=template_data["json_template"],
54+
file_path=str(file.parent).replace(benchmark_dir, ""),
55+
file_parent=str(file.parent),
56+
output_folder=output_folder,
57+
)
58+
else:
59+
process_file(
60+
name=template_data["name"],
61+
data_types=template_data["data_types"],
62+
code_template=template_data["code_template"],
63+
json_template=template_data["json_template"],
64+
file_path=str(file.parent).replace(benchmark_dir, ""),
65+
output_folder=output_folder,
66+
)
67+
68+
except Exception as e:
69+
print(e)
70+
pass
71+
72+
73+
def get_fact_stats(json_files):
74+
total_annotations = 0
75+
total_types = 0
76+
total_col = 0
77+
rows = []
78+
sum_functions = 0
79+
sum_params = 0
80+
sum_variables = 0
81+
sum_empty_out_types = 0
82+
sum_non_empty_out_types = 0
83+
for json_file in json_files:
84+
with open(json_file, "r") as f:
85+
data = json.load(f)
86+
if "ground_truth" not in data:
87+
continue
88+
data = data["ground_truth"]
89+
total_annotations += len(data)
90+
merged_cell = json_file
91+
for _t in data:
92+
total_types += len(_t["type"])
93+
if _t.get("col_offset"):
94+
total_col += 1
95+
line_number = _t.get("line_number", "")
96+
function = _t.get("function", "")
97+
param = _t.get("parameter", "")
98+
variable = _t.get("variable", "")
99+
types = ", ".join(_t.get("type", []))
100+
rows.append(
101+
[
102+
merged_cell,
103+
line_number,
104+
function,
105+
param,
106+
variable,
107+
types,
108+
]
109+
)
110+
if function:
111+
if not param and not variable:
112+
sum_functions += 1
113+
114+
if param:
115+
sum_params += 1
116+
117+
if variable:
118+
sum_variables += 1
119+
120+
return (
121+
total_annotations,
122+
total_types,
123+
total_col,
124+
sum_functions,
125+
sum_params,
126+
sum_variables,
127+
)
128+
129+
130+
# python_features
131+
print("python_features")
132+
print(
133+
"category | Overall annotations | Overall types | Overall functions | overall param"
134+
" | Overall variables"
135+
)
136+
python_features_dir = output_folder + "/python_features"
137+
pf_overall_annotations = 0
138+
pf_overall_types = 0
139+
for cat in sorted(os.listdir(python_features_dir)):
140+
cat_dir = os.path.join(python_features_dir, cat)
141+
json_files = [_file for _file in sorted(Path(cat_dir).rglob("*_gt.json"))]
142+
143+
_a, _t, _, sum_functions, sum_params, sum_variables = get_fact_stats(json_files)
144+
145+
pf_overall_annotations += _a
146+
pf_overall_types += _t
147+
148+
print(cat, _a, _t, sum_functions, sum_params, sum_variables)
149+
150+
print(pf_overall_annotations, pf_overall_types)
151+
152+
json_files = [_file for _file in sorted(Path(output_folder).rglob("*_gt.json"))]
153+
python_files = [_file for _file in sorted(Path(output_folder).rglob("*.py"))]
154+
155+
print("\nOverall")
156+
total_annotations = 0
157+
total_types = 0
158+
total_col = 0
159+
for json_file in json_files:
160+
with open(json_file, "r") as f:
161+
data = json.load(f)
162+
if "ground_truth" not in data:
163+
continue
164+
data = data["ground_truth"]
165+
total_annotations += len(data)
166+
for _t in data:
167+
total_types += len(_t["type"])
168+
if _t.get("col_offset"):
169+
total_col += 1
170+
171+
172+
print(f"Total Python files: {len(python_files)}")
173+
print(f"Total annotations: {total_annotations}")
174+
print(f"Total types in annotations: {total_types}")
175+
176+
print(f"Total time taken: {time.time() - total_start_time} seconds")

0 commit comments

Comments
 (0)