Skip to content

Commit 336135d

Browse files
committed
LLMs | Training set and jsonl for fine tuning
1 parent 62dfbd4 commit 336135d

File tree

25 files changed

+521
-0
lines changed

25 files changed

+521
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
import os
2+
import json
3+
4+
5+
def process_file(file_path):
6+
with open(file_path, "r") as f:
7+
return f.read()
8+
9+
10+
def process_json_data(json_data):
11+
return json.loads(json_data)
12+
13+
14+
def process_folder(folder_path, output_file):
15+
with open(output_file, "w") as output:
16+
for root, dirs, files in os.walk(folder_path):
17+
messages = []
18+
19+
system_message = {
20+
"role": "system",
21+
"content": (
22+
"Perform the following tasks: 1. Infer the types of various Python"
23+
" elements like function parameters, local variables, and function"
24+
" return types with the highest probability. 2. Provide your"
25+
" response in a valid JSON array of objects. Do not provide any"
26+
" additional information except the JSON object."
27+
),
28+
}
29+
messages.append(system_message)
30+
print("Analysing:", root)
31+
for file_name in files:
32+
file_path = os.path.join(root, file_name)
33+
34+
if file_name.endswith(".py"):
35+
code = process_file(file_path)
36+
user_message = {
37+
"role": "user",
38+
"content": f"'{file_name}':\n\n```\n{code}\n```",
39+
}
40+
messages.append(user_message)
41+
42+
elif file_name.endswith(".json"):
43+
assistant_content = process_file(file_path)
44+
data = process_json_data(assistant_content)
45+
assistant_message = {
46+
"role": "assistant",
47+
"content": json.dumps(data),
48+
}
49+
messages.append(assistant_message)
50+
51+
# Write messages to the output file
52+
if any(entry.get("role") == "user" for entry in messages):
53+
output.write(
54+
json.dumps({"messages": messages}, separators=(",", ":")) + "\n"
55+
)
56+
57+
58+
folder_path = "training_set"
59+
output_file = "output.jsonl"
60+
process_folder(folder_path, output_file)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef func():\n return \"Hello from func\"\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"type\": [\"str\"]}]"}]}
2+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef func(a):\n return \"Hello from func\"\n\n\nresult = func(5)\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"type\": [\"str\"]}, {\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"parameter\": \"a\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 5, \"variable\": \"result\", \"type\": [\"str\"]}]"}]}
3+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef func():\n x = 1\n return x\n\n\nresult1 = func()\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"variable\": \"x\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 6, \"variable\": \"result1\", \"type\": [\"int\"]}]"}]}
4+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\na = 10\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"variable\": \"a\", \"type\": [\"int\"]}]"}]}
5+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef outer():\n x = 1\n\n def inner():\n y = \"Hello\"\n return y\n\n return inner()\n\n\nresult1 = outer()\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"outer\", \"type\": [\"str\"]}, {\"file\": \"main.py\", \"line_number\": 2, \"function\": \"outer\", \"variable\": \"x\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 4, \"function\": \"inner\", \"type\": [\"str\"]}, {\"file\": \"main.py\", \"line_number\": 5, \"function\": \"inner\", \"variable\": \"y\", \"type\": [\"str\"]}, {\"file\": \"main.py\", \"line_number\": 11, \"variable\": \"result1\", \"type\": [\"str\"]}]"}]}
6+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\na = [3, 5, 2]\n\n```"},{"role":"assistant","content":"[{\"file\": \" main.py \", \"line_number\": 1, \"variable\": \"a\", \"type\": [\"list\"]}]"}]}
7+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef func(a=5):\n return\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"type\": [\"Nonetype\"]}, {\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"parameter\": \"a\", \"type\": [\"int\"]}]"}]}
8+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef add(a, b):\n return a + b\n\n\nresult1 = add(2, 3)\nresult2 = add(\"Hello\", \"String\")\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"add\", \"type\": [\"int\", \"str\"]}, {\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"parameter\": \"a\", \"type\": [\"int\", \"str\"]}, {\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"parameter\": \"b\", \"type\": [\"int\", \"str\"]}, {\"file\": \"main.py\", \"line_number\": 5, \"variable\": \"result1\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 6, \"variable\": \"result2\", \"type\": [\"str\"]}]"}]}
9+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndict1 = {\"a\": 1, \"b\": 2}\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 4, \"col_offset\": 1, \"variable\": \"dict1\", \"type\": [\"dict\"]}, {\"file\": \"main.py\", \"line_number\": 4, \"col_offset\": 1, \"variable\": \"dict1['a']\", \"type\": [\"int\"]}, {\"file\": \"main.py\", \"line_number\": 4, \"col_offset\": 1, \"variable\": \"dict1['b']\", \"type\": [\"int\"]}]"}]}
10+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\ndef func():\n return \"Hello from func\"\n\n\na = func\n\n```"},{"role":"assistant","content":"[{\"file\": \"main.py\", \"line_number\": 1, \"function\": \"func\", \"type\": [\"str\"]}, {\"file\": \"main.py\", \"line_number\": 5, \"variable\": \"a\", \"type\": [\"callable\"]}]"}]}
11+
{"messages":[{"role":"system","content":"Perform the following tasks: 1. Infer the types of various Python elements like function parameters, local variables, and function return types with the highest probability. 2. Provide your response in a valid JSON array of objects. Do not provide any additional information except the JSON object."},{"role":"user","content":"'main.py':\n\n```\nclass TestClass:\n def func(self):\n return \"Hello from func in TestClass\"\n\n\ntest = TestClass()\nresult = test.func()\n\n```"},{"role":"assistant","content":"[{\"file\": \" main.py \", \"line_number\": 1, \"function\": \"TestClass.func\", \"type\": [\"str\"]}, {\"file\": \" main.py \", \"line_number\": 6, \"variable\": \"test\", \"type\": [\"TestClass\"]}, {\"file\": \" main.py \", \"line_number\": 7, \"variable\": \"result\", \"type\": [\"str\"]}]"}]}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a = 10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"variable": "a",
6+
"type": [
7+
"int"
8+
]
9+
}
10+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def func():
2+
return "Hello from func"
3+
4+
5+
a = func
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "func",
6+
"type": [
7+
"str"
8+
]
9+
},
10+
{
11+
"file": "main.py",
12+
"line_number": 5,
13+
"variable": "a",
14+
"type": [
15+
"callable"
16+
]
17+
}
18+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
def outer():
2+
x = 1
3+
4+
def inner():
5+
y = "Hello"
6+
return y
7+
8+
return inner()
9+
10+
11+
result1 = outer()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "outer",
6+
"type": [
7+
"str"
8+
]
9+
},
10+
{
11+
"file": "main.py",
12+
"line_number": 2,
13+
"function": "outer",
14+
"variable": "x",
15+
"type": [
16+
"int"
17+
]
18+
},
19+
{
20+
"file": "main.py",
21+
"line_number": 4,
22+
"function": "inner",
23+
"type": [
24+
"str"
25+
]
26+
},
27+
{
28+
"file": "main.py",
29+
"line_number": 5,
30+
"function": "inner",
31+
"variable": "y",
32+
"type": [
33+
"str"
34+
]
35+
},
36+
{
37+
"file": "main.py",
38+
"line_number": 11,
39+
"variable": "result1",
40+
"type": [
41+
"str"
42+
]
43+
}
44+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def func():
2+
return "Hello from func"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "func",
6+
"type": [
7+
"str"
8+
]
9+
}
10+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
def func(a=5):
2+
return
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "func",
6+
"type": [
7+
"Nonetype"
8+
]
9+
},
10+
{
11+
"file": "main.py",
12+
"line_number": 1,
13+
"function": "func",
14+
"parameter": "a",
15+
"type": [
16+
"int"
17+
]
18+
}
19+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
def func(a):
2+
return "Hello from func"
3+
4+
5+
result = func(5)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "func",
6+
"type": [
7+
"str"
8+
]
9+
},
10+
{
11+
"file": "main.py",
12+
"line_number": 1,
13+
"function": "func",
14+
"parameter": "a",
15+
"type": [
16+
"int"
17+
]
18+
},
19+
{
20+
"file": "main.py",
21+
"line_number": 5,
22+
"variable": "result",
23+
"type": [
24+
"str"
25+
]
26+
}
27+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def func():
2+
x = 1
3+
return x
4+
5+
6+
result1 = func()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "func",
6+
"type": [
7+
"int"
8+
]
9+
},
10+
{
11+
"file": "main.py",
12+
"line_number": 1,
13+
"function": "func",
14+
"variable": "x",
15+
"type": [
16+
"int"
17+
]
18+
},
19+
{
20+
"file": "main.py",
21+
"line_number": 6,
22+
"variable": "result1",
23+
"type": [
24+
"int"
25+
]
26+
}
27+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
def add(a, b):
2+
return a + b
3+
4+
5+
result1 = add(2, 3)
6+
result2 = add("Hello", "String")
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
[
2+
{
3+
"file": "main.py",
4+
"line_number": 1,
5+
"function": "add",
6+
"type": [
7+
"int",
8+
"str"
9+
]
10+
},
11+
{
12+
"file": "main.py",
13+
"line_number": 1,
14+
"function": "func",
15+
"parameter": "a",
16+
"type": [
17+
"int",
18+
"str"
19+
]
20+
},
21+
{
22+
"file": "main.py",
23+
"line_number": 1,
24+
"function": "func",
25+
"parameter": "b",
26+
"type": [
27+
"int",
28+
"str"
29+
]
30+
},
31+
{
32+
"file": "main.py",
33+
"line_number": 5,
34+
"variable": "result1",
35+
"type": [
36+
"int"
37+
]
38+
},
39+
{
40+
"file": "main.py",
41+
"line_number": 6,
42+
"variable": "result2",
43+
"type": [
44+
"str"
45+
]
46+
}
47+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
class TestClass:
2+
def func(self):
3+
return "Hello from func in TestClass"
4+
5+
6+
test = TestClass()
7+
result = test.func()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
[
2+
{
3+
"file": " main.py ",
4+
"line_number": 1,
5+
"function": "TestClass.func",
6+
"type": [
7+
"str"
8+
]
9+
},
10+
{
11+
"file": " main.py ",
12+
"line_number": 6,
13+
"variable": "test",
14+
"type": [
15+
"TestClass"
16+
]
17+
},
18+
{
19+
"file": " main.py ",
20+
"line_number": 7,
21+
"variable": "result",
22+
"type": [
23+
"str"
24+
]
25+
}
26+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
a = [3, 5, 2]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
[
2+
{
3+
"file": " main.py ",
4+
"line_number": 1,
5+
"variable": "a",
6+
"type": [
7+
"list"
8+
]
9+
}
10+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
dict1 = {"a": 1, "b": 2}

0 commit comments

Comments
 (0)