Skip to content

Commit 62dfbd4

Browse files
Questions based prompt and response translator
1 parent 0fb9ff0 commit 62dfbd4

File tree

4 files changed

+152
-54
lines changed

4 files changed

+152
-54
lines changed

src/target_tools/ollama/src/prompts.py

+33-1
Original file line numberDiff line numberDiff line change
@@ -471,9 +471,41 @@ def id_func ( arg ):
471471
472472
Analyze the provided Python code and determine the types of various elements. Answer the following questions based on your analysis.
473473
474+
Python Code:
475+
{code}
476+
474477
Questions:
475478
{questions}
476479
477-
Python Code:
480+
Your Answers:
481+
{answers}
482+
"""
483+
484+
questions_based_2 = """
485+
## Task Description
486+
487+
**Objective**: Examine and identify the data types of various elements such as function parameters, local variables, and function return types in the given Python code.
488+
489+
**Instructions**:
490+
1. For each question below, provide a concise, one-word answer indicating the data type.
491+
2. For arguments and variables inside a function, list every data type they take within the current program context as a comma separated list.
492+
3. Do not include additional explanations or commentary in your answers.
493+
494+
**Python Code Provided**:
495+
```python
478496
{code}
497+
```
498+
499+
**Questions**:
500+
{questions}
501+
502+
**Format for Answers**:
503+
- Provide your answer next to each question number, using only one word.
504+
- Example:
505+
1. int
506+
2. float
507+
3. str
508+
509+
**Your Answers**:
510+
{answers}
479511
"""

src/target_tools/ollama/src/runner.py

+69-49
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import json
33
import logging
44
import multiprocessing
5+
import os
56
import re
67
import shutil
78
import sys
@@ -12,6 +13,7 @@
1213
from typing import List, Optional
1314

1415
import prompts
16+
import translator
1517
import utils
1618
from langchain.callbacks.manager import CallbackManager
1719
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
@@ -22,8 +24,9 @@
2224
from langchain.pydantic_v1 import BaseModel
2325

2426
AUTOFIX_WITH_OPENAI = False
25-
ENABLE_STREAMING = False
27+
ENABLE_STREAMING = True
2628
REQUEST_TIMEOUT = 60
29+
USE_MULTIPROCESSING_FOR_TERMINATION = True
2730

2831

2932
class TypeEvalPySchema(BaseModel):
@@ -39,6 +42,7 @@ class TypeEvalPySchema(BaseModel):
3942
"json_based_1": prompts.json_based_1,
4043
"json_based_2": prompts.json_based_2,
4144
"questions_based_1": prompts.questions_based_1,
45+
"questions_based_2": prompts.questions_based_2,
4246
}
4347

4448
# Create a logger
@@ -76,17 +80,20 @@ def get_prompt(prompt_id, code_path, json_filepath):
7680
with open(code_path, "r") as file:
7781
code = file.read()
7882

79-
if prompt_id == "questions_based_1":
83+
if prompt_id in ["questions_based_1", "questions_based_2"]:
8084
questions_from_json = utils.generate_questions_from_json(json_filepath)
8185

8286
prompt = PromptTemplate(
8387
template=PROMPTS_MAP[prompt_id],
84-
input_variables=["code", "questions"],
88+
input_variables=["code", "questions", "answers"],
8589
)
8690

8791
prompt_data = {
8892
"code": code,
89-
"questions": "\nResult:\n".join(questions_from_json),
93+
"questions": "\n".join(questions_from_json),
94+
"answers": "\n".join(
95+
[f"{x}." for x in range(1, len(questions_from_json) + 1)]
96+
),
9097
}
9198
elif prompt_id in ["json_based_1", "json_based_2"]:
9299
parser = PydanticOutputParser(pydantic_object=TypeEvalPySchema)
@@ -112,32 +119,35 @@ def process_file(file_path, llm, openai_llm, prompt_id):
112119
json_filepath = str(file_path).replace(".py", "_gt.json")
113120
result_filepath = str(file_path).replace(".py", f"_result.json")
114121

115-
# Queue for communication between processes
116-
queue = multiprocessing.Queue()
122+
if USE_MULTIPROCESSING_FOR_TERMINATION:
123+
# Queue for communication between processes
124+
queue = multiprocessing.Queue()
117125

118-
# Create a process for llm.invoke
119-
process = multiprocessing.Process(
120-
target=invoke_llm,
121-
args=(llm, get_prompt(prompt_id, file_path, json_filepath), queue),
122-
)
123-
process.start()
126+
# Create a process for llm.invoke
127+
process = multiprocessing.Process(
128+
target=invoke_llm,
129+
args=(llm, get_prompt(prompt_id, file_path, json_filepath), queue),
130+
)
131+
process.start()
124132

125-
# Wait for the process to finish with a timeout (e.g., 60 seconds)
126-
process.join(timeout=REQUEST_TIMEOUT)
133+
# Wait for the process to finish with a timeout (e.g., 60 seconds)
134+
process.join(timeout=REQUEST_TIMEOUT)
127135

128-
if process.is_alive():
129-
logger.info(f"Timeout occurred for {file_path}")
130-
process.terminate() # Terminate the process if it's still running
131-
process.join()
132-
logger.info(f"{file_path} failed: Not a valid JSON")
133-
raise utils.TimeoutException("json")
136+
if process.is_alive():
137+
logger.info(f"Timeout occurred for {file_path}")
138+
process.terminate() # Terminate the process if it's still running
139+
process.join()
140+
logger.info(f"{file_path} failed: Not a valid JSON")
141+
raise utils.TimeoutException("json")
134142

135-
result = queue.get_nowait()
143+
result = queue.get_nowait()
136144

137-
if isinstance(result, Exception):
138-
raise result
145+
if isinstance(result, Exception):
146+
raise result
139147

140-
output = result
148+
output = result
149+
else:
150+
output = llm.invoke(get_prompt(prompt_id, file_path, json_filepath))
141151

142152
if isinstance(llm, ChatOpenAI):
143153
output = output.content
@@ -157,7 +167,13 @@ def process_file(file_path, llm, openai_llm, prompt_id):
157167

158168
logger.info(output)
159169

160-
is_valid_json = utils.generate_json_file(result_filepath, output)
170+
if prompt_id == "questions_based_2":
171+
answers_json = utils.generate_json_from_answers(json_filepath, output)
172+
translated_json = translator.translate_content(answers_json)
173+
else:
174+
translated_json = translator.translate_content(output)
175+
176+
is_valid_json = utils.generate_json_file(result_filepath, translated_json)
161177
if not is_valid_json:
162178
logger.info(f"{file_path} failed: Not a valid JSON")
163179
raise utils.JsonException("json")
@@ -184,33 +200,37 @@ def main_runner(args):
184200

185201
python_files = list_python_files(results_dst)
186202

187-
if model.startswith("gpt-"):
188-
# OpenAI models
189-
llm = ChatOpenAI(
190-
model_name=model,
191-
temperature=temperature,
192-
openai_api_key=args.openai_key,
193-
)
194-
195-
else:
196-
llm = Ollama(
197-
model=model,
198-
callback_manager=(
199-
CallbackManager([StreamingStdOutCallbackHandler()])
200-
if ENABLE_STREAMING
201-
else None
202-
),
203-
temperature=temperature,
204-
timeout=REQUEST_TIMEOUT,
205-
)
206-
llm.base_url = args.ollama_url
207-
if utils.is_ollama_online(llm.base_url):
203+
if not model.startswith("gpt-"):
204+
if utils.is_ollama_online(args.ollama_url):
208205
logger.info("Ollama is online!")
209206
else:
210207
logger.error("Ollama server is not online!!!")
211208
sys.exit(-1)
212209

213210
for file in python_files:
211+
# Recreating llm object each iteration since we might force terminate in thread
212+
# Maybe there is another better way to do this
213+
if model.startswith("gpt-"):
214+
# OpenAI models
215+
llm = ChatOpenAI(
216+
model_name=model,
217+
temperature=temperature,
218+
openai_api_key=args.openai_key,
219+
)
220+
221+
else:
222+
llm = Ollama(
223+
model=model,
224+
callback_manager=(
225+
CallbackManager([StreamingStdOutCallbackHandler()])
226+
if ENABLE_STREAMING
227+
else None
228+
),
229+
temperature=temperature,
230+
timeout=REQUEST_TIMEOUT,
231+
)
232+
llm.base_url = args.ollama_url
233+
214234
prompt_start_time = time.time()
215235
try:
216236
logger.info(file)
@@ -229,9 +249,9 @@ def main_runner(args):
229249

230250
files_analyzed += 1
231251
logger.info(
232-
f"Progress: {files_analyzed}/{len(python_files)} | Errors/JSON:"
233-
f" {error_count}/{json_count} | PromptTime:"
234-
f" {time.time()-prompt_start_time}"
252+
f"Progress: {files_analyzed}/{len(python_files)} | Total Errors / JSON"
253+
f" Errors / Timeouts: {error_count},{json_count},{timeout_count} |"
254+
f" PromptTime: {time.time()-prompt_start_time}"
235255
)
236256

237257
logger.info(
+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import json
2+
3+
4+
def translate_content(data):
5+
type_mapping = {
6+
"integer": "int",
7+
"string": "str",
8+
"function": "callable",
9+
"none": "Nonetype",
10+
}
11+
12+
for entry in data:
13+
translated_types = [
14+
type_mapping[t.lower()] if t.lower() in type_mapping else t
15+
for t in entry["type"]
16+
]
17+
entry["type"] = translated_types
18+
19+
return data

src/target_tools/ollama/src/utils.py

+31-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import json
22
import os
3+
import re
34
import shutil
45
import sys
56

@@ -80,6 +81,31 @@ def generate_json_file(filename, type_info):
8081
return is_valid_json
8182

8283

84+
def generate_json_from_answers(gt_json_file, answers):
85+
try:
86+
with open(gt_json_file, "r") as file:
87+
gt_data = json.load(file)
88+
89+
pattern = re.compile(r"^\s*(\d+)\.\s+(.+)\s*$", re.MULTILINE)
90+
parsed_answers = pattern.findall(answers)
91+
92+
if len(gt_data) != len(parsed_answers):
93+
return False
94+
95+
answers_json_data = []
96+
for fact in range(len(gt_data)):
97+
_result = gt_data[fact]
98+
_result.pop("type")
99+
_result["type"] = [x.strip() for x in parsed_answers[fact][1].split(",")]
100+
answers_json_data.append(_result)
101+
102+
return answers_json_data
103+
except Exception as e:
104+
print("Error generating json from questions")
105+
print(e)
106+
return False
107+
108+
83109
def generate_questions_from_json(json_file):
84110
# Read and parse the JSON file
85111
with open(json_file, "r") as file:
@@ -98,26 +124,26 @@ def generate_questions_from_json(json_file):
98124
question = (
99125
"What is the return type of the function"
100126
f" '{entry['function']}' at line {line_number}, column"
101-
f" {col_offset}?"
127+
f" {col_offset}? Reply in one word."
102128
)
103129
# Function Parameter type
104130
elif "parameter" in entry:
105131
question = (
106132
f"What is the type of the parameter '{entry['parameter']}' at line"
107133
f" {line_number}, column {col_offset}, within the function"
108-
f" '{entry['function']}'?"
134+
f" '{entry['function']}'? Reply in one word."
109135
)
110136
# Variable in a function type
111137
elif "variable" in entry and "function" not in entry:
112138
question = (
113139
f"What is the type of the variable '{entry['variable']}' at line"
114-
f" {line_number}, column {col_offset}?"
140+
f" {line_number}, column {col_offset}? Reply in one word."
115141
)
116142
elif "variable" in entry and "function" in entry:
117143
question = (
118144
f"What is the type of the variable '{entry['variable']}' at line"
119145
f" {line_number}, column {col_offset}, within the function"
120-
f" '{entry['function']}'?"
146+
f" '{entry['function']}'? Reply in one word."
121147
)
122148
else:
123149
print("ERROR! Type could not be converted to types")
@@ -127,4 +153,5 @@ def generate_questions_from_json(json_file):
127153
print("ERROR! Type questions length does not match json length")
128154
sys.exit(-1)
129155

156+
questions = [f"{x}. {y}" for x, y in zip(range(1, len(questions) + 1), questions)]
130157
return questions

0 commit comments

Comments
 (0)