Skip to content

Commit f9c75f4

Browse files
committed
Fix tons of bugs, add docs command
1 parent f9183d3 commit f9c75f4

File tree

13 files changed

+372
-172
lines changed

13 files changed

+372
-172
lines changed

codegpt/files.py

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import os
22
import typer
3+
import math
4+
import re
5+
import magic
6+
37

48
def load_text(filenames):
59
out = {}
@@ -13,15 +17,65 @@ def write_text(files, backup=False):
1317
# If the backup option is specified and the file exists,
1418
# write the existing file to <filename>.bak
1519
for i, out in enumerate(files):
16-
filename = out.get('filename', f"{i}.txt")
17-
typer.secho(f"Hmm, didn't find a filename, writing to {filename}", color=typer.colors.MAGENTA)
20+
filename = out.get("filename", f"{i}.txt")
1821
if backup and os.path.exists(filename):
1922
with open(filename, "r") as f_in:
2023
with open(f"{filename}.bak", "w") as f_out:
2124
f_out.write(f_in.read())
2225

2326
# Write the new text to the file
2427
with open(filename, "w") as f:
25-
f.write(out['code'])
28+
f.write(out["code"])
29+
if "explanation" in out:
30+
typer.secho(f"{filename} - " + out["explanation"], color=typer.colors.BLUE)
31+
32+
33+
def split_code_into_chunks(paths, chunk_size):
34+
chunks = {}
35+
for path in paths:
36+
if path.is_dir():
37+
# Crawl the directory and process each file
38+
for root, _, filenames in os.walk(path):
39+
for filename in filenames:
40+
file_path = os.path.join(root, filename)
41+
process_file(file_path, chunk_size, chunks)
42+
else:
43+
# Process the file directly
44+
process_file(path, chunk_size, chunks)
45+
46+
return chunks
47+
48+
def process_file(file_path, chunk_size, chunks):
49+
# Use the python-magic library to identify the type of the file
50+
mime = magic.from_file(file_path, mime=True)
51+
if mime.split("/")[0] != "text":
52+
# If the file is not a text file, skip it
53+
return
54+
55+
with open(file_path, "r") as f:
56+
code = f.read()
57+
58+
# Split the code into tokens using a regular expression
59+
tokens = re.findall(r"\b\w+\b", code)
60+
61+
# Determine the number of chunks needed
62+
num_chunks = math.ceil(len(tokens) / chunk_size)
2663

27-
typer.secho(f"{filename} - " + out['explanation'], color=typer.colors.BLUE)
64+
# Split the tokens into chunks with a hundred token overlap
65+
for i in range(num_chunks):
66+
start = i * chunk_size - 100
67+
if start < 0:
68+
start = 0
69+
end = start + chunk_size + 100
70+
if end > len(tokens):
71+
end = len(tokens)
72+
chunk = tokens[start:end]
73+
if num_chunks > 1:
74+
# If the file was split into multiple chunks, use a key of the form {filename} - {chunk_num}
75+
key = f"{file_path.stem} - {i + 1}"
76+
else:
77+
# If the file was not split, use the file path as the key
78+
key = file_path
79+
if key not in chunks:
80+
chunks[key] = []
81+
chunks[key].append(chunk)

codegpt/gpt_interface.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,19 @@ def send_iffy_edit(prompt: str, code: Dict[str, str], clipboard: bool = False, y
9292
print(response["choices"][0]["text"])
9393
return parsed[0] if clipboard else parsed
9494

95+
def send_normal_completion(prompt, max_tokens=3000, yes=False):
96+
97+
max_tokens = confirm_send(prompt, max_tokens, yes=yes)
98+
99+
response = openai.Completion.create(
100+
engine="text-davinci-003",
101+
prompt=prompt,
102+
max_tokens=max_tokens,
103+
n=1,
104+
temperature=0.6,
105+
)
106+
107+
return response["choices"][0]["text"].strip().strip("```").strip()
95108

96109

97110
if __name__ == "__main__":

codegpt/main.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import json
44
import logging
55

6+
import sys
7+
sys.path.append("../codegpt")
68

79
from codegpt import gpt_interface as gpt
810

@@ -12,6 +14,9 @@
1214
from typing import List, Optional
1315
from pathlib import Path
1416

17+
from rich.progress import track
18+
19+
1520
app = typer.Typer(
1621
no_args_is_help=True,
1722
)
@@ -160,6 +165,34 @@ def quick_edit_file(
160165
typer.secho("Done!", color=typer.colors.BRIGHT_BLUE)
161166

162167

168+
@app.command("docs")
169+
def docs(
170+
paths: List[Path] = typer.Argument(None, exists=True, dir_okay=True, file_okay=True),
171+
):
172+
data = files.split_code_into_chunks(paths, 1800)
173+
174+
typer.secho(f"Found {len(data)} files. Documenting...", color=typer.colors.BRIGHT_BLUE)
175+
176+
for filename, chunk in track(data.items()):
177+
try:
178+
prompt = prompts.generate_review_instructions(filename, chunk)
179+
result = gpt.send_normal_completion(prompt, 4000, True)
180+
181+
# Write the documentation for the current code chunk to a file
182+
outname = f"./docs/{filename}.md"
183+
# Create the '/docs' folder and any intermediate directories if they do not exist
184+
Path(outname).parent.mkdir(parents=True, exist_ok=True)
185+
186+
files.write_text([{'filename': outname, 'code': result}])
187+
188+
# Print a message to confirm that the documentation has been written to the file
189+
typer.secho(f"Wrote documentation for {filename} to {outname}", color=typer.colors.GREEN)
190+
except Exception as e:
191+
import traceback as tb
192+
tb.print_exc()
193+
typer.secho(f"Error: {e}", color=typer.colors.RED)
194+
typer.secho("Done!")
195+
163196
@app.command("config")
164197
def config():
165198
"""

codegpt/parse.py

Lines changed: 2 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
def parse_resp(response:dict):
22
resp = response["choices"][0]["text"].strip().splitlines()
3-
3+
44
# Initialize an empty list to hold the dictionaries
55
out = []
66

@@ -16,27 +16,13 @@ def parse_resp(response:dict):
1616

1717
# If the line doesn't start with '>', it's a key
1818
if line[0] != '>':
19-
if line == '===' and curr_dict:
20-
out.append(curr_dict)
21-
curr_dict = {}
2219
# Strip leading/trailing whitespace and remove ':' from the key
2320
key = line.strip().replace(":", '')
2421
# Initialize an empty value for this key in the current dictionary
2522
curr_dict[key] = ""
26-
# If the line does start with '>', it's a value
27-
else:
28-
# Strip the leading '>' and leading/trailing whitespace from the value
29-
# and add it to the current key in the current dictionary
30-
if key == "code":
31-
curr_dict[key] += line.strip().strip('> ').strip('>') + '\n'
32-
else:
33-
curr_dict[key] = line.strip().strip('> ').strip('>').strip()
23+
curr_dict[key] += line.strip().strip('> ').strip('>') + '\n'
3424

3525
# Add the final dictionary to the output list
3626
out.append(curr_dict)
3727

38-
# Backtop just in case this fails. Tests don't tend to use whole code, so it gets weird.
39-
if 'code' not in out:
40-
out = [{'code': response["choices"][0]["text"]}]
41-
4228
return out

codegpt/prompts.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from textwrap import dedent
2+
13
prompts = {
24
"comment": "Add or update comments according to the given language's standards. Add or update function, module, or class level comments if they're appropriate.",
35
"varnames": "Change variable names, but nothing else, to make the code more readable. For example, instead of using 'x' and 'y', use 'width' and 'height'.",
@@ -26,3 +28,62 @@ def set_username(username):
2628
this.username = username
2729
""",
2830
}
31+
32+
def generate_review_instructions(filename, code):
33+
instructions = dedent(
34+
f"""
35+
Please review the code in the file "{filename}" and document your findings in a markdown file. The code is shown below for reference:
36+
37+
```
38+
{code}
39+
```
40+
41+
In your markdown file, please include the following information:
42+
43+
1. A summary of the purpose of the file and its contents.
44+
2. A list of all classes and functions defined in the file, along with a brief description of their purpose.
45+
3. A list of any external dependencies used in the file, including any libraries or modules imported from outside the project.
46+
4. Any bugs or issues you identified while reviewing the code.
47+
5. Any areas of the code that you consider to be particularly well-written or poorly-written, and why.
48+
49+
Please also include any questions or comments you have about the code in your markdown file.
50+
51+
When you have finished reviewing the code and documenting your findings, please submit your markdown file for review.
52+
53+
Here is a sample markdown file format you can follow:
54+
55+
```md
56+
# Code Review: {filename}
57+
58+
## Summary
59+
60+
[Insert summary of the purpose of the file and its contents here.]
61+
62+
## Classes and Functions
63+
64+
[Insert a list of all classes and functions defined in the file, along with a brief description of their purpose.]
65+
66+
## External Dependencies
67+
68+
[Insert a list of any external dependencies used in the file, including any libraries or modules imported from outside the project.]
69+
70+
## Bugs and Issues
71+
72+
[Insert any bugs or issues you identified while reviewing the code.]
73+
74+
## Code Quality
75+
76+
[Insert any comments you have on the quality of the code, including any areas that you consider to be particularly well-written or poorly-written, and why.]
77+
78+
## Questions and Comments
79+
80+
[Insert any questions or comments you have about the code.]
81+
```
82+
83+
You are an expert, senior developer, give helpful feedback if you find problems. Return your whole response, markdown formatted for github, below.
84+
85+
Review Doc:
86+
```md
87+
"""
88+
)
89+
return instructions

docs/codegpt/__main__.py.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Code Review: codegpt/__main__.py
2+
3+
## Summary
4+
5+
This file is the main entry point for the codegpt project. It imports the main app object and runs it.
6+
7+
## Classes and Functions
8+
9+
- `app`: The main application object.
10+
11+
## External Dependencies
12+
13+
No external dependencies are used in this file.
14+
15+
## Bugs and Issues
16+
17+
No bugs or issues were identified while reviewing the code.
18+
19+
## Code Quality
20+
21+
The code is well written and organized. It is clear and concise.
22+
23+
## Questions and Comments
24+
25+
No questions or comments were identified while reviewing the code.

docs/codegpt/files.py.md

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Code Review: codegpt/files.py
2+
3+
## Summary
4+
5+
This file contains functions that are used to read and write text files. It also contains functions to process files, split them into chunks, and identify their type using the Python Magic library.
6+
7+
## Classes and Functions
8+
9+
- `load_text`: Loads a list of text files and returns the content of the files.
10+
- `write_text`: Writes text to a file, with an optional backup option.
11+
- `split_code_into_chunks`: Splits a list of paths into chunks of a given size.
12+
- `process_file`: Processes a single file and splits it into chunks.
13+
14+
## External Dependencies
15+
16+
- `os`: Used for file system operations.
17+
- `typer`: Used for displaying colored text.
18+
- `math`: Used for performing mathematical operations.
19+
- `re`: Used for regular expression operations.
20+
- `magic`: Used for identifying file types.
21+
22+
## Bugs and Issues
23+
24+
No bugs or issues were identified during the review.
25+
26+
## Code Quality
27+
28+
The code is well-written and easy to read. It is well-structured, with functions clearly defined and named. It also makes use of external libraries to perform certain tasks, which helps reduce the amount of code that needs to be written.
29+
30+
## Questions and Comments
31+
32+
No questions or comments were identified during the review.

docs/codegpt/gpt_interface.py.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# Code Review: codegpt/gpt_interface.py
2+
3+
## Summary
4+
5+
This code is part of a codegpt library and is a module for interacting with GPT-3. It contains functions for sending prompts to the GPT-3 engine, confirming whether a prompt should be sent, and parsing the response from the engine. It also includes code for downloading the NLTK punkt tokenizer if it is not already available.
6+
7+
## Classes and Functions
8+
9+
- `confirm_send(prompt: str, max_tokens: int = 4000, yes: bool = False, silent: bool = False) -> int`: This function checks the length of the prompt and confirms that the user wants to send it to the GPT-3 engine. It returns the maximum number of tokens the engine can return in response.
10+
11+
- `send_iffy_edit(prompt: str, code: Dict[str, str], clipboard: bool = False, yes: bool = False) -> Dict[str, str]:` This function is used to send a prompt with code that may need to be edited. It returns a parsed response from the GPT-3 engine.
12+
13+
- `send_normal_completion(prompt: str, max_tokens: int = 3000, yes: bool = False) -> str:` This function is used to send a normal prompt to the GPT-3 engine. It returns the response from the engine.
14+
15+
## External Dependencies
16+
17+
- `nltk`: Used for downloading the punkt tokenizer.
18+
- `openai`: Used for interacting with the GPT-3 engine.
19+
- `typer`: Used for printing colored text and prompting the user for confirmation.
20+
- `textwrap`: Used for dedenting strings.
21+
- `codegpt.parse`: Used for parsing the response from the GPT-3 engine.
22+
23+
## Bugs and Issues
24+
25+
None identified.
26+
27+
## Code Quality
28+
29+
The code is well-written and well-structured. The functions are clear and concise, and the comments provide helpful context. The code is also well-formatted and easy to read.
30+
31+
## Questions and Comments
32+
33+
None.

docs/codegpt/parse.py.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Code Review: codegpt/parse.py
2+
3+
## Summary
4+
5+
This file contains a function, `parse_resp`, which is used to parse a response from a dict object. The function takes in a dict object, strips the text, and splits the lines into a list. It then uses this list to create a dictionary of key-value pairs and adds it to an output list.
6+
7+
## Classes and Functions
8+
9+
- `parse_resp`: Function used to parse a response from a dict object.
10+
11+
## External Dependencies
12+
13+
No external dependencies are used in this file.
14+
15+
## Bugs and Issues
16+
17+
No bugs or issues were identified while reviewing the code.
18+
19+
## Code Quality
20+
21+
The code appears to be well-written, clearly laid out, and easy to follow. All of the necessary steps for parsing the response are included, and the code is concise and efficient.
22+
23+
## Questions and Comments
24+
25+
No questions or comments at this time.

0 commit comments

Comments
 (0)