Skip to content

Commit bfdaa3c

Browse files
authored
Merge pull request #12 from morganpartee/10-add-obsidian-canvas
add docs command again (it's really good now)
2 parents 672a8f7 + 31e8994 commit bfdaa3c

File tree

9 files changed

+191
-444
lines changed

9 files changed

+191
-444
lines changed

README.md

+14-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Codegpt
22

3-
## 0.2.15
3+
## 0.3
44

55
A tool for using GPT just a little quicker. A nearly truly automated footgun. Learn how to revert with git before trying please.
66

@@ -21,7 +21,19 @@ Windows users can also use `setx` like:
2121

2222
from an admin console.
2323

24-
## Be careful! But try this
24+
## Your first (safe) command
25+
26+
One cool thing is generating documentation. GPT-3 has a token limit of 4000 for completions, so larger files will be chunked up.
27+
28+
```bash
29+
codegpt docs <paths>
30+
```
31+
32+
And it'll generate docs, one per file. This is great when you're coming into a codebase you've never seen before.
33+
34+
## Unsafe Commands
35+
36+
Everything else can modify files. Have someone hold your beer and try some of these (after you check it into git):
2537

2638
Usage
2739
To try Codegpt, you can run the following command:

codegpt/files.py

+58-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
import os
22
import typer
3+
import math
4+
import re
5+
import magic
6+
37

48
def load_text(filenames):
59
out = {}
@@ -13,15 +17,65 @@ def write_text(files, backup=False):
1317
# If the backup option is specified and the file exists,
1418
# write the existing file to <filename>.bak
1519
for i, out in enumerate(files):
16-
filename = out.get('filename', f"{i}.txt")
17-
typer.secho(f"Hmm, didn't find a filename, writing to {filename}", color=typer.colors.MAGENTA)
20+
filename = out.get("filename", f"{i}.txt")
1821
if backup and os.path.exists(filename):
1922
with open(filename, "r") as f_in:
2023
with open(f"{filename}.bak", "w") as f_out:
2124
f_out.write(f_in.read())
2225

2326
# Write the new text to the file
2427
with open(filename, "w") as f:
25-
f.write(out['code'])
28+
f.write(out["code"])
29+
if "explanation" in out:
30+
typer.secho(f"{filename} - " + out["explanation"], color=typer.colors.BLUE)
31+
32+
33+
def split_code_into_chunks(paths, chunk_size):
34+
chunks = {}
35+
for path in paths:
36+
if path.is_dir():
37+
# Crawl the directory and process each file
38+
for root, _, filenames in os.walk(path):
39+
for filename in filenames:
40+
file_path = os.path.join(root, filename)
41+
process_file(file_path, chunk_size, chunks)
42+
else:
43+
# Process the file directly
44+
process_file(path, chunk_size, chunks)
45+
46+
return chunks
47+
48+
def process_file(file_path, chunk_size, chunks):
49+
# Use the python-magic library to identify the type of the file
50+
mime = magic.from_file(file_path, mime=True)
51+
if mime.split("/")[0] != "text":
52+
# If the file is not a text file, skip it
53+
return
54+
55+
with open(file_path, "r") as f:
56+
code = f.read()
57+
58+
# Split the code into tokens using a regular expression
59+
tokens = re.findall(r"\b\w+\b", code)
60+
61+
# Determine the number of chunks needed
62+
num_chunks = math.ceil(len(tokens) / chunk_size)
2663

27-
typer.secho(f"{filename} - " + out['explanation'], color=typer.colors.BLUE)
64+
# Split the tokens into chunks with a hundred token overlap
65+
for i in range(num_chunks):
66+
start = i * chunk_size - 100
67+
if start < 0:
68+
start = 0
69+
end = start + chunk_size + 100
70+
if end > len(tokens):
71+
end = len(tokens)
72+
chunk = tokens[start:end]
73+
if num_chunks > 1:
74+
# If the file was split into multiple chunks, use a key of the form {filename} - {chunk_num}
75+
key = f"{file_path.stem} - {i + 1}"
76+
else:
77+
# If the file was not split, use the file path as the key
78+
key = file_path
79+
if key not in chunks:
80+
chunks[key] = []
81+
chunks[key].append(chunk)

codegpt/gpt_interface.py

+15-6
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,7 @@ def send_iffy_edit(prompt: str, code: Dict[str, str], clipboard: bool = False, y
5252
> <the code to be output line 1>
5353
> <the code to be output, line n...>
5454
55-
You must include an explanation of what you did, and the code to be output, regardless of the format or file.
56-
57-
OUTPUT:""")
55+
You must include an explanation of what you did, and the code to be output, regardless of the format or file.""")
5856

5957
else:
6058
full_prompt += dedent("""
@@ -71,9 +69,7 @@ def send_iffy_edit(prompt: str, code: Dict[str, str], clipboard: bool = False, y
7169
> <code line 1>
7270
> <code line n...>
7371
74-
You must include the filename, an explanation of what you did, and the code for the file to be output, regardless of the format or file.
75-
76-
OUTPUT:""")
72+
You must include the filename, an explanation of what you did, and the code for the file to be output, regardless of the format or file.""")
7773

7874
max_tokens = confirm_send(full_prompt, yes=yes, silent=clipboard)
7975

@@ -92,6 +88,19 @@ def send_iffy_edit(prompt: str, code: Dict[str, str], clipboard: bool = False, y
9288
print(response["choices"][0]["text"])
9389
return parsed[0] if clipboard else parsed
9490

91+
def send_normal_completion(prompt, max_tokens=3000, yes=False):
92+
93+
max_tokens = confirm_send(prompt, max_tokens, yes=yes)
94+
95+
response = openai.Completion.create(
96+
engine="text-davinci-003",
97+
prompt=prompt,
98+
max_tokens=max_tokens,
99+
n=1,
100+
temperature=0.6,
101+
)
102+
103+
return response["choices"][0]["text"].strip().strip("```").strip()
95104

96105

97106
if __name__ == "__main__":

codegpt/main.py

+34-1
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@
33
import json
44
import logging
55

6+
import sys
7+
sys.path.append("../codegpt")
68

79
from codegpt import gpt_interface as gpt
810

@@ -12,6 +14,9 @@
1214
from typing import List, Optional
1315
from pathlib import Path
1416

17+
from rich.progress import track
18+
19+
1520
app = typer.Typer(
1621
no_args_is_help=True,
1722
)
@@ -83,7 +88,7 @@ def edit_file(
8388
return
8489

8590
if raw_out:
86-
print(result['code'])
91+
print(result.get('code') or result)
8792
return
8893

8994
files.write_text(result, backup)
@@ -160,6 +165,34 @@ def quick_edit_file(
160165
typer.secho("Done!", color=typer.colors.BRIGHT_BLUE)
161166

162167

168+
@app.command("docs")
169+
def docs(
170+
paths: List[Path] = typer.Argument(None, exists=True, dir_okay=True, file_okay=True),
171+
):
172+
data = files.split_code_into_chunks(paths, 1800)
173+
174+
typer.secho(f"Found {len(data)} files. Documenting...", color=typer.colors.BRIGHT_BLUE)
175+
176+
for filename, chunk in track(data.items()):
177+
try:
178+
prompt = prompts.generate_review_instructions(filename, chunk)
179+
result = gpt.send_normal_completion(prompt, 4000, True)
180+
181+
# Write the documentation for the current code chunk to a file
182+
outname = f"./docs/{filename}.md"
183+
# Create the '/docs' folder and any intermediate directories if they do not exist
184+
Path(outname).parent.mkdir(parents=True, exist_ok=True)
185+
186+
files.write_text([{'filename': outname, 'code': result}])
187+
188+
# Print a message to confirm that the documentation has been written to the file
189+
typer.secho(f"Wrote documentation for {filename} to {outname}", color=typer.colors.GREEN)
190+
except Exception as e:
191+
import traceback as tb
192+
tb.print_exc()
193+
typer.secho(f"Error: {e}", color=typer.colors.RED)
194+
typer.secho("Done!")
195+
163196
@app.command("config")
164197
def config():
165198
"""

codegpt/parse.py

+3-16
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
def parse_resp(response:dict):
22
resp = response["choices"][0]["text"].strip().splitlines()
3-
3+
44
# Initialize an empty list to hold the dictionaries
55
out = []
66

@@ -16,27 +16,14 @@ def parse_resp(response:dict):
1616

1717
# If the line doesn't start with '>', it's a key
1818
if line[0] != '>':
19-
if line == '===' and curr_dict:
20-
out.append(curr_dict)
21-
curr_dict = {}
2219
# Strip leading/trailing whitespace and remove ':' from the key
23-
key = line.strip().replace(":", '')
20+
key = line.strip().replace(":", '').lower()
2421
# Initialize an empty value for this key in the current dictionary
2522
curr_dict[key] = ""
26-
# If the line does start with '>', it's a value
2723
else:
28-
# Strip the leading '>' and leading/trailing whitespace from the value
29-
# and add it to the current key in the current dictionary
30-
if key == "code":
31-
curr_dict[key] += line.strip().strip('> ').strip('>') + '\n'
32-
else:
33-
curr_dict[key] = line.strip().strip('> ').strip('>').strip()
24+
curr_dict[key] += line.strip().strip('> ').strip('>') + '\n'
3425

3526
# Add the final dictionary to the output list
3627
out.append(curr_dict)
3728

38-
# Backtop just in case this fails. Tests don't tend to use whole code, so it gets weird.
39-
if 'code' not in out:
40-
out = [{'code': response["choices"][0]["text"]}]
41-
4229
return out

codegpt/prompts.py

+61
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from textwrap import dedent
2+
13
prompts = {
24
"comment": "Add or update comments according to the given language's standards. Add or update function, module, or class level comments if they're appropriate.",
35
"varnames": "Change variable names, but nothing else, to make the code more readable. For example, instead of using 'x' and 'y', use 'width' and 'height'.",
@@ -26,3 +28,62 @@ def set_username(username):
2628
this.username = username
2729
""",
2830
}
31+
32+
def generate_review_instructions(filename, code):
33+
instructions = dedent(
34+
f"""
35+
Please review the code in the file "{filename}" and document your findings in a markdown file. The code is shown below for reference:
36+
37+
```
38+
{code}
39+
```
40+
41+
In your markdown file, please include the following information:
42+
43+
1. A summary of the purpose of the file and its contents.
44+
2. A list of all classes and functions defined in the file, along with a brief description of their purpose.
45+
3. A list of any external dependencies used in the file, including any libraries or modules imported from outside the project.
46+
4. Any bugs or issues you identified while reviewing the code.
47+
5. Any areas of the code that you consider to be particularly well-written or poorly-written, and why.
48+
49+
Please also include any questions or comments you have about the code in your markdown file.
50+
51+
When you have finished reviewing the code and documenting your findings, please submit your markdown file for review.
52+
53+
Here is a sample markdown file format you can follow:
54+
55+
```md
56+
# Code Review: {filename}
57+
58+
## Summary
59+
60+
[Insert summary of the purpose of the file and its contents here.]
61+
62+
## Classes and Functions
63+
64+
[Insert a list of all classes and functions defined in the file, along with a brief description of their purpose.]
65+
66+
## External Dependencies
67+
68+
[Insert a list of any external dependencies used in the file, including any libraries or modules imported from outside the project.]
69+
70+
## Bugs and Issues
71+
72+
[Insert any bugs or issues you identified while reviewing the code.]
73+
74+
## Code Quality
75+
76+
[Insert any comments you have on the quality of the code, including any areas that you consider to be particularly well-written or poorly-written, and why.]
77+
78+
## Questions and Comments
79+
80+
[Insert any questions or comments you have about the code.]
81+
```
82+
83+
You are an expert, senior developer, give helpful feedback if you find problems. Return your whole response, markdown formatted for github, below.
84+
85+
Review Doc:
86+
```md
87+
"""
88+
)
89+
return instructions

0 commit comments

Comments
 (0)