|
17 | 17 | files_to_diff_dict
|
18 | 18 | )
|
19 | 19 |
|
20 |
| -ray.init(object_store_memory=78643200) |
21 |
| - |
22 | 20 |
|
23 | 21 | logging.basicConfig(stream=sys.stdout, level=logging.INFO)
|
24 | 22 | logger = logging.getLogger("Docu Mentor")
|
|
55 | 53 | openai.api_key = os.environ.get("ANYSCALE_API_KEY")
|
56 | 54 |
|
57 | 55 |
|
58 |
| -SYSTEM_CONTENT = """ |
59 |
| -You are a helpful assistant. |
| 56 | +SYSTEM_CONTENT = """You are a helpful assistant. |
60 | 57 | Improve the following <content>. Criticise syntax, grammar, punctuation, style, etc.
|
61 | 58 | Recommend common technical writing knowledge, such as used in Vale
|
62 | 59 | and the Google developer documentation style guide.
|
63 | 60 | If the content is good, don't comment on it.
|
64 |
| -Do not comment on file names, just the actual text. |
| 61 | +You can use GitHub-flavored markdown syntax in your answer. |
| 62 | +""" |
| 63 | + |
| 64 | +PROMPT = """Improve this content. |
| 65 | +Don't comment on file names or other meta data, just the actual text. |
65 | 66 | The <content> will be in JSON format and contains file name keys and text values.
|
66 |
| -You can use GitHub-flavored markdown syntax. |
67 | 67 | Make sure to give very concise feedback per file.
|
68 | 68 | """
|
69 | 69 |
|
70 | 70 | def mentor(
|
71 | 71 | content,
|
72 | 72 | model="meta-llama/Llama-2-70b-chat-hf",
|
73 | 73 | system_content=SYSTEM_CONTENT,
|
74 |
| - extra_instructions="Improve this content." |
| 74 | + prompt=PROMPT |
75 | 75 | ):
|
76 |
| - """The content can be any string in principle, but the system prompt is |
77 |
| - crafted for dictionary data of the form {'file_name': 'file_content'}. |
78 |
| - """ |
79 |
| - return openai.ChatCompletion.create( |
| 76 | + result = openai.ChatCompletion.create( |
80 | 77 | model=model,
|
81 | 78 | messages=[
|
82 | 79 | {"role": "system", "content": system_content},
|
83 |
| - {"role": "user", "content": f"This is the content: {content}. {extra_instructions}"}, |
| 80 | + {"role": "user", "content": f"This is the content: {content}. {prompt}"}, |
84 | 81 | ],
|
85 | 82 | temperature=0.7,
|
86 | 83 | )
|
| 84 | + usage = result.get("usage") |
| 85 | + prompt_tokens = usage.get("prompt_tokens") |
| 86 | + completion_tokens = usage.get("completion_tokens") |
| 87 | + content = result["choices"][0]["message"]["content"] |
| 88 | + |
| 89 | + return content, model, prompt_tokens, completion_tokens |
| 90 | + |
| 91 | +try: |
| 92 | + ray.init() |
| 93 | +except: |
| 94 | + logger.info("Ray init failed.") |
| 95 | + |
| 96 | + |
| 97 | +@ray.remote |
| 98 | +def mentor_task(content, model, system_content, prompt): |
| 99 | + return mentor(content, model, system_content, prompt) |
| 100 | + |
| 101 | +def ray_mentor( |
| 102 | + content: dict, |
| 103 | + model="meta-llama/Llama-2-70b-chat-hf", |
| 104 | + system_content=SYSTEM_CONTENT, |
| 105 | + prompt="Improve this content." |
| 106 | + ): |
| 107 | + futures = [ |
| 108 | + mentor_task.remote(v, model, system_content, prompt) |
| 109 | + for v in content.values() |
| 110 | + ] |
| 111 | + suggestions = ray.get(futures) |
| 112 | + content = {k: v[0] for k, v in zip(content.keys(), suggestions)} |
| 113 | + prompt_tokens = sum(v[2] for v in suggestions) |
| 114 | + completion_tokens = sum(v[3] for v in suggestions) |
| 115 | + |
| 116 | + return content, model, prompt_tokens, completion_tokens |
| 117 | + |
87 | 118 |
|
88 | 119 |
|
89 | 120 | app = FastAPI()
|
@@ -163,35 +194,32 @@ async def handle_webhook(request: Request):
|
163 | 194 | diff_response = await client.get(url, headers=headers)
|
164 | 195 | diff = diff_response.text
|
165 | 196 |
|
166 |
| - files_with_diff = files_to_diff_dict(diff) |
| 197 | + files = files_to_diff_dict(diff) |
167 | 198 |
|
168 | 199 | # Filter the dictionary
|
169 | 200 | if files_to_keep:
|
170 |
| - files_with_diff = { |
171 |
| - k: files_with_diff[k] |
172 |
| - for k in files_with_diff |
| 201 | + files = { |
| 202 | + k: files[k] |
| 203 | + for k in files |
173 | 204 | if any(sub in k for sub in files_to_keep)
|
174 | 205 | }
|
175 |
| - |
176 |
| - logger.info(files_with_diff.keys()) |
| 206 | + logger.info(files.keys()) |
177 | 207 |
|
178 | 208 | # Get suggestions from Docu Mentor
|
179 |
| - chat_completion = mentor(files_with_diff) |
| 209 | + content, model, prompt_tokens, completion_tokens = ray_mentor(files) if ray.is_initialized() else mentor(files) |
180 | 210 |
|
181 |
| - logger.info(chat_completion) |
182 |
| - model = chat_completion.get("model") |
183 |
| - usage = chat_completion.get("usage") |
184 |
| - prompt_tokens = usage.get("prompt_tokens") |
185 |
| - completion_tokens = usage.get("completion_tokens") |
186 |
| - content = chat_completion["choices"][0]["message"]["content"] |
| 211 | + print_content = "" |
| 212 | + for k, v in content.items(): |
| 213 | + print_content += f"{k}:\n\t\{v}\n\n" |
| 214 | + logger.info(print_content) |
187 | 215 |
|
188 | 216 | # Let's comment on the PR
|
189 | 217 | await client.post(
|
190 | 218 | f"{comment['issue_url']}/comments",
|
191 | 219 | json={
|
192 | 220 | "body": f":rocket: Docu Mentor finished analysing your PR! :rocket:\n\n"
|
193 | 221 | + "Take a look at your results:\n"
|
194 |
| - + f"{content}\n\n" |
| 222 | + + f"{print_content}\n\n" |
195 | 223 | + "This bot is proudly powered by [Anyscale Endpoints](https://app.endpoints.anyscale.com/).\n"
|
196 | 224 | + f"It used the model {model}, used {prompt_tokens} prompt tokens, "
|
197 | 225 | + f"and {completion_tokens} completion tokens in total."
|
|
0 commit comments