1+ import os
2+ from typing import List , Dict , Tuple
3+ from pathlib import Path
4+ import anthropic
5+ from git import Repo
6+ from datetime import datetime , timedelta
7+
8+ class DocHelperAgent :
9+ def __init__ (self , api_key : str ):
10+ self .client = anthropic .Client (api_key = api_key )
11+ self .repo = Repo ('.' )
12+ self .changed_files = self ._get_changed_files ()
13+ self .commit_history = {}
14+ self ._analyze_commit_history ()
15+
16+ def _analyze_commit_history (self , days : int = 30 ):
17+ """Analyze commit history to understand file importance."""
18+ since = datetime .now () - timedelta (days = days )
19+ commits = list (self .repo .iter_commits (since = since ))
20+
21+ for commit in commits :
22+ for file in commit .stats .files :
23+ if file .endswith ('.py' ):
24+ self .commit_history [file ] = self .commit_history .get (file , 0 ) + 1
25+
26+ def _get_file_importance_score (self , file_path : str ) -> float :
27+ """Calculate importance score for a file based on various factors."""
28+ score = 0.0
29+
30+ # Factor 1: Recent commit frequency (0-5 points)
31+ commit_count = self .commit_history .get (file_path , 0 )
32+ score += min (commit_count / 2 , 5 ) # Cap at 5 points
33+
34+ # Factor 2: File size and complexity (0-3 points)
35+ with open (file_path , 'r' ) as f :
36+ content = f .read ()
37+ lines = content .split ('\n ' )
38+ score += min (len (lines ) / 100 , 3 ) # Larger files score higher, cap at 3
39+
40+ # Factor 3: Import statements (0-2 points)
41+ import_count = sum (1 for line in lines if line .strip ().startswith ('import' ) or line .strip ().startswith ('from' ))
42+ score += min (import_count / 5 , 2 ) # More imports suggest more complexity
43+
44+ return score
45+
46+ def analyze_code_importance (self , code : str ) -> Dict [str , float ]:
47+ """Have Claude analyze code importance based on various factors."""
48+ prompt = f"""Analyze this Python code and score each function based on these factors:
49+ 1. Complexity (0-5): Number of operations, loops, conditions
50+ 2. Impact (0-5): How much other code depends on this function
51+ 3. Clarity Need (0-5): How much documentation would help understanding
52+
53+ Return scores in this format:
54+ FUNCTION: name
55+ COMPLEXITY: score
56+ IMPACT: score
57+ CLARITY: score
58+
59+ Code to analyze:
60+
61+ { code } """
62+
63+ message = self .client .messages .create (
64+ model = "claude-3-sonnet-20240229" ,
65+ max_tokens = 1000 ,
66+ temperature = 0.2 ,
67+ messages = [{"role" : "user" , "content" : prompt }]
68+ )
69+
70+ scores = {}
71+ current_func = None
72+ current_scores = {}
73+
74+ for line in message .content .split ('\n ' ):
75+ if line .startswith ('FUNCTION:' ):
76+ if current_func :
77+ scores [current_func ] = sum (current_scores .values ()) / 15 # Normalize to 0-1
78+ current_func = line .replace ('FUNCTION:' , '' ).strip ()
79+ current_scores = {}
80+ elif line .startswith (('COMPLEXITY:' , 'IMPACT:' , 'CLARITY:' )):
81+ key , value = line .split (':' )
82+ current_scores [key .lower ()] = float (value .strip ())
83+
84+ if current_func :
85+ scores [current_func ] = sum (current_scores .values ()) / 15
86+
87+ return scores
88+
89+ def make_documentation_decisions (self , file_path : str , code : str ) -> List [Dict ]:
90+ """Decide which functions need documentation and in what order."""
91+ # Get file-level importance
92+ file_importance = self ._get_file_importance_score (file_path )
93+
94+ # Get function-level importance
95+ function_scores = self .analyze_code_importance (code )
96+
97+ # Combine scores and make decisions
98+ decisions = []
99+ for func_name , func_score in function_scores .items ():
100+ combined_score = (file_importance + func_score * 10 ) / 2
101+
102+ if combined_score >= 3.5 : # High priority
103+ priority = "high"
104+ doc_style = "comprehensive"
105+ elif combined_score >= 2 : # Medium priority
106+ priority = "medium"
107+ doc_style = "standard"
108+ else : # Low priority
109+ priority = "low"
110+ doc_style = "basic"
111+
112+ decisions .append ({
113+ "function" : func_name ,
114+ "priority" : priority ,
115+ "doc_style" : doc_style ,
116+ "score" : combined_score
117+ })
118+
119+ # Sort by score
120+ decisions .sort (key = lambda x : x ['score' ], reverse = True )
121+ return decisions
122+
123+ def analyze_and_update_file (self , file_path : str ) -> None :
124+ """Analyze file and make documentation decisions."""
125+ with open (file_path , 'r' ) as file :
126+ content = file .read ()
127+
128+ # Make decisions about what to document
129+ decisions = self .make_documentation_decisions (file_path , content )
130+
131+ # Only proceed with high and medium priority functions
132+ functions_to_document = [d for d in decisions if d ['priority' ] in ['high' , 'medium' ]]
133+
134+ if not functions_to_document :
135+ print (f"No high/medium priority documentation needed for { file_path } " )
136+ return
137+
138+ # Generate documentation based on decisions
139+ prompt = (
140+ f"""For the following Python code, generate documentation for these functions:
141+ { ', ' .join (d ['function' ] for d in functions_to_document )}
142+
143+ For each function, provide:
144+ 1. The function name
145+ 2. Line number where documentation should be inserted
146+ 3. A { ' and ' .join (f"{ d ['doc_style' ]} " for d in functions_to_document )} Google-style docstring
147+
148+ Return in this format:
149+ FUNCTION: function_name
150+ LINE: line_number
151+ DOCSTRING:
152+ '''your docstring here'''
153+
154+ Here's the code:
155+
156+ { content } """
157+ )
158+
159+ message = self .client .messages .create (
160+ model = "claude-3-sonnet-20240229" ,
161+ max_tokens = 2000 ,
162+ temperature = 0.3 ,
163+ messages = [{"role" : "user" , "content" : prompt }]
164+ )
165+
166+ updates = self ._parse_claude_response (message .content )
167+ if updates :
168+ self ._apply_updates (file_path , updates )
169+
170+ # ... (rest of the class methods remain the same)
0 commit comments