Skip to content

Commit 87c917e

Browse files
garland3github-advanced-security[bot]claude
authored
Fix file management (#24)
* feat(utilities): enhance logging in tool workflow and LLM response handling - Add detailed step logging in tool_utils.py for execute_tools_workflow, execute_single_tool, and prepare_tool_arguments functions to trace tool execution process - Modify llm logging in error_utils.py to output full llm_response instead of just has_tool_calls for better debugging - Update pdfbasic main.py to include logging import and adjust _analyze_pdf_content function to accept optional original_filename parameter for improved context handling in PDF analysis * feat: enhance chat file handling and PDF analysis - Refactor chat service to directly manage file references in session context for existing S3 files, bypassing complex file handling utilities and improving efficiency - Modify PDF analysis tool to generate in-memory PDF reports with word frequency summaries, providing better visual output for text analytics * Potential fix for code scanning alert no. 280: Unused import Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * fix: address PR review comments - Replace print() debugging statements with proper logger calls in file_size_test/main.py - Move os import to top of pdfbasic/main.py for consistency - Fix double space typo in pdfbasic/main.py docstring 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> * refactor: improve error handling and request timeout - Add 30-second timeout to all requests.get() calls to prevent indefinite hangs - Replace traceback.print_exc() with logger.exception() for better logging - Consolidate error logging messages for improved readability 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <[email protected]> --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> Co-authored-by: Claude <[email protected]>
1 parent d172154 commit 87c917e

File tree

9 files changed

+606
-354
lines changed

9 files changed

+606
-354
lines changed

backend/application/chat/service.py

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,7 @@ async def handle_attach_file(
341341

342342
try:
343343
# Get file metadata
344-
file_result = await self.file_manager.get_file(user_email, s3_key)
344+
file_result = await self.file_manager.s3_client.get_file(user_email, s3_key)
345345
if not file_result:
346346
return {
347347
"type": "file_attach",
@@ -359,25 +359,26 @@ async def handle_attach_file(
359359
"error": "Invalid file metadata"
360360
}
361361

362-
# Add file to session context
363-
session.context = await file_utils.handle_session_files(
364-
session_context=session.context,
365-
user_email=user_email,
366-
files_map={
367-
filename: {
368-
"key": s3_key,
369-
"content_type": file_result.get("content_type"),
370-
"size": file_result.get("size"),
371-
"filename": filename
372-
}
373-
},
374-
file_manager=self.file_manager,
375-
update_callback=update_callback
376-
)
362+
# Add file reference directly to session context (file already exists in S3)
363+
session.context.setdefault("files", {})[filename] = {
364+
"key": s3_key,
365+
"content_type": file_result.get("content_type"),
366+
"size": file_result.get("size"),
367+
"source": "user",
368+
"last_modified": file_result.get("last_modified"),
369+
}
377370

378371
sanitized_s3_key = s3_key.replace('\r', '').replace('\n', '')
379372
logger.info(f"Attached file ({sanitized_s3_key}) to session {session_id}")
380373

374+
# Emit files_update to notify UI
375+
if update_callback:
376+
await file_utils.emit_files_update_from_context(
377+
session_context=session.context,
378+
file_manager=self.file_manager,
379+
update_callback=update_callback
380+
)
381+
381382
return {
382383
"type": "file_attach",
383384
"s3_key": s3_key,

backend/application/chat/utilities/error_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ async def safe_call_llm_with_tools(
8585
llm_response = await llm_caller.call_with_tools(
8686
model, messages, tools_schema, tool_choice, temperature=temperature
8787
)
88-
logger.info(f"LLM response received with tools only, has_tool_calls: {llm_response.has_tool_calls()}")
88+
logger.info(f"LLM response received with tools only, llm_response: {llm_response}")
8989
return llm_response
9090
except Exception as e:
9191
logger.error(f"Error calling LLM with tools: {e}", exc_info=True)

backend/application/chat/utilities/tool_utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ async def execute_tools_workflow(
3535
3636
Pure function that coordinates tool execution without maintaining state.
3737
"""
38+
logger.info("Step 4: Entering execute_tools_workflow")
3839
# Add assistant message with tool calls
3940
messages.append({
4041
"role": "assistant",
@@ -115,6 +116,7 @@ async def execute_single_tool(
115116
116117
Pure function that doesn't maintain state - all context passed as parameters.
117118
"""
119+
logger.info("Step 5: Entering execute_single_tool")
118120
from . import notification_utils
119121

120122
try:
@@ -233,6 +235,7 @@ def prepare_tool_arguments(tool_call, session_context: Dict[str, Any], tool_mana
233235
234236
Pure function that transforms arguments based on context and tool schema.
235237
"""
238+
logger.info("Step 6: Entering prepare_tool_arguments")
236239
# Parse raw arguments
237240
raw_args = getattr(tool_call.function, "arguments", {})
238241
if isinstance(raw_args, dict):
@@ -286,6 +289,7 @@ def to_url(key: str) -> str:
286289
ref = files_ctx.get(fname)
287290
if ref and ref.get("key"):
288291
url = to_url(ref["key"])
292+
logger.info(f"Step 6.1: Rewriting filename to URL: {url}")
289293
parsed_args.setdefault("original_filename", fname)
290294
parsed_args["filename"] = url
291295
parsed_args.setdefault("file_url", url)
@@ -304,6 +308,7 @@ def to_url(key: str) -> str:
304308
else:
305309
urls.append(fname)
306310
if urls:
311+
logger.info(f"Step 6.1: Rewriting filenames to URLs: {urls}")
307312
parsed_args.setdefault("original_file_names", originals)
308313
parsed_args["file_names"] = urls
309314
parsed_args.setdefault("file_urls", urls)

backend/mcp/file_size_test/main.py

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
#!/usr/bin/env python3
2+
"""
3+
File Size Test MCP Server using FastMCP.
4+
Simple tool for testing file transfer by returning file size.
5+
"""
6+
7+
import base64
8+
import os
9+
import logging
10+
from typing import Any, Dict, Annotated
11+
12+
import requests
13+
from fastmcp import FastMCP
14+
15+
logger = logging.getLogger(__name__)
16+
17+
mcp = FastMCP("File_Size_Test")
18+
19+
20+
@mcp.tool
21+
def process_file_demo(
22+
filename: Annotated[str, "The file to process (URL or base64)"],
23+
username: Annotated[str, "Username for auditing"] = None
24+
) -> Dict[str, Any]:
25+
"""
26+
Demo tool that processes a file and returns a new transformed file.
27+
28+
This tool demonstrates the v2 MCP artifacts contract by:
29+
- Accepting a file input
30+
- Processing it (converting text to uppercase for demo)
31+
- Returning a new file as an artifact with proper v2 format
32+
- Including display hints for canvas viewing
33+
34+
**v2 Artifacts Contract:**
35+
- Uses artifacts array with base64 content
36+
- Includes MIME types and metadata
37+
- Provides display hints for canvas behavior
38+
- Supports username injection for auditing
39+
40+
**File Processing:**
41+
- For text files: converts content to uppercase
42+
- For binary files: demonstrates file modification capability
43+
- Preserves original file structure where possible
44+
45+
**Return Format:**
46+
- results: Summary of operation
47+
- artifacts: Array containing the processed file
48+
- display: Canvas hints (open_canvas: true, primary_file, etc.)
49+
- meta_data: Additional processing details
50+
51+
Args:
52+
filename: File reference (URL or base64 data) to process
53+
username: Injected user identity for auditing
54+
55+
Returns:
56+
Dictionary with results, artifacts, and display hints per v2 contract
57+
"""
58+
logger.debug(f"process_file_demo called with filename: {filename}")
59+
logger.debug(f"username: {username}")
60+
try:
61+
# Get the file content (reuse logic from get_file_size)
62+
is_url = (
63+
filename.startswith("http://") or
64+
filename.startswith("https://") or
65+
filename.startswith("/api/") or
66+
filename.startswith("/")
67+
)
68+
logger.debug(f"is_url determined as: {is_url}")
69+
70+
if is_url:
71+
if filename.startswith("/"):
72+
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
73+
url = f"{backend_url}{filename}"
74+
else:
75+
url = filename
76+
logger.info(f"Downloading file for processing: {url}")
77+
response = requests.get(url, timeout=30)
78+
response.raise_for_status()
79+
file_bytes = response.content
80+
original_filename = filename.split('/')[-1] or "processed_file.txt"
81+
else:
82+
# Assume base64
83+
logger.info("Decoding base64 for file processing")
84+
file_bytes = base64.b64decode(filename)
85+
original_filename = "processed_file.txt"
86+
87+
logger.debug(f"Original file size: {len(file_bytes)} bytes")
88+
89+
# Process the file (demo: convert text to uppercase)
90+
try:
91+
# Try to decode as text for processing
92+
original_text = file_bytes.decode('utf-8')
93+
processed_text = original_text.upper()
94+
processed_bytes = processed_text.encode('utf-8')
95+
processed_mime = "text/plain"
96+
description = "Processed text (converted to uppercase)"
97+
except UnicodeDecodeError:
98+
# If not text, do a simple binary modification (demo purpose)
99+
processed_bytes = file_bytes + b"\n[DEMO PROCESSED]"
100+
processed_mime = "application/octet-stream"
101+
description = "Processed binary file (demo modification)"
102+
103+
# Create artifact
104+
processed_b64 = base64.b64encode(processed_bytes).decode('ascii')
105+
new_filename = f"processed_{original_filename}"
106+
107+
# Create display hints
108+
display_hints = {
109+
"open_canvas": True,
110+
"primary_file": new_filename,
111+
"mode": "replace",
112+
"viewer_hint": "auto"
113+
}
114+
115+
result = {
116+
"results": {
117+
"operation": "process_file_demo",
118+
"original_filename": original_filename,
119+
"processed_filename": new_filename,
120+
"original_size": len(file_bytes),
121+
"processed_size": len(processed_bytes),
122+
"processing_type": "text_uppercase" if 'original_text' in locals() else "binary_demo",
123+
"status": "success"
124+
},
125+
"meta_data": {
126+
"is_error": False,
127+
"processed_by": "process_file_demo_v2",
128+
"username": username,
129+
"mime_type": processed_mime
130+
},
131+
"artifacts": [
132+
{
133+
"name": new_filename,
134+
"b64": processed_b64,
135+
"mime": processed_mime,
136+
"size": len(processed_bytes),
137+
"description": description,
138+
"viewer": "auto"
139+
}
140+
],
141+
"display": display_hints
142+
}
143+
logger.debug(f"About to return processed file result: {result['results']}")
144+
return result
145+
146+
except Exception as e:
147+
logger.exception(f"Exception in process_file_demo: {str(e)}")
148+
error_result = {
149+
"results": {
150+
"operation": "process_file_demo",
151+
"error": f"File processing failed: {str(e)}",
152+
"filename": filename
153+
},
154+
"meta_data": {
155+
"is_error": True,
156+
"error_type": type(e).__name__,
157+
"username": username
158+
}
159+
}
160+
return error_result
161+
162+
163+
@mcp.tool
164+
def get_file_size(
165+
filename: Annotated[str, "The file to check (URL or base64)"]
166+
) -> Dict[str, Any]:
167+
"""
168+
Test file transfer by returning the size of the transferred file.
169+
170+
This simple tool is designed for testing file transfer functionality
171+
between frontend and backend. It accepts a file and returns its size in bytes.
172+
173+
**File Input Support:**
174+
- URL-based files (http://, https://, or /api/ paths)
175+
- Base64-encoded file data
176+
- Automatic backend URL construction for relative paths
177+
178+
**Return Information:**
179+
- File size in bytes
180+
- File size in human-readable format (KB, MB)
181+
- Original filename or URL
182+
183+
**Use Cases:**
184+
- Testing file upload/download workflows
185+
- Validating file transfer infrastructure
186+
- Debugging file handling issues
187+
- Verifying file size limits
188+
189+
Args:
190+
filename: File reference (URL or base64 data)
191+
192+
Returns:
193+
Dictionary containing:
194+
- operation: "get_file_size"
195+
- filename: Original filename/URL
196+
- size_bytes: File size in bytes
197+
- size_human: Human-readable size (e.g., "1.5 MB")
198+
Or error message if file cannot be accessed
199+
"""
200+
logger.debug(f"get_file_size called with filename: {filename}")
201+
logger.debug(f"filename type: {type(filename)}, length: {len(filename) if filename else 0}")
202+
try:
203+
# Check if filename is a URL (absolute or relative)
204+
is_url = (
205+
filename.startswith("http://") or
206+
filename.startswith("https://") or
207+
filename.startswith("/api/") or
208+
filename.startswith("/")
209+
)
210+
logger.debug(f"is_url determined as: {is_url}")
211+
212+
if is_url:
213+
# Convert relative URLs to absolute URLs
214+
if filename.startswith("/"):
215+
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
216+
url = f"{backend_url}{filename}"
217+
logger.debug(f"Constructing URL from relative path: {filename} -> {url}")
218+
else:
219+
url = filename
220+
logger.debug(f"Using absolute URL: {url}")
221+
222+
logger.debug(f"About to download from URL: {url}")
223+
logger.info(f"Downloading file from URL: {url}")
224+
response = requests.get(url, timeout=30)
225+
logger.debug(f"HTTP response status: {response.status_code}")
226+
response.raise_for_status()
227+
file_bytes = response.content
228+
logger.debug(f"Successfully downloaded file content, length: {len(file_bytes)} bytes")
229+
else:
230+
# Assume it's base64-encoded data
231+
logger.debug(f"Treating input as base64 data, attempting to decode")
232+
logger.info("Decoding base64 file data")
233+
file_bytes = base64.b64decode(filename)
234+
logger.debug(f"Successfully decoded base64 data, length: {len(file_bytes)} bytes")
235+
236+
# Calculate file size
237+
size_bytes = len(file_bytes)
238+
size_human = _format_size(size_bytes)
239+
logger.debug(f"Calculated file size: {size_bytes} bytes ({size_human})")
240+
241+
result = {
242+
"results": {
243+
"operation": "get_file_size",
244+
"filename": filename,
245+
"size_bytes": size_bytes,
246+
"size_human": size_human,
247+
"status": "success"
248+
},
249+
"meta_data": {
250+
"is_error": False,
251+
"transfer_method": "url" if is_url else "base64"
252+
}
253+
}
254+
logger.debug(f"About to return success result: {result}")
255+
return result
256+
257+
except Exception as e:
258+
logger.exception(f"Exception occurred while processing file: {str(e)} (type: {type(e).__name__}, filename: {filename})")
259+
error_result = {
260+
"results": {
261+
"operation": "get_file_size",
262+
"error": f"File size check failed: {str(e)}",
263+
"filename": filename
264+
},
265+
"meta_data": {
266+
"is_error": True,
267+
"error_type": type(e).__name__
268+
}
269+
}
270+
logger.debug(f"About to return error result: {error_result}")
271+
return error_result
272+
273+
274+
def _format_size(size_bytes: int) -> str:
275+
"""Format file size in human-readable format."""
276+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
277+
if size_bytes < 1024.0:
278+
return f"{size_bytes:.2f} {unit}"
279+
size_bytes /= 1024.0
280+
return f"{size_bytes:.2f} PB"
281+
282+
283+
if __name__ == "__main__":
284+
mcp.run()

0 commit comments

Comments
 (0)