Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 17 additions & 16 deletions backend/application/chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ async def handle_attach_file(

try:
# Get file metadata
file_result = await self.file_manager.get_file(user_email, s3_key)
file_result = await self.file_manager.s3_client.get_file(user_email, s3_key)
Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Direct access to s3_client breaks encapsulation. The file_manager should provide a method to retrieve files rather than exposing its internal s3_client. This creates tight coupling and makes the code harder to maintain or refactor.

Suggested change
file_result = await self.file_manager.s3_client.get_file(user_email, s3_key)
file_result = await self.file_manager.get_file(user_email, s3_key)

Copilot uses AI. Check for mistakes.
if not file_result:
return {
"type": "file_attach",
Expand All @@ -359,25 +359,26 @@ async def handle_attach_file(
"error": "Invalid file metadata"
}

# Add file to session context
session.context = await file_utils.handle_session_files(
session_context=session.context,
user_email=user_email,
files_map={
filename: {
"key": s3_key,
"content_type": file_result.get("content_type"),
"size": file_result.get("size"),
"filename": filename
}
},
file_manager=self.file_manager,
update_callback=update_callback
)
# Add file reference directly to session context (file already exists in S3)
session.context.setdefault("files", {})[filename] = {
"key": s3_key,
"content_type": file_result.get("content_type"),
"size": file_result.get("size"),
"source": "user",
"last_modified": file_result.get("last_modified"),
}

sanitized_s3_key = s3_key.replace('\r', '').replace('\n', '')
logger.info(f"Attached file ({sanitized_s3_key}) to session {session_id}")

# Emit files_update to notify UI
if update_callback:
await file_utils.emit_files_update_from_context(
session_context=session.context,
file_manager=self.file_manager,
update_callback=update_callback
)

return {
"type": "file_attach",
"s3_key": s3_key,
Expand Down
2 changes: 1 addition & 1 deletion backend/application/chat/utilities/error_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ async def safe_call_llm_with_tools(
llm_response = await llm_caller.call_with_tools(
model, messages, tools_schema, tool_choice, temperature=temperature
)
logger.info(f"LLM response received with tools only, has_tool_calls: {llm_response.has_tool_calls()}")
logger.info(f"LLM response received with tools only, llm_response: {llm_response}")
return llm_response
except Exception as e:
logger.error(f"Error calling LLM with tools: {e}", exc_info=True)
Expand Down
5 changes: 5 additions & 0 deletions backend/application/chat/utilities/tool_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ async def execute_tools_workflow(

Pure function that coordinates tool execution without maintaining state.
"""
logger.info("Step 4: Entering execute_tools_workflow")
# Add assistant message with tool calls
messages.append({
"role": "assistant",
Expand Down Expand Up @@ -115,6 +116,7 @@ async def execute_single_tool(

Pure function that doesn't maintain state - all context passed as parameters.
"""
logger.info("Step 5: Entering execute_single_tool")
from . import notification_utils

try:
Expand Down Expand Up @@ -233,6 +235,7 @@ def prepare_tool_arguments(tool_call, session_context: Dict[str, Any], tool_mana

Pure function that transforms arguments based on context and tool schema.
"""
logger.info("Step 6: Entering prepare_tool_arguments")
# Parse raw arguments
raw_args = getattr(tool_call.function, "arguments", {})
if isinstance(raw_args, dict):
Expand Down Expand Up @@ -286,6 +289,7 @@ def to_url(key: str) -> str:
ref = files_ctx.get(fname)
if ref and ref.get("key"):
url = to_url(ref["key"])
logger.info(f"Step 6.1: Rewriting filename to URL: {url}")
parsed_args.setdefault("original_filename", fname)
parsed_args["filename"] = url
parsed_args.setdefault("file_url", url)
Expand All @@ -304,6 +308,7 @@ def to_url(key: str) -> str:
else:
urls.append(fname)
if urls:
logger.info(f"Step 6.1: Rewriting filenames to URLs: {urls}")
parsed_args.setdefault("original_file_names", originals)
parsed_args["file_names"] = urls
parsed_args.setdefault("file_urls", urls)
Expand Down
284 changes: 284 additions & 0 deletions backend/mcp/file_size_test/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,284 @@
#!/usr/bin/env python3
"""
File Size Test MCP Server using FastMCP.
Simple tool for testing file transfer by returning file size.
"""

import base64
import os
Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Import of 'io' is not used.

Suggested change
import os

Copilot uses AI. Check for mistakes.
import logging
from typing import Any, Dict, Annotated

import requests
from fastmcp import FastMCP

logger = logging.getLogger(__name__)

mcp = FastMCP("File_Size_Test")


@mcp.tool
def process_file_demo(
filename: Annotated[str, "The file to process (URL or base64)"],
username: Annotated[str, "Username for auditing"] = None
) -> Dict[str, Any]:
"""
Demo tool that processes a file and returns a new transformed file.

This tool demonstrates the v2 MCP artifacts contract by:
- Accepting a file input
- Processing it (converting text to uppercase for demo)
- Returning a new file as an artifact with proper v2 format
- Including display hints for canvas viewing

**v2 Artifacts Contract:**
- Uses artifacts array with base64 content
- Includes MIME types and metadata
- Provides display hints for canvas behavior
- Supports username injection for auditing

**File Processing:**
- For text files: converts content to uppercase
- For binary files: demonstrates file modification capability
- Preserves original file structure where possible

**Return Format:**
- results: Summary of operation
- artifacts: Array containing the processed file
- display: Canvas hints (open_canvas: true, primary_file, etc.)
- meta_data: Additional processing details

Args:
filename: File reference (URL or base64 data) to process
username: Injected user identity for auditing

Returns:
Dictionary with results, artifacts, and display hints per v2 contract
"""
logger.debug(f"process_file_demo called with filename: {filename}")
logger.debug(f"username: {username}")
try:
# Get the file content (reuse logic from get_file_size)
is_url = (
filename.startswith("http://") or
filename.startswith("https://") or
filename.startswith("/api/") or
filename.startswith("/")
)
logger.debug(f"is_url determined as: {is_url}")

Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.
if is_url:
if filename.startswith("/"):
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The BACKEND_URL environment variable is used without validation. If this URL is user-controllable or comes from an untrusted source, it could enable SSRF attacks. Consider validating that the URL points to an allowed domain or use a configuration-based whitelist.

Copilot uses AI. Check for mistakes.
url = f"{backend_url}{filename}"
else:
url = filename
logger.info(f"Downloading file for processing: {url}")
response = requests.get(url, timeout=30)
response.raise_for_status()
file_bytes = response.content
original_filename = filename.split('/')[-1] or "processed_file.txt"
else:
# Assume base64
logger.info("Decoding base64 for file processing")
file_bytes = base64.b64decode(filename)
original_filename = "processed_file.txt"

logger.debug(f"Original file size: {len(file_bytes)} bytes")

Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.
# Process the file (demo: convert text to uppercase)
try:
# Try to decode as text for processing
original_text = file_bytes.decode('utf-8')
processed_text = original_text.upper()
processed_bytes = processed_text.encode('utf-8')
processed_mime = "text/plain"
description = "Processed text (converted to uppercase)"
except UnicodeDecodeError:
# If not text, do a simple binary modification (demo purpose)
processed_bytes = file_bytes + b"\n[DEMO PROCESSED]"
processed_mime = "application/octet-stream"
description = "Processed binary file (demo modification)"

# Create artifact
processed_b64 = base64.b64encode(processed_bytes).decode('ascii')
new_filename = f"processed_{original_filename}"

# Create display hints
display_hints = {
"open_canvas": True,
"primary_file": new_filename,
"mode": "replace",
"viewer_hint": "auto"
}

result = {
"results": {
"operation": "process_file_demo",
"original_filename": original_filename,
"processed_filename": new_filename,
"original_size": len(file_bytes),
"processed_size": len(processed_bytes),
"processing_type": "text_uppercase" if 'original_text' in locals() else "binary_demo",
"status": "success"
},
"meta_data": {
"is_error": False,
"processed_by": "process_file_demo_v2",
"username": username,
"mime_type": processed_mime
},
"artifacts": [
{
"name": new_filename,
"b64": processed_b64,
"mime": processed_mime,
"size": len(processed_bytes),
"description": description,
"viewer": "auto"
}
],
"display": display_hints
}
logger.debug(f"About to return processed file result: {result['results']}")
return result

except Exception as e:
logger.exception(f"Exception in process_file_demo: {str(e)}")
error_result = {
"results": {
"operation": "process_file_demo",
"error": f"File processing failed: {str(e)}",
"filename": filename
},
"meta_data": {
"is_error": True,
"error_type": type(e).__name__,
"username": username
}
}
return error_result


@mcp.tool
def get_file_size(
filename: Annotated[str, "The file to check (URL or base64)"]
) -> Dict[str, Any]:
"""
Test file transfer by returning the size of the transferred file.

This simple tool is designed for testing file transfer functionality
between frontend and backend. It accepts a file and returns its size in bytes.

**File Input Support:**
- URL-based files (http://, https://, or /api/ paths)
- Base64-encoded file data
- Automatic backend URL construction for relative paths

**Return Information:**
- File size in bytes
- File size in human-readable format (KB, MB)
- Original filename or URL

**Use Cases:**
- Testing file upload/download workflows
- Validating file transfer infrastructure
- Debugging file handling issues
- Verifying file size limits

Args:
filename: File reference (URL or base64 data)

Returns:
Dictionary containing:
- operation: "get_file_size"
- filename: Original filename/URL
- size_bytes: File size in bytes
- size_human: Human-readable size (e.g., "1.5 MB")
Or error message if file cannot be accessed
"""
logger.debug(f"get_file_size called with filename: {filename}")
logger.debug(f"filename type: {type(filename)}, length: {len(filename) if filename else 0}")
try:
# Check if filename is a URL (absolute or relative)
is_url = (
filename.startswith("http://") or
filename.startswith("https://") or
filename.startswith("/api/") or
filename.startswith("/")
)
logger.debug(f"is_url determined as: {is_url}")

Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.
if is_url:
# Convert relative URLs to absolute URLs
if filename.startswith("/"):
backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
url = f"{backend_url}{filename}"
logger.debug(f"Constructing URL from relative path: {filename} -> {url}")
else:
url = filename
logger.debug(f"Using absolute URL: {url}")

logger.debug(f"About to download from URL: {url}")
logger.info(f"Downloading file from URL: {url}")
response = requests.get(url, timeout=30)
logger.debug(f"HTTP response status: {response.status_code}")
response.raise_for_status()
file_bytes = response.content
logger.debug(f"Successfully downloaded file content, length: {len(file_bytes)} bytes")
else:
# Assume it's base64-encoded data
logger.debug(f"Treating input as base64 data, attempting to decode")
logger.info("Decoding base64 file data")
file_bytes = base64.b64decode(filename)
logger.debug(f"Successfully decoded base64 data, length: {len(file_bytes)} bytes")

# Calculate file size
size_bytes = len(file_bytes)
size_human = _format_size(size_bytes)
logger.debug(f"Calculated file size: {size_bytes} bytes ({size_human})")

Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.
result = {
"results": {
"operation": "get_file_size",
"filename": filename,
"size_bytes": size_bytes,
"size_human": size_human,
"status": "success"
},
"meta_data": {
"is_error": False,
"transfer_method": "url" if is_url else "base64"
}
}
logger.debug(f"About to return success result: {result}")
return result
Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.

except Exception as e:
logger.exception(f"Exception occurred while processing file: {str(e)} (type: {type(e).__name__}, filename: {filename})")
error_result = {
"results": {
"operation": "get_file_size",
"error": f"File size check failed: {str(e)}",
"filename": filename
},
"meta_data": {
"is_error": True,
"error_type": type(e).__name__
}
}
logger.debug(f"About to return error result: {error_result}")
return error_result
Copy link

Copilot AI Oct 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Extensive use of print() statements for debugging should be replaced with proper logging using the logger object that's already configured (line 16). The logger provides better control over log levels, formatting, and output destinations.

Copilot uses AI. Check for mistakes.


def _format_size(size_bytes: int) -> str:
"""Format file size in human-readable format."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if size_bytes < 1024.0:
return f"{size_bytes:.2f} {unit}"
size_bytes /= 1024.0
return f"{size_bytes:.2f} PB"


if __name__ == "__main__":
mcp.run()
Loading
Loading