Skip to content

Commit cd39c41

Browse files
committed
feat(errors): add LLMServiceError for generic LLM failures and enhance error classification
1 parent 52ea1d8 commit cd39c41

File tree

7 files changed

+118
-95
lines changed

7 files changed

+118
-95
lines changed

backend/application/chat/utilities/error_utils.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import logging
99
from typing import Any, Dict, List, Optional, Callable, Awaitable, Tuple
1010

11-
from domain.errors import ValidationError, RateLimitError, LLMTimeoutError, LLMAuthenticationError
11+
from domain.errors import ValidationError, RateLimitError, LLMTimeoutError, LLMAuthenticationError, LLMServiceError
1212
from domain.messages.models import MessageType
1313

1414
logger = logging.getLogger(__name__)
@@ -65,7 +65,9 @@ def classify_llm_error(error: Exception) -> Tuple[type, str, str]:
6565
Classify LLM errors and return appropriate error type, user message, and log message.
6666
6767
Returns:
68-
Tuple of (error_class, user_message, log_message)
68+
Tuple of (error_class, user_message, log_message).
69+
70+
NOTE: user_message MUST NOT contain raw exception details or sensitive data.
6971
"""
7072
error_str = str(error)
7173
error_type_name = type(error).__name__
@@ -88,10 +90,10 @@ def classify_llm_error(error: Exception) -> Tuple[type, str, str]:
8890
log_msg = f"Authentication error: {error_str}"
8991
return (LLMAuthenticationError, user_msg, log_msg)
9092

91-
# Generic LLM error
92-
user_msg = f"The AI service encountered an error. Please try again or contact support if the issue persists."
93+
# Generic LLM service error (non-validation)
94+
user_msg = "The AI service encountered an error. Please try again or contact support if the issue persists."
9395
log_msg = f"LLM error: {error_str}"
94-
return (ValidationError, user_msg, log_msg)
96+
return (LLMServiceError, user_msg, log_msg)
9597

9698

9799
async def safe_call_llm_with_tools(

backend/domain/errors.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,11 @@ class LLMError(DomainError):
4646
pass
4747

4848

49+
class LLMServiceError(LLMError):
50+
"""Generic LLM service failure that is not a validation issue."""
51+
pass
52+
53+
4954
class ToolError(DomainError):
5055
"""Tool execution error."""
5156
pass

backend/tests/test_error_classification.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Tests for error classification and user-friendly error messages."""
22

33
from application.chat.utilities.error_utils import classify_llm_error
4-
from domain.errors import RateLimitError, LLMTimeoutError, LLMAuthenticationError, ValidationError
4+
from domain.errors import RateLimitError, LLMTimeoutError, LLMAuthenticationError, LLMServiceError
55

66

77
class TestErrorClassification:
@@ -77,7 +77,7 @@ def test_classify_generic_llm_error(self):
7777

7878
error_class, user_msg, log_msg = classify_llm_error(error)
7979

80-
assert error_class == ValidationError
80+
assert error_class == LLMServiceError
8181
assert "error" in user_msg.lower()
8282
assert "try again" in user_msg.lower() or "contact support" in user_msg.lower()
8383

docs/developer/README.md

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,9 @@ This guide provides technical details for developers contributing to the Atlas U
55
## Topics
66

77
### Getting Started
8-
- [Architecture Overview](architecture.md) - System architecture and design patterns
9-
- [Development Conventions](conventions.md) - Coding standards and best practices
108

119
### Building MCP Servers
12-
- [Creating MCP Servers](creating-mcp-servers.md) - How to build tool servers
13-
- [Working with Files](working-with-files.md) - File access patterns for tools
14-
- [Progress Updates](progress-updates.md) - Sending intermediate results to users
1510

1611
### Frontend Development
17-
- [Custom Canvas Renderers](canvas-renderers.md) - Adding support for new file types
12+
- [Error Handling Improvements](error_handling_improvements.md) - LLM error classification and surfacing
13+
- [Error Flow Diagram](error_flow_diagram.md) - End-to-end error flow diagram
Lines changed: 76 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
```markdown
12
# Error Flow Diagram
23

34
## Complete Error Handling Flow
@@ -6,107 +7,107 @@
67
┌─────────────────────────────────────────────────────────────────────┐
78
│ USER SENDS MESSAGE │
89
└─────────────────────────────────────────────────────────────────────┘
9-
10-
10+
11+
1112
┌─────────────────────────────────────────────────────────────────────┐
1213
│ WebSocket Handler (main.py) │
1314
│ handle_chat() async function │
1415
└─────────────────────────────────────────────────────────────────────┘
15-
16-
16+
17+
1718
┌─────────────────────────────────────────────────────────────────────┐
1819
│ ChatService.handle_chat_message() │
1920
│ (service.py) │
2021
└─────────────────────────────────────────────────────────────────────┘
21-
22-
22+
23+
2324
┌─────────────────────────────────────────────────────────────────────┐
2425
│ ChatOrchestrator.execute() │
2526
│ (orchestrator.py) │
2627
└─────────────────────────────────────────────────────────────────────┘
27-
28-
28+
29+
2930
┌─────────────────────────────────────────────────────────────────────┐
3031
│ ToolsModeRunner.run() │
3132
│ (modes/tools.py) │
3233
└─────────────────────────────────────────────────────────────────────┘
33-
34-
34+
35+
3536
┌─────────────────────────────────────────────────────────────────────┐
3637
│ error_utils.safe_call_llm_with_tools() │
3738
│ (utilities/error_utils.py) │
3839
└─────────────────────────────────────────────────────────────────────┘
39-
40-
40+
41+
4142
┌─────────────────────────────────────────────────────────────────────┐
4243
│ LLMCaller.call_with_tools() │
4344
│ (modules/llm/litellm_caller.py) │
4445
└─────────────────────────────────────────────────────────────────────┘
45-
46-
46+
47+
4748
┌─────────────────────────────────────────────────────────────────────┐
4849
│ LiteLLM Library │
4950
│ (calls Cerebras/OpenAI/etc.) │
5051
└─────────────────────────────────────────────────────────────────────┘
51-
52-
53-
┌─────────────┴─────────────┐
54-
│ │
55-
┌──────▼───────┐ ┌───────▼────────┐
56-
│ SUCCESS │ │ ERROR │
57-
│ (200 OK) │ │ (Rate Limit) │
58-
└──────┬───────┘ └───────┬────────┘
59-
│ │
60-
│ ▼
61-
│ ┌──────────────────────────────┐
62-
│ │ Exception: RateLimitError │
63-
│ │ "We're experiencing high │
64-
│ │ traffic right now!" │
65-
│ └──────────┬───────────────────┘
66-
│ │
67-
│ ▼
68-
│ ┌──────────────────────────────┐
69-
│ │ error_utils.classify_llm_ │
70-
│ │ error(exception) │
71-
│ │ │
72-
│ │ Returns: │
73-
│ │ - error_class: RateLimitError│
74-
│ │ - user_msg: "The AI service │
75-
│ │ is experiencing high │
76-
│ │ traffic..." │
77-
│ │ - log_msg: Full details │
78-
│ └──────────┬───────────────────┘
79-
│ │
80-
│ ▼
81-
│ ┌──────────────────────────────┐
82-
│ │ Raise RateLimitError(user_msg)│
83-
│ └──────────┬───────────────────┘
84-
│ │
85-
│ ▼
52+
53+
54+
┌─────────────┴─────────────┐
55+
│ │
56+
┌──────▼───────┐ ┌───────▼────────┐
57+
│ SUCCESS │ │ ERROR │
58+
│ (200 OK) │ │ (Rate Limit) │
59+
└──────┬───────┘ └───────┬────────┘
60+
│ │
61+
│ ▼
62+
│ ┌──────────────────────────────┐
63+
│ │ Exception: RateLimitError │
64+
│ │ "We're experiencing high │
65+
│ │ traffic right now!" │
66+
│ └──────────┬───────────────────┘
67+
│ │
68+
│ ▼
69+
│ ┌──────────────────────────────┐
70+
│ │ error_utils.classify_llm_
71+
│ │ error(exception) │
72+
│ │ │
73+
│ │ Returns: │
74+
│ │ - error_class: RateLimitError│
75+
│ │ - user_msg: "The AI service │
76+
│ │ is experiencing high │
77+
│ │ traffic..." │
78+
│ │ - log_msg: Full details │
79+
│ └──────────┬───────────────────┘
80+
│ │
81+
│ ▼
82+
│ ┌──────────────────────────────┐
83+
│ │ Raise RateLimitError(user_msg)│
84+
│ └──────────┬───────────────────┘
85+
│ │
86+
│ ▼
8687
┌───────────────────┴─────────────────────────┴─────────────────────┐
8788
│ Back to WebSocket Handler (main.py) │
8889
│ Exception Catching │
8990
└────────────────────────────────────────────────────────────────────┘
90-
91-
┌─────────────┴─────────────┐
92-
│ │
93-
┌──────▼────────┐ ┌────────▼────────────┐
94-
│ except │ │ except │
95-
│ RateLimitError │ │ LLMTimeoutError │
96-
│ │ │ LLMAuth...Error │
97-
│ Send to user: │ │ ValidationError │
98-
│ { │ │ etc. │
99-
│ type: "error",│ │ │
100-
│ message: user │ │ Send appropriate │
101-
│ friendly msg,│ │ message to user │
102-
│ error_type: │ │ │
103-
│ "rate_limit" │ │ │
104-
│ } │ │ │
105-
└───────┬────────┘ └────────┬────────────┘
106-
│ │
107-
└──────────┬───────────────┘
108-
109-
91+
92+
┌─────────────┴─────────────┐
93+
│ │
94+
┌──────▼────────┐ ┌────────▼────────────┐
95+
│ except │ │ except │
96+
│ RateLimitError │ │ LLMTimeoutError │
97+
│ │ │ LLMAuth...Error │
98+
│ Send to user: │ │ ValidationError │
99+
│ { │ │ etc. │
100+
│ type: "error",│ │ │
101+
│ message: user │ │ Send appropriate │
102+
│ friendly msg,│ │ message to user │
103+
│ error_type: │ │ │
104+
│ "rate_limit" │ │ │
105+
│ } │ │ │
106+
└───────┬────────┘ └────────┬────────────┘
107+
│ │
108+
└──────────┬───────────────┘
109+
110+
110111
┌─────────────────────────────────────────────────────────────────────┐
111112
│ WebSocket Message Sent │
112113
│ { │
@@ -115,8 +116,8 @@
115116
│ "error_type": "rate_limit" │
116117
│ } │
117118
└─────────────────────────────────────────────────────────────────────┘
118-
119-
119+
120+
120121
┌─────────────────────────────────────────────────────────────────────┐
121122
│ Frontend (websocketHandlers.js) │
122123
│ │
@@ -128,8 +129,8 @@
128129
│ timestamp: new Date().toISOString() │
129130
│ }) │
130131
└─────────────────────────────────────────────────────────────────────┘
131-
132-
132+
133+
133134
┌─────────────────────────────────────────────────────────────────────┐
134135
│ UI DISPLAYS ERROR │
135136
│ │
@@ -152,3 +153,5 @@
152153
4. **Error Type Field**: The `error_type` field allows the frontend to potentially handle different error types differently in the future (e.g., automatic retry for timeouts).
153154
154155
5. **No Sensitive Data Exposure**: API keys, stack traces, and other sensitive information are never sent to the frontend.
156+
```
157+

0 commit comments

Comments
 (0)