Skip to content

Commit 4b7c3e4

Browse files
committed
Implement real streaming from LlamaStack
- Replace simulated streaming with actual LlamaStack streaming in chat service - Handle chunk types: text deltas, tool execution status, completion events - Add fallback formatting for tool outputs when no text is generated - Increase timeout to 300s and max_infer_iters to 100 - Fix MCP discovery to use mcp.transport label instead of proxyMode - Update logging for new app/ directory structure - Remove unused Kubernetes service readiness code Signed-off-by: Yuval Turgeman <[email protected]> Assisted-by: Claude
1 parent c147613 commit 4b7c3e4

File tree

16 files changed

+559
-135
lines changed

16 files changed

+559
-135
lines changed

backend/README.md

Lines changed: 35 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,22 +11,41 @@ FastAPI backend for the AI Virtual Agent Quickstart project. For complete setup
1111

1212
```
1313
backend/
14-
├── main.py # FastAPI app entrypoint, includes routers
15-
├── database.py # Database connection and session
16-
├── models.py # SQLAlchemy models
17-
├── schemas.py # Pydantic schemas
18-
├── routes/ # API route modules
19-
│ ├── users.py # User management endpoints
20-
│ ├── mcp_servers.py # MCP server management
21-
│ ├── knowledge_bases.py # Knowledge base operations
22-
│ ├── virtual_assistants.py # Agent CRUD operations
23-
│ ├── chat_sessions.py # Chat session management
24-
│ ├── tools.py # Tool configuration endpoints
25-
│ └── guardrails.py # Guardrail management
26-
├── utils/ # Utility modules
27-
│ └── logging_config.py # Centralized logging setup
28-
├── requirements.txt # Python dependencies
29-
└── .env # Environment variables (not committed)
14+
├── main.py # FastAPI app entrypoint
15+
├── app/
16+
│ ├── api/v1/ # API route modules
17+
│ │ ├── chat.py # Chat endpoints
18+
│ │ ├── chat_sessions.py # Session management
19+
│ │ ├── virtual_agents.py # Agent CRUD operations
20+
│ │ ├── knowledge_bases.py # Knowledge base operations
21+
│ │ ├── tools.py # Tool configuration endpoints
22+
│ │ ├── users.py # User management endpoints
23+
│ │ ├── mcp_servers.py # MCP server management
24+
│ │ └── ... # Additional API endpoints
25+
│ ├── core/ # Core configuration
26+
│ │ ├── logging_config.py # Centralized logging setup
27+
│ │ ├── auth.py # Authentication utilities
28+
│ │ └── template_startup.py # Template initialization
29+
│ ├── crud/ # Data access layer
30+
│ │ ├── virtual_agents.py # Agent data operations
31+
│ │ ├── chat_sessions.py # Session data operations
32+
│ │ └── ... # Additional CRUD operations
33+
│ ├── models/ # SQLAlchemy database models
34+
│ │ ├── agent.py # Agent models
35+
│ │ ├── chat.py # Chat models
36+
│ │ └── ... # Additional models
37+
│ ├── schemas/ # Pydantic schemas for validation
38+
│ │ ├── agent.py # Agent schemas
39+
│ │ ├── chat.py # Chat schemas
40+
│ │ └── ... # Additional schemas
41+
│ ├── services/ # Business logic layer
42+
│ │ ├── chat.py # Chat service logic
43+
│ │ └── ... # Additional services
44+
│ └── database.py # Database connection and session
45+
├── migrations/ # Alembic database migrations
46+
├── agent_templates/ # Agent template YAML files
47+
├── requirements.txt # Python dependencies
48+
└── .env # Environment variables (not committed)
3049
```
3150

3251
## Environment Variables

backend/app/api/llamastack.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
load_dotenv()
1414

1515
LLAMASTACK_URL = os.getenv("LLAMASTACK_URL", "http://localhost:8321")
16-
LLAMASTACK_TIMEOUT = float(os.getenv("LLAMASTACK_TIMEOUT", "60.0"))
16+
LLAMASTACK_TIMEOUT = float(os.getenv("LLAMASTACK_TIMEOUT", "300.0"))
1717

1818
# Set up logging
1919
logger = logging.getLogger(__name__)
@@ -70,7 +70,6 @@ def get_client(
7070
base_url=LLAMASTACK_URL,
7171
default_headers=headers or {},
7272
timeout=httpx.Timeout(LLAMASTACK_TIMEOUT),
73-
max_retries=0,
7473
)
7574
if api_key:
7675
client.api_key = api_key

backend/app/api/v1/agent_templates.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -340,7 +340,7 @@ async def initialize_agent_from_template(
340340
top_p=0.95,
341341
max_tokens=4096,
342342
repetition_penalty=1.0,
343-
max_infer_iters=10,
343+
max_infer_iters=100,
344344
input_shields=[],
345345
output_shields=[],
346346
enable_session_persistence=False,

backend/app/core/logging_config.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,14 @@ def setup_logging(
4646

4747
# Ensure our backend loggers use DEBUG level
4848
logging.getLogger("backend").setLevel(logging.DEBUG)
49-
logging.getLogger("backend.routes").setLevel(logging.DEBUG)
50-
logging.getLogger("backend.routes.chat").setLevel(logging.DEBUG)
51-
logging.getLogger("backend.routes.chat_sessions").setLevel(logging.DEBUG)
52-
logging.getLogger("backend.routes.llama_stack").setLevel(logging.DEBUG)
53-
logging.getLogger("backend.routes.knowledge_bases").setLevel(logging.DEBUG)
49+
logging.getLogger("backend.app").setLevel(logging.DEBUG)
50+
logging.getLogger("backend.app.api").setLevel(logging.DEBUG)
51+
logging.getLogger("backend.app.api.v1").setLevel(logging.DEBUG)
52+
logging.getLogger("backend.app.api.v1.chat").setLevel(logging.DEBUG)
53+
logging.getLogger("backend.app.api.v1.chat_sessions").setLevel(logging.DEBUG)
54+
logging.getLogger("backend.app.api.v1.llama_stack").setLevel(logging.DEBUG)
55+
logging.getLogger("backend.app.api.v1.knowledge_bases").setLevel(logging.DEBUG)
56+
logging.getLogger("backend.app.services.chat").setLevel(logging.DEBUG)
5457

5558

5659
def _get_handlers(log_file: Optional[str], format_string: str) -> list:

backend/app/schemas/agent.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class VirtualAgentBase(BaseModel):
2828
top_k: Optional[int] = None
2929
max_tokens: Optional[int] = None
3030
repetition_penalty: Optional[float] = None
31-
max_infer_iters: Optional[int] = None
31+
max_infer_iters: Optional[int] = 100
3232

3333

3434
class VirtualAgentCreate(VirtualAgentBase):

0 commit comments

Comments
 (0)