-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathai-observability-summarizer-example-values.yaml
More file actions
86 lines (73 loc) · 2.24 KB
/
ai-observability-summarizer-example-values.yaml
File metadata and controls
86 lines (73 loc) · 2.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# Example parameter values for AI Observability Summarizer template
# Based on rh-ai-quickstart/ai-observability-summarizer repository
# Use these as a reference when filling out the template form
name: "ai-observability"
owner: "platform-team"
description: "AI-powered observability platform for OpenShift AI workloads"
# OpenShift Configuration
openshiftCluster: "https://api.prod-cluster.example.com:6443"
namespace: "ai-observability"
openshiftAIVersion: "2.16.2+"
# LLM Model
llmModel: "llama-3-2-3b-instruct" # Options: 1b, 1b-quantized, 3b, 8b, 70b
enableGPU: false # Set to true for larger models (8b, 70b)
# gpuToleration: "nvidia.com/gpu"
# Observability Components
enableTempo: true # Distributed tracing
enableLoki: true # Log aggregation
enableOpenTelemetry: true # Trace collection
enableMinio: true # Object storage
minioStorageSize: "50Gi"
# Monitoring
enableVLLMMonitoring: true # Monitor vLLM model serving
enableDCGMMonitoring: false # GPU metrics (requires GPU nodes)
prometheusRetention: "15d"
# Alerting
enableAlerts: true
slackWebhookUrl: "https://hooks.slack.com/services/YOUR/WEBHOOK/URL"
alertThresholds:
gpuUtilization: 85
memoryUsage: 90
# Dashboards
enableMultiDashboard: true
dashboards:
- vllm-metrics
- openshift-metrics
- chat-interface
# Report Generation
enableReportGeneration: true
reportFormats:
- html
- markdown
# MCP Integration
enableMCPServer: true # For Claude Desktop/Cursor integration
# Resources
cpuRequest: "4" # Options: "2", "4", "8"
memoryRequest: "8Gi" # Options: "8Gi", "16Gi", "32Gi"
storageSize: "50Gi"
# Prerequisites Check (REQUIRED!)
hasServiceMesh: true
hasServerless: true
hasPrometheus: true
# Repository
createGitRepo: true
repoUrl: "github.com/your-org/ai-observability"
# ---
# Deployment creates:
# - llm-service (LLM inference engine)
# - llama-stack (Backend API)
# - metric-ui (Streamlit multi-dashboard)
# - mcp-server (Model Context Protocol)
# - tempo (Distributed tracing)
# - loki (Log aggregation)
# - otel-collector (OpenTelemetry)
# - minio (Object storage)
#
# Access via: oc get route -n ai-observability
#
# Example queries:
# - "How is my GPU performing?"
# - "What's my AI infrastructure cost?"
# - "Show me vLLM latency trends"
#
# Estimated deployment time: 10-15 minutes