From 31c29fdab4f91e06eac25175e1bde59aefa4f21e Mon Sep 17 00:00:00 2001 From: Geoff Williams Date: Mon, 19 Dec 2022 18:21:58 +1100 Subject: [PATCH] Update attribute filters and operator metrics * Update repository and instructions for Prometheus * Rename filter attributes: * `kubernetes_namespace` -> `namespace` * `kubernetes_pod_name` -> `pod` **Confluent Operator** * Fix namespace variable dropdown in `confluent-operator` * Deleted charts (metrics not available): * Post Request Latency 99th Quantile * Get Request Latency 99th Quantile * Patch Request Latency 99th Quantile * PUT Request Latency 99th Quantile * Total number of secretRef errors per controller * New Chart: Request Latency (by component) --- monitoring/grafana-dashboard/README.md | 6 +- .../grafana-dashboard/confluent-operator.json | 485 ++---------------- .../grafana-dashboard/confluent-platform.json | 140 ++--- 3 files changed, 121 insertions(+), 510 deletions(-) diff --git a/monitoring/grafana-dashboard/README.md b/monitoring/grafana-dashboard/README.md index 343ef16c..c1c81265 100644 --- a/monitoring/grafana-dashboard/README.md +++ b/monitoring/grafana-dashboard/README.md @@ -19,12 +19,12 @@ These instructions were last verified with: * Grafana Helm chart 6.7.4 (app version 7.5.3+) ## Install Prometheus - helm repo add stable https://charts.helm.sh/stable + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts helm repo add grafana https://grafana.github.io/helm-charts helm repo update - helm upgrade --install demo-test stable/prometheus \ + helm upgrade --install prometheus prometheus-community/prometheus \ --set alertmanager.persistentVolume.enabled=false \ --set server.persistentVolume.enabled=false \ --namespace default @@ -53,7 +53,7 @@ Follow the in-browser instructions to configure a Prometheus data source for Gra [online documentation](https://prometheus.io/docs/visualization/grafana/#creating-a-prometheus-data-source). You will be asked to provide a URL. Enter the URL as shown below: - http://demo-test-prometheus-server.default.svc.cluster.local + http://prometheus-server.default.svc.cluster.local Click "Save & Test". You should see a green alert at the bottom of the page saying "Data source is working". diff --git a/monitoring/grafana-dashboard/confluent-operator.json b/monitoring/grafana-dashboard/confluent-operator.json index 363b4431..ca26d00b 100644 --- a/monitoring/grafana-dashboard/confluent-operator.json +++ b/monitoring/grafana-dashboard/confluent-operator.json @@ -106,7 +106,7 @@ "targets": [ { "exemplar": true, - "expr": "confluent_operator_liveness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}\n", + "expr": "confluent_operator_liveness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}\n", "format": "table", "interval": "", "legendFormat": "{{ version }}", @@ -178,7 +178,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.5, sum(rate(controller_runtime_reconcile_time_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\", kubernetes_pod_name=~\"$pod_name\"}[5m])) by (le, controller))", + "expr": "histogram_quantile(0.5, sum(rate(controller_runtime_reconcile_time_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\", pod=~\"$pod_name\"}[5m])) by (le, controller))", "hide": false, "interval": "", "legendFormat": "{{controller}} ", @@ -276,7 +276,7 @@ "targets": [ { "exemplar": true, - "expr": "ceil(sum(increase(controller_runtime_reconcile_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[4m])) by (controller, result))", + "expr": "ceil(sum(increase(controller_runtime_reconcile_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[4m])) by (controller, result))", "hide": false, "instant": false, "interval": "2", @@ -375,7 +375,7 @@ "targets": [ { "exemplar": true, - "expr": "ceil(sum(increase(confluent_operator_reconcile_errors_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (controller))", + "expr": "ceil(sum(increase(confluent_operator_reconcile_errors_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (controller))", "hide": false, "interval": "", "legendFormat": "{{controller}} ", @@ -487,7 +487,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",code=~\"2..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\",code=~\"2..\"}[5m]))", "interval": "4", "intervalFactor": 1, "legendFormat": "2xx", @@ -495,7 +495,7 @@ }, { "exemplar": true, - "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",code=~\"3..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\",code=~\"3..\"}[5m]))", "hide": false, "interval": "", "intervalFactor": 1, @@ -504,7 +504,7 @@ }, { "exemplar": true, - "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",code=~\"4..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\",code=~\"4..\"}[5m]))", "format": "time_series", "hide": false, "interval": "", @@ -514,7 +514,7 @@ }, { "exemplar": true, - "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",code=~\"5..\"}[5m]))", + "expr": "sum(rate(rest_client_requests_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\",code=~\"5..\"}[5m]))", "format": "time_series", "hide": false, "interval": "4", @@ -571,7 +571,7 @@ "dashLength": 10, "dashes": false, "datasource": null, - "description": "Request latency in seconds", + "description": "Request latency in seconds (99th quantile)", "fieldConfig": { "defaults": {}, "overrides": [] @@ -613,9 +613,9 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",verb=\"POST\"}[5m])) by (verb, url, le))", + "expr": "histogram_quantile(0.99, sum(rate(confluent_operator_controller_reconcile_latency_time_seconds_bucket{app=\"confluent-operator\",confluent_platform=\"true\"}[5m])) by (controller, le))", "interval": "", - "legendFormat": "{{ verb }} {{ url }}", + "legendFormat": "{{controller}}", "refId": "A" } ], @@ -623,298 +623,7 @@ "timeFrom": null, "timeRegions": [], "timeShift": null, - "title": "Post Request Latency 99th Quantile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:206", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:207", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Request latency in seconds", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 34 - }, - "hiddenSeries": false, - "id": 22, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.3", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",verb=\"GET\"}[5m])) by (verb, url, le))", - "interval": "", - "legendFormat": "{{ verb }} {{ url }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Get Request Latency 99th Quantile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:206", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:207", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Request latency in seconds", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 16, - "w": 12, - "x": 12, - "y": 34 - }, - "hiddenSeries": false, - "id": 38, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.3", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",verb=\"PATCH\"}[5m])) by (verb, url, le))", - "interval": "", - "legendFormat": "{{ verb }} {{ url }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Patch Request Latency 99th Quantile", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:206", - "format": "s", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:207", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Request latency in seconds", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 42 - }, - "hiddenSeries": false, - "id": 39, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.3", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(rest_client_request_latency_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\",verb=\"PUT\"}[5m])) by (verb, url, le))", - "interval": "", - "legendFormat": "{{ verb }} {{ url }}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "PUT Request Latency 99th Quantile", + "title": "Request Latency", "tooltip": { "shared": true, "sort": 0, @@ -1014,9 +723,9 @@ "targets": [ { "exemplar": true, - "expr": "confluent_operator_readiness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "confluent_operator_readiness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "", - "legendFormat": "{{ kubernetes_pod_name }}", + "legendFormat": "{{ pod }}", "refId": "A" } ], @@ -1110,9 +819,9 @@ "targets": [ { "exemplar": true, - "expr": "confluent_operator_liveness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "confluent_operator_liveness_probe_count{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "", - "legendFormat": "{{ kubernetes_pod_name }}", + "legendFormat": "{{ pod }}", "refId": "A" } ], @@ -1159,104 +868,6 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": null, - "description": "Total number of secretRef errors per controller", - "fieldConfig": { - "defaults": {}, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 8, - "w": 12, - "x": 0, - "y": 59 - }, - "hiddenSeries": false, - "id": 37, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": true, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.5.3", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "exemplar": true, - "expr": "ceil(sum(increase(confluent_operator_secret_ref_error_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name, namespace))", - "hide": false, - "interval": "", - "legendFormat": "{{name}} - {{namespace}} ", - "refId": "B" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Secret Reference Error", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "$$hashKey": "object:206", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "$$hashKey": "object:207", - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "collapsed": false, "datasource": null, @@ -1319,7 +930,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name, le))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_queue_duration_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name, le))", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -1416,7 +1027,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(workqueue_unfinished_work_seconds{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name)", + "expr": "sum(rate(workqueue_unfinished_work_seconds{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name)", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -1513,7 +1124,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(workqueue_longest_running_processor_seconds{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name)", + "expr": "sum(rate(workqueue_longest_running_processor_seconds{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name)", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -1610,7 +1221,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(workqueue_retries_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name, le)", + "expr": "sum(rate(workqueue_retries_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name, le)", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -1707,7 +1318,7 @@ "targets": [ { "exemplar": true, - "expr": "histogram_quantile(0.99, sum(rate(workqueue_work_duration_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (le, name))", + "expr": "histogram_quantile(0.99, sum(rate(workqueue_work_duration_seconds_bucket{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (le, name))", "hide": false, "interval": "", "legendFormat": "{{name}} ", @@ -1805,7 +1416,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(workqueue_depth{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name, le)", + "expr": "sum(rate(workqueue_depth{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name, le)", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -1902,7 +1513,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(workqueue_adds_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) by (name, le)", + "expr": "sum(rate(workqueue_adds_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) by (name, le)", "interval": "", "legendFormat": "{{ name }}", "refId": "A" @@ -2012,14 +1623,14 @@ "targets": [ { "exemplar": true, - "expr": "go_memstats_mspan_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_mspan_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "1", "legendFormat": "go_memstats_mspan_inuse_bytes", "refId": "A" }, { "exemplar": true, - "expr": "go_memstats_mspan_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_mspan_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_mspan_sys_byte", @@ -2027,7 +1638,7 @@ }, { "exemplar": true, - "expr": "go_memstats_mcache_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_mcache_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_mcache_inuse_bytes", @@ -2035,7 +1646,7 @@ }, { "exemplar": true, - "expr": "go_memstats_mcache_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_mcache_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_mcache_sys_bytes", @@ -2043,7 +1654,7 @@ }, { "exemplar": true, - "expr": "go_memstats_buck_hash_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_buck_hash_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_buck_hash_sys_bytes", @@ -2051,7 +1662,7 @@ }, { "exemplar": true, - "expr": "go_memstats_gc_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_gc_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_gc_sys_bytes", @@ -2059,7 +1670,7 @@ }, { "exemplar": true, - "expr": "go_memstats_other_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"} - go_memstats_other_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_other_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"} - go_memstats_other_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "Other runtime allocation", @@ -2067,7 +1678,7 @@ }, { "exemplar": true, - "expr": "go_memstats_next_gc_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_next_gc_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_next_gc_bytes", @@ -2164,14 +1775,14 @@ "targets": [ { "exemplar": true, - "expr": "go_memstats_heap_alloc_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_heap_alloc_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "1", "legendFormat": "go_memstats_heap_alloc_bytes", "refId": "A" }, { "exemplar": true, - "expr": "go_memstats_heap_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_heap_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_heap_sys_bytes", @@ -2179,7 +1790,7 @@ }, { "exemplar": true, - "expr": "go_memstats_heap_idle_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_heap_idle_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_heap_idle_bytes", @@ -2187,7 +1798,7 @@ }, { "exemplar": true, - "expr": "go_memstats_heap_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_heap_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_heap_inuse_bytes", @@ -2195,7 +1806,7 @@ }, { "exemplar": true, - "expr": "go_memstats_heap_released_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_heap_released_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_heap_released_bytes", @@ -2292,14 +1903,14 @@ "targets": [ { "exemplar": true, - "expr": "go_memstats_stack_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_stack_inuse_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "1", "legendFormat": "go_memstats_stack_inuse_bytes", "refId": "A" }, { "exemplar": true, - "expr": "go_memstats_stack_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_stack_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "hide": false, "interval": "1", "legendFormat": "go_memstats_stack_sys_bytes", @@ -2396,7 +2007,7 @@ "targets": [ { "exemplar": true, - "expr": "go_memstats_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_memstats_sys_bytes{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "format": "time_series", "instant": false, "interval": "", @@ -2495,7 +2106,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(go_memstats_mallocs_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])", + "expr": "rate(go_memstats_mallocs_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])", "interval": "1", "legendFormat": "go_memstats_mallocs_total", "refId": "A" @@ -2591,7 +2202,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(go_memstats_lookups_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[2m])", + "expr": "rate(go_memstats_lookups_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[2m])", "interval": "1", "legendFormat": "go_memstats_lookups_total", "refId": "A" @@ -2687,7 +2298,7 @@ "targets": [ { "exemplar": true, - "expr": "go_goroutines{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}", + "expr": "go_goroutines{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}", "interval": "1", "legendFormat": "go_goroutines", "refId": "A" @@ -2784,7 +2395,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(rate(go_gc_duration_seconds_sum{app=\"confluent-operator\",confluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m])) / sum(rate(go_gc_duration_seconds_count{app=\"confluent-operator\",confluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[5m]))", + "expr": "sum(rate(go_gc_duration_seconds_sum{app=\"confluent-operator\",confluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m])) / sum(rate(go_gc_duration_seconds_count{app=\"confluent-operator\",confluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[5m]))", "instant": false, "interval": "", "legendFormat": "gc-duration-mean", @@ -2881,7 +2492,7 @@ "targets": [ { "exemplar": true, - "expr": "rate(go_memstats_alloc_bytes_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",kubernetes_namespace=~\"$namespace\",kubernetes_pod_name=~\"$pod_name\"}[2m])", + "expr": "rate(go_memstats_alloc_bytes_total{app=\"confluent-operator\",\nconfluent_platform=~\"true\",namespace=~\"$namespace\",pod=~\"$pod_name\"}[2m])", "interval": "1", "legendFormat": "go_memstats_alloc_bytes_total", "refId": "A" @@ -2941,7 +2552,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(kube_pod_labels{label_app=\"confluent-operator\"}, kubernetes_namespace)", + "definition": "label_values(kube_pod_labels{pod=~\"confluent-operator.*\"}, namespace)", "description": null, "error": null, "hide": 0, @@ -2951,7 +2562,7 @@ "name": "namespace", "options": [], "query": { - "query": "label_values(kube_pod_labels{label_app=\"confluent-operator\"}, kubernetes_namespace)", + "query": "label_values(kube_pod_labels{pod=~\"confluent-operator.*\"}, namespace)", "refId": "StandardVariableQuery" }, "refresh": 2, @@ -2968,7 +2579,7 @@ "allValue": null, "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(controller_runtime_reconcile_time_seconds_bucket{confluent_platform=\"true\",app_kubernetes_io_name=\"confluent-operator\", kubernetes_namespace=\"$namespace\"}, kubernetes_pod_name)", + "definition": "label_values(controller_runtime_reconcile_time_seconds_bucket{confluent_platform=\"true\",app_kubernetes_io_name=\"confluent-operator\", namespace=\"$namespace\"}, pod)", "description": null, "error": null, "hide": 0, @@ -2978,7 +2589,7 @@ "name": "pod_name", "options": [], "query": { - "query": "label_values(controller_runtime_reconcile_time_seconds_bucket{confluent_platform=\"true\",app_kubernetes_io_name=\"confluent-operator\", kubernetes_namespace=\"$namespace\"}, kubernetes_pod_name)", + "query": "label_values(controller_runtime_reconcile_time_seconds_bucket{confluent_platform=\"true\",app_kubernetes_io_name=\"confluent-operator\", namespace=\"$namespace\"}, pod)", "refId": "StandardVariableQuery" }, "refresh": 2, diff --git a/monitoring/grafana-dashboard/confluent-platform.json b/monitoring/grafana-dashboard/confluent-platform.json index 4da6d6ab..dc4f0bf3 100644 --- a/monitoring/grafana-dashboard/confluent-platform.json +++ b/monitoring/grafana-dashboard/confluent-platform.json @@ -118,20 +118,20 @@ "targets": [ { "exemplar": true, - "expr": "io_confluent_caas_volumemetrics_used{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "io_confluent_caas_volumemetrics_used{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "Used {{kubernetes_pod_name }}", + "legendFormat": "Used {{pod }}", "refId": "A" }, { "exemplar": true, - "expr": "io_confluent_caas_volumemetrics_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "io_confluent_caas_volumemetrics_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "hide": false, "instant": false, "interval": "", - "legendFormat": "Total {{kubernetes_pod_name}}", + "legendFormat": "Total {{pod}}", "refId": "B" } ], @@ -227,11 +227,11 @@ "targets": [ { "exemplar": true, - "expr": "io_confluent_caas_volumemetrics_percentused{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "io_confluent_caas_volumemetrics_percentused{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -327,11 +327,11 @@ "targets": [ { "exemplar": true, - "expr": "io_confluent_caas_volumemetrics_percentavailable{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "io_confluent_caas_volumemetrics_percentavailable{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -671,7 +671,7 @@ "tableColumn": "", "targets": [ { - "expr": "count(kafka_server_replicamanager_value{name=\"LeaderCount\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "count(kafka_server_replicamanager_value{name=\"LeaderCount\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "", @@ -757,7 +757,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(sum(kafka_controller_kafkacontroller_value{name=\"ActiveControllerCount\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}))", + "expr": "sum(sum(kafka_controller_kafkacontroller_value{name=\"ActiveControllerCount\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}))", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -842,7 +842,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_controller_controllerstats_oneminuterate{name=~\"UncleanLeaderElectionsPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_controller_controllerstats_oneminuterate{name=~\"UncleanLeaderElectionsPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -927,7 +927,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_server_replicamanager_value{name=~\"PartitionCount\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_replicamanager_value{name=~\"PartitionCount\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -1012,7 +1012,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_server_replicamanager_value{name=~\"UnderReplicatedPartitions\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_replicamanager_value{name=~\"UnderReplicatedPartitions\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "hide": false, "intervalFactor": 2, @@ -1098,7 +1098,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_server_replicamanager_value{name=~\"OfflineReplicaCount\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_replicamanager_value{name=~\"OfflineReplicaCount\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -1169,7 +1169,7 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesInPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesInPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "hide": false, "interval": "", @@ -1178,7 +1178,7 @@ "refId": "A" }, { - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesOutPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"BytesOutPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "hide": false, "intervalFactor": 1, @@ -1277,7 +1277,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"MessagesInPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"MessagesInPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -1375,11 +1375,11 @@ "targets": [ { "exemplar": true, - "expr": "kafka_server_kafkarequesthandlerpool_oneminuterate{name=~\"RequestHandlerAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}*100", + "expr": "kafka_server_kafkarequesthandlerpool_oneminuterate{name=~\"RequestHandlerAvgIdlePercent\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}*100", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -1472,14 +1472,14 @@ "steppedLine": false, "targets": [ { - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"TotalProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"TotalProduceRequestsPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Total Produce Request Rate", "refId": "A" }, { - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"FailedProduceRequestsPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=\"FailedProduceRequestsPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Failed Produce Request Rate", @@ -1576,7 +1576,7 @@ "targets": [ { "exemplar": true, - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"TotalFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"TotalFetchRequestsPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -1584,7 +1584,7 @@ "refId": "A" }, { - "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"FailedFetchRequestsPerSec\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", + "expr": "sum(kafka_server_brokertopicmetrics_oneminuterate{name=~\"FailedFetchRequestsPerSec\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"})", "format": "time_series", "intervalFactor": 1, "legendFormat": "Failed Fetch Request Rate", @@ -1681,11 +1681,11 @@ "targets": [ { "exemplar": true, - "expr": "kafka_network_socketserver_value{name=~\"NetworkProcessorAvgIdlePercent\",kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}*100", + "expr": "kafka_network_socketserver_value{name=~\"NetworkProcessorAvgIdlePercent\",namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}*100", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -1811,7 +1811,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_quorumsize\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "avg({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_quorumsize\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -1896,7 +1896,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_numaliveconnections\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_numaliveconnections\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "hide": false, "instant": false, @@ -2003,12 +2003,12 @@ "steppedLine": false, "targets": [ { - "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_outstandingrequests\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"} ", + "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_outstandingrequests\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"} ", "format": "time_series", "instant": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}-{{name2}}", + "legendFormat": "{{pod}}-{{name2}}", "refId": "A" } ], @@ -2127,7 +2127,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_nodecount\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "avg({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_nodecount\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2212,7 +2212,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_watchcount\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum({__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_watchcount\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2279,10 +2279,10 @@ "steppedLine": false, "targets": [ { - "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_minrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_minrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -2506,10 +2506,10 @@ "steppedLine": false, "targets": [ { - "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_maxrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_maxrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -2644,10 +2644,10 @@ "steppedLine": false, "targets": [ { - "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_avgrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"org_apache_zookeeperservice_replicatedserver_.*_avgrequestlatency\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -2781,11 +2781,11 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_count{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_count{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "instant": false, "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name}}", + "legendFormat": "{{ pod}}", "refId": "A" } ], @@ -2868,7 +2868,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_startup_success_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_startup_success_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -2953,7 +2953,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_connector_startup_failure_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_connector_startup_failure_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3037,7 +3037,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_task_count{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_task_count{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3122,7 +3122,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_task_startup_success_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_task_startup_success_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3207,7 +3207,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum(kafka_connect_connect_worker_metrics_task_startup_failure_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum(kafka_connect_connect_worker_metrics_task_startup_failure_total{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -3273,10 +3273,10 @@ "steppedLine": false, "targets": [ { - "expr": "kafka_connect_connect_metrics_io_ratio{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "kafka_connect_connect_metrics_io_ratio{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name }}", + "legendFormat": "{{ pod }}", "refId": "A" } ], @@ -3368,10 +3368,10 @@ "steppedLine": false, "targets": [ { - "expr": "kafka_connect_connect_metrics_incoming_byte_rate{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "kafka_connect_connect_metrics_incoming_byte_rate{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -3463,10 +3463,10 @@ "steppedLine": false, "targets": [ { - "expr": "kafka_connect_connect_metrics_network_io_rate{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "kafka_connect_connect_metrics_network_io_rate{app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -3573,12 +3573,12 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_bytes_consumed_total\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_bytes_consumed_total\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "hide": false, "interval": "", "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name }}", + "legendFormat": "{{ pod }}", "refId": "A" } ], @@ -3672,11 +3672,11 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_messages_consumed_per_sec\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_messages_consumed_per_sec\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name }}", + "legendFormat": "{{ pod }}", "refId": "A" } ], @@ -3770,11 +3770,11 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_messages_produced_per_sec\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_messages_produced_per_sec\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name}}", + "legendFormat": "{{ pod}}", "refId": "A" } ], @@ -3868,11 +3868,11 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_active_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_active_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{ kubernetes_pod_name}}", + "legendFormat": "{{ pod}}", "refId": "A" } ], @@ -3966,11 +3966,11 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_idle_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_idle_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -4064,11 +4064,11 @@ "targets": [ { "exemplar": true, - "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_persistent_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"}", + "expr": "{__name__=~\"io_confluent_ksql_metrics_confluent_ksql_engine_query_stats_num_persistent_queries\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"}", "format": "time_series", "interval": "", "intervalFactor": 1, - "legendFormat": " {{kubernetes_pod_name}}", + "legendFormat": " {{pod}}", "refId": "A" } ], @@ -4195,7 +4195,7 @@ "targets": [ { "exemplar": true, - "expr": "count({__name__=~\"schemaregistry_.*_jetty_metrics_connections_active\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "count({__name__=~\"schemaregistry_.*_jetty_metrics_connections_active\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "interval": "", "intervalFactor": 1, @@ -4282,7 +4282,7 @@ "tableColumn": "", "targets": [ { - "expr": "sum({__name__=~\"schemaregistry_.*_jetty_metrics_connections_active\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "sum({__name__=~\"schemaregistry_.*_jetty_metrics_connections_active\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -4367,7 +4367,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg({__name__=~\"schemaregistry_.*_jetty_metrics_connections_opened_rate\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "avg({__name__=~\"schemaregistry_.*_jetty_metrics_connections_opened_rate\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -4452,7 +4452,7 @@ "tableColumn": "", "targets": [ { - "expr": "avg({__name__=~\"schemaregistry_.*_jetty_metrics_connections_closed_rate\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",kubernetes_namespace=~\"$namespace\"})", + "expr": "avg({__name__=~\"schemaregistry_.*_jetty_metrics_connections_closed_rate\",app=~\"$component_name\", platform_confluent_io_type=~\"$controller_type\",namespace=~\"$namespace\"})", "format": "time_series", "intervalFactor": 1, "refId": "A" @@ -4533,11 +4533,11 @@ "targets": [ { "exemplar": true, - "expr": "confluent_controlcenter_healthcheck_streams_status{kubernetes_namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}", + "expr": "confluent_controlcenter_healthcheck_streams_status{namespace=~\"$namespace\",app=~\"$component_name\",platform_confluent_io_type=~\"$controller_type\"}", "format": "time_series", "instant": false, "interval": "", - "legendFormat": "{{kubernetes_pod_name}}", + "legendFormat": "{{pod}}", "refId": "Healthcheck Streams Status" } ], @@ -4603,7 +4603,7 @@ "allValue": ".+", "current": {}, "datasource": "${DS_PROMETHEUS}", - "definition": "label_values(kubernetes_namespace)", + "definition": "label_values(namespace)", "description": null, "error": null, "hide": 0, @@ -4613,7 +4613,7 @@ "name": "namespace", "options": [], "query": { - "query": "label_values(kubernetes_namespace)", + "query": "label_values(namespace)", "refId": "Prometheus-namespace-Variable-Query" }, "refresh": 1,