diff --git a/terraform/gitops/generate-files/templates/monitoring/install/values-tempo.yaml.tpl b/terraform/gitops/generate-files/templates/monitoring/install/values-tempo.yaml.tpl index 108667750..2ee6eb906 100644 --- a/terraform/gitops/generate-files/templates/monitoring/install/values-tempo.yaml.tpl +++ b/terraform/gitops/generate-files/templates/monitoring/install/values-tempo.yaml.tpl @@ -1,29 +1,140 @@ +tempo: + # source: https://github.com/bitnami/charts/blob/a3c7c6e5bc685b2587a6302770e20c6890ebd72d/bitnami/grafana-tempo/values.yaml#L136C3-L231C48 + configuration: | + multitenancy_enabled: false + cache: + caches: + - memcached: + host: {{ include "grafana-tempo.memcached.url" . }} + service: memcache + timeout: 500ms + consistent_hash: true + roles: + - bloom + - trace-id-index + compactor: + compaction: + block_retention: ${tempo_retention_period} + ring: + kvstore: + store: memberlist + distributor: + ring: + kvstore: + store: memberlist + receivers: + {{- if or (.Values.tempo.traces.jaeger.thriftCompact) (.Values.tempo.traces.jaeger.thriftBinary) (.Values.tempo.traces.jaeger.thriftHttp) (.Values.tempo.traces.jaeger.grpc) }} + jaeger: + protocols: + {{- if .Values.tempo.traces.jaeger.thriftCompact }} + thrift_compact: + endpoint: 0.0.0.0:6831 + {{- end }} + {{- if .Values.tempo.traces.jaeger.thriftBinary }} + thrift_binary: + endpoint: 0.0.0.0:6832 + {{- end }} + {{- if .Values.tempo.traces.jaeger.thriftHttp }} + thrift_http: + endpoint: 0.0.0.0:14268 + {{- end }} + {{- if .Values.tempo.traces.jaeger.grpc }} + grpc: + endpoint: 0.0.0.0:14250 + {{- end }} + {{- end }} + {{- if .Values.tempo.traces.zipkin }} + zipkin: + endpoint: 0.0.0.0:9411 + {{- end }} + {{- if or (.Values.tempo.traces.otlp.http) (.Values.tempo.traces.otlp.grpc) }} + otlp: + protocols: + {{- if .Values.tempo.traces.otlp.http }} + http: + endpoint: 0.0.0.0:55681 + {{- end }} + {{- if .Values.tempo.traces.otlp.grpc }} + grpc: + endpoint: 0.0.0.0:4317 + {{- end }} + {{- end }} + {{- if .Values.tempo.traces.opencensus }} + opencensus: + endpoint: 0.0.0.0:55678 + {{- end }} + querier: + frontend_worker: + frontend_address: {{ include "grafana-tempo.query-frontend.fullname" . }}-headless:{{ .Values.queryFrontend.service.ports.grpc }} + ingester: + lifecycler: + ring: + replication_factor: 1 + kvstore: + store: memberlist + tokens_file_path: {{ .Values.tempo.dataDir }}/tokens.json + metrics_generator: + ring: + kvstore: + store: memberlist + storage: + path: {{ .Values.tempo.dataDir }}/wal + remote_write: {{ include "common.tplvalues.render" (dict "value" .Values.metricsGenerator.remoteWrite "context" $) | nindent 6 }} + memberlist: + abort_if_cluster_join_fails: false + join_members: + - {{ include "grafana-tempo.gossip-ring.fullname" . }} + overrides: + per_tenant_override_config: /bitnami/grafana-tempo/conf/overrides.yaml + server: + http_listen_port: {{ .Values.tempo.containerPorts.web }} + storage: + trace: + backend: s3 + blocklist_poll: 5m + local: + path: {{ .Values.tempo.dataDir }}/traces + wal: + path: {{ .Values.tempo.dataDir }}/wal + s3: + forcepathstyle: true + endpoint: ${minio_api_url} + insecure: true + bucket: ${minio_tempo_bucket} + + compactor: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING values: ["enabled"] distributor: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING values: ["enabled"] ingester: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING values: ["enabled"] metricsGenerator: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING values: ["enabled"] querier: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING values: ["enabled"] queryFrontend: + extraEnvVarsSecret: ${minio_tempo_credentials_secret_name} nodeAffinityPreset: type: hard key: workload-class.mojaloop.io/MONITORING diff --git a/terraform/gitops/generate-files/templates/monitoring/install/vault-minio-ext-secret.yaml.tpl b/terraform/gitops/generate-files/templates/monitoring/install/vault-minio-ext-secret.yaml.tpl index 152a60b93..5244ac963 100644 --- a/terraform/gitops/generate-files/templates/monitoring/install/vault-minio-ext-secret.yaml.tpl +++ b/terraform/gitops/generate-files/templates/monitoring/install/vault-minio-ext-secret.yaml.tpl @@ -27,4 +27,37 @@ spec: conversionStrategy: Default decodingStrategy: None key: ${minio_loki_password_key} - property: value \ No newline at end of file + property: value + +--- +apiVersion: external-secrets.io/v1beta1 +kind: ExternalSecret +metadata: + name: ${minio_tempo_credentials_secret_name} + annotations: + argocd.argoproj.io/sync-wave: "${external_secret_sync_wave}" +spec: + refreshInterval: 1h + + secretStoreRef: + kind: ClusterSecretStore + name: tenant-vault-secret-store + + target: + name: ${minio_tempo_credentials_secret_name} # Name for the secret to be created on the cluster + creationPolicy: Owner + + data: + # https://grafana.com/docs/tempo/latest/configuration/hosted-storage/s3/#amazon-s3-permissions + - secretKey: MINIO_ACCESS_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: ${minio_tempo_user_key} + property: value + - secretKey: MINIO_SECRET_KEY + remoteRef: + conversionStrategy: Default + decodingStrategy: None + key: ${minio_tempo_password_key} + property: value \ No newline at end of file diff --git a/terraform/gitops/generate-files/templates/monitoring/post-config/monitoring-crs.yaml.tpl b/terraform/gitops/generate-files/templates/monitoring/post-config/monitoring-crs.yaml.tpl index 5c4d3824b..7cb2be9e7 100644 --- a/terraform/gitops/generate-files/templates/monitoring/post-config/monitoring-crs.yaml.tpl +++ b/terraform/gitops/generate-files/templates/monitoring/post-config/monitoring-crs.yaml.tpl @@ -88,6 +88,26 @@ spec: editable: true --- apiVersion: grafana.integreatly.org/v1beta1 +kind: GrafanaDatasource +metadata: + name: tempo +spec: + instanceSelector: + matchLabels: + dashboards: "grafana" + datasource: + name: Tempo + type: tempo + access: proxy + url: http://tempo-grafana-tempo-query-frontend:3200 + jsonData: + httpHeaderName1: 'X-Scope-OrgID' + secureJsonData: + httpHeaderValue1: 'single-tenant' + isDefault: false + editable: true +--- +apiVersion: grafana.integreatly.org/v1beta1 kind: GrafanaFolder metadata: name: default diff --git a/terraform/gitops/k8s-cluster-config/monitoring.tf b/terraform/gitops/k8s-cluster-config/monitoring.tf index 87f053ab4..9592af7a6 100644 --- a/terraform/gitops/k8s-cluster-config/monitoring.tf +++ b/terraform/gitops/k8s-cluster-config/monitoring.tf @@ -1,53 +1,58 @@ module "generate_monitoring_files" { source = "../generate-files" var_map = { - grafana_crd_version_tag = try(var.common_var_map.grafana_crd_version_tag, local.grafana_crd_version_tag) - prometheus_crd_version = try(var.common_var_map.prometheus_crd_version, local.prometheus_crd_version) - loki_chart_version = try(var.common_var_map.loki_chart_version, local.loki_chart_version) - prometheus_operator_version = try(var.common_var_map.prometheus_operator_version, local.prometheus_operator_version) - prometheus_operator_release_name = local.prometheus_operator_release_name - prometheus_process_exporter_version = try(var.common_var_map.prometheus_process_exporter_version, local.prometheus_process_exporter_version) - loki_release_name = local.loki_release_name - grafana_operator_version = try(var.common_var_map.grafana_operator_version, local.grafana_operator_version) - grafana_version = try(var.common_var_map.grafana_version, local.grafana_version) - tempo_chart_version = try(var.common_var_map.tempo_chart_version, local.tempo_chart_version) - monitoring_namespace = var.monitoring_namespace - gitlab_server_url = var.gitlab_server_url - gitlab_project_url = var.gitlab_project_url - public_subdomain = var.public_subdomain - client_id = try(data.vault_generic_secret.grafana_oauth_client_id[0].data.value,"") - client_secret = try(data.vault_generic_secret.grafana_oauth_client_secret[0].data.value,"") - enable_oidc = var.enable_grafana_oidc - storage_class_name = var.storage_class_name - groups = var.gitlab_admin_group_name - prom-mojaloop-url = "http://prometheus-operated:9090" - admin_secret_pw_key = "admin-pw" - admin_secret_user_key = "admin-user" - admin_secret = "grafana-admin-secret" - admin_user_name = "grafana-admin" - alertmanager_jira_secret_ref = "${var.cluster_name}/jira-prometheus-integration-secret-key" - monitoring_sync_wave = var.monitoring_sync_wave - monitoring_post_config_sync_wave = var.monitoring_post_config_sync_wave - ingress_class = var.grafana_ingress_internal_lb ? var.internal_ingress_class_name : var.external_ingress_class_name - istio_create_ingress_gateways = var.istio_create_ingress_gateways - loki_ingester_pvc_size = try(var.common_var_map.loki_ingester_pvc_size, local.loki_ingester_pvc_size) - prometheus_pvc_size = try(var.common_var_map.prometheus_pvc_size, local.prometheus_pvc_size) - loki_retention_enabled = try(var.common_var_map.loki_retention_enabled, local.loki_retention_enabled) - loki_ingester_retention_period = try(var.common_var_map.loki_ingester_retention_period, local.loki_ingester_retention_period) - prometheus_retention_period = try(var.common_var_map.prometheus_retention_period, local.prometheus_retention_period) - alertmanager_enabled = try(var.common_var_map.alertmanager_enabled, false) - minio_loki_credentials_secret_name = "minio-loki-credentials-secret" - minio_api_url = var.minio_api_url - minio_loki_bucket = local.minio_loki_bucket - minio_loki_user_key = "${var.cluster_name}/minio_loki_username" - minio_loki_password_key = "${var.cluster_name}/minio_loki_password" - external_secret_sync_wave = var.external_secret_sync_wave - prom_tsdb_max_block_duration = try(var.common_var_map.prom_tsdb_max_block_duration, local.prom_tsdb_max_block_duration) - prom_tsdb_min_block_duration = try(var.common_var_map.prom_tsdb_min_block_duration, local.prom_tsdb_min_block_duration) - grafana_subdomain = local.grafana_subdomain - grafana_fqdn = local.grafana_fqdn - grafana_istio_gateway_namespace = local.grafana_istio_gateway_namespace - grafana_istio_wildcard_gateway_name = local.vault_istio_wildcard_gateway_name + grafana_crd_version_tag = try(var.common_var_map.grafana_crd_version_tag, local.grafana_crd_version_tag) + prometheus_crd_version = try(var.common_var_map.prometheus_crd_version, local.prometheus_crd_version) + loki_chart_version = try(var.common_var_map.loki_chart_version, local.loki_chart_version) + prometheus_operator_version = try(var.common_var_map.prometheus_operator_version, local.prometheus_operator_version) + prometheus_operator_release_name = local.prometheus_operator_release_name + prometheus_process_exporter_version = try(var.common_var_map.prometheus_process_exporter_version, local.prometheus_process_exporter_version) + loki_release_name = local.loki_release_name + grafana_operator_version = try(var.common_var_map.grafana_operator_version, local.grafana_operator_version) + grafana_version = try(var.common_var_map.grafana_version, local.grafana_version) + tempo_chart_version = try(var.common_var_map.tempo_chart_version, local.tempo_chart_version) + monitoring_namespace = var.monitoring_namespace + gitlab_server_url = var.gitlab_server_url + gitlab_project_url = var.gitlab_project_url + public_subdomain = var.public_subdomain + client_id = try(data.vault_generic_secret.grafana_oauth_client_id[0].data.value, "") + client_secret = try(data.vault_generic_secret.grafana_oauth_client_secret[0].data.value, "") + enable_oidc = var.enable_grafana_oidc + storage_class_name = var.storage_class_name + groups = var.gitlab_admin_group_name + prom-mojaloop-url = "http://prometheus-operated:9090" + admin_secret_pw_key = "admin-pw" + admin_secret_user_key = "admin-user" + admin_secret = "grafana-admin-secret" + admin_user_name = "grafana-admin" + alertmanager_jira_secret_ref = "${var.cluster_name}/jira-prometheus-integration-secret-key" + monitoring_sync_wave = var.monitoring_sync_wave + monitoring_post_config_sync_wave = var.monitoring_post_config_sync_wave + ingress_class = var.grafana_ingress_internal_lb ? var.internal_ingress_class_name : var.external_ingress_class_name + istio_create_ingress_gateways = var.istio_create_ingress_gateways + loki_ingester_pvc_size = try(var.common_var_map.loki_ingester_pvc_size, local.loki_ingester_pvc_size) + prometheus_pvc_size = try(var.common_var_map.prometheus_pvc_size, local.prometheus_pvc_size) + loki_retention_enabled = try(var.common_var_map.loki_retention_enabled, local.loki_retention_enabled) + loki_ingester_retention_period = try(var.common_var_map.loki_ingester_retention_period, local.loki_ingester_retention_period) + prometheus_retention_period = try(var.common_var_map.prometheus_retention_period, local.prometheus_retention_period) + alertmanager_enabled = try(var.common_var_map.alertmanager_enabled, false) + minio_credentials_secret_name = "minio-credentials-secret" # TODO: update to minio_loki_credentials_secret_name + minio_api_url = var.minio_api_url + minio_loki_bucket = local.minio_loki_bucket + minio_loki_user_key = "${var.cluster_name}/minio_loki_username" + minio_loki_password_key = "${var.cluster_name}/minio_loki_password" + minio_tempo_credentials_secret_name = "minio-tempo-credentials-secret" + minio_tempo_user_key = "${var.cluster_name}/minio_tempo_username" + minio_tempo_password_key = "${var.cluster_name}/minio_tempo_password" + minio_tempo_bucket = local.minio_tempo_bucket + tempo_retention_period = try(var.common_var_map.tempo_retention_period, local.tempo_retention_period) + external_secret_sync_wave = var.external_secret_sync_wave + prom_tsdb_max_block_duration = try(var.common_var_map.prom_tsdb_max_block_duration, local.prom_tsdb_max_block_duration) + prom_tsdb_min_block_duration = try(var.common_var_map.prom_tsdb_min_block_duration, local.prom_tsdb_min_block_duration) + grafana_subdomain = local.grafana_subdomain + grafana_fqdn = local.grafana_fqdn + grafana_istio_gateway_namespace = local.grafana_istio_gateway_namespace + grafana_istio_wildcard_gateway_name = local.vault_istio_wildcard_gateway_name } file_list = [for f in fileset(local.monitoring_template_path, "**/*.tpl") : trimsuffix(f, ".tpl") if !can(regex(local.monitoring_app_file, f))] template_path = local.monitoring_template_path @@ -110,7 +115,7 @@ locals { loki_chart_version = "2.13.0" prometheus_operator_version = "8.22.8" prometheus_process_exporter_version = "0.4.2" - tempo_chart_version = "2.6.0" + tempo_chart_version = "3.1.0" grafana_version = "10.2.3" grafana_operator_version = "3.5.11" monitoring_template_path = "${path.module}/../generate-files/templates/monitoring" @@ -120,6 +125,7 @@ locals { loki_retention_enabled = true loki_ingester_retention_period = "72h" prometheus_retention_period = "10d" + tempo_retention_period = "72h" prom_tsdb_min_block_duration = "30m" prom_tsdb_max_block_duration = "30m" grafana_public_fqdn = "grafana.${var.public_subdomain}" diff --git a/terraform/gitops/k8s-cluster-config/variables.tf b/terraform/gitops/k8s-cluster-config/variables.tf index 2730b1888..a0667dc74 100644 --- a/terraform/gitops/k8s-cluster-config/variables.tf +++ b/terraform/gitops/k8s-cluster-config/variables.tf @@ -130,4 +130,5 @@ locals { longhorn_backups_credentials_secret_provider_key = "minio_longhorn_password" longhorn_backups_credentials_id_provider_key = "minio_longhorn_username" minio_loki_bucket = data.gitlab_project_variable.minio_loki_bucket.value + minio_tempo_bucket = data.gitlab_project_variable.minio_tempo_bucket.value } diff --git a/terraform/k8s/default-config/common-vars.yaml b/terraform/k8s/default-config/common-vars.yaml index 5645f723a..18e056074 100644 --- a/terraform/k8s/default-config/common-vars.yaml +++ b/terraform/k8s/default-config/common-vars.yaml @@ -24,13 +24,14 @@ prometheus_operator_version: 8.22.8 prometheus_process_exporter_version: 0.4.2 grafana_operator_version: 3.5.11 grafana_version: 10.2.3 -tempo_chart_version: 2.6.0 +tempo_chart_version: 3.1.0 loki_chart_version: 2.13.0 loki_ingester_pvc_size: 10Gi prometheus_pvc_size: 50Gi loki_retention_enabled: true loki_ingester_retention_period: 72h prometheus_retention_period: 10d +tempo_retention_period: 72h istio_egress_gateway_max_replicas: 5 microk8s_version: 1.29/stable alertmanager_enabled: false