Skip to content

Commit

Permalink
IPROD-565: Setup tempo to use minio (#232)
Browse files Browse the repository at this point in the history
* enable env variable expansion in config

* update tempo chart version

* add minio_tempo_credentials_secret_name

* update

* minio tempo credentials secert

* added tempo datasource

* replace extraArgs with args

* remove extra args

* upadte config

* fix bugs

* added extraEnvVarsSecret to remaining services

* switch to s3

* add tempo retension period

* use hours instead of days

* get minio_tempo_bucket from gitlab

* use minio api url

* use minio_tempo_credentials_secret_name variable

* refactor

---------

Co-authored-by: David Fry <[email protected]>
  • Loading branch information
muzammil360 and dfry authored Apr 26, 2024
1 parent ead675f commit a33da60
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 50 deletions.
Original file line number Diff line number Diff line change
@@ -1,29 +1,140 @@
tempo:
# source: https://github.com/bitnami/charts/blob/a3c7c6e5bc685b2587a6302770e20c6890ebd72d/bitnami/grafana-tempo/values.yaml#L136C3-L231C48
configuration: |
multitenancy_enabled: false
cache:
caches:
- memcached:
host: {{ include "grafana-tempo.memcached.url" . }}
service: memcache
timeout: 500ms
consistent_hash: true
roles:
- bloom
- trace-id-index
compactor:
compaction:
block_retention: ${tempo_retention_period}
ring:
kvstore:
store: memberlist
distributor:
ring:
kvstore:
store: memberlist
receivers:
{{- if or (.Values.tempo.traces.jaeger.thriftCompact) (.Values.tempo.traces.jaeger.thriftBinary) (.Values.tempo.traces.jaeger.thriftHttp) (.Values.tempo.traces.jaeger.grpc) }}
jaeger:
protocols:
{{- if .Values.tempo.traces.jaeger.thriftCompact }}
thrift_compact:
endpoint: 0.0.0.0:6831
{{- end }}
{{- if .Values.tempo.traces.jaeger.thriftBinary }}
thrift_binary:
endpoint: 0.0.0.0:6832
{{- end }}
{{- if .Values.tempo.traces.jaeger.thriftHttp }}
thrift_http:
endpoint: 0.0.0.0:14268
{{- end }}
{{- if .Values.tempo.traces.jaeger.grpc }}
grpc:
endpoint: 0.0.0.0:14250
{{- end }}
{{- end }}
{{- if .Values.tempo.traces.zipkin }}
zipkin:
endpoint: 0.0.0.0:9411
{{- end }}
{{- if or (.Values.tempo.traces.otlp.http) (.Values.tempo.traces.otlp.grpc) }}
otlp:
protocols:
{{- if .Values.tempo.traces.otlp.http }}
http:
endpoint: 0.0.0.0:55681
{{- end }}
{{- if .Values.tempo.traces.otlp.grpc }}
grpc:
endpoint: 0.0.0.0:4317
{{- end }}
{{- end }}
{{- if .Values.tempo.traces.opencensus }}
opencensus:
endpoint: 0.0.0.0:55678
{{- end }}
querier:
frontend_worker:
frontend_address: {{ include "grafana-tempo.query-frontend.fullname" . }}-headless:{{ .Values.queryFrontend.service.ports.grpc }}
ingester:
lifecycler:
ring:
replication_factor: 1
kvstore:
store: memberlist
tokens_file_path: {{ .Values.tempo.dataDir }}/tokens.json
metrics_generator:
ring:
kvstore:
store: memberlist
storage:
path: {{ .Values.tempo.dataDir }}/wal
remote_write: {{ include "common.tplvalues.render" (dict "value" .Values.metricsGenerator.remoteWrite "context" $) | nindent 6 }}
memberlist:
abort_if_cluster_join_fails: false
join_members:
- {{ include "grafana-tempo.gossip-ring.fullname" . }}
overrides:
per_tenant_override_config: /bitnami/grafana-tempo/conf/overrides.yaml
server:
http_listen_port: {{ .Values.tempo.containerPorts.web }}
storage:
trace:
backend: s3
blocklist_poll: 5m
local:
path: {{ .Values.tempo.dataDir }}/traces
wal:
path: {{ .Values.tempo.dataDir }}/wal
s3:
forcepathstyle: true
endpoint: ${minio_api_url}
insecure: true
bucket: ${minio_tempo_bucket}


compactor:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
values: ["enabled"]
distributor:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
values: ["enabled"]
ingester:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
values: ["enabled"]
metricsGenerator:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
values: ["enabled"]
querier:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
values: ["enabled"]
queryFrontend:
extraEnvVarsSecret: ${minio_tempo_credentials_secret_name}
nodeAffinityPreset:
type: hard
key: workload-class.mojaloop.io/MONITORING
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,37 @@ spec:
conversionStrategy: Default
decodingStrategy: None
key: ${minio_loki_password_key}
property: value
property: value

---
apiVersion: external-secrets.io/v1beta1
kind: ExternalSecret
metadata:
name: ${minio_tempo_credentials_secret_name}
annotations:
argocd.argoproj.io/sync-wave: "${external_secret_sync_wave}"
spec:
refreshInterval: 1h

secretStoreRef:
kind: ClusterSecretStore
name: tenant-vault-secret-store

target:
name: ${minio_tempo_credentials_secret_name} # Name for the secret to be created on the cluster
creationPolicy: Owner

data:
# https://grafana.com/docs/tempo/latest/configuration/hosted-storage/s3/#amazon-s3-permissions
- secretKey: MINIO_ACCESS_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: ${minio_tempo_user_key}
property: value
- secretKey: MINIO_SECRET_KEY
remoteRef:
conversionStrategy: Default
decodingStrategy: None
key: ${minio_tempo_password_key}
property: value
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,26 @@ spec:
editable: true
---
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaDatasource
metadata:
name: tempo
spec:
instanceSelector:
matchLabels:
dashboards: "grafana"
datasource:
name: Tempo
type: tempo
access: proxy
url: http://tempo-grafana-tempo-query-frontend:3200
jsonData:
httpHeaderName1: 'X-Scope-OrgID'
secureJsonData:
httpHeaderValue1: 'single-tenant'
isDefault: false
editable: true
---
apiVersion: grafana.integreatly.org/v1beta1
kind: GrafanaFolder
metadata:
name: default
Expand Down
102 changes: 54 additions & 48 deletions terraform/gitops/k8s-cluster-config/monitoring.tf
Original file line number Diff line number Diff line change
@@ -1,53 +1,58 @@
module "generate_monitoring_files" {
source = "../generate-files"
var_map = {
grafana_crd_version_tag = try(var.common_var_map.grafana_crd_version_tag, local.grafana_crd_version_tag)
prometheus_crd_version = try(var.common_var_map.prometheus_crd_version, local.prometheus_crd_version)
loki_chart_version = try(var.common_var_map.loki_chart_version, local.loki_chart_version)
prometheus_operator_version = try(var.common_var_map.prometheus_operator_version, local.prometheus_operator_version)
prometheus_operator_release_name = local.prometheus_operator_release_name
prometheus_process_exporter_version = try(var.common_var_map.prometheus_process_exporter_version, local.prometheus_process_exporter_version)
loki_release_name = local.loki_release_name
grafana_operator_version = try(var.common_var_map.grafana_operator_version, local.grafana_operator_version)
grafana_version = try(var.common_var_map.grafana_version, local.grafana_version)
tempo_chart_version = try(var.common_var_map.tempo_chart_version, local.tempo_chart_version)
monitoring_namespace = var.monitoring_namespace
gitlab_server_url = var.gitlab_server_url
gitlab_project_url = var.gitlab_project_url
public_subdomain = var.public_subdomain
client_id = try(data.vault_generic_secret.grafana_oauth_client_id[0].data.value,"")
client_secret = try(data.vault_generic_secret.grafana_oauth_client_secret[0].data.value,"")
enable_oidc = var.enable_grafana_oidc
storage_class_name = var.storage_class_name
groups = var.gitlab_admin_group_name
prom-mojaloop-url = "http://prometheus-operated:9090"
admin_secret_pw_key = "admin-pw"
admin_secret_user_key = "admin-user"
admin_secret = "grafana-admin-secret"
admin_user_name = "grafana-admin"
alertmanager_jira_secret_ref = "${var.cluster_name}/jira-prometheus-integration-secret-key"
monitoring_sync_wave = var.monitoring_sync_wave
monitoring_post_config_sync_wave = var.monitoring_post_config_sync_wave
ingress_class = var.grafana_ingress_internal_lb ? var.internal_ingress_class_name : var.external_ingress_class_name
istio_create_ingress_gateways = var.istio_create_ingress_gateways
loki_ingester_pvc_size = try(var.common_var_map.loki_ingester_pvc_size, local.loki_ingester_pvc_size)
prometheus_pvc_size = try(var.common_var_map.prometheus_pvc_size, local.prometheus_pvc_size)
loki_retention_enabled = try(var.common_var_map.loki_retention_enabled, local.loki_retention_enabled)
loki_ingester_retention_period = try(var.common_var_map.loki_ingester_retention_period, local.loki_ingester_retention_period)
prometheus_retention_period = try(var.common_var_map.prometheus_retention_period, local.prometheus_retention_period)
alertmanager_enabled = try(var.common_var_map.alertmanager_enabled, false)
minio_loki_credentials_secret_name = "minio-loki-credentials-secret"
minio_api_url = var.minio_api_url
minio_loki_bucket = local.minio_loki_bucket
minio_loki_user_key = "${var.cluster_name}/minio_loki_username"
minio_loki_password_key = "${var.cluster_name}/minio_loki_password"
external_secret_sync_wave = var.external_secret_sync_wave
prom_tsdb_max_block_duration = try(var.common_var_map.prom_tsdb_max_block_duration, local.prom_tsdb_max_block_duration)
prom_tsdb_min_block_duration = try(var.common_var_map.prom_tsdb_min_block_duration, local.prom_tsdb_min_block_duration)
grafana_subdomain = local.grafana_subdomain
grafana_fqdn = local.grafana_fqdn
grafana_istio_gateway_namespace = local.grafana_istio_gateway_namespace
grafana_istio_wildcard_gateway_name = local.vault_istio_wildcard_gateway_name
grafana_crd_version_tag = try(var.common_var_map.grafana_crd_version_tag, local.grafana_crd_version_tag)
prometheus_crd_version = try(var.common_var_map.prometheus_crd_version, local.prometheus_crd_version)
loki_chart_version = try(var.common_var_map.loki_chart_version, local.loki_chart_version)
prometheus_operator_version = try(var.common_var_map.prometheus_operator_version, local.prometheus_operator_version)
prometheus_operator_release_name = local.prometheus_operator_release_name
prometheus_process_exporter_version = try(var.common_var_map.prometheus_process_exporter_version, local.prometheus_process_exporter_version)
loki_release_name = local.loki_release_name
grafana_operator_version = try(var.common_var_map.grafana_operator_version, local.grafana_operator_version)
grafana_version = try(var.common_var_map.grafana_version, local.grafana_version)
tempo_chart_version = try(var.common_var_map.tempo_chart_version, local.tempo_chart_version)
monitoring_namespace = var.monitoring_namespace
gitlab_server_url = var.gitlab_server_url
gitlab_project_url = var.gitlab_project_url
public_subdomain = var.public_subdomain
client_id = try(data.vault_generic_secret.grafana_oauth_client_id[0].data.value, "")
client_secret = try(data.vault_generic_secret.grafana_oauth_client_secret[0].data.value, "")
enable_oidc = var.enable_grafana_oidc
storage_class_name = var.storage_class_name
groups = var.gitlab_admin_group_name
prom-mojaloop-url = "http://prometheus-operated:9090"
admin_secret_pw_key = "admin-pw"
admin_secret_user_key = "admin-user"
admin_secret = "grafana-admin-secret"
admin_user_name = "grafana-admin"
alertmanager_jira_secret_ref = "${var.cluster_name}/jira-prometheus-integration-secret-key"
monitoring_sync_wave = var.monitoring_sync_wave
monitoring_post_config_sync_wave = var.monitoring_post_config_sync_wave
ingress_class = var.grafana_ingress_internal_lb ? var.internal_ingress_class_name : var.external_ingress_class_name
istio_create_ingress_gateways = var.istio_create_ingress_gateways
loki_ingester_pvc_size = try(var.common_var_map.loki_ingester_pvc_size, local.loki_ingester_pvc_size)
prometheus_pvc_size = try(var.common_var_map.prometheus_pvc_size, local.prometheus_pvc_size)
loki_retention_enabled = try(var.common_var_map.loki_retention_enabled, local.loki_retention_enabled)
loki_ingester_retention_period = try(var.common_var_map.loki_ingester_retention_period, local.loki_ingester_retention_period)
prometheus_retention_period = try(var.common_var_map.prometheus_retention_period, local.prometheus_retention_period)
alertmanager_enabled = try(var.common_var_map.alertmanager_enabled, false)
minio_credentials_secret_name = "minio-credentials-secret" # TODO: update to minio_loki_credentials_secret_name
minio_api_url = var.minio_api_url
minio_loki_bucket = local.minio_loki_bucket
minio_loki_user_key = "${var.cluster_name}/minio_loki_username"
minio_loki_password_key = "${var.cluster_name}/minio_loki_password"
minio_tempo_credentials_secret_name = "minio-tempo-credentials-secret"
minio_tempo_user_key = "${var.cluster_name}/minio_tempo_username"
minio_tempo_password_key = "${var.cluster_name}/minio_tempo_password"
minio_tempo_bucket = local.minio_tempo_bucket
tempo_retention_period = try(var.common_var_map.tempo_retention_period, local.tempo_retention_period)
external_secret_sync_wave = var.external_secret_sync_wave
prom_tsdb_max_block_duration = try(var.common_var_map.prom_tsdb_max_block_duration, local.prom_tsdb_max_block_duration)
prom_tsdb_min_block_duration = try(var.common_var_map.prom_tsdb_min_block_duration, local.prom_tsdb_min_block_duration)
grafana_subdomain = local.grafana_subdomain
grafana_fqdn = local.grafana_fqdn
grafana_istio_gateway_namespace = local.grafana_istio_gateway_namespace
grafana_istio_wildcard_gateway_name = local.vault_istio_wildcard_gateway_name
}
file_list = [for f in fileset(local.monitoring_template_path, "**/*.tpl") : trimsuffix(f, ".tpl") if !can(regex(local.monitoring_app_file, f))]
template_path = local.monitoring_template_path
Expand Down Expand Up @@ -110,7 +115,7 @@ locals {
loki_chart_version = "2.13.0"
prometheus_operator_version = "8.22.8"
prometheus_process_exporter_version = "0.4.2"
tempo_chart_version = "2.6.0"
tempo_chart_version = "3.1.0"
grafana_version = "10.2.3"
grafana_operator_version = "3.5.11"
monitoring_template_path = "${path.module}/../generate-files/templates/monitoring"
Expand All @@ -120,6 +125,7 @@ locals {
loki_retention_enabled = true
loki_ingester_retention_period = "72h"
prometheus_retention_period = "10d"
tempo_retention_period = "72h"
prom_tsdb_min_block_duration = "30m"
prom_tsdb_max_block_duration = "30m"
grafana_public_fqdn = "grafana.${var.public_subdomain}"
Expand Down
1 change: 1 addition & 0 deletions terraform/gitops/k8s-cluster-config/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -130,4 +130,5 @@ locals {
longhorn_backups_credentials_secret_provider_key = "minio_longhorn_password"
longhorn_backups_credentials_id_provider_key = "minio_longhorn_username"
minio_loki_bucket = data.gitlab_project_variable.minio_loki_bucket.value
minio_tempo_bucket = data.gitlab_project_variable.minio_tempo_bucket.value
}
3 changes: 2 additions & 1 deletion terraform/k8s/default-config/common-vars.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ prometheus_operator_version: 8.22.8
prometheus_process_exporter_version: 0.4.2
grafana_operator_version: 3.5.11
grafana_version: 10.2.3
tempo_chart_version: 2.6.0
tempo_chart_version: 3.1.0
loki_chart_version: 2.13.0
loki_ingester_pvc_size: 10Gi
prometheus_pvc_size: 50Gi
loki_retention_enabled: true
loki_ingester_retention_period: 72h
prometheus_retention_period: 10d
tempo_retention_period: 72h
istio_egress_gateway_max_replicas: 5
microk8s_version: 1.29/stable
alertmanager_enabled: false
Expand Down

0 comments on commit a33da60

Please sign in to comment.