Skip to content

feat: allow scaling telemetry-proxy #153

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
May 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions .github/workflows/build-test-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,8 @@ jobs:
--set "controllerManager.manager.image.tag=${{ github.run_id }}" \
--set "controllerManager.telemetryProxy.image.repository=${PROXY_IMG}" \
--set "controllerManager.telemetryProxy.image.tag=${{ github.run_id }}" \
--set "telemetryProxy.image.repository=${PROXY_IMG}" \
--set "telemetryProxy.image.tag=${{ github.run_id }}" \
--set "watchdog.image.repository=${WATCHDOG_IMG}" \
--set "watchdog.image.tag=${{ github.run_id }}" \
--set "watchdog.enabled=true" \
Expand All @@ -149,15 +151,18 @@ jobs:
--set "controllerManager.manager.image.tag=${{ github.run_id }}" \
--set "controllerManager.telemetryProxy.image.repository=${PROXY_IMG}" \
--set "controllerManager.telemetryProxy.image.tag=${{ github.run_id }}" \
--set "telemetryProxy.image.repository=${PROXY_IMG}" \
--set "telemetryProxy.image.tag=${{ github.run_id }}" \
--set "watchdog.image.repository=${WATCHDOG_IMG}" \
--set "watchdog.image.tag=${{ github.run_id }}" \
--set "watchdog.enabled=true" \
--debug --wait --timeout 4m; then

kubectl describe all -n lumigo-system
kubectl logs -l control-plane=controller-manager -n lumigo-system -c manager
kubectl logs -l control-plane=controller-manager -n lumigo-system -c telemetry-proxy
kubectl logs -l control-plane=controller-manager -n lumigo-system -c watchdog
kubectl logs -l control-plane=controller-manager -n lumigo-system -c events-scraper
kubectl logs -l control-plane=watchdog -n lumigo-system -c watchdog
kubectl logs -l control-plane=telemetry-proxy -n lumigo-system -c telemetry-proxy
exit 1
fi
- name: Run end-to-end tests
Expand Down Expand Up @@ -288,6 +293,7 @@ jobs:
kubectl logs -l control-plane=controller-manager -n lumigo-system -c manager
kubectl logs -l control-plane=controller-manager -n lumigo-system -c telemetry-proxy
kubectl logs -l control-plane=watchdog -n lumigo-system -c watchdog
kubectl logs -l control-plane=telemetry-proxy -n lumigo-system -c telemtry-proxy
exit 1
fi

Expand Down Expand Up @@ -728,6 +734,8 @@ jobs:
yq e -i ".controllerManager.manager.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-operator\"" charts/lumigo-operator/values.yaml
yq e -i ".controllerManager.manager.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
yq e -i ".controllerManager.telemetryProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-telemetry-proxy\"" charts/lumigo-operator/values.yaml
yq e -i ".telemetryProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
yq e -i ".telemetryProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-telemetry-proxy\"" charts/lumigo-operator/values.yaml
yq e -i ".controllerManager.telemetryProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
yq e -i ".controllerManager.kubeRbacProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-rbac-proxy\"" charts/lumigo-operator/values.yaml
yq e -i ".controllerManager.kubeRbacProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
Expand Down
186 changes: 156 additions & 30 deletions charts/lumigo-operator/templates/controller-deployment-and-webhooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ metadata:
control-plane: controller-manager
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
spec:
replicas: {{ .Values.controllerManager.replicas }}
# this must be 1, as the events-scraper is not meant run in multiple replicas
# as this will result in duplicate events being scraped
replicas: 1
selector:
matchLabels:
control-plane: controller-manager
Expand Down Expand Up @@ -229,23 +231,25 @@ spec:
- name: namespace-configurations
mountPath: /lumigo/etc/namespaces/
readOnly: false
- name: telemetry-proxy
- name: events-scraper
image: {{ .Values.controllerManager.telemetryProxy.image.repository }}:{{ .Values.controllerManager.telemetryProxy.image.tag | default .Chart.AppVersion }}
env:
{{- if .Values.cluster }}
{{- if .Values.cluster.name }}
{{- if and .Values.cluster .Values.cluster.name }}
- name: KUBERNETES_CLUSTER_NAME
value: "{{ .Values.cluster.name }}"
{{- end }}
{{- end }}
- name: LUMIGO_DEBUG
value: "{{ .Values.debug.enabled | default false }}"
- name: LUMIGO_ENDPOINT
value: "{{ .Values.endpoint.otlp.url }}"
- name: LUMIGO_LOGS_ENDPOINT
value: "{{ .Values.endpoint.otlp.logs_url }}"
- name: "LUMIGO_WATCHDOG_ENABLED"
value: "{{ .Values.watchdog.enabled }}"
{{ if .Values.watchdog.enabled }}
- name: LUMIGO_METRICS_ENDPOINT
value: "{{ .Values.endpoint.otlp.metrics_url }}"
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
{{ end }}
{{- if .Values.lumigoToken }}
- name: LUMIGO_INFRA_METRICS_TOKEN
valueFrom:
Expand All @@ -254,28 +258,6 @@ spec:
key: {{ .Values.lumigoToken.secretKey }}
optional: true
{{- end }}
- name: LUMIGO_INFRA_METRICS_ENABLED
value: "{{ .Values.clusterCollection.metrics.enabled }}"
{{- if .Values.clusterCollection.metrics.enabled }}
- name: LUMIGO_INFRA_METRICS_SCRAPING_FREQUENCY
value: "{{ .Values.clusterCollection.metrics.frequency }}"
- name: LUMIGO_CLUSTER_AGENT_SERVICE
value: "{{ include "helm.fullname" . }}-cluster-agent-service.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_PROM_NODE_EXPORTER_PORT
value: "{{ .Values.prometheusNodeExporter.service.nodePort }}"
- name: LUMIGO_KUBE_STATE_METRICS_SERVICE
value: "{{ .Release.Name }}-kube-state-metrics.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_KUBE_STATE_METRICS_PORT
value: "{{ index .Values "kube-state-metrics" "service" "port" }}"
- name: LUMIGO_EXPORT_ESSENTIAL_METRICS_ONLY
value: "{{ .Values.clusterCollection.metrics.essentialOnly }}"
{{- end }}
- name: "LUMIGO_WATCHDOG_ENABLED"
value: "{{ .Values.watchdog.enabled }}"
{{ if .Values.watchdog.enabled }}
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
{{ end }}
- name: LUMIGO_OPERATOR_VERSION
value: "{{ $lumigoOperatorVersion }}"
- name: LUMIGO_OPERATOR_DEPLOYMENT_METHOD
Expand All @@ -284,6 +266,8 @@ spec:
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: OTELCOL_CONFIG_TEMPLATE_FILE_PATH
value: /lumigo/etc/otelcol-events-config.yaml.tpl
ports:
- containerPort: 4318
name: otlphttp
Expand All @@ -294,7 +278,6 @@ spec:
capabilities:
drop:
- ALL
# readOnlyRootFilesystem: true
runAsNonRoot: true
runAsUser: 1234
volumeMounts:
Expand Down Expand Up @@ -328,6 +311,19 @@ spec:
- ALL
readOnlyRootFilesystem: true
runAsNonRoot: true
- name: namespace-config-server
image: busybox:1.35
command: ["httpd", "-f", "-p", "8077", "-h", "/namespaces"]
volumeMounts:
- name: namespace-configurations
mountPath: /namespaces
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: false
securityContext:
runAsNonRoot: true
fsGroup: 1234
Expand All @@ -338,6 +334,136 @@ spec:
secret:
defaultMode: 420
secretName: '{{ include "helm.fullname" . }}-lumigo-injector-webhook-certs'
- name: namespace-configurations
emptyDir: {}
- name: telemetry-proxy-configurations
emptyDir: {}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "helm.fullname" . }}-telemetry-proxy
labels:
{{- include "helm.labels" . | nindent 4 }}
app.kubernetes.io/component: telemetry-proxy
app.kubernetes.io/created-by: lumigo
app.kubernetes.io/part-of: lumigo
control-plane: telemetry-proxy
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
spec:
replicas: {{ .Values.telemetryProxy.replicas }}
selector:
matchLabels:
control-plane: telemetry-proxy
{{- include "helm.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "helm.selectorLabels" . | nindent 8 }}
control-plane: telemetry-proxy
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
lumigo.cert-digest: {{ print $certFingerprint }}
lumigo.metrics-scraper: 'true' # so it will be assigned with scraping targets by the target-allocator
annotations:
kubectl.kubernetes.io/default-container: telemetry-proxy
spec:
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: kubernetes.io/arch
operator: In
values:
- amd64
- arm64
- key: kubernetes.io/os
operator: In
values:
- linux
containers:
- name: telemetry-proxy
image: {{ .Values.telemetryProxy.image.repository }}:{{ .Values.telemetryProxy.image.tag | default .Chart.AppVersion }}
env:
{{- if and .Values.cluster .Values.cluster.name }}
- name: KUBERNETES_CLUSTER_NAME
value: "{{ .Values.cluster.name }}"
{{- end }}
- name: LUMIGO_DEBUG
value: "{{ .Values.debug.enabled | default false }}"
- name: LUMIGO_ENDPOINT
value: "{{ .Values.endpoint.otlp.url }}"
- name: LUMIGO_LOGS_ENDPOINT
value: "{{ .Values.endpoint.otlp.logs_url }}"
- name: LUMIGO_METRICS_ENDPOINT
value: "{{ .Values.endpoint.otlp.metrics_url }}"
{{- if .Values.lumigoToken }}
- name: LUMIGO_INFRA_METRICS_TOKEN
valueFrom:
secretKeyRef:
name: {{ .Values.lumigoToken.secretName }}
key: {{ .Values.lumigoToken.secretKey }}
optional: true
{{- end }}
- name: LUMIGO_INFRA_METRICS_ENABLED
value: "{{ .Values.clusterCollection.metrics.enabled }}"
{{- if .Values.clusterCollection.metrics.enabled }}
- name: LUMIGO_TARGET_ALLOCATOR_ENDPOINT
value: "http://{{ include "helm.fullname" . }}-target-allocator.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_INFRA_METRICS_SCRAPING_FREQUENCY
value: "{{ .Values.clusterCollection.metrics.frequency }}"
- name: LUMIGO_CLUSTER_AGENT_SERVICE
value: "{{ include "helm.fullname" . }}-cluster-agent-service.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_PROM_NODE_EXPORTER_PORT
value: "{{ .Values.prometheusNodeExporter.service.nodePort }}"
- name: LUMIGO_KUBE_STATE_METRICS_SERVICE
value: "{{ include "helm.fullname" . }}-kube-state-metrics.{{ .Release.Namespace }}.svc.cluster.local"
- name: LUMIGO_KUBE_STATE_METRICS_PORT
value: "{{ index .Values "kube-state-metrics" "service" "port" }}"
- name: LUMIGO_EXPORT_ESSENTIAL_METRICS_ONLY
value: "{{ .Values.clusterCollection.metrics.essentialOnly }}"
{{- end }}
- name: "LUMIGO_WATCHDOG_ENABLED"
value: "{{ .Values.watchdog.enabled }}"
{{ if .Values.watchdog.enabled }}
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
{{ end }}
- name: LUMIGO_OPERATOR_VERSION
value: "{{ $lumigoOperatorVersion }}"
- name: LUMIGO_OPERATOR_DEPLOYMENT_METHOD
value: "Helm-{{ .Capabilities.HelmVersion.Version }}"
- name: LUMIGO_OPERATOR_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
- name: LUMIGO_OPERATOR_NAMESPACE_LIST_URL
value: "http://{{ include "helm.fullname" . }}-webhooks-service.{{ .Release.Namespace }}.svc.cluster.local:8077/namespaces_to_monitor.json"
ports:
- containerPort: 4318
name: otlphttp
protocol: TCP
resources: {{- toYaml .Values.telemetryProxy.resources | nindent 10 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
runAsNonRoot: true
runAsUser: 1234
volumeMounts:
- name: telemetry-proxy-configurations
mountPath: /lumigo/etc/otelcol/
readOnly: false
- name: namespace-configurations
mountPath: /lumigo/etc/namespaces/
readOnly: false
securityContext:
runAsNonRoot: true
fsGroup: 1234
serviceAccountName: lumigo-kubernetes-operator
terminationGracePeriodSeconds: 10
volumes:
- name: namespace-configurations
emptyDir: {}
- name: telemetry-proxy-configurations
Expand Down
Loading
Loading