Skip to content

Commit b801f2b

Browse files
harelmo-lumigoCircleCI
and
CircleCI
authored
feat: allow scaling telemetry-proxy (#153)
Co-authored-by: CircleCI <[email protected]>
1 parent 24f81a2 commit b801f2b

14 files changed

+806
-244
lines changed

.github/workflows/build-test-release.yml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ jobs:
138138
--set "controllerManager.manager.image.tag=${{ github.run_id }}" \
139139
--set "controllerManager.telemetryProxy.image.repository=${PROXY_IMG}" \
140140
--set "controllerManager.telemetryProxy.image.tag=${{ github.run_id }}" \
141+
--set "telemetryProxy.image.repository=${PROXY_IMG}" \
142+
--set "telemetryProxy.image.tag=${{ github.run_id }}" \
141143
--set "watchdog.image.repository=${WATCHDOG_IMG}" \
142144
--set "watchdog.image.tag=${{ github.run_id }}" \
143145
--set "watchdog.enabled=true" \
@@ -149,15 +151,18 @@ jobs:
149151
--set "controllerManager.manager.image.tag=${{ github.run_id }}" \
150152
--set "controllerManager.telemetryProxy.image.repository=${PROXY_IMG}" \
151153
--set "controllerManager.telemetryProxy.image.tag=${{ github.run_id }}" \
154+
--set "telemetryProxy.image.repository=${PROXY_IMG}" \
155+
--set "telemetryProxy.image.tag=${{ github.run_id }}" \
152156
--set "watchdog.image.repository=${WATCHDOG_IMG}" \
153157
--set "watchdog.image.tag=${{ github.run_id }}" \
154158
--set "watchdog.enabled=true" \
155159
--debug --wait --timeout 4m; then
156160
157161
kubectl describe all -n lumigo-system
158162
kubectl logs -l control-plane=controller-manager -n lumigo-system -c manager
159-
kubectl logs -l control-plane=controller-manager -n lumigo-system -c telemetry-proxy
160-
kubectl logs -l control-plane=controller-manager -n lumigo-system -c watchdog
163+
kubectl logs -l control-plane=controller-manager -n lumigo-system -c events-scraper
164+
kubectl logs -l control-plane=watchdog -n lumigo-system -c watchdog
165+
kubectl logs -l control-plane=telemetry-proxy -n lumigo-system -c telemetry-proxy
161166
exit 1
162167
fi
163168
- name: Run end-to-end tests
@@ -288,6 +293,7 @@ jobs:
288293
kubectl logs -l control-plane=controller-manager -n lumigo-system -c manager
289294
kubectl logs -l control-plane=controller-manager -n lumigo-system -c telemetry-proxy
290295
kubectl logs -l control-plane=watchdog -n lumigo-system -c watchdog
296+
kubectl logs -l control-plane=telemetry-proxy -n lumigo-system -c telemtry-proxy
291297
exit 1
292298
fi
293299
@@ -728,6 +734,8 @@ jobs:
728734
yq e -i ".controllerManager.manager.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-operator\"" charts/lumigo-operator/values.yaml
729735
yq e -i ".controllerManager.manager.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
730736
yq e -i ".controllerManager.telemetryProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-telemetry-proxy\"" charts/lumigo-operator/values.yaml
737+
yq e -i ".telemetryProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
738+
yq e -i ".telemetryProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-telemetry-proxy\"" charts/lumigo-operator/values.yaml
731739
yq e -i ".controllerManager.telemetryProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml
732740
yq e -i ".controllerManager.kubeRbacProxy.image.repository = \"${{ matrix.ecr-registry }}/lumigo/lumigo-kubernetes-rbac-proxy\"" charts/lumigo-operator/values.yaml
733741
yq e -i ".controllerManager.kubeRbacProxy.image.tag = \"${{ needs.validate-release-increment.outputs.version }}\"" charts/lumigo-operator/values.yaml

charts/lumigo-operator/templates/controller-deployment-and-webhooks.yaml

Lines changed: 156 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,9 @@ metadata:
136136
control-plane: controller-manager
137137
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
138138
spec:
139-
replicas: {{ .Values.controllerManager.replicas }}
139+
# this must be 1, as the events-scraper is not meant run in multiple replicas
140+
# as this will result in duplicate events being scraped
141+
replicas: 1
140142
selector:
141143
matchLabels:
142144
control-plane: controller-manager
@@ -229,23 +231,25 @@ spec:
229231
- name: namespace-configurations
230232
mountPath: /lumigo/etc/namespaces/
231233
readOnly: false
232-
- name: telemetry-proxy
234+
- name: events-scraper
233235
image: {{ .Values.controllerManager.telemetryProxy.image.repository }}:{{ .Values.controllerManager.telemetryProxy.image.tag | default .Chart.AppVersion }}
234236
env:
235-
{{- if .Values.cluster }}
236-
{{- if .Values.cluster.name }}
237+
{{- if and .Values.cluster .Values.cluster.name }}
237238
- name: KUBERNETES_CLUSTER_NAME
238239
value: "{{ .Values.cluster.name }}"
239-
{{- end }}
240240
{{- end }}
241241
- name: LUMIGO_DEBUG
242242
value: "{{ .Values.debug.enabled | default false }}"
243-
- name: LUMIGO_ENDPOINT
244-
value: "{{ .Values.endpoint.otlp.url }}"
245243
- name: LUMIGO_LOGS_ENDPOINT
246244
value: "{{ .Values.endpoint.otlp.logs_url }}"
245+
- name: "LUMIGO_WATCHDOG_ENABLED"
246+
value: "{{ .Values.watchdog.enabled }}"
247+
{{ if .Values.watchdog.enabled }}
247248
- name: LUMIGO_METRICS_ENDPOINT
248249
value: "{{ .Values.endpoint.otlp.metrics_url }}"
250+
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
251+
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
252+
{{ end }}
249253
{{- if .Values.lumigoToken }}
250254
- name: LUMIGO_INFRA_METRICS_TOKEN
251255
valueFrom:
@@ -254,28 +258,6 @@ spec:
254258
key: {{ .Values.lumigoToken.secretKey }}
255259
optional: true
256260
{{- end }}
257-
- name: LUMIGO_INFRA_METRICS_ENABLED
258-
value: "{{ .Values.clusterCollection.metrics.enabled }}"
259-
{{- if .Values.clusterCollection.metrics.enabled }}
260-
- name: LUMIGO_INFRA_METRICS_SCRAPING_FREQUENCY
261-
value: "{{ .Values.clusterCollection.metrics.frequency }}"
262-
- name: LUMIGO_CLUSTER_AGENT_SERVICE
263-
value: "{{ include "helm.fullname" . }}-cluster-agent-service.{{ .Release.Namespace }}.svc.cluster.local"
264-
- name: LUMIGO_PROM_NODE_EXPORTER_PORT
265-
value: "{{ .Values.prometheusNodeExporter.service.nodePort }}"
266-
- name: LUMIGO_KUBE_STATE_METRICS_SERVICE
267-
value: "{{ .Release.Name }}-kube-state-metrics.{{ .Release.Namespace }}.svc.cluster.local"
268-
- name: LUMIGO_KUBE_STATE_METRICS_PORT
269-
value: "{{ index .Values "kube-state-metrics" "service" "port" }}"
270-
- name: LUMIGO_EXPORT_ESSENTIAL_METRICS_ONLY
271-
value: "{{ .Values.clusterCollection.metrics.essentialOnly }}"
272-
{{- end }}
273-
- name: "LUMIGO_WATCHDOG_ENABLED"
274-
value: "{{ .Values.watchdog.enabled }}"
275-
{{ if .Values.watchdog.enabled }}
276-
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
277-
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
278-
{{ end }}
279261
- name: LUMIGO_OPERATOR_VERSION
280262
value: "{{ $lumigoOperatorVersion }}"
281263
- name: LUMIGO_OPERATOR_DEPLOYMENT_METHOD
@@ -284,6 +266,8 @@ spec:
284266
valueFrom:
285267
fieldRef:
286268
fieldPath: spec.nodeName
269+
- name: OTELCOL_CONFIG_TEMPLATE_FILE_PATH
270+
value: /lumigo/etc/otelcol-events-config.yaml.tpl
287271
ports:
288272
- containerPort: 4318
289273
name: otlphttp
@@ -294,7 +278,6 @@ spec:
294278
capabilities:
295279
drop:
296280
- ALL
297-
# readOnlyRootFilesystem: true
298281
runAsNonRoot: true
299282
runAsUser: 1234
300283
volumeMounts:
@@ -328,6 +311,19 @@ spec:
328311
- ALL
329312
readOnlyRootFilesystem: true
330313
runAsNonRoot: true
314+
- name: namespace-config-server
315+
image: busybox:1.35
316+
command: ["httpd", "-f", "-p", "8077", "-h", "/namespaces"]
317+
volumeMounts:
318+
- name: namespace-configurations
319+
mountPath: /namespaces
320+
securityContext:
321+
allowPrivilegeEscalation: false
322+
capabilities:
323+
drop:
324+
- ALL
325+
readOnlyRootFilesystem: false
326+
runAsNonRoot: false
331327
securityContext:
332328
runAsNonRoot: true
333329
fsGroup: 1234
@@ -338,6 +334,136 @@ spec:
338334
secret:
339335
defaultMode: 420
340336
secretName: '{{ include "helm.fullname" . }}-lumigo-injector-webhook-certs'
337+
- name: namespace-configurations
338+
emptyDir: {}
339+
- name: telemetry-proxy-configurations
340+
emptyDir: {}
341+
---
342+
apiVersion: apps/v1
343+
kind: Deployment
344+
metadata:
345+
name: {{ include "helm.fullname" . }}-telemetry-proxy
346+
labels:
347+
{{- include "helm.labels" . | nindent 4 }}
348+
app.kubernetes.io/component: telemetry-proxy
349+
app.kubernetes.io/created-by: lumigo
350+
app.kubernetes.io/part-of: lumigo
351+
control-plane: telemetry-proxy
352+
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
353+
spec:
354+
replicas: {{ .Values.telemetryProxy.replicas }}
355+
selector:
356+
matchLabels:
357+
control-plane: telemetry-proxy
358+
{{- include "helm.selectorLabels" . | nindent 6 }}
359+
template:
360+
metadata:
361+
labels:
362+
{{- include "helm.selectorLabels" . | nindent 8 }}
363+
control-plane: telemetry-proxy
364+
lumigo.auto-trace: 'false' # We do not need the operator to inject itself
365+
lumigo.cert-digest: {{ print $certFingerprint }}
366+
lumigo.metrics-scraper: 'true' # so it will be assigned with scraping targets by the target-allocator
367+
annotations:
368+
kubectl.kubernetes.io/default-container: telemetry-proxy
369+
spec:
370+
affinity:
371+
nodeAffinity:
372+
requiredDuringSchedulingIgnoredDuringExecution:
373+
nodeSelectorTerms:
374+
- matchExpressions:
375+
- key: kubernetes.io/arch
376+
operator: In
377+
values:
378+
- amd64
379+
- arm64
380+
- key: kubernetes.io/os
381+
operator: In
382+
values:
383+
- linux
384+
containers:
385+
- name: telemetry-proxy
386+
image: {{ .Values.telemetryProxy.image.repository }}:{{ .Values.telemetryProxy.image.tag | default .Chart.AppVersion }}
387+
env:
388+
{{- if and .Values.cluster .Values.cluster.name }}
389+
- name: KUBERNETES_CLUSTER_NAME
390+
value: "{{ .Values.cluster.name }}"
391+
{{- end }}
392+
- name: LUMIGO_DEBUG
393+
value: "{{ .Values.debug.enabled | default false }}"
394+
- name: LUMIGO_ENDPOINT
395+
value: "{{ .Values.endpoint.otlp.url }}"
396+
- name: LUMIGO_LOGS_ENDPOINT
397+
value: "{{ .Values.endpoint.otlp.logs_url }}"
398+
- name: LUMIGO_METRICS_ENDPOINT
399+
value: "{{ .Values.endpoint.otlp.metrics_url }}"
400+
{{- if .Values.lumigoToken }}
401+
- name: LUMIGO_INFRA_METRICS_TOKEN
402+
valueFrom:
403+
secretKeyRef:
404+
name: {{ .Values.lumigoToken.secretName }}
405+
key: {{ .Values.lumigoToken.secretKey }}
406+
optional: true
407+
{{- end }}
408+
- name: LUMIGO_INFRA_METRICS_ENABLED
409+
value: "{{ .Values.clusterCollection.metrics.enabled }}"
410+
{{- if .Values.clusterCollection.metrics.enabled }}
411+
- name: LUMIGO_TARGET_ALLOCATOR_ENDPOINT
412+
value: "http://{{ include "helm.fullname" . }}-target-allocator.{{ .Release.Namespace }}.svc.cluster.local"
413+
- name: LUMIGO_INFRA_METRICS_SCRAPING_FREQUENCY
414+
value: "{{ .Values.clusterCollection.metrics.frequency }}"
415+
- name: LUMIGO_CLUSTER_AGENT_SERVICE
416+
value: "{{ include "helm.fullname" . }}-cluster-agent-service.{{ .Release.Namespace }}.svc.cluster.local"
417+
- name: LUMIGO_PROM_NODE_EXPORTER_PORT
418+
value: "{{ .Values.prometheusNodeExporter.service.nodePort }}"
419+
- name: LUMIGO_KUBE_STATE_METRICS_SERVICE
420+
value: "{{ include "helm.fullname" . }}-kube-state-metrics.{{ .Release.Namespace }}.svc.cluster.local"
421+
- name: LUMIGO_KUBE_STATE_METRICS_PORT
422+
value: "{{ index .Values "kube-state-metrics" "service" "port" }}"
423+
- name: LUMIGO_EXPORT_ESSENTIAL_METRICS_ONLY
424+
value: "{{ .Values.clusterCollection.metrics.essentialOnly }}"
425+
{{- end }}
426+
- name: "LUMIGO_WATCHDOG_ENABLED"
427+
value: "{{ .Values.watchdog.enabled }}"
428+
{{ if .Values.watchdog.enabled }}
429+
- name: "LUMIGO_OTELCOL_METRICS_SCRAPING_FREQUENCY"
430+
value: "{{ .Values.watchdog.otelCollector.internalMetricsFrequency }}"
431+
{{ end }}
432+
- name: LUMIGO_OPERATOR_VERSION
433+
value: "{{ $lumigoOperatorVersion }}"
434+
- name: LUMIGO_OPERATOR_DEPLOYMENT_METHOD
435+
value: "Helm-{{ .Capabilities.HelmVersion.Version }}"
436+
- name: LUMIGO_OPERATOR_NODE_NAME
437+
valueFrom:
438+
fieldRef:
439+
fieldPath: spec.nodeName
440+
- name: LUMIGO_OPERATOR_NAMESPACE_LIST_URL
441+
value: "http://{{ include "helm.fullname" . }}-webhooks-service.{{ .Release.Namespace }}.svc.cluster.local:8077/namespaces_to_monitor.json"
442+
ports:
443+
- containerPort: 4318
444+
name: otlphttp
445+
protocol: TCP
446+
resources: {{- toYaml .Values.telemetryProxy.resources | nindent 10 }}
447+
securityContext:
448+
allowPrivilegeEscalation: false
449+
capabilities:
450+
drop:
451+
- ALL
452+
runAsNonRoot: true
453+
runAsUser: 1234
454+
volumeMounts:
455+
- name: telemetry-proxy-configurations
456+
mountPath: /lumigo/etc/otelcol/
457+
readOnly: false
458+
- name: namespace-configurations
459+
mountPath: /lumigo/etc/namespaces/
460+
readOnly: false
461+
securityContext:
462+
runAsNonRoot: true
463+
fsGroup: 1234
464+
serviceAccountName: lumigo-kubernetes-operator
465+
terminationGracePeriodSeconds: 10
466+
volumes:
341467
- name: namespace-configurations
342468
emptyDir: {}
343469
- name: telemetry-proxy-configurations

0 commit comments

Comments
 (0)