Replace render config with schema template#51293
Conversation
|
Gitlab CI Configuration ChangesModified Jobsgenerate_config_schema-linux generate_config_schema-linux:
artifacts:
expire_in: 2 weeks
paths:
- - $CI_PROJECT_DIR/pkg/config/schema/core_schema.yaml
? ^^^^^^^^^ ^
+ - $CI_PROJECT_DIR/pkg/config/schema/yaml/*.yaml
? ^^ ^^^
- - $CI_PROJECT_DIR/pkg/config/schema/system-probe_schema.yaml
when: always
before_script:
- mkdir -p $GOPATH/pkg/mod/cache && zstd -dc modcache.tar.zst | tar xf - -C $GOPATH/pkg/mod/cache
- rm -f modcache.tar.zst
cache:
- key:
files:
- .bazelversion
prefix: bazelversion-$CI_RUNNER_DESCRIPTION
paths:
- .cache/bazelisk
- .cache/bazel/*/install
policy: pull$BAZEL_CACHE_POLICY_SUFFIX
when: on_success
- key:
files:
- .go-version
- .python-version
prefix: bazel-$CI_JOB_NAME
paths:
- .cache/bazel/*/cache
- .cache/go
- .cache/ms-go
- .cache/pip
policy: pull$BAZEL_CACHE_POLICY_SUFFIX
when: on_success
id_tokens:
BUILDBARN_ID_TOKEN:
aud: buildbarn.us1.ddbuild.io
image: registry.ddbuild.io/ci/datadog-agent-buildimages/linux$CI_IMAGE_LINUX_SUFFIX:$CI_IMAGE_LINUX
needs:
- go_deps
rules:
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^[0-9]+\.[0-9]+\.x$/
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- pkg/config/**/*
- tasks/schema/**/*
script:
- dda inv -- -e agent.build
- dda inv -- schema.generate --agent-bin=./bin/agent/agent
- bash $CI_PROJECT_DIR/tasks/schema/check_config_templates.sh $CI_PROJECT_DIR
stage: binary_build
tags:
- arch:amd64
- specific:true
variables:
BAZELISK_HOME: $XDG_CACHE_HOME/bazelisk
KUBERNETES_CPU_REQUEST: 16
KUBERNETES_MEMORY_LIMIT: 16Gi
KUBERNETES_MEMORY_REQUEST: 16Gi
TEST_OUTPUT_FILE: test_output
XDG_CACHE_HOME: $CI_PROJECT_DIR/.cachegenerate_config_schema-macos generate_config_schema-macos:
artifacts:
expire_in: 2 weeks
paths:
- - $CI_PROJECT_DIR/pkg/config/schema/core_schema.yaml
? ^^^^^^^^^ ^
+ - $CI_PROJECT_DIR/pkg/config/schema/yaml/*.yaml
? ^^ ^^^
- - $CI_PROJECT_DIR/pkg/config/schema/system-probe_schema.yaml
when: always
before_script:
- export VAULT_ADDR=https://vault.us1.ddbuild.io
- vault login -method=aws -no-print
- export AWS_RETRY_MODE=standard
- export AWS_RETRY_MAX_ATTEMPTS=5
- 'eval $(gimme $(cat .go-version))
export GOPATH=$GOROOT
'
- "if [ -z \"$TMPDIR\" ]; then\n echo \"TMPDIR must be set\" >& 2\n exit 1\nfi\n"
- export DDA_DIR="$TMPDIR/dda-${CI_JOB_ID}"
- export PATH="$DDA_DIR:$PATH"
- export DDA_NO_DYNAMIC_DEPS=1
- "# Perform installation only if the directory does not exist\nif [ ! -d \"$DDA_DIR\"\
\ ]; then\n robust_curl=\"curl -fsSL --retry 4\" # recommended flags + resist\
\ transient errors like `Connection reset by peer`\n # Get the commit from the\
\ build image variable in the format `vPIPELINE_ID-COMMIT`\n export BUILDIMAGES_COMMIT=\"\
${CI_IMAGE_LINUX#*-}\"\n export DDA_VERSION=\"$($robust_curl https://raw.githubusercontent.com/DataDog/datadog-agent-buildimages/${BUILDIMAGES_COMMIT}/dda.env\
\ | awk -F= '/^DDA_VERSION=/ {print $2}')\"\n # Detect architecture and download\
\ appropriate binary\n if [ \"$(uname -m)\" = \"arm64\" ]; then\n dda_target_triple=\"\
aarch64-apple-darwin\"\n else\n dda_target_triple=\"x86_64-apple-darwin\"\n\
\ fi\n $robust_curl -o dda.tar.gz https://github.com/DataDog/datadog-agent-dev/releases/download/${DDA_VERSION}/dda-${dda_target_triple}.tar.gz\n\
\ tar -xzf dda.tar.gz\n mkdir -p \"$DDA_DIR\"\n sudo mv dda $DDA_DIR\n rm\
\ -f dda.tar.gz\n dda self dep sync -f legacy-tasks\n dda self pip install awscli==1.29.45\n\
fi\n"
- DD_API_KEY="$("$CI_PROJECT_DIR"/tools/ci/fetch_secret.sh "$AGENT_API_KEY_ORG2"
token)" || exit $?; export DD_API_KEY
- DD_APP_KEY="$("$CI_PROJECT_DIR"/tools/ci/fetch_secret.sh "$AGENT_APP_KEY_ORG2"
token)" || exit $?; export DD_APP_KEY
- 'AWS_TOKEN="$(curl -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds:
21600")"
RUNNER_ID="$(curl -s http://169.254.169.254/latest/meta-data/instance-id -H "X-aws-ec2-metadata-token:
$AWS_TOKEN" || hostname)"
datadog-ci tag --level job --tags macos_runner:"$RUNNER_ID"
echo "Reported runner ID to Datadog: $RUNNER_ID"
'
- "if [ \"$CI_COMMIT_BRANCH\" = \"main\" ] || [[ \"$CI_COMMIT_BRANCH\" =~ ^[0-9]+\\\
.[0-9]+\\.(x|[0-9]+)$ ]]; then\n dda inv -- -e macos.report-versions -l all ||\
\ true\nfi\n"
- "if [ \"$((RANDOM%20))\" -eq 0 ]; then\n echo Trying to remove inactive versions\n\
\ dda inv -- -e macos.remove-inactive-versions -l python -t \"$PYTHON_VERSION\"\
\ || true\n dda inv -- -e macos.remove-inactive-versions -l go -t \"$(cat .go-version)\"\
\ || true\nfi\n"
- 'export TMPDIR=/tmp/gitlabci
NEWTMPDIR="$RUNNER_TEMP_PROJECT_DIR/gitlabci"
sudo rm -fr "$(realpath $TMPDIR)" "$NEWTMPDIR"
mkdir "$NEWTMPDIR"
sudo ln -fs "$NEWTMPDIR" $TMPDIR
echo "Temporary folder created, TMPDIR=$TMPDIR -> $NEWTMPDIR"
'
- dda inv -- -e rtloader.make
- dda inv -- -e rtloader.install
- mkdir -p $GOPATH/pkg/mod/cache && zstd -dc modcache.tar.zst | tar xf - -C $GOPATH/pkg/mod/cache
- rm -f modcache.tar.zst
- mkdir -p $GOPATH/bin $GOPATH/pkg/mod/cache && zstd -dc modcache_tools.tar.zst
| tar xf - -C $GOPATH
- rm -f modcache_tools.tar.zst
- export PATH=$PATH:$GOPATH/bin
- dda inv -- -e install-tools --verbose
id_tokens:
BUILDBARN_ID_TOKEN:
aud: buildbarn.us1.ddbuild.io
CI_IDENTITIES_GITLAB_ID_TOKEN:
aud: ci-identities
needs:
- go_deps
- go_tools_deps
rules:
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^[0-9]+\.[0-9]+\.x$/
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- pkg/config/**/*
- tasks/schema/**/*
script:
- dda inv -- -e agent.build
- dda inv -- schema.generate --agent-bin=./bin/agent/agent
- bash $CI_PROJECT_DIR/tasks/schema/check_config_templates.sh $CI_PROJECT_DIR
stage: binary_build
tags:
- macos:sonoma-amd64
- specific:true
variables:
AWS_SHARED_CREDENTIALS_FILE: ${CI_PROJECT_DIR}/.aws/credentials-by-job-id/${CI_JOB_ID}
BAZELISK_HOME: $XDG_CACHE_HOME/bazelisk
KUBERNETES_CPU_REQUEST: 16
KUBERNETES_MEMORY_LIMIT: 16Gi
KUBERNETES_MEMORY_REQUEST: 16Gi
TEST_OUTPUT_FILE: test_output
XDG_CACHE_HOME: $RUNNER_TEMP_PROJECT_DIRgenerate_config_schema-windows generate_config_schema-windows:
artifacts:
expire_in: 2 weeks
paths:
- - $CI_PROJECT_DIR/pkg/config/schema/core_schema.yaml
? ^^^^^^^^^ ^
+ - $CI_PROJECT_DIR/pkg/config/schema/yaml/*.yaml
? ^^ ^^^
- - $CI_PROJECT_DIR/pkg/config/schema/system-probe_schema.yaml
when: always
before_script:
- C:\ci-identities-gitlab-job-client.exe assume-role
id_tokens:
BUILDBARN_ID_TOKEN:
aud: buildbarn.us1.ddbuild.io
CI_IDENTITIES_GITLAB_ID_TOKEN:
aud: ci-identities
needs:
- go_deps
- go_tools_deps
rules:
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^[0-9]+\.[0-9]+\.x$/
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- pkg/config/**/*
- tasks/schema/**/*
script:
- $ErrorActionPreference = "Stop"
- '.\tools\ci\docker-run-with-bazel-cache.ps1 -m 8192M -v "$(Get-Location):c:\mnt"
-e GITLAB_CI -e CI_JOB_ID -e CI_PIPELINE_ID -e CI_PROJECT_NAME -e AWS_NETWORKING=true
-e GOMODCACHE="c:\modcache" -e GOPROXY -e GONOSUMDB -e PIP_INDEX_URL -e DDA_FEATURE_FLAGS_CI_SSM_KEY_WINDOWS
-e CI_IDENTITIES_GITLAB_ID_TOKEN ${WINBUILDIMAGE} powershell.exe -c "c:\mnt\tasks\winbuildscripts\Generate-ConfigSchema.ps1
-BuildOutOfSource 1 -CheckGoVersion 1 -InstallDeps 1"
'
- If ($lastExitCode -ne "0") { throw "Previous command returned $lastExitCode" }
stage: binary_build
tags:
- windows-v2:2022
variables:
ARCH: x64
AWS_SHARED_CREDENTIALS_FILE: ${CI_PROJECT_DIR}\.aws\credentials-by-job-id\${CI_JOB_ID}
BAZELISK_HOME: $XDG_CACHE_HOME/bazelisk
OVERRIDE_GIT_STRATEGY: clone
WINBUILDIMAGE: registry.ddbuild.io/ci/datadog-agent-buildimages/windows_ltsc2022_${ARCH}${CI_IMAGE_WIN_LTSC2022_X64_SUFFIX}:${CI_IMAGE_WIN_LTSC2022_X64}
XDG_CACHE_HOME: c:/bzllint_config_schema lint_config_schema:
allow_failure: false
image: registry.ddbuild.io/ci/datadog-agent-buildimages/linux$CI_IMAGE_LINUX_SUFFIX:$CI_IMAGE_LINUX
needs:
- job: generate_config_schema-linux
rules:
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- if: $CI_COMMIT_BRANCH == "main"
- if: $CI_COMMIT_BRANCH =~ /^[0-9]+\.[0-9]+\.x$/
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- pkg/config/**/*
- tasks/schema/**/*
script:
- - dda inv -- schema.lint --schema-dir $CI_PROJECT_DIR/pkg/config/schema/
+ - dda inv -- schema.lint --schema-dir $CI_PROJECT_DIR/pkg/config/schema/yaml/
? +++++
stage: binary_build
tags:
- arch:amd64
- specific:trueChanges Summary
ℹ️ Diff available in the job log. |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 841aeebb2a
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
Files inventory check summaryFile checks results against ancestor 8dc5f627: Results for datadog-agent_7.81.0~devel.git.549.623abc8.pipeline.117288584-1_amd64.deb:No change detected |
Static quality checks✅ Please find below the results from static quality gates Successful checksInfo
31 successful checks with minimal change (< 2 KiB)
|
| bzl_library( | ||
| name = "template_bzl", | ||
| srcs = ["template.bzl"], | ||
| visibility = ["//visibility:public"], | ||
| ) |
There was a problem hiding this comment.
Generated by an AI agent by mimicry, I guess? Please mind:
74f74d3 to
4548531
Compare
a36aed9 to
3cff60d
Compare
841aeeb to
52cd5a7
Compare
52cd5a7 to
19a9684
Compare
davidor
left a comment
There was a problem hiding this comment.
👍 for container-platform files (small change in one file)
d97fdfd to
bc7e542
Compare
Regression DetectorRegression Detector ResultsMetrics dashboard Baseline: 8dc5f62 Optimization Goals: ✅ No significant changes detected
|
| perf | experiment | goal | Δ mean % | Δ mean % CI | trials | links |
|---|---|---|---|---|---|---|
| ➖ | quality_gate_idle_all_features | memory utilization | +0.04 | [+0.00, +0.07] | 1 | Logs bounds checks dashboard |
| ➖ | quality_gate_metrics_logs | memory utilization | -0.52 | [-0.78, -0.27] | 1 | Logs bounds checks dashboard |
| ➖ | quality_gate_logs | % cpu utilization | -0.58 | [-1.63, +0.46] | 1 | Logs bounds checks dashboard |
| ➖ | quality_gate_idle | memory utilization | -0.65 | [-0.70, -0.59] | 1 | Logs bounds checks dashboard |
Bounds Checks: ✅ Passed
| perf | experiment | bounds_check_name | replicates_passed | observed_value | links |
|---|---|---|---|---|---|
| ✅ | quality_gate_idle | intake_connections | 10/10 | 3 ≤ 4 | bounds checks dashboard |
| ✅ | quality_gate_idle | memory_usage | 10/10 | 144.45MiB ≤ 147MiB | bounds checks dashboard |
| ✅ | quality_gate_idle | total_bytes_received | 10/10 | 731.38KiB ≤ 819.20KiB | bounds checks dashboard |
| ✅ | quality_gate_idle_all_features | intake_connections | 10/10 | 3 ≤ 4 | bounds checks dashboard |
| ✅ | quality_gate_idle_all_features | memory_usage | 10/10 | 480.97MiB ≤ 495MiB | bounds checks dashboard |
| ✅ | quality_gate_idle_all_features | total_bytes_received | 10/10 | 1.12MiB ≤ 1.25MiB | bounds checks dashboard |
| ✅ | quality_gate_logs | intake_connections | 10/10 | 4 ≤ 6 | bounds checks dashboard |
| ✅ | quality_gate_logs | memory_usage | 10/10 | 181.30MiB ≤ 195MiB | bounds checks dashboard |
| ✅ | quality_gate_logs | missed_bytes | 10/10 | 0B = 0B | bounds checks dashboard |
| ✅ | quality_gate_logs | total_bytes_received | 10/10 | 264.38MiB ≤ 292MiB | bounds checks dashboard |
| ✅ | quality_gate_metrics_logs | cpu_usage | 10/10 | 345.18 ≤ 2000 | bounds checks dashboard |
| ✅ | quality_gate_metrics_logs | intake_connections | 10/10 | 3 ≤ 6 | bounds checks dashboard |
| ✅ | quality_gate_metrics_logs | memory_usage | 10/10 | 399.62MiB ≤ 430MiB | bounds checks dashboard |
| ✅ | quality_gate_metrics_logs | missed_bytes | 10/10 | 0B = 0B | bounds checks dashboard |
| ✅ | quality_gate_metrics_logs | total_bytes_received | 10/10 | 0.93GiB ≤ 1.04GiB | bounds checks dashboard |
Explanation
Confidence level: 90.00%
Effect size tolerance: |Δ mean %| ≥ 5.00%
Performance changes are noted in the perf column of each table:
- ✅ = significantly better comparison variant performance
- ❌ = significantly worse comparison variant performance
- ➖ = no significant change in performance
A regression test is an A/B test of target performance in a repeatable rig, where "performance" is measured as "comparison variant minus baseline variant" for an optimization goal (e.g., ingress throughput). Due to intrinsic variability in measuring that goal, we can only estimate its mean value for each experiment; we report uncertainty in that value as a 90.00% confidence interval denoted "Δ mean % CI".
For each experiment, we decide whether a change in performance is a "regression" -- a change worth investigating further -- if all of the following criteria are true:
-
Its estimated |Δ mean %| ≥ 5.00%, indicating the change is big enough to merit a closer look.
-
Its 90.00% confidence interval "Δ mean % CI" does not contain zero, indicating that if our statistical model is accurate, there is at least a 90.00% chance there is a difference in performance between baseline and comparison variants.
-
Its configuration does not mark it "erratic".
Replicate Execution Details
We run multiple replicates for each experiment/variant. However, we allow replicates to be automatically retried if there are any failures, up to 8 times, at which point the replicate is marked dead and we are unable to run analysis for the entire experiment. We call each of these attempts at running replicates a replicate execution. This section lists all replicate executions that failed due to the target crashing or being oom killed.
Note: In the below tables we bucket failures by experiment, variant, and failure type. For each of these buckets we list out the replicate indexes that failed with an annotation signifying how many times said replicate failed with the given failure mode. In the below example the baseline variant of the experiment named experiment_with_failures had two replicates that failed by oom kills. Replicate 0, which failed 8 executions, and replicate 1 which failed 6 executions, all with the same failure mode.
| Experiment | Variant | Replicates | Failure | Logs | Debug Dashboard |
|---|---|---|---|---|---|
| experiment_with_failures | baseline | 0 (x8) 1 (x6) | Oom killed | Debug Dashboard |
The debug dashboard links will take you to a debugging dashboard specifically designed to investigate replicate execution failures.
❌ Retried Profiling Replicate Execution Failures (ddprof)
Note: Profiling replicas may still be executing. See the debug dashboard for up to date status.
| Experiment | Variant | Replicates | Failure | Debug Dashboard |
|---|---|---|---|---|
| quality_gate_idle | baseline | 10 | Oom killed | Debug Dashboard |
| quality_gate_idle_all_features | baseline | 10 | Oom killed | Debug Dashboard |
| quality_gate_idle_all_features | comparison | 10 | Oom killed | Debug Dashboard |
| quality_gate_logs | baseline | 10 | Oom killed | Debug Dashboard |
| quality_gate_logs | comparison | 10 | Oom killed | Debug Dashboard |
| quality_gate_metrics_logs | baseline | 10 | Oom killed | Debug Dashboard |
| quality_gate_metrics_logs | comparison | 10 | Oom killed | Debug Dashboard |
CI Pass/Fail Decision
✅ Passed. All Quality Gates passed.
- quality_gate_idle, bounds check intake_connections: 10/10 replicas passed. Gate passed.
- quality_gate_idle, bounds check total_bytes_received: 10/10 replicas passed. Gate passed.
- quality_gate_idle, bounds check memory_usage: 10/10 replicas passed. Gate passed.
- quality_gate_logs, bounds check memory_usage: 10/10 replicas passed. Gate passed.
- quality_gate_logs, bounds check intake_connections: 10/10 replicas passed. Gate passed.
- quality_gate_logs, bounds check total_bytes_received: 10/10 replicas passed. Gate passed.
- quality_gate_logs, bounds check missed_bytes: 10/10 replicas passed. Gate passed.
- quality_gate_metrics_logs, bounds check cpu_usage: 10/10 replicas passed. Gate passed.
- quality_gate_metrics_logs, bounds check memory_usage: 10/10 replicas passed. Gate passed.
- quality_gate_metrics_logs, bounds check missed_bytes: 10/10 replicas passed. Gate passed.
- quality_gate_metrics_logs, bounds check intake_connections: 10/10 replicas passed. Gate passed.
- quality_gate_metrics_logs, bounds check total_bytes_received: 10/10 replicas passed. Gate passed.
- quality_gate_idle_all_features, bounds check intake_connections: 10/10 replicas passed. Gate passed.
- quality_gate_idle_all_features, bounds check total_bytes_received: 10/10 replicas passed. Gate passed.
- quality_gate_idle_all_features, bounds check memory_usage: 10/10 replicas passed. Gate passed.
bc7e542 to
a636624
Compare
Migrate the three call sites that used to invoke pkg/config/render_config
(Go binary + text/template) to call tasks/schema/template.py (Python +
enriched schema):
- Bazel: new schema_template_config rule in pkg/config/schema/template.bzl
replaces the legacy agent_config rule. Three downstream targets keep
their names: //pkg/config:{agent,iot_agent,system_probe}_config. OS is
picked via select() on @platforms//os.
- Dev build: tasks/agent.py renames render_config -> generate_config_examples
and calls generate_template in-process. The @task def generate_config is
deleted; tasks/msi.py and tasks/winbuildscripts/Invoke-UnitTests.ps1 are
updated to call dda inv schema.template directly.
- //go:generate: dropped from cmd/cluster-agent, cmd/cluster-agent-cloudfoundry,
and cmd/dogstatsd (latter file deleted entirely). The corresponding invoke
tasks (cluster_agent_helpers.build_common, dogstatsd.build) now call
generate_template directly.
The legacy render_config binary stays in place for now. It will be
removed in a follow-up PR.
a636624 to
623abc8
Compare
What does this PR do?
Switch render_config callers to schema.template
Migrate the three call sites that used to invoke pkg/config/render_config
(Go binary + text/template) to call tasks/schema/template.py (Python +
enriched schema):
replaces the legacy agent_config rule. Three downstream targets keep
their names: //pkg/config:{agent,iot_agent,system_probe}_config. OS is
picked via select() on @platforms//os.
and calls generate_template in-process. The @task def generate_config is
deleted; tasks/msi.py and tasks/winbuildscripts/Invoke-UnitTests.ps1 are
updated to call dda inv schema.template directly.
and cmd/dogstatsd (latter file deleted entirely). The corresponding invoke
tasks (cluster_agent_helpers.build_common, dogstatsd.build) now call
generate_template directly.
The legacy render_config binary stays in place for now. It will be
removed in a follow-up PR.
Describe how you validated your changes
Validating that the example configuration files are still generated and shipped like before.
Each team should validate, where is make sense: