SBOM e2e: scan host and container images across runtimes#51486
Conversation
|
@codex review |
Gitlab CI Configuration ChangesModified Jobsnew-e2e-sbom new-e2e-sbom:
after_script:
- CODECOV_TOKEN=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $CODECOV token) || exit
$?; export CODECOV_TOKEN
- $CI_PROJECT_DIR/tools/ci/junit_upload.sh "junit-${CI_JOB_ID}.tgz" "$E2E_RESULT_JSON"
- "if [ -d \"$E2E_COVERAGE_OUT_DIR\" ]; then\n dda inv -- -e coverage.process-e2e-coverage-folders\
\ $E2E_COVERAGE_OUT_DIR\n dda inv -- -e dyntest.compute-and-upload-job-index\
\ --bucket-uri $S3_PERMANENT_ARTIFACTS_URI --coverage-folder $E2E_COVERAGE_OUT_DIR\
\ --commit-sha $CI_COMMIT_SHA --job-id $CI_JOB_ID\nfi\n"
- "if [ -d \"$E2E_COVERAGE_OUT_DIR\" ]; then\n DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh\
\ $AGENT_API_KEY_ORG2 token) || exit $?; export DD_API_KEY\n for coverage in\
\ \"$E2E_COVERAGE_OUT_DIR\"/*/coverage.txt; do\n datadog-ci coverage upload\
\ --format=go-coverprofile \"$coverage\" || true\n done\nfi\n"
artifacts:
expire_in: 2 weeks
paths:
- $E2E_OUTPUT_DIR
- $E2E_RESULT_JSON
- junit-*.tgz
- $E2E_COVERAGE_OUT_DIR
reports:
annotations:
- $EXTERNAL_LINKS_PATH
when: always
before_script:
- mkdir -p $GOPATH/pkg/mod/cache && zstd -dc modcache_e2e.tar.zst | tar xf - -C
$GOPATH/pkg/mod/cache
- rm -f modcache_e2e.tar.zst
- mkdir -p ~/.pulumi && zstd -dc pulumi_plugins.tar.zst | tar xf - -C ~/.pulumi
- rm -f pulumi_plugins.tar.zst
- "go_bin=\"$(go env GOBIN)\"\nif [ -z \"$go_bin\" ]; then\n go_bin=\"$(go env\
\ GOPATH)/bin\"\nfi\nmkdir -p \"$go_bin\" \"$GOPATH/pkg/mod/cache\"\nzstd -dc\
\ go_tools_bin.tar.zst | tar xf - -C \"$go_bin\"\nzstd -dc modcache_tools.tar.zst\
\ | tar xf - -C \"$GOPATH/pkg/mod/cache\"\n"
- rm -f go_tools_bin.tar.zst modcache_tools.tar.zst
- export PATH=$PATH:$go_bin
- mkdir -p ~/.aws
- "if [ -n \"$E2E_USE_AWS_PROFILE\" ]; then\n echo Using agent-qa-ci aws profile\n\
\ $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_QA_E2E profile >> ~/.aws/config\
\ || exit $?\n # Now all `aws` commands target the agent-qa profile\n export\
\ AWS_PROFILE=agent-qa-ci\nelse\n # Assume role to fetch only once credentials\
\ and avoid rate limits\n echo Assuming ddbuild-agent-ci role\n roleoutput=\"\
$(aws sts assume-role --role-arn arn:aws:iam::669783387624:role/ddbuild-agent-ci\
\ --external-id ddbuild-agent-ci --role-session-name RoleSession)\"\n export\
\ AWS_ACCESS_KEY_ID=\"$(echo \"$roleoutput\" | jq -r '.Credentials.AccessKeyId')\"\
\n export AWS_SECRET_ACCESS_KEY=\"$(echo \"$roleoutput\" | jq -r '.Credentials.SecretAccessKey')\"\
\n export AWS_SESSION_TOKEN=\"$(echo \"$roleoutput\" | jq -r '.Credentials.SessionToken')\"\
\nfi\n"
- $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_QA_E2E ssh_public_key_rsa > $E2E_AWS_PUBLIC_KEY_PATH
|| exit $?
- touch $E2E_AWS_PRIVATE_KEY_PATH && chmod 600 $E2E_AWS_PRIVATE_KEY_PATH && $CI_PROJECT_DIR/tools/ci/fetch_secret.sh
$AGENT_QA_E2E ssh_key_rsa > $E2E_AWS_PRIVATE_KEY_PATH || exit $?
- $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_QA_E2E ssh_public_key_rsa > $E2E_AZURE_PUBLIC_KEY_PATH
|| exit $?
- touch $E2E_AZURE_PRIVATE_KEY_PATH && chmod 600 $E2E_AZURE_PRIVATE_KEY_PATH &&
$CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_QA_E2E ssh_key_rsa > $E2E_AZURE_PRIVATE_KEY_PATH
|| exit $?
- $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_QA_E2E ssh_public_key_rsa > $E2E_GCP_PUBLIC_KEY_PATH
|| exit $?
- touch $E2E_GCP_PRIVATE_KEY_PATH && chmod 600 $E2E_GCP_PRIVATE_KEY_PATH && $CI_PROJECT_DIR/tools/ci/fetch_secret.sh
$AGENT_QA_E2E ssh_key_rsa > $E2E_GCP_PRIVATE_KEY_PATH || exit $?
- pulumi login "s3://dd-pulumi-state?region=us-east-1&awssdk=v2&profile=$AWS_PROFILE"
- ARM_CLIENT_ID=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_AZURE client_id)
|| exit $?; export ARM_CLIENT_ID
- ARM_CLIENT_SECRET=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_AZURE token)
|| exit $?; export ARM_CLIENT_SECRET
- ARM_TENANT_ID=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_AZURE tenant_id)
|| exit $?; export ARM_TENANT_ID
- ARM_SUBSCRIPTION_ID=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_AZURE subscription_id)
|| exit $?; export ARM_SUBSCRIPTION_ID
- $CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_GCP credentials_json > ~/gcp-credentials.json
|| exit $?
- export GOOGLE_APPLICATION_CREDENTIALS=~/gcp-credentials.json
- 'gcp_acr_key=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $E2E_GCP credentials_acr_readonly)
|| exit $?
export E2E_GCP_IMAGE_PULL_PASSWORD="b64=$(printf ''%s'' "$gcp_acr_key" | base64
-w 0)"
'
- dda inv -- -e gitlab.generate-ci-visibility-links --output=$EXTERNAL_LINKS_PATH
- export DD_ENV=nativetest
- export DD_CIVISIBILITY_ENABLED=true
- export DD_CIVISIBILITY_AGENTLESS_ENABLED=true
- export DD_CIVISIBILITY_FLAKY_RETRY_ENABLED=false
- export DD_TAGS="gitlab.pipeline_source:${CI_PIPELINE_SOURCE}"
- DD_API_KEY=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $AGENT_API_KEY_ORG2 token)
|| exit $?; export DD_API_KEY
- export WINDOWS_DDNPM_DRIVER=${WINDOWS_DDNPM_DRIVER:-$(dda inv release.get-release-json-value
"dependencies::WINDOWS_DDNPM_DRIVER" --no-worktree)}
- export WINDOWS_DDPROCMON_DRIVER=${WINDOWS_DDPROCMON_DRIVER:-$(dda inv release.get-release-json-value
"dependencies::WINDOWS_DDPROCMON_DRIVER" --no-worktree)}
image: registry.ddbuild.io/ci/datadog-agent-buildimages/linux$CI_IMAGE_LINUX_SUFFIX:$CI_IMAGE_LINUX
needs:
- go_e2e_deps
- artifacts: false
job: go_e2e_test_binaries
- go_tools_deps
- job: new-e2e-base-coverage
optional: true
- qa_agent_linux
- qa_agent_linux_jmx
- qa_dca
- qa_dogstatsd
parallel:
matrix:
- EXTRA_PARAMS: --run "TestSBOMKindSuite"
+ - EXTRA_PARAMS: --run "TestSBOMKubeadmSuite"
+ - EXTRA_PARAMS: --run "TestSBOMKubeadmCrioSuite"
+ - EXTRA_PARAMS: --run "TestSBOMDockerHostSuite"
rules:
- if: $RUN_E2E_TESTS == "off"
when: never
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- if: $RUN_E2E_TESTS == "on"
when: on_success
- if: $CI_COMMIT_BRANCH == "main"
when: on_success
- if: $CI_COMMIT_BRANCH =~ /^[0-9]+\.[0-9]+\.x$/
when: on_success
- if: $CI_COMMIT_TAG =~ /^[0-9]+\.[0-9]+\.[0-9]+-rc\.[0-9]+$/
when: on_success
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- .gitlab/test/e2e/e2e.yml
- test/e2e-framework/**/*
- test/new-e2e/go.mod
- flakes.yaml
- release.json
- changes:
compare_to: $COMPARE_TO_BRANCH
paths:
- pkg/sbom/**/*
- test/new-e2e/tests/sbom/**/*
when: on_success
- if: $CI_COMMIT_BRANCH =~ /^mq-working-branch-/
when: never
- allow_failure: true
when: manual
script:
- export IS_DEV_BRANCH="$(dda inv -- -e pipeline.is-dev-branch)"
- DYNAMIC_TESTS_BREAKGLASS=$($CI_PROJECT_DIR/tools/ci/fetch_secret.sh $DYNAMIC_TESTS_BREAKGLASS
value) || exit $?; export DYNAMIC_TESTS_BREAKGLASS
- "if [ \"$DYNAMIC_TESTS_BREAKGLASS\" == \"true\" ] || [ \"$IS_DEV_BRANCH\" == \"\
false\" ] || [ \"$RUN_E2E_TESTS\" == \"on\" ]; then\n export DYNAMIC_TESTS_FLAG=\"\
\"\nfi\n"
- export E2E_IMAGE_PULL_PASSWORD=$(aws ecr get-login-password),$E2E_GCP_IMAGE_PULL_PASSWORD
- dda inv -- -e new-e2e-tests.run $DYNAMIC_TESTS_FLAG $PRE_BUILT_BINARIES_FLAG $MAX_RETRIES_FLAG
--local-package $CI_PROJECT_DIR/$OMNIBUS_BASE_DIR --result-json $E2E_RESULT_JSON
--targets $TARGETS --junit-tar junit-${CI_JOB_ID}.tgz ${EXTRA_PARAMS} --test-washer
--logs-folder=$E2E_OUTPUT_DIR/logs --logs-post-processing --logs-post-processing-test-depth=$E2E_LOGS_PROCESSING_TEST_DEPTH
stage: e2e
tags:
- arch:amd64
- specific:true
variables:
DYNAMIC_TESTS_FLAG: --impacted
E2E_AWS_PRIVATE_KEY_PATH: /tmp/agent-qa-aws-ssh-key
E2E_AWS_PUBLIC_KEY_PATH: /tmp/agent-qa-aws-ssh-key.pub
E2E_AZURE_PRIVATE_KEY_PATH: /tmp/agent-qa-azure-ssh-key
E2E_AZURE_PUBLIC_KEY_PATH: /tmp/agent-qa-azure-ssh-key.pub
E2E_COMMIT_SHA: $CI_COMMIT_SHORT_SHA
E2E_COVERAGE_OUT_DIR: $CI_PROJECT_DIR/coverage
E2E_GCP_PRIVATE_KEY_PATH: /tmp/agent-qa-gcp-ssh-key
E2E_GCP_PUBLIC_KEY_PATH: /tmp/agent-qa-gcp-ssh-key.pub
E2E_IMAGE_PULL_REGISTRY: 669783387624.dkr.ecr.us-east-1.amazonaws.com,us-central1-docker.pkg.dev
E2E_IMAGE_PULL_USERNAME: AWS,_json_key
E2E_KEY_PAIR_NAME: datadog-agent-ci-rsa
E2E_LOGS_PROCESSING_TEST_DEPTH: 1
E2E_OUTPUT_DIR: $CI_PROJECT_DIR/e2e-output
E2E_PIPELINE_ID: $CI_PIPELINE_ID
E2E_PREBUILD_S3_URI: $S3_PERMANENT_ARTIFACTS_URI/e2e-pre-build/$CI_PIPELINE_ID
E2E_RESULT_JSON: $CI_PROJECT_DIR/e2e_test_output.json
E2E_SKIP_WINDOWS: $SKIP_WINDOWS
E2E_USE_AWS_PROFILE: 'true'
EXTERNAL_LINKS_PATH: external_links_$CI_JOB_ID.json
FLAKY_PATTERNS_CONFIG: $CI_PROJECT_DIR/flaky-patterns-runtime.yaml
GIT_STRATEGY: clone
KUBERNETES_CPU_REQUEST: 6
KUBERNETES_MEMORY_LIMIT: 16Gi
KUBERNETES_MEMORY_REQUEST: 12Gi
MAX_RETRIES_FLAG: ''
ON_NIGHTLY_FIPS: 'true'
PRE_BUILT_BINARIES_FLAG: --use-prebuilt-binaries
REMOTE_STACK_CLEANING: 'true'
SHOULD_RUN_IN_FLAKES_FINDER: 'true'
TARGETS: ./tests/sbom
TEAM: csm-threats-agentChanges Summary
ℹ️ Diff available in the job log. |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: ca140215ec
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Use a portable keepalive command for Alpine targets
When this loop reaches golang:1.26.3-alpine, the container runs Alpine/BusyBox sleep, which does not accept infinity; the sbom-golang pod exits immediately and the Deployment never becomes available, so the Pulumi Kubernetes provider can time out while creating the SBOM workloads before the new kubeadm suite starts. Use a portable command such as a shell loop or a large numeric sleep for these target images.
Useful? React with 👍 / 👎.
|
Files inventory check summaryFile checks results against ancestor 8dc5f627: Results for datadog-agent_7.81.0~devel.git.557.6877286d.pipeline.117332409-1_amd64.deb:No change detected |
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 580187002f
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| Containers: corev1.ContainerArray{ | ||
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), |
There was a problem hiding this comment.
Keep the Alpine workload from crash-looping
For the golang:1.26.3-alpine target, this command uses Alpine's BusyBox sleep, which does not reliably accept the GNU-style infinity argument, so that pod can immediately exit and enter CrashLoopBackOff instead of staying resident as the comment promises. In this kubeadm SBOM scenario that makes the Alpine image workload flaky/misleading, especially if the scanner observes running containers or Kubernetes/containerd garbage-collects failed containers before the SBOM assertion runs; use a portable long-running command such as sleep 365d or tail -f /dev/null.
Useful? React with 👍 / 👎.
|
@codex review |
|
Codex Review: Didn't find any major issues. More of your lovely PRs please. ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: c4dcef0147
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| Containers: corev1.ContainerArray{ | ||
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), |
There was a problem hiding this comment.
Route target images through configured pull registry
In GitLab E2E runs the template sets E2E_IMAGE_PULL_REGISTRY to the internal ECR/GAR pull-through caches, and this code creates the pull secret when that config is present, but the workloads still use bare Docker Hub refs like node:.../python:.... In that CI context the secret is for the cache registries, not Docker Hub, so these pods bypass the cache and pull four public images unauthenticated, which can make the new nightly SBOM job fail or flake on Docker Hub rate limits; rewrite these Docker Hub refs via the configured registry as other Docker-Hub-based test apps do.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
The kubeadm scenario routes docker.io pulls through the containerd registry mirror (mirror.gcr.io via hosts.toml); I just fixed the config_path substitution that was silently disabling that mirror, which covers the Docker Hub rate limit concern without rewriting the image refs. The DockerHost scenario does not use a pull through registry, so leaving the refs as is.
| require.NotEmptyf(c, hosts, "No host SBOM yet") | ||
|
|
||
| for _, p := range hosts { | ||
| require.Equalf(c, sbom.SBOMStatus_SUCCESS, p.Status, "host SBOM is not successful") |
There was a problem hiding this comment.
Filter host SBOMs to successful payloads
If the agent emits an early host SBOM payload with a non-success status before a later successful scan, fakeintake retains both payloads and this loop keeps requiring the stale non-success payload to be SUCCESS, so EventuallyWithTf can never converge even though a valid host SBOM arrived. The container SBOM check already filters to SBOMStatus_SUCCESS; apply the same filtering for host payloads before validating the CycloneDX contents.
Useful? React with 👍 / 👎.
|
@codex review |
|
Codex Review: Didn't find any major issues. You're on a roll. ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
|
Codex Review: Didn't find any major issues. Chef's kiss. ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: abc13f9931
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Keep the Alpine SBOM target from crash-looping
When this Deployment is created for the golang:1.26.3-alpine target, /bin/sleep is BusyBox and does not accept infinity, so the container exits immediately instead of staying ready. Pulumi waits for Kubernetes Deployments to become available, so the sbom-golang workload can keep the new kubeadm scenario from provisioning successfully before any SBOM assertions run; use a portable long-running command for all images or special-case Alpine.
Useful? React with 👍 / 👎.
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 84f60a6e95
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Use a sleep command that works on Alpine
For the golang:...-alpine target, this runs BusyBox sleep, which does not accept infinity; that pod exits and CrashLoopBackOffs instead of being kept alive as intended. The SBOM test then depends on restart/image-GC behavior rather than having a stable Alpine workload resident on the node, so use a long numeric sleep or a shell loop instead.
Useful? React with 👍 / 👎.
| # kubelet uses the systemd cgroup driver; containerd must match or the node never goes Ready. | ||
| sed -i 's/SystemdCgroup = false/SystemdCgroup = true/' /etc/containerd/config.toml | ||
| # Route docker.io pulls through mirror.gcr.io to avoid Docker Hub anonymous rate limits (429). | ||
| sed -i "s#config_path = ''#config_path = '/etc/containerd/certs.d'#" /etc/containerd/config.toml |
There was a problem hiding this comment.
Match containerd's quoted config_path
This substitution does not update the file produced by containerd config default: the default setting is double-quoted (config_path = "", as also patched in this repo's kind cluster template), while this pattern looks for two single quotes. In this kubeadm scenario the Docker Hub mirror config under /etc/containerd/certs.d/docker.io/hosts.toml is therefore ignored, so the new SBOM workloads can still hit anonymous Docker Hub pulls and 429 flakes.
Useful? React with 👍 / 👎.
|
@codex review |
|
Codex Review: Didn't find any major issues. Delightful! ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
|
@codex review |
|
Codex Review: Didn't find any major issues. 🚀 ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 2f4e79e521
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
|
|
||
| out := make([]ftypes.LayerPath, len(diffIDs)) | ||
| for i := range diffIDs { | ||
| out[i] = ftypes.LayerPath{DiffID: diffIDs[i], Path: paths[i]} |
There was a problem hiding this comment.
Preserve Docker layer digests for overlay SBOMs
When Docker overlayfs scanning is used, this now constructs each LayerPath without a Digest, whereas the previous path populated it from the Docker diff ID fallback. The SBOM assertions added in this patch require every layer-attributed component to carry aquasecurity:trivy:LayerDigest; with DockerHostSuite the components will either miss that property or fail the new assertComponentLayer check even though the image was scanned successfully. Populate Digest with the diff ID fallback here when Docker cannot provide a manifest digest.
Useful? React with 👍 / 👎.
There was a problem hiding this comment.
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Use a portable keepalive command for Alpine targets
This command is applied to every SBOM target, including golang:1.26.3-alpine; Alpine images use BusyBox sleep, which does not reliably accept infinity, so that pod exits instead of staying Running. The Docker counterpart in this patch already uses tail -f /dev/null for this exact reason; the Kubernetes workload should use the same portable keepalive or the kubeadm suites can miss/fail the Alpine target workload.
Useful? React with 👍 / 👎.
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 4bf76d25ce
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Use a portable long-running command for Alpine targets
This command is used for every SBOM target, including golang:1.26.3-alpine, whose BusyBox sleep does not support the GNU infinity argument (the Docker counterpart in this same change uses tail -f /dev/null for that reason). In the kubeadm suites that pod will immediately exit/CrashLoop instead of keeping the image in use, making the Kubernetes SBOM workload deployment unreliable; use the same portable tail -f /dev/null command here.
Useful? React with 👍 / 👎.
4bf76d2 to
0b53c93
Compare
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 0b53c939d0
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| &corev1.ContainerArgs{ | ||
| Name: pulumi.String("main"), | ||
| Image: pulumi.String(t.Image), | ||
| Command: pulumi.StringArray{pulumi.String("sleep"), pulumi.String("infinity")}, |
There was a problem hiding this comment.
Keep Alpine target alive without sleep infinity
When this manifest reaches the sbom-golang target (golang:...-alpine), Alpine's BusyBox sleep does not accept infinity (the Docker manifest in this same package accounts for this by using tail -f /dev/null). That pod exits/CrashLoopBackOff instead of keeping the image resident, so the kubeadm SBOM suites never receive a successful golang container SBOM; use the same portable long-running command here.
Useful? React with 👍 / 👎.
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 407d1a88a8
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| require.NotEmptyf(c, payloads, "No successful container SBOM for %s yet", target.short) | ||
|
|
||
| // Ground truth from the real image config for the expected digest. | ||
| realDiffIDs, realImageID, err := realImageRootFS(target.repo + "@" + target.digest) |
There was a problem hiding this comment.
Avoid registry pulls inside the retry loop
For Docker Hub-backed targets, this runs on every Eventually retry while fakeintake is still converging, and it bypasses the mirror/auth setup that the provisioners added specifically to avoid anonymous Docker Hub rate limits. A few minutes of retries across the node/golang/python/ruby cases can turn an otherwise-correct Agent run into a 429/registry failure; fetch/cache the expected DiffIDs once per target outside the retry loop, or use the same mirror/auth path as the node pulls.
Useful? React with 👍 / 👎.
407d1a8 to
ada8997
Compare
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: ada899749f
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| if err := proto.Unmarshal(inflated, &msg); err != nil { | ||
| return nil, fmt.Errorf("error parsing SBOM payload: data_len=%d, inflated_len=%d, encoding=%s, first_few_data_bytes=%v: %w", len(payload.Data), len(inflated), payload.Encoding, payload.Data[:min(20, len(payload.Data))], err) | ||
| // The Agent's SBOM endpoint can also receive non SBOM bodies (for example | ||
| // empty {} keepalive payloads when the endpoint is redirected to the | ||
| // fakeintake). Skip anything that is not a valid SBOM protobuf so one junk | ||
| // payload does not fail the entire SBOM query. | ||
| return nil, nil |
There was a problem hiding this comment.
Keep reporting malformed SBOM payloads
When the SBOM endpoint receives any non-empty but malformed SBOM protobuf (for example a serialization regression, truncated gzip output, or a payload-format mismatch), this now returns nil, nil and the fakeintake aggregator silently drops it. Because fakeintake retains earlier valid SBOMs, the new SBOM E2Es can continue to pass against stale data instead of surfacing that the current payload is corrupt; this should only ignore the known keepalive body (e.g. {}) and continue returning errors for other parse failures.
Useful? React with 👍 / 👎.
|
@codex review |
There was a problem hiding this comment.
💡 Codex Review
Here are some automated review suggestions for this pull request.
Reviewed commit: 9c3b5ef4be
ℹ️ About Codex in GitHub
Codex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
- Open a pull request for review
- Mark a draft as ready
- Comment "@codex review".
If Codex has suggestions, it will comment; otherwise it will react with 👍.
When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback".
| Urls: layer.URLs, | ||
| // pb field 2 is named "digest" for wire compatibility but | ||
| // has always carried a diff_id. | ||
| Digest: layer.DiffID, |
There was a problem hiding this comment.
Use the generated DiffID field
This package no longer compiles because the generated pb.ContainerImageLayer type still exposes field 2 as DiffID/GetDiffID (see pkg/proto/pbgo/core/workloadmeta.pb.go), not Digest; the reverse conversion has the same protoLayer.Digest reference later in this file. Any build that includes workloadmeta proto conversion will fail until these references use DiffID.
Useful? React with 👍 / 👎.
9c3b5ef to
c652e1d
Compare
|
@codex review |
|
Codex Review: Didn't find any major issues. Already looking forward to the next diff. ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
Provision RHEL 10 and verify the Agent scans both the host root filesystem and the container images on the node, shipping a host SBOM and one SBOM per image to the fakeintake, across three container runtimes behind one shared set of assertions: - Docker (overlay2) - Kubernetes with containerd - Kubernetes with CRIO The target images span the major OS package formats, Debian dpkg, RHEL rpm (single and multiple layer images) and Alpine apk, each also carrying packages from a distinct language ecosystem. The assertions cover both scopes: the host SBOM and, for every image, exact component counts, layer DiffIDs and digests, the RepoDigest and the container artifact scan path. The resulting SBOM is identical whether an image is served by Docker, containerd or CRIO. Existing SBOM e2e coverage was a loose container image smoke test on a Kind cluster. These suites add host filesystem scanning, exact component and layer digest assertions, and real RHEL 10 hosts running Docker and CRIO, not only containerd.
extractLayersFromOverlayFSMounts only parsed lowerdir=/upperdir= overlay options, so a single layer image, which containerd exposes as a single bind mount with no overlay options, yielded zero layers and failed the scan with "unable to extract layers from overlayfs mounts". Treat a bind mount's source as the image's only layer. Adds unit coverage for the single layer bind mount and the overlay source ignored cases.
newFakeContainer indexed three independent slices (RootFS.DiffIDs,
mount lowerdir paths, and workloadmeta imgMeta.Layers) at the same
index to build each ftypes.LayerPath. Any ordering drift between
them produced an SBOM whose LayerDiffID and LayerDigest pointed at
different layers, and the DiffID keyed fanal cache persisted those
entries across agent restarts.
Build each {DiffID, Digest, Path} triple at the call site. For
containerd, walk the snapshotter parent chain via Stat and check it
against the chainIDs from rootfs.diff_ids. A mismatch fails the scan.
Namespace the context before those reads. Containerd indexes content
and snapshots per namespace. For docker, leave LayerDigest empty
rather than guess from workloadmeta history. For CRIO,
GetCRIOImageLayers already pairs structurally, so only the
newFakeContainer signature changes.
…ImageLayer Containerd SBOMs have been emitting LayerDigest equal to LayerDiffID since PR #33641 (e0da7dd) overwrote the manifest digest with the rootfs diff_id to keep Trivy's diff_id keyed cache happy. CRIO never carried a manifest digest at all. This PR exposes it. ContainerImageLayer gains a ManifestDigest field. Containerd populates it from the OCI manifest, restoring the value #33641 discarded. CRIO reads it from the manifest file parseLayerInfo already opens (the layerInfo struct grows a Digest field). Docker leaves it empty. ImageInspect does not surface per layer manifest digests. The Trivy consumer (newFakeContainer) prefers ManifestDigest for ftypes.LayerPath.Digest and falls back to the diff_id when empty. Trivy accepts either as a cache key. Containerd and CRIO SBOMs now have LayerDigest equal to the actual manifest digest, distinct from LayerDiffID. Docker SBOMs are unchanged. The existing Digest field carried a diff_id, so adding the new field forced renaming it to DiffID. Two fields called Digest with opposite semantics would silently change behaviour for every old reader. Renaming makes every call site a compile error and the field names honest. The proto keeps field 2 as string digest, byte identical on the wire and unchanged for any out of tree consumer. The converter at proto.go maps the renamed Go field to and from it. The new manifestDigest is field 5, ignored by old readers per proto3. crio_util.go indexes CRIO storage by diff_id natively, so only the field name changes there. processor.go still emits layer.DiffID into agent payload's Digest, since the backend has joined that to Trivy vulnerability records by diff_id since #33641 and this PR does not touch that contract.
…storage dir buildCRIOLayerPaths derived the layer DiffID from the containers storage overlay directory name (a storage layer id), so crio SBOMs reported DiffIDs that were not the image rootfs diff_ids. Take the DiffID from the paired imgMeta layer instead, matching containerd. The storage path stays the scan Path.
The crio collector set the image ID to the manifest RepoDigest, so crio SBOMs reported a different ImageID than containerd, which uses the OCI config digest. The container storage backend already keys each image by that config digest (the value GetId returns), so use it, normalized to sha256 form. The backend joins vulnerabilities by diff_id, so changing the image ID value is safe.
The image ID change to the OCI config digest also shifted the expected IDs and the skip existing store seeds in the collector tests. Update generateImageEventFromContainer, the image collection optimization, and the ID format cases to the config digest scheme. The convertImageToEvent cases were already updated. Also corrects a stale comment in generateImageEventsFromImageList.
The docker overlay2 scan built LayerPath entries with only a DiffID and left Digest empty, so SBOM components carried a LayerDiffID but no LayerDigest property. The docker daemon does not expose a reliable per layer manifest digest, so fall back to the diff_id (a valid sha256), mirroring the crio path (buildCRIOLayerPaths). Each SBOM component now carries a LayerDigest, which the container SBOM e2e assertions require. Also fixes the cascade where the missing LayerDigest made every container subtest burn its full 8 minute timeout, pushing later target images and the host SBOM past the fakeintake retention window so they read as absent.
Main renamed the ContainerImageLayer proto field from digest to diffID. The rebase kept the e2e additions but left two halves out of sync: the committed pb.go was the stale pre rename copy, so the bazel write_pb_go check flagged it, and the def to proto converter still referenced the old pb field name. Regenerate pb.go with the current toolchain (preserving the ManifestDigest addition) and point the converter at the DiffID field in both directions, so the package builds and the proto stays consistent with main.
c652e1d to
6877286
Compare
|
@codex review |
|
Codex Review: Didn't find any major issues. Hooray! ℹ️ About Codex in GitHubCodex has been enabled to automatically review pull requests in this repo. Reviews are triggered when you
If Codex has suggestions, it will comment; otherwise it will react with 👍. When you sign up for Codex through ChatGPT, Codex can also answer questions or update the PR, like "@codex address that feedback". |
|
The PR currently contains various fixes on top of the e2e test commit to be able to pass the CI. These commits will be removed once the following PR will be merged:
Please review only the first commit (e2e test). |
Provision RHEL 10 and verify the Agent scans both the host root
filesystem and the container images on the node, shipping a host SBOM
and one SBOM per image to the fakeintake, across three container
runtimes behind one shared set of assertions:
The target images span the major OS package formats, Debian dpkg, RHEL
rpm (single and multiple layer images) and Alpine apk, each also
carrying packages from a distinct language ecosystem. The assertions
cover both scopes: the host SBOM and, for every image, exact component
counts, layer DiffIDs and digests, the RepoDigest and the container
artifact scan path. The resulting SBOM is identical whether an image is
served by Docker, containerd or CRIO.
Existing SBOM e2e coverage was a loose container image smoke test on a
Kind cluster. These suites add host filesystem scanning, exact component
and layer digest assertions, and real RHEL 10 hosts running Docker and
CRIO, not only containerd.