Skip to content

Commit 059a835

Browse files
authored
RHOAIENG-16517: chore(tests): add new changed files rebuild helper for GitHub Actions PRs (opendatahub-io#800)
1 parent 7419685 commit 059a835

File tree

11 files changed

+516
-0
lines changed

11 files changed

+516
-0
lines changed

.github/workflows/build-notebooks-pr.yaml

+4
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,10 @@ jobs:
2222
steps:
2323
- uses: actions/checkout@v4
2424

25+
- uses: actions/setup-go@v5
26+
with:
27+
cache-dependency-path: "**/*.sum"
28+
2529
- name: Determine targets to build based on changed files
2630
run: |
2731
set -x

Makefile

+13
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# https://stackoverflow.com/questions/18136918/how-to-get-current-relative-directory-of-your-makefile
22
ROOT_DIR := $(dir $(realpath $(lastword $(MAKEFILE_LIST))))
3+
SELF ::= $(firstword $(MAKEFILE_LIST))
34

45
# https://tech.davis-hansson.com/p/make/
56
SHELL := bash
@@ -98,6 +99,18 @@ define image
9899
)
99100
endef
100101

102+
####################################### Build helpers #######################################
103+
104+
# https://stackoverflow.com/questions/78899903/how-to-create-a-make-target-which-is-an-implicit-dependency-for-all-other-target
105+
skip-init-for := deploy% undeploy% test% scan-image-vulnerabilities
106+
ifneq (,$(filter-out $(skip-init-for),$(MAKECMDGOALS) $(.DEFAULT_GOAL)))
107+
$(SELF): bin/buildinputs
108+
endif
109+
110+
bin/buildinputs: scripts/buildinputs/buildinputs.go scripts/buildinputs/go.mod scripts/buildinputs/go.sum
111+
$(info Building a Go helper for Dockerfile dependency analysis...)
112+
go build -C "scripts/buildinputs" -o "$(ROOT_DIR)/$@" ./...
113+
101114
####################################### Buildchain for Python 3.9 using ubi9 #######################################
102115

103116
# Build and push base-ubi9-python-3.9 image to the registry

ci/cached-builds/gha_pr_changed_files.py

+17
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import os
34
import pathlib
@@ -48,9 +49,22 @@ def should_build_target(changed_files: list[str], target_directories: list[str])
4849
"""Returns truthy if there is at least one changed file necessitating a build.
4950
Falsy (empty) string is returned otherwise."""
5051
for directory in target_directories:
52+
# detect change in the Dockerfile directory
5153
for changed_file in changed_files:
5254
if changed_file.startswith(directory):
5355
return changed_file
56+
# detect change in any of the files outside
57+
stdout = subprocess.check_output([PROJECT_ROOT / "bin/buildinputs", directory + "/Dockerfile"],
58+
text=True, cwd=PROJECT_ROOT)
59+
logging.debug(f"{directory=} {stdout=}")
60+
if stdout == "\n":
61+
# no dependencies
62+
continue
63+
dependencies: list[str] = json.loads(stdout)
64+
for dependency in dependencies:
65+
for changed_file in changed_files:
66+
if changed_file.startswith(dependency):
67+
return changed_file
5468
return ""
5569

5670

@@ -78,3 +92,6 @@ def test_analyze_build_directories(self):
7892
assert set(directories) == {"base/ubi9-python-3.9",
7993
"intel/base/gpu/ubi9-python-3.9",
8094
"jupyter/intel/pytorch/ubi9-python-3.9"}
95+
96+
def test_should_build_target(self):
97+
assert "" == should_build_target(["README.md"], ["jupyter/datascience/ubi9-python-3.11"])

scripts/README.md

+12
Original file line numberDiff line numberDiff line change
@@ -47,3 +47,15 @@ Create a Python 3.11 version based on the Python 3.9 version for each one in the
4747
```sh
4848
python scripts/new_python_based_image.py --context-dir . --source 3.9 --target 3.11 --match ./ --log-level DEBUG
4949
```
50+
51+
## buildinputs/
52+
53+
CLI tool written in Go that computes the list of input files required to build a Dockerfile.
54+
This is useful to determine what images need to be built on CI when testing a GitHub Pull Request.
55+
56+
### Examples
57+
58+
```shell
59+
make bin/buildinputs
60+
bin/buildinputs jupyter/datascience/ubi9-python-3.11/Dockerfile 2>/dev/null
61+
```

scripts/buildinputs/README.md

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# buildinputs
2+
3+
Tool to determine what files are required to build a given Dockerfile.
4+
5+
## Design
6+
7+
There are multiple possible solutions to this problem.
8+
9+
### Convert to LLB and analyze the LLB (chosen approach)
10+
11+
At present, the tool works by converting the Dockerfile into LLB (BuildKit's protobuf-based representation) and then
12+
analysing that to get the referenced files.
13+
14+
### Parse the Dockerfile and resolve the AST
15+
16+
Docker provides Go functions to parse a Dockerfile to AST.
17+
It does not provide public functions for the AST resolution, however.
18+
19+
The procedure can be copied either from the LLB convertor code or from regular build code.
20+
21+
It requires handling the following instructions (at minimum):
22+
23+
* `ARG`, `ENV` (file paths will require arg substitution)
24+
* `ADD`, `COPY` (the two main ways to pull files into the build)
25+
* `RUN` (has a `--mount=bind` flag)
26+
* `ONBUILD` in a parent image (that would be probably fine to ignore, as it's not OCI)
27+
28+
### Parse (lex) the Dockerfile ourselves
29+
30+
This is also doable, and it would avoid having to use Go.
31+
32+
The main limitation is that if the implementation we have is incomplete,
33+
then using a new Dockerfile feature (HEREDOC, ...) would require first implementing it in our parser,
34+
which limits further progress.
35+
36+
Another difficulty is that we would surely have bugs and that would decrease the team's confidence in the CI system.

scripts/buildinputs/buildinputs.go

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
package main
2+
3+
import (
4+
"flag"
5+
"fmt"
6+
)
7+
8+
func main() {
9+
flag.Parse()
10+
for _, dockerfile := range flag.Args() {
11+
deps := getDockerfileDeps(dockerfile)
12+
fmt.Println(deps)
13+
}
14+
}
+49
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
package main
2+
3+
import (
4+
"encoding/json"
5+
"os"
6+
"path/filepath"
7+
"testing"
8+
)
9+
10+
func globDockerfiles(dir string) ([]string, error) {
11+
files := make([]string, 0)
12+
err := filepath.Walk(dir, func(path string, f os.FileInfo, err error) error {
13+
if filepath.Base(path) == "Dockerfile" {
14+
files = append(files, path)
15+
}
16+
return nil
17+
})
18+
19+
return files, err
20+
}
21+
22+
// TestParseAllDockerfiles checks there are no panics when processing all Dockerfiles we have
23+
func TestParseAllDockerfiles(t *testing.T) {
24+
projectRoot := "../../"
25+
dockerfiles := noErr2(globDockerfiles(projectRoot))
26+
27+
if len(dockerfiles) < 6 {
28+
t.Fatalf("not enough Dockerfiles found, got %+v", dockerfiles)
29+
}
30+
31+
for _, dockerfile := range dockerfiles {
32+
t.Run(dockerfile, func(t *testing.T) {
33+
result := getDockerfileDeps(dockerfile)
34+
if result == "" {
35+
// no deps in the dockerfile
36+
return
37+
}
38+
data := make([]string, 0)
39+
noErr(json.Unmarshal([]byte(result), &data))
40+
for _, path := range data {
41+
stat := noErr2(os.Stat(filepath.Join(projectRoot, path)))
42+
if stat.IsDir() {
43+
// log this very interesting observation
44+
t.Logf("dockerfile copies in a whole directory: %s", path)
45+
}
46+
}
47+
})
48+
}
49+
}

scripts/buildinputs/dockerfile.go

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
package main
2+
3+
import (
4+
"context"
5+
"encoding/json"
6+
"fmt"
7+
"github.com/containerd/platforms"
8+
"github.com/moby/buildkit/client/llb"
9+
"github.com/moby/buildkit/client/llb/sourceresolver"
10+
"github.com/moby/buildkit/frontend/dockerfile/dockerfile2llb"
11+
"github.com/moby/buildkit/frontend/dockerfile/parser"
12+
"github.com/moby/buildkit/frontend/dockerui"
13+
"github.com/moby/buildkit/solver/pb"
14+
"github.com/opencontainers/go-digest"
15+
ocispecs "github.com/opencontainers/image-spec/specs-go/v1"
16+
"github.com/pkg/errors"
17+
"log"
18+
"os"
19+
"strings"
20+
)
21+
22+
func getDockerfileDeps(dockerfile string) string {
23+
ctx := context.Background()
24+
data := noErr2(os.ReadFile(dockerfile))
25+
26+
st, _, _, _, err := dockerfile2llb.Dockerfile2LLB(ctx, data, dockerfile2llb.ConvertOpt{
27+
// building an image requires fetching the metadata for its parent
28+
// this fakes a parent so that this tool does not need to do network i/o
29+
MetaResolver: &testResolver{
30+
// random digest value
31+
digest: "sha256:a1c7d58d98df3f9a67eda799200655b923ebc7a41cad1d9bb52723ae1c81ad17",
32+
dir: "/",
33+
platform: "linux/amd64",
34+
},
35+
Config: dockerui.Config{
36+
BuildArgs: map[string]string{"BASE_IMAGE": "fake-image"},
37+
BuildPlatforms: []ocispecs.Platform{{OS: "linux", Architecture: "amd64"}},
38+
},
39+
Warn: func(rulename, description, url, fmtmsg string, location []parser.Range) {
40+
log.Printf(rulename, description, url, fmtmsg, location)
41+
},
42+
})
43+
noErr(err)
44+
45+
definition := noErr2(st.Marshal(ctx))
46+
return getOpSourceFollowPaths(definition)
47+
}
48+
49+
func getOpSourceFollowPaths(definition *llb.Definition) string {
50+
// https://earthly.dev/blog/compiling-containers-dockerfiles-llvm-and-buildkit/
51+
// https://stackoverflow.com/questions/73067660/what-exactly-is-the-frontend-and-backend-of-docker-buildkit
52+
53+
ops := make([]llbOp, 0)
54+
for _, dt := range definition.Def {
55+
var op pb.Op
56+
if err := op.UnmarshalVT(dt); err != nil {
57+
panic("failed to parse op")
58+
}
59+
dgst := digest.FromBytes(dt)
60+
ent := llbOp{Op: &op, Digest: dgst, OpMetadata: definition.Metadata[dgst].ToPB()}
61+
ops = append(ops, ent)
62+
}
63+
64+
for _, op := range ops {
65+
switch op := op.Op.Op.(type) {
66+
case *pb.Op_Source:
67+
if strings.HasPrefix(op.Source.Identifier, "docker-image://") {
68+
// no-op
69+
} else if strings.HasPrefix(op.Source.Identifier, "local://") {
70+
paths := op.Source.Attrs[pb.AttrFollowPaths]
71+
return paths
72+
} else {
73+
panic(fmt.Errorf("unexpected prefix %v", op.Source.Identifier))
74+
}
75+
}
76+
}
77+
return ""
78+
}
79+
80+
// llbOp holds data for a single loaded LLB op
81+
type llbOp struct {
82+
Op *pb.Op
83+
Digest digest.Digest
84+
OpMetadata *pb.OpMetadata
85+
}
86+
87+
// testResolver provides a fake parent image manifest for the build
88+
type testResolver struct {
89+
digest digest.Digest
90+
dir string
91+
platform string
92+
}
93+
94+
func (r *testResolver) ResolveImageConfig(ctx context.Context, ref string, opt sourceresolver.Opt) (string, digest.Digest, []byte, error) {
95+
var img struct {
96+
Config struct {
97+
Env []string `json:"Env,omitempty"`
98+
WorkingDir string `json:"WorkingDir,omitempty"`
99+
User string `json:"User,omitempty"`
100+
} `json:"config,omitempty"`
101+
}
102+
103+
img.Config.WorkingDir = r.dir
104+
105+
if opt.Platform != nil {
106+
r.platform = platforms.Format(*opt.Platform)
107+
}
108+
109+
dt, err := json.Marshal(img)
110+
if err != nil {
111+
return "", "", nil, errors.WithStack(err)
112+
}
113+
return ref, r.digest, dt, nil
114+
}

scripts/buildinputs/go.mod

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
module dockerfile
2+
3+
go 1.22.0
4+
5+
require (
6+
github.com/containerd/platforms v0.2.1
7+
github.com/moby/buildkit v0.18.1
8+
github.com/opencontainers/go-digest v1.0.0
9+
github.com/opencontainers/image-spec v1.1.0
10+
github.com/pkg/errors v0.9.1
11+
)
12+
13+
require (
14+
github.com/AdaLogics/go-fuzz-headers v0.0.0-20230811130428-ced1acdcaa24 // indirect
15+
github.com/agext/levenshtein v1.2.3 // indirect
16+
github.com/containerd/containerd v1.7.24 // indirect
17+
github.com/containerd/errdefs v0.3.0 // indirect
18+
github.com/containerd/log v0.1.0 // indirect
19+
github.com/containerd/ttrpc v1.2.5 // indirect
20+
github.com/containerd/typeurl/v2 v2.2.3 // indirect
21+
github.com/distribution/reference v0.6.0 // indirect
22+
github.com/docker/go-connections v0.5.0 // indirect
23+
github.com/docker/go-units v0.5.0 // indirect
24+
github.com/felixge/httpsnoop v1.0.4 // indirect
25+
github.com/go-logr/logr v1.4.2 // indirect
26+
github.com/go-logr/stdr v1.2.2 // indirect
27+
github.com/gogo/protobuf v1.3.2 // indirect
28+
github.com/golang/protobuf v1.5.4 // indirect
29+
github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect
30+
github.com/google/uuid v1.6.0 // indirect
31+
github.com/hashicorp/errwrap v1.1.0 // indirect
32+
github.com/hashicorp/go-multierror v1.1.1 // indirect
33+
github.com/in-toto/in-toto-golang v0.5.0 // indirect
34+
github.com/klauspost/compress v1.17.11 // indirect
35+
github.com/moby/docker-image-spec v1.3.1 // indirect
36+
github.com/moby/locker v1.0.1 // indirect
37+
github.com/moby/patternmatcher v0.6.0 // indirect
38+
github.com/moby/sys/signal v0.7.1 // indirect
39+
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
40+
github.com/secure-systems-lab/go-securesystemslib v0.4.0 // indirect
41+
github.com/shibumi/go-pathspec v1.3.0 // indirect
42+
github.com/sirupsen/logrus v1.9.3 // indirect
43+
github.com/tonistiigi/fsutil v0.0.0-20241121093142-31cf1f437184 // indirect
44+
github.com/tonistiigi/go-csvvalue v0.0.0-20240710180619-ddb21b71c0b4 // indirect
45+
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.46.1 // indirect
46+
go.opentelemetry.io/contrib/instrumentation/net/http/httptrace/otelhttptrace v0.46.1 // indirect
47+
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
48+
go.opentelemetry.io/otel v1.28.0 // indirect
49+
go.opentelemetry.io/otel/metric v1.28.0 // indirect
50+
go.opentelemetry.io/otel/sdk v1.28.0 // indirect
51+
go.opentelemetry.io/otel/trace v1.28.0 // indirect
52+
golang.org/x/crypto v0.27.0 // indirect
53+
golang.org/x/net v0.29.0 // indirect
54+
golang.org/x/sync v0.8.0 // indirect
55+
golang.org/x/sys v0.26.0 // indirect
56+
golang.org/x/text v0.18.0 // indirect
57+
google.golang.org/genproto/googleapis/rpc v0.0.0-20240701130421-f6361c86f094 // indirect
58+
google.golang.org/grpc v1.66.3 // indirect
59+
google.golang.org/protobuf v1.35.1 // indirect
60+
)

0 commit comments

Comments
 (0)