Skip to content

Commit e69e8dc

Browse files
authored
Enable standalone configuration (#53)
Enable a standalone configuration of the AppWrapper controller that can be run without assuming a Kueue installation. In this mode, the AppWrapper controller still obeys the "Suspend protocol", but assumes that all changes to the value of Suspend are done externally.
1 parent 38e531d commit e69e8dc

23 files changed

+540
-43
lines changed

.github/workflows/CI-standalone.yaml

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
name: CI-standalone
2+
on:
3+
push:
4+
branches: [main]
5+
pull_request:
6+
branches:
7+
- 'main'
8+
- 'release-*'
9+
10+
jobs:
11+
CI:
12+
runs-on: ubuntu-latest
13+
14+
steps:
15+
- name: checkout code
16+
uses: actions/checkout@v4
17+
with:
18+
fetch-depth: 0
19+
20+
- name: Set latest tag and branch name
21+
run: |
22+
echo "GIT_BRANCH=gha-ci" >> $GITHUB_ENV
23+
echo "TAG=$GITHUB_RUN_ID" >> $GITHUB_ENV
24+
25+
- name: Set up Go
26+
uses: actions/setup-go@v5
27+
with:
28+
go-version-file: './go.mod'
29+
30+
- name: Set up Python
31+
uses: actions/setup-python@v5
32+
with:
33+
python-version: '3.11'
34+
35+
- name: Run pre-commit checks
36+
run: |
37+
pip install pre-commit
38+
pre-commit run --show-diff-on-failure --color=always --all-files
39+
40+
- name: Build
41+
run: make build
42+
43+
- name: Build Image
44+
run: |
45+
make docker-build -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }}
46+
47+
- name: Run Unit Tests
48+
run: make test
49+
50+
- name: Create and configure cluster
51+
run: ./hack/create-test-cluster.sh
52+
53+
- name: Deploy AppWrapper controller
54+
run: |
55+
make install -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }}
56+
make kind-push -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }}
57+
make deploy-aw -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }}
58+
59+
- name: Run E2E tests
60+
run: LABEL_FILTER="Standalone" ./hack/run-tests-on-cluster.sh

.github/workflows/CI.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ jobs:
4848
run: make test
4949

5050
- name: Create and configure cluster
51-
run: ./hack/create-test-cluster.sh
51+
run: LABEL_FILTER=Kueue ./hack/create-test-cluster.sh
5252

5353
- name: Deploy Kueue
5454
run: ./hack/deploy-kueue.sh

Dockerfile

+5-3
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ COPY go.sum go.sum
1212
RUN go mod download
1313

1414
# Copy the go source
15-
COPY cmd/main.go cmd/main.go
15+
COPY cmd/ cmd/
1616
COPY api/ api/
1717
COPY internal/ internal/
1818

@@ -21,13 +21,15 @@ COPY internal/ internal/
2121
# was called. For example, if we call make docker-build in a local env which has the Apple Silicon M1 SO
2222
# the docker BUILDPLATFORM arg will be linux/arm64 when for Apple x86 it will be linux/amd64. Therefore,
2323
# by leaving it empty we can ensure that the container and binary shipped on it will have the same platform.
24-
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/main.go
24+
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager cmd/unified/main.go
25+
RUN CGO_ENABLED=0 GOOS=${TARGETOS:-linux} GOARCH=${TARGETARCH} go build -a -o manager-aw cmd/standalone/main.go
2526

2627
# Use distroless as minimal base image to package the manager binary
2728
# Refer to https://github.com/GoogleContainerTools/distroless for more details
2829
FROM gcr.io/distroless/static:nonroot
2930
WORKDIR /
3031
COPY --from=builder /workspace/manager .
32+
COPY --from=builder /workspace/manager-aw .
3133
USER 65532:65532
3234

33-
ENTRYPOINT ["/manager"]
35+
CMD ["/manager"]

Makefile

+23-2
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,22 @@ build: manifests generate fmt vet ## Build manager binary.
112112
-X 'main.BuildVersion=$(BUILD_VERSION)' \
113113
-X 'main.BuildDate=$(BUILD_DATE)' \
114114
" \
115-
-o bin/manager cmd/main.go
115+
-o bin/manager cmd/unified/main.go
116+
go build \
117+
-ldflags " \
118+
-X 'main.BuildVersion=$(BUILD_VERSION)' \
119+
-X 'main.BuildDate=$(BUILD_DATE)' \
120+
" \
121+
-o bin/manager-aw cmd/standalone/main.go
116122

117123
.PHONY: run
118124
run: manifests generate fmt vet ## Run a controller from your host (webhooks are disabled).
119-
ENABLE_WEBHOOKS=false go run ./cmd/main.go --metrics-bind-address=localhost:0 --health-probe-bind-address=localhost:0
125+
ENABLE_WEBHOOKS=false go run ./cmd/unified/main.go --metrics-bind-address=localhost:0 --health-probe-bind-address=localhost:0
126+
127+
.PHONY: run-aw
128+
run-aw: manifests generate fmt vet ## Run a controller from your host (webhooks are disabled).
129+
ENABLE_WEBHOOKS=false go run ./cmd/standalone/main.go --metrics-bind-address=localhost:0 --health-probe-bind-address=localhost:0
130+
120131

121132
# If you wish to build the manager image targeting other platforms you can use the --platform flag.
122133
# (i.e. docker build --platform linux/arm64). However, you must enable docker buildKit for it.
@@ -183,10 +194,20 @@ deploy: manifests kustomize ## Deploy controller to the K8s cluster specified in
183194
$(KUSTOMIZE) build config/default | $(KUBECTL) apply -f -
184195
@$(call clean-manifests)
185196

197+
.PHONY: deploy-aw
198+
deploy-aw: manifests kustomize ## Deploy controller to the K8s cluster specified in ~/.kube/config.
199+
cd config/manager && $(KUSTOMIZE) edit set image controller=${IMG}
200+
$(KUSTOMIZE) build config/standalone | $(KUBECTL) apply -f -
201+
@$(call clean-manifests)
202+
186203
.PHONY: undeploy
187204
undeploy: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
188205
$(KUSTOMIZE) build config/default | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
189206

207+
.PHONY: undeploy-aw
208+
undeploy-aw: kustomize ## Undeploy controller from the K8s cluster specified in ~/.kube/config. Call with ignore-not-found=true to ignore resource not found errors during deletion.
209+
$(KUSTOMIZE) build config/standalone | $(KUBECTL) delete --ignore-not-found=$(ignore-not-found) -f -
210+
190211
##@ Dependencies
191212

192213
## Location to install dependencies to

cmd/standalone/main.go

+154
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
/*
2+
Copyright 2024 IBM Corporation.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"crypto/tls"
21+
"flag"
22+
"os"
23+
24+
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
25+
// to ensure that exec-entrypoint and run can make use of them.
26+
_ "k8s.io/client-go/plugin/pkg/client/auth"
27+
28+
"k8s.io/apimachinery/pkg/runtime"
29+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
30+
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
31+
ctrl "sigs.k8s.io/controller-runtime"
32+
"sigs.k8s.io/controller-runtime/pkg/healthz"
33+
"sigs.k8s.io/controller-runtime/pkg/log/zap"
34+
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
35+
"sigs.k8s.io/controller-runtime/pkg/webhook"
36+
37+
workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2"
38+
"github.com/project-codeflare/appwrapper/internal/config"
39+
"github.com/project-codeflare/appwrapper/internal/controller"
40+
//+kubebuilder:scaffold:imports
41+
)
42+
43+
var (
44+
scheme = runtime.NewScheme()
45+
setupLog = ctrl.Log.WithName("setup")
46+
BuildVersion = "UNKNOWN"
47+
BuildDate = "UNKNOWN"
48+
)
49+
50+
func init() {
51+
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
52+
utilruntime.Must(workloadv1beta2.AddToScheme(scheme))
53+
//+kubebuilder:scaffold:scheme
54+
}
55+
56+
func main() {
57+
var metricsAddr string
58+
var enableLeaderElection bool
59+
var probeAddr string
60+
var secureMetrics bool
61+
var enableHTTP2 bool
62+
63+
awConfig := config.AppWrapperConfig{StandaloneMode: true, ManageJobsWithoutQueueName: false}
64+
65+
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
66+
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
67+
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
68+
"Enable leader election for controller manager. "+
69+
"Enabling this will ensure there is only one active controller manager.")
70+
flag.BoolVar(&secureMetrics, "metrics-secure", false,
71+
"If set the metrics endpoint is served securely")
72+
flag.BoolVar(&enableHTTP2, "enable-http2", false,
73+
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
74+
opts := zap.Options{
75+
Development: true,
76+
}
77+
opts.BindFlags(flag.CommandLine)
78+
flag.Parse()
79+
80+
ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts)))
81+
setupLog.Info("Build info", "version", BuildVersion, "date", BuildDate)
82+
setupLog.Info("Configuration", "config", awConfig)
83+
84+
// if the enable-http2 flag is false (the default), http/2 should be disabled
85+
// due to its vulnerabilities. More specifically, disabling http/2 will
86+
// prevent from being vulnerable to the HTTP/2 Stream Cancelation and
87+
// Rapid Reset CVEs. For more information see:
88+
// - https://github.com/advisories/GHSA-qppj-fm5r-hxr3
89+
// - https://github.com/advisories/GHSA-4374-p667-p6c8
90+
disableHTTP2 := func(c *tls.Config) {
91+
setupLog.Info("disabling http/2")
92+
c.NextProtos = []string{"http/1.1"}
93+
}
94+
95+
tlsOpts := []func(*tls.Config){}
96+
if !enableHTTP2 {
97+
tlsOpts = append(tlsOpts, disableHTTP2)
98+
}
99+
100+
webhookServer := webhook.NewServer(webhook.Options{
101+
TLSOpts: tlsOpts,
102+
})
103+
104+
mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
105+
Scheme: scheme,
106+
Metrics: metricsserver.Options{
107+
BindAddress: metricsAddr,
108+
SecureServing: secureMetrics,
109+
TLSOpts: tlsOpts,
110+
},
111+
WebhookServer: webhookServer,
112+
HealthProbeBindAddress: probeAddr,
113+
LeaderElection: enableLeaderElection,
114+
LeaderElectionID: "f134c674.codeflare.dev",
115+
// LeaderElectionReleaseOnCancel defines if the leader should step down voluntarily
116+
// when the Manager ends. This requires the binary to immediately end when the
117+
// Manager is stopped, otherwise, this setting is unsafe. Setting this significantly
118+
// speeds up voluntary leader transitions as the new leader don't have to wait
119+
// LeaseDuration time first.
120+
//
121+
// In the default scaffold provided, the program ends immediately after
122+
// the manager stops, so would be fine to enable this option. However,
123+
// if you are doing or is intended to do any operation such as perform cleanups
124+
// after the manager stops then its usage might be unsafe.
125+
// LeaderElectionReleaseOnCancel: true,
126+
})
127+
if err != nil {
128+
setupLog.Error(err, "unable to start manager")
129+
os.Exit(1)
130+
}
131+
132+
ctx := ctrl.SetupSignalHandler()
133+
err = controller.SetupWithManager(ctx, mgr, &awConfig)
134+
if err != nil {
135+
setupLog.Error(err, "unable to start appwrapper controllers")
136+
os.Exit(1)
137+
}
138+
139+
//+kubebuilder:scaffold:builder
140+
if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil {
141+
setupLog.Error(err, "unable to set up health check")
142+
os.Exit(1)
143+
}
144+
if err := mgr.AddReadyzCheck("readyz", healthz.Ping); err != nil {
145+
setupLog.Error(err, "unable to set up ready check")
146+
os.Exit(1)
147+
}
148+
149+
setupLog.Info("starting manager")
150+
if err := mgr.Start(ctx); err != nil {
151+
setupLog.Error(err, "problem running manager")
152+
os.Exit(1)
153+
}
154+
}

cmd/main.go renamed to cmd/unified/main.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func main() {
6262
var secureMetrics bool
6363
var enableHTTP2 bool
6464

65-
awConfig := config.AppWrapperConfig{}
65+
awConfig := config.AppWrapperConfig{StandaloneMode: false}
6666

6767
flag.StringVar(&metricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.")
6868
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")

0 commit comments

Comments
 (0)