Skip to content

Commit a978538

Browse files
authored
Enhance end-to-end tests (#62)
* add e2e tests for completion detection and load testing * add e2e tests for validating webhook
1 parent 8611ddc commit a978538

File tree

7 files changed

+453
-17
lines changed

7 files changed

+453
-17
lines changed

.github/workflows/CI-standalone.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,4 @@ jobs:
5757
make deploy-aw -e GIT_BRANCH=${{ env.GIT_BRANCH }} TAG=${{ env.GIT_BRANCH }}-${{ env.TAG }}
5858
5959
- name: Run E2E tests
60-
run: LABEL_FILTER="Standalone" ./hack/run-tests-on-cluster.sh
60+
run: LABEL_FILTER="Standalone,Webhook" ./hack/run-tests-on-cluster.sh

hack/e2e-util.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -167,7 +167,7 @@ function configure_cluster {
167167
echo ""
168168

169169
# Sleep until the kubeflow operator is running
170-
echo "Waiting for pods in the kueueflow namespace to become ready"
170+
echo "Waiting for pods in the kubeflow namespace to become ready"
171171
while [[ $(kubectl get pods -n kubeflow -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}' | tr ' ' '\n' | sort -u) != "True" ]]
172172
do
173173
echo -n "." && sleep 1;

hack/run-tests-on-cluster.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ export GORACE=1
1919
export CLEANUP_CLUSTER=${CLEANUP_CLUSTER:-"false"}
2020
export CLUSTER_STARTED="true"
2121
export KUTTL_TEST_SUITES=("")
22-
export LABEL_FILTER=${LABEL_FILTER:-"Kueue"}
22+
export LABEL_FILTER=${LABEL_FILTER:-"Kueue,Webhook"}
2323

2424
source ${ROOT_DIR}/hack/e2e-util.sh
2525

test/e2e/appwrapper_test.go

+227-10
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,17 @@ limitations under the License.
1717
package e2e
1818

1919
import (
20+
"context"
21+
"encoding/json"
22+
"fmt"
2023
"time"
2124

2225
. "github.com/onsi/ginkgo/v2"
2326
. "github.com/onsi/gomega"
27+
"k8s.io/apimachinery/pkg/runtime"
28+
"k8s.io/apimachinery/pkg/types"
29+
"k8s.io/apimachinery/pkg/util/wait"
30+
"k8s.io/utils/ptr"
2431

2532
workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2"
2633
)
@@ -72,14 +79,137 @@ var _ = Describe("AppWrapper E2E Test", func() {
7279
appwrappers = append(appwrappers, aw)
7380
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
7481
})
82+
})
83+
84+
// TODO: KubeRay GVKs (would have to deploy KubeRay operator on e2e test cluster)
85+
86+
// TODO: JobSets (would have to deploy JobSet controller on e2e test cluster)
87+
88+
Describe("Webhook Enforces AppWrapper Invariants", Label("Webhook"), func() {
89+
Context("Structural Invariants", func() {
90+
It("There must be at least one podspec (a)", func() {
91+
aw := toAppWrapper()
92+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
93+
})
94+
95+
It("There must be at least one podspec (b)", func() {
96+
aw := toAppWrapper(service())
97+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
98+
})
99+
100+
It("There must be no more than 8 podspecs", func() {
101+
aw := toAppWrapper(pod(100), pod(100), pod(100), pod(100), pod(100), pod(100), pod(100), pod(100), pod(100))
102+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
103+
})
104+
105+
It("Non-existent PodSpec paths are rejected", func() {
106+
comp := deployment(4, 100)
107+
comp.PodSets[0].Path = "template.spec.missing"
108+
aw := toAppWrapper(comp)
109+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
110+
111+
comp.PodSets[0].Path = ""
112+
aw = toAppWrapper(comp)
113+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
114+
})
115+
116+
It("PodSpec paths must refer to a PodSpecTemplate", func() {
117+
comp := deployment(4, 100)
118+
comp.PodSets[0].Path = "template.spec.template.metadata"
119+
aw := toAppWrapper(comp)
120+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
121+
})
122+
123+
It("Validation of Array and Map path elements", func() {
124+
comp := jobSet(2, 100)
125+
comp.PodSets[0].Path = "template.spec.replicatedJobs.template.spec.template"
126+
aw := toAppWrapper(comp)
127+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
128+
129+
comp.PodSets[0].Path = "template.spec.replicatedJobs"
130+
aw = toAppWrapper(comp)
131+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
132+
133+
comp.PodSets[0].Path = "template.spec.replicatedJobs[0].template[0].spec.template"
134+
aw = toAppWrapper(comp)
135+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
136+
137+
comp.PodSets[0].Path = "template.spec.replicatedJobs[10].template.spec.template"
138+
aw = toAppWrapper(comp)
139+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
140+
141+
comp.PodSets[0].Path = "template.spec.replicatedJobs[-1].template.spec.template"
142+
aw = toAppWrapper(comp)
143+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
144+
145+
comp.PodSets[0].Path = "template.spec.replicatedJobs[a10].template.spec.template"
146+
aw = toAppWrapper(comp)
147+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
148+
149+
comp.PodSets[0].Path = "template.spec.replicatedJobs[1"
150+
aw = toAppWrapper(comp)
151+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
152+
153+
comp.PodSets[0].Path = "template.spec.replicatedJobs[1]].template.spec.template"
154+
aw = toAppWrapper(comp)
155+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
156+
})
157+
})
158+
159+
It("Components in other namespaces are rejected", func() {
160+
aw := toAppWrapper(namespacedPod("test", 100))
161+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
162+
})
163+
164+
It("Nested AppWrappers are rejected", func() {
165+
child := toAppWrapper(pod(100))
166+
childBytes, err := json.Marshal(child)
167+
Expect(err).ShouldNot(HaveOccurred())
168+
aw := toAppWrapper(pod(100), workloadv1beta2.AppWrapperComponent{
169+
PodSets: []workloadv1beta2.AppWrapperPodSet{},
170+
Template: runtime.RawExtension{Raw: childBytes},
171+
})
172+
Expect(getClient(ctx).Create(ctx, aw)).ShouldNot(Succeed())
173+
})
174+
175+
It("Sensitive fields of aw.Spec.Components are immutable", func() {
176+
aw := createAppWrapper(ctx, pod(1000), deployment(4, 1000))
177+
appwrappers = append(appwrappers, aw)
178+
awName := types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace}
179+
180+
Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) {
181+
aw.Spec.Components[0].Template = aw.Spec.Components[1].Template
182+
})).ShouldNot(Succeed())
75183

76-
// TODO: Additional Kubeflow Training Operator GVKs of interest
184+
Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) {
185+
aw.Spec.Components = append(aw.Spec.Components, aw.Spec.Components[0])
186+
})).ShouldNot(Succeed())
77187

188+
Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) {
189+
aw.Spec.Components[0].PodSets = append(aw.Spec.Components[0].PodSets, aw.Spec.Components[0].PodSets...)
190+
})).ShouldNot(Succeed())
191+
192+
Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) {
193+
aw.Spec.Components[0].PodSets[0].Path = "bad"
194+
})).ShouldNot(Succeed())
195+
196+
Expect(updateAppWrapper(ctx, awName, func(aw *workloadv1beta2.AppWrapper) {
197+
aw.Spec.Components[0].PodSets[0].Replicas = ptr.To(int32(12))
198+
})).ShouldNot(Succeed())
199+
})
78200
})
79201

80-
Describe("Error Handling for Invalid Resources", func() {
81-
// TODO: Replicate scenarios from the AdmissionController unit tests
202+
Describe("Webhook Enforces RBAC", Label("Webhook"), func() {
203+
It("AppWrapper containing permitted resources can be created", func() {
204+
aw := toAppWrapper(pod(100))
205+
Expect(getLimitedClient(ctx).Create(ctx, aw)).To(Succeed(), "Limited user should be allowed to create AppWrapper containing Pods")
206+
Expect(getClient(ctx).Delete(ctx, aw)).To(Succeed())
207+
})
82208

209+
It("AppWrapper containing unpermitted resources cannot be created", func() {
210+
aw := toAppWrapper(deployment(4, 100))
211+
Expect(getLimitedClient(ctx).Create(ctx, aw)).NotTo(Succeed(), "Limited user should not be allowed to create AppWrapper containing Deployments")
212+
})
83213
})
84214

85215
Describe("Queueing and Preemption", Label("Kueue"), func() {
@@ -103,24 +233,111 @@ var _ = Describe("AppWrapper E2E Test", func() {
103233
appwrappers = []*workloadv1beta2.AppWrapper{aw2, aw3}
104234
Expect(waitAWPodsReady(ctx, aw3)).Should(Succeed())
105235
})
106-
107236
})
108237

238+
// AppWrapper consumes the entire quota itself; tests verify that we don't double count children
109239
Describe("Recognition of Child Jobs", Label("Kueue"), func() {
110-
// TODO: Test scenarios where the AW "just fits" in the quota and
111-
// contains components that Kueue might try to queue
112-
// but should not in this case because they are using the parent workload's quota
113-
// 1. batch v1 jobs
114-
// 2. pytorch jobs (which themself contain child Jobs)
240+
It("Batch Job", func() {
241+
aw := createAppWrapper(ctx, batchjob(2000))
242+
appwrappers = append(appwrappers, aw)
243+
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
244+
})
245+
246+
It("PyTorch Job", func() {
247+
aw := createAppWrapper(ctx, pytorchjob(2, 1000))
248+
appwrappers = append(appwrappers, aw)
249+
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
250+
})
115251

252+
It("Compound Workloads", func() {
253+
aw := createAppWrapper(ctx, batchjob(500), pytorchjob(2, 500), deployment(2, 250))
254+
appwrappers = append(appwrappers, aw)
255+
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
256+
})
116257
})
117258

118-
Describe("Detection of Completion Status", Label("Kueue", "Standalone"), func() {
259+
Describe("Detection of Completion Status", Label("slow"), Label("Kueue", "Standalone"), func() {
260+
It("A successful Batch Job yields a successful AppWrapper", func() {
261+
aw := createAppWrapper(ctx, succeedingBatchjob(500))
262+
appwrappers = append(appwrappers, aw)
263+
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
264+
Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(workloadv1beta2.AppWrapperSucceeded))
265+
})
119266

267+
It("A failed Batch Job yields a failed AppWrapper", func() {
268+
aw := createAppWrapper(ctx, failingBatchjob(500))
269+
appwrappers = append(appwrappers, aw)
270+
Expect(waitAWPodsReady(ctx, aw)).Should(Succeed())
271+
Eventually(AppWrapperPhase(ctx, aw), 90*time.Second).Should(Equal(workloadv1beta2.AppWrapperFailed))
272+
})
120273
})
121274

122275
Describe("Load Testing", Label("slow"), Label("Kueue", "Standalone"), func() {
276+
It("Create 50 AppWrappers", func() {
277+
const (
278+
awCount = 50
279+
cpuDemand = 5
280+
)
123281

282+
By("Creating 50 AppWrappers")
283+
replicas := 2
284+
for i := 0; i < awCount; i++ {
285+
aw := createAppWrapper(ctx, deployment(replicas, cpuDemand))
286+
appwrappers = append(appwrappers, aw)
287+
}
288+
nonRunningAWs := appwrappers
289+
290+
By("Polling for all AppWrappers to be Running")
291+
err := wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 1*time.Minute, false, func(ctx context.Context) (done bool, err error) {
292+
t := time.Now()
293+
toCheckAWS := make([]*workloadv1beta2.AppWrapper, 0, len(appwrappers))
294+
for _, aw := range nonRunningAWs {
295+
if !checkAppWrapperRunning(ctx, aw) {
296+
toCheckAWS = append(toCheckAWS, aw)
297+
}
298+
}
299+
nonRunningAWs = toCheckAWS
300+
if len(toCheckAWS) == 0 {
301+
fmt.Fprintf(GinkgoWriter, "\tAll AppWrappers Running at time %s\n", t.Format(time.RFC3339))
302+
return true, nil
303+
}
304+
fmt.Fprintf(GinkgoWriter, "\tThere are %d non-Running AppWrappers at time %s\n", len(toCheckAWS), t.Format(time.RFC3339))
305+
return false, nil
306+
})
307+
if err != nil {
308+
fmt.Fprintf(GinkgoWriter, "Load Testing - Create 50 AppWrappers - There are %d non-Running AppWrappers, err = %v\n", len(nonRunningAWs), err)
309+
for _, uaw := range nonRunningAWs {
310+
fmt.Fprintf(GinkgoWriter, "Load Testing - Create 50 AppWrappers - Non-Running AW '%s/%s'\n", uaw.Namespace, uaw.Name)
311+
}
312+
}
313+
Expect(err).Should(Succeed(), "All AppWrappers should have ready Pods")
314+
315+
By("Polling for all pods to become ready")
316+
nonReadyAWs := appwrappers
317+
err = wait.PollUntilContextTimeout(ctx, 500*time.Millisecond, 3*time.Minute, false, func(ctx context.Context) (done bool, err error) {
318+
t := time.Now()
319+
toCheckAWS := make([]*workloadv1beta2.AppWrapper, 0, len(appwrappers))
320+
for _, aw := range nonReadyAWs {
321+
if !checkAllAWPodsReady(ctx, aw) {
322+
toCheckAWS = append(toCheckAWS, aw)
323+
}
324+
}
325+
nonReadyAWs = toCheckAWS
326+
if len(toCheckAWS) == 0 {
327+
fmt.Fprintf(GinkgoWriter, "\tAll pods ready at time %s\n", t.Format(time.RFC3339))
328+
return true, nil
329+
}
330+
fmt.Fprintf(GinkgoWriter, "\tThere are %d app wrappers without ready pods at time %s\n", len(toCheckAWS), t.Format(time.RFC3339))
331+
return false, nil
332+
})
333+
if err != nil {
334+
fmt.Fprintf(GinkgoWriter, "Load Testing - Create 50 AppWrappers - There are %d app wrappers without ready pods, err = %v\n", len(nonReadyAWs), err)
335+
for _, uaw := range nonReadyAWs {
336+
fmt.Fprintf(GinkgoWriter, "Load Testing - Create 50 AppWrappers - Non-Ready AW '%s/%s'\n", uaw.Namespace, uaw.Name)
337+
}
338+
}
339+
Expect(err).Should(Succeed(), "All AppWrappers should have ready Pods")
340+
})
124341
})
125342

126343
})

test/e2e/e2e_test.go

+1
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ var _ = BeforeSuite(func() {
3333
log.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true)))
3434
ctx = extendContextWithClient(context.Background())
3535
ensureNamespaceExists(ctx)
36+
ctx = extendContextWithLimitedClient(ctx)
3637
if Label("Kueue").MatchesLabelFilter(GinkgoLabelFilter()) {
3738
ensureTestQueuesExist(ctx)
3839
}

0 commit comments

Comments
 (0)