@@ -17,10 +17,17 @@ limitations under the License.
17
17
package e2e
18
18
19
19
import (
20
+ "context"
21
+ "encoding/json"
22
+ "fmt"
20
23
"time"
21
24
22
25
. "github.com/onsi/ginkgo/v2"
23
26
. "github.com/onsi/gomega"
27
+ "k8s.io/apimachinery/pkg/runtime"
28
+ "k8s.io/apimachinery/pkg/types"
29
+ "k8s.io/apimachinery/pkg/util/wait"
30
+ "k8s.io/utils/ptr"
24
31
25
32
workloadv1beta2 "github.com/project-codeflare/appwrapper/api/v1beta2"
26
33
)
@@ -72,14 +79,137 @@ var _ = Describe("AppWrapper E2E Test", func() {
72
79
appwrappers = append (appwrappers , aw )
73
80
Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
74
81
})
82
+ })
83
+
84
+ // TODO: KubeRay GVKs (would have to deploy KubeRay operator on e2e test cluster)
85
+
86
+ // TODO: JobSets (would have to deploy JobSet controller on e2e test cluster)
87
+
88
+ Describe ("Webhook Enforces AppWrapper Invariants" , Label ("Webhook" ), func () {
89
+ Context ("Structural Invariants" , func () {
90
+ It ("There must be at least one podspec (a)" , func () {
91
+ aw := toAppWrapper ()
92
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
93
+ })
94
+
95
+ It ("There must be at least one podspec (b)" , func () {
96
+ aw := toAppWrapper (service ())
97
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
98
+ })
99
+
100
+ It ("There must be no more than 8 podspecs" , func () {
101
+ aw := toAppWrapper (pod (100 ), pod (100 ), pod (100 ), pod (100 ), pod (100 ), pod (100 ), pod (100 ), pod (100 ), pod (100 ))
102
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
103
+ })
104
+
105
+ It ("Non-existent PodSpec paths are rejected" , func () {
106
+ comp := deployment (4 , 100 )
107
+ comp .PodSets [0 ].Path = "template.spec.missing"
108
+ aw := toAppWrapper (comp )
109
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
110
+
111
+ comp .PodSets [0 ].Path = ""
112
+ aw = toAppWrapper (comp )
113
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
114
+ })
115
+
116
+ It ("PodSpec paths must refer to a PodSpecTemplate" , func () {
117
+ comp := deployment (4 , 100 )
118
+ comp .PodSets [0 ].Path = "template.spec.template.metadata"
119
+ aw := toAppWrapper (comp )
120
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
121
+ })
122
+
123
+ It ("Validation of Array and Map path elements" , func () {
124
+ comp := jobSet (2 , 100 )
125
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs.template.spec.template"
126
+ aw := toAppWrapper (comp )
127
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
128
+
129
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs"
130
+ aw = toAppWrapper (comp )
131
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
132
+
133
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[0].template[0].spec.template"
134
+ aw = toAppWrapper (comp )
135
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
136
+
137
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[10].template.spec.template"
138
+ aw = toAppWrapper (comp )
139
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
140
+
141
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[-1].template.spec.template"
142
+ aw = toAppWrapper (comp )
143
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
144
+
145
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[a10].template.spec.template"
146
+ aw = toAppWrapper (comp )
147
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
148
+
149
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[1"
150
+ aw = toAppWrapper (comp )
151
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
152
+
153
+ comp .PodSets [0 ].Path = "template.spec.replicatedJobs[1]].template.spec.template"
154
+ aw = toAppWrapper (comp )
155
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
156
+ })
157
+ })
158
+
159
+ It ("Components in other namespaces are rejected" , func () {
160
+ aw := toAppWrapper (namespacedPod ("test" , 100 ))
161
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
162
+ })
163
+
164
+ It ("Nested AppWrappers are rejected" , func () {
165
+ child := toAppWrapper (pod (100 ))
166
+ childBytes , err := json .Marshal (child )
167
+ Expect (err ).ShouldNot (HaveOccurred ())
168
+ aw := toAppWrapper (pod (100 ), workloadv1beta2.AppWrapperComponent {
169
+ PodSets : []workloadv1beta2.AppWrapperPodSet {},
170
+ Template : runtime.RawExtension {Raw : childBytes },
171
+ })
172
+ Expect (getClient (ctx ).Create (ctx , aw )).ShouldNot (Succeed ())
173
+ })
174
+
175
+ It ("Sensitive fields of aw.Spec.Components are immutable" , func () {
176
+ aw := createAppWrapper (ctx , pod (1000 ), deployment (4 , 1000 ))
177
+ appwrappers = append (appwrappers , aw )
178
+ awName := types.NamespacedName {Name : aw .Name , Namespace : aw .Namespace }
179
+
180
+ Expect (updateAppWrapper (ctx , awName , func (aw * workloadv1beta2.AppWrapper ) {
181
+ aw .Spec .Components [0 ].Template = aw .Spec .Components [1 ].Template
182
+ })).ShouldNot (Succeed ())
75
183
76
- // TODO: Additional Kubeflow Training Operator GVKs of interest
184
+ Expect (updateAppWrapper (ctx , awName , func (aw * workloadv1beta2.AppWrapper ) {
185
+ aw .Spec .Components = append (aw .Spec .Components , aw .Spec .Components [0 ])
186
+ })).ShouldNot (Succeed ())
77
187
188
+ Expect (updateAppWrapper (ctx , awName , func (aw * workloadv1beta2.AppWrapper ) {
189
+ aw .Spec .Components [0 ].PodSets = append (aw .Spec .Components [0 ].PodSets , aw .Spec .Components [0 ].PodSets ... )
190
+ })).ShouldNot (Succeed ())
191
+
192
+ Expect (updateAppWrapper (ctx , awName , func (aw * workloadv1beta2.AppWrapper ) {
193
+ aw .Spec .Components [0 ].PodSets [0 ].Path = "bad"
194
+ })).ShouldNot (Succeed ())
195
+
196
+ Expect (updateAppWrapper (ctx , awName , func (aw * workloadv1beta2.AppWrapper ) {
197
+ aw .Spec .Components [0 ].PodSets [0 ].Replicas = ptr .To (int32 (12 ))
198
+ })).ShouldNot (Succeed ())
199
+ })
78
200
})
79
201
80
- Describe ("Error Handling for Invalid Resources" , func () {
81
- // TODO: Replicate scenarios from the AdmissionController unit tests
202
+ Describe ("Webhook Enforces RBAC" , Label ("Webhook" ), func () {
203
+ It ("AppWrapper containing permitted resources can be created" , func () {
204
+ aw := toAppWrapper (pod (100 ))
205
+ Expect (getLimitedClient (ctx ).Create (ctx , aw )).To (Succeed (), "Limited user should be allowed to create AppWrapper containing Pods" )
206
+ Expect (getClient (ctx ).Delete (ctx , aw )).To (Succeed ())
207
+ })
82
208
209
+ It ("AppWrapper containing unpermitted resources cannot be created" , func () {
210
+ aw := toAppWrapper (deployment (4 , 100 ))
211
+ Expect (getLimitedClient (ctx ).Create (ctx , aw )).NotTo (Succeed (), "Limited user should not be allowed to create AppWrapper containing Deployments" )
212
+ })
83
213
})
84
214
85
215
Describe ("Queueing and Preemption" , Label ("Kueue" ), func () {
@@ -103,24 +233,111 @@ var _ = Describe("AppWrapper E2E Test", func() {
103
233
appwrappers = []* workloadv1beta2.AppWrapper {aw2 , aw3 }
104
234
Expect (waitAWPodsReady (ctx , aw3 )).Should (Succeed ())
105
235
})
106
-
107
236
})
108
237
238
+ // AppWrapper consumes the entire quota itself; tests verify that we don't double count children
109
239
Describe ("Recognition of Child Jobs" , Label ("Kueue" ), func () {
110
- // TODO: Test scenarios where the AW "just fits" in the quota and
111
- // contains components that Kueue might try to queue
112
- // but should not in this case because they are using the parent workload's quota
113
- // 1. batch v1 jobs
114
- // 2. pytorch jobs (which themself contain child Jobs)
240
+ It ("Batch Job" , func () {
241
+ aw := createAppWrapper (ctx , batchjob (2000 ))
242
+ appwrappers = append (appwrappers , aw )
243
+ Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
244
+ })
245
+
246
+ It ("PyTorch Job" , func () {
247
+ aw := createAppWrapper (ctx , pytorchjob (2 , 1000 ))
248
+ appwrappers = append (appwrappers , aw )
249
+ Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
250
+ })
115
251
252
+ It ("Compound Workloads" , func () {
253
+ aw := createAppWrapper (ctx , batchjob (500 ), pytorchjob (2 , 500 ), deployment (2 , 250 ))
254
+ appwrappers = append (appwrappers , aw )
255
+ Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
256
+ })
116
257
})
117
258
118
- Describe ("Detection of Completion Status" , Label ("Kueue" , "Standalone" ), func () {
259
+ Describe ("Detection of Completion Status" , Label ("slow" ), Label ("Kueue" , "Standalone" ), func () {
260
+ It ("A successful Batch Job yields a successful AppWrapper" , func () {
261
+ aw := createAppWrapper (ctx , succeedingBatchjob (500 ))
262
+ appwrappers = append (appwrappers , aw )
263
+ Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
264
+ Eventually (AppWrapperPhase (ctx , aw ), 60 * time .Second ).Should (Equal (workloadv1beta2 .AppWrapperSucceeded ))
265
+ })
119
266
267
+ It ("A failed Batch Job yields a failed AppWrapper" , func () {
268
+ aw := createAppWrapper (ctx , failingBatchjob (500 ))
269
+ appwrappers = append (appwrappers , aw )
270
+ Expect (waitAWPodsReady (ctx , aw )).Should (Succeed ())
271
+ Eventually (AppWrapperPhase (ctx , aw ), 90 * time .Second ).Should (Equal (workloadv1beta2 .AppWrapperFailed ))
272
+ })
120
273
})
121
274
122
275
Describe ("Load Testing" , Label ("slow" ), Label ("Kueue" , "Standalone" ), func () {
276
+ It ("Create 50 AppWrappers" , func () {
277
+ const (
278
+ awCount = 50
279
+ cpuDemand = 5
280
+ )
123
281
282
+ By ("Creating 50 AppWrappers" )
283
+ replicas := 2
284
+ for i := 0 ; i < awCount ; i ++ {
285
+ aw := createAppWrapper (ctx , deployment (replicas , cpuDemand ))
286
+ appwrappers = append (appwrappers , aw )
287
+ }
288
+ nonRunningAWs := appwrappers
289
+
290
+ By ("Polling for all AppWrappers to be Running" )
291
+ err := wait .PollUntilContextTimeout (ctx , 500 * time .Millisecond , 1 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
292
+ t := time .Now ()
293
+ toCheckAWS := make ([]* workloadv1beta2.AppWrapper , 0 , len (appwrappers ))
294
+ for _ , aw := range nonRunningAWs {
295
+ if ! checkAppWrapperRunning (ctx , aw ) {
296
+ toCheckAWS = append (toCheckAWS , aw )
297
+ }
298
+ }
299
+ nonRunningAWs = toCheckAWS
300
+ if len (toCheckAWS ) == 0 {
301
+ fmt .Fprintf (GinkgoWriter , "\t All AppWrappers Running at time %s\n " , t .Format (time .RFC3339 ))
302
+ return true , nil
303
+ }
304
+ fmt .Fprintf (GinkgoWriter , "\t There are %d non-Running AppWrappers at time %s\n " , len (toCheckAWS ), t .Format (time .RFC3339 ))
305
+ return false , nil
306
+ })
307
+ if err != nil {
308
+ fmt .Fprintf (GinkgoWriter , "Load Testing - Create 50 AppWrappers - There are %d non-Running AppWrappers, err = %v\n " , len (nonRunningAWs ), err )
309
+ for _ , uaw := range nonRunningAWs {
310
+ fmt .Fprintf (GinkgoWriter , "Load Testing - Create 50 AppWrappers - Non-Running AW '%s/%s'\n " , uaw .Namespace , uaw .Name )
311
+ }
312
+ }
313
+ Expect (err ).Should (Succeed (), "All AppWrappers should have ready Pods" )
314
+
315
+ By ("Polling for all pods to become ready" )
316
+ nonReadyAWs := appwrappers
317
+ err = wait .PollUntilContextTimeout (ctx , 500 * time .Millisecond , 3 * time .Minute , false , func (ctx context.Context ) (done bool , err error ) {
318
+ t := time .Now ()
319
+ toCheckAWS := make ([]* workloadv1beta2.AppWrapper , 0 , len (appwrappers ))
320
+ for _ , aw := range nonReadyAWs {
321
+ if ! checkAllAWPodsReady (ctx , aw ) {
322
+ toCheckAWS = append (toCheckAWS , aw )
323
+ }
324
+ }
325
+ nonReadyAWs = toCheckAWS
326
+ if len (toCheckAWS ) == 0 {
327
+ fmt .Fprintf (GinkgoWriter , "\t All pods ready at time %s\n " , t .Format (time .RFC3339 ))
328
+ return true , nil
329
+ }
330
+ fmt .Fprintf (GinkgoWriter , "\t There are %d app wrappers without ready pods at time %s\n " , len (toCheckAWS ), t .Format (time .RFC3339 ))
331
+ return false , nil
332
+ })
333
+ if err != nil {
334
+ fmt .Fprintf (GinkgoWriter , "Load Testing - Create 50 AppWrappers - There are %d app wrappers without ready pods, err = %v\n " , len (nonReadyAWs ), err )
335
+ for _ , uaw := range nonReadyAWs {
336
+ fmt .Fprintf (GinkgoWriter , "Load Testing - Create 50 AppWrappers - Non-Ready AW '%s/%s'\n " , uaw .Namespace , uaw .Name )
337
+ }
338
+ }
339
+ Expect (err ).Should (Succeed (), "All AppWrappers should have ready Pods" )
340
+ })
124
341
})
125
342
126
343
})
0 commit comments