Skip to content

Commit 0368a1b

Browse files
authored
Revert "add initial grace period before triggering a failure due to missing components (#273)" (#284)
This reverts commit 256dbab.
1 parent 0f30456 commit 0368a1b

File tree

2 files changed

+12
-22
lines changed

2 files changed

+12
-22
lines changed

internal/controller/appwrapper/appwrapper_controller.go

+10-15
Original file line numberDiff line numberDiff line change
@@ -266,26 +266,21 @@ func (r *AppWrapperReconciler) Reconcile(ctx context.Context, req ctrl.Request)
266266
}
267267

268268
// Detect externally deleted components and transition to Failed with no GracePeriod or retry
269+
detailMsg := fmt.Sprintf("Only found %v deployed components, but was expecting %v", compStatus.deployed, compStatus.expected)
269270
if compStatus.deployed != compStatus.expected {
270-
// There may be a lag before created resources become visible in the cache; don't react too quickly.
271-
whenDeployed := meta.FindStatusCondition(aw.Status.Conditions, string(workloadv1beta2.ResourcesDeployed)).LastTransitionTime
272-
graceDuration := r.admissionGraceDuration(ctx, aw)
273-
if time.Now().After(whenDeployed.Add(graceDuration)) {
274-
detailMsg := fmt.Sprintf("Only found %v deployed components, but was expecting %v", compStatus.deployed, compStatus.expected)
275-
meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{
276-
Type: string(workloadv1beta2.Unhealthy),
277-
Status: metav1.ConditionTrue,
278-
Reason: "MissingComponent",
279-
Message: detailMsg,
280-
})
281-
r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "MissingComponent: "+detailMsg)
282-
return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperFailed)
283-
}
271+
meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{
272+
Type: string(workloadv1beta2.Unhealthy),
273+
Status: metav1.ConditionTrue,
274+
Reason: "MissingComponent",
275+
Message: detailMsg,
276+
})
277+
r.Recorder.Event(aw, v1.EventTypeNormal, string(workloadv1beta2.Unhealthy), "MissingComponent: "+detailMsg)
278+
return ctrl.Result{}, r.transitionToPhase(ctx, orig, aw, workloadv1beta2.AppWrapperFailed)
284279
}
285280

286281
// If a component's controller has put it into a failed state, we do not need
287282
// to allow a grace period. The situation will not self-correct.
288-
detailMsg := fmt.Sprintf("Found %v failed components", compStatus.failed)
283+
detailMsg = fmt.Sprintf("Found %v failed components", compStatus.failed)
289284
if compStatus.failed > 0 {
290285
meta.SetStatusCondition(&aw.Status.Conditions, metav1.Condition{
291286
Type: string(workloadv1beta2.Unhealthy),

test/e2e/appwrapper_test.go

+2-7
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,8 @@ var _ = Describe("AppWrapper E2E Test", func() {
297297
Expect(aw.Status.Retries).Should(Equal(int32(2)))
298298
})
299299

300-
It("Deleting a Running Component yields a failed AppWrapper", Label("slow"), func() {
301-
aw := toAppWrapper(pytorchjob(2, 500))
302-
if aw.Annotations == nil {
303-
aw.Annotations = make(map[string]string)
304-
}
305-
aw.Annotations[workloadv1beta2.AdmissionGracePeriodDurationAnnotation] = "5s"
306-
Expect(getClient(ctx).Create(ctx, aw)).To(Succeed())
300+
It("Deleting a Running Component yields a failed AppWrapper", func() {
301+
aw := createAppWrapper(ctx, pytorchjob(2, 500))
307302
appwrappers = append(appwrappers, aw)
308303
Eventually(AppWrapperPhase(ctx, aw), 60*time.Second).Should(Equal(workloadv1beta2.AppWrapperRunning))
309304
aw = getAppWrapper(ctx, types.NamespacedName{Name: aw.Name, Namespace: aw.Namespace})

0 commit comments

Comments
 (0)