Skip to content

Commit 36e368d

Browse files
fix: don't spawn new workflow child on abort
If status check node aborted and has "abort with statuscheck" flag set, don't spawn a new workflow child node for the next step. This fixes a race condition where the serial reconciler spawns a new child node in the window between the statuscheck node aborting and the abort status being propagated to the parent. Signed-off-by: Graham Brereton <[email protected]>
1 parent 9c1206d commit 36e368d

File tree

2 files changed

+24
-0
lines changed

2 files changed

+24
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ For more information and how-to, see [RFC: Keep A Changelog](https://github.com/
5454
- Fix goroutine leak [#4229](https://github.com/chaos-mesh/chaos-mesh/pull/4229)
5555
- Remove the duplicate `make test` [#4234](https://github.com/chaos-mesh/chaos-mesh/pull/4234)
5656
- Fix daemon-server `SetDNSServer` endpoint to validate provided server address [#4246](https://github.com/chaos-mesh/chaos-mesh/pull/4246)
57+
- Fix serial workflow node reconciler not to spawn the next child after status check abort [#4286](https://github.com/chaos-mesh/chaos-mesh/pull/4286)
5758

5859
### Security
5960

pkg/workflow/controllers/serial_node_reconciler.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,16 @@ func (it *SerialNodeReconciler) syncChildNodes(ctx context.Context, node v1alpha
178178
if err != nil {
179179
return err
180180
}
181+
182+
if abortedStatusCheck := findAbortedStatusCheckNode(finishedChildNodes); abortedStatusCheck != nil {
183+
it.logger.Info(
184+
"not spawning new child, status check node with status aborted and AbortWithStatusCheck = true",
185+
"node",
186+
fmt.Sprintf("%s/%s", abortedStatusCheck.Namespace, abortedStatusCheck.Name),
187+
)
188+
return nil
189+
}
190+
181191
var taskToStartup string
182192
if len(activeChildNodes) == 0 {
183193
// no active children, trying to spawn a new one
@@ -287,3 +297,16 @@ func (it *SerialNodeReconciler) syncChildNodes(ctx context.Context, node v1alpha
287297

288298
return nil
289299
}
300+
301+
func findAbortedStatusCheckNode(nodes []v1alpha1.WorkflowNode) *v1alpha1.WorkflowNode {
302+
for _, node := range nodes {
303+
if node.Spec.Type == v1alpha1.TypeStatusCheck && node.Spec.AbortWithStatusCheck {
304+
for _, cond := range node.Status.Conditions {
305+
if cond.Type == v1alpha1.ConditionAborted && cond.Status == corev1.ConditionTrue {
306+
return &node
307+
}
308+
}
309+
}
310+
}
311+
return nil
312+
}

0 commit comments

Comments
 (0)