Skip to content

Commit 561cf47

Browse files
authored
Fix cluster status update logic (#911)
Previously, several actors would cancel their context when successful. This was a relic of the way the reconciliation loop was previously structured, but now leads to a bug where cluster status is not updated upon successful action. To address this, this patch deletes the CancelLoop function entirely.
1 parent 10b3b21 commit 561cf47

13 files changed

+10
-93
lines changed

CHANGELOG.md

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
77

88
# [Unreleased](https://github.com/cockroachdb/cockroach-operator/compare/v2.7.0...master)
99

10+
## Fixed
11+
12+
* Delete the CancelLoop function, fixing a cluster status update bug
13+
1014
# [v2.7.0](https://github.com/cockroachdb/cockroach-operator/compare/v2.6.0...v2.7.0)
1115

1216
## Fixed

pkg/actor/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ go_library(
55
srcs = [
66
"actor.go",
77
"cluster_restart.go",
8-
"context.go",
98
"decommission.go",
109
"deploy.go",
1110
"director.go",

pkg/actor/cluster_restart.go

-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ func (r *clusterRestart) Act(ctx context.Context, cluster *resource.Cluster, log
125125
log.Error(err, "failed reseting the restart cluster field")
126126
}
127127
log.V(DEBUGLEVEL).Info("completed cluster restart")
128-
CancelLoop(ctx, log)
129128
return nil
130129
}
131130

pkg/actor/context.go

-47
This file was deleted.

pkg/actor/decommission.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package actor
1919
import (
2020
"context"
2121
"fmt"
22+
2223
api "github.com/cockroachdb/cockroach-operator/apis/v1alpha1"
2324
"github.com/cockroachdb/cockroach-operator/pkg/clustersql"
2425
"github.com/cockroachdb/cockroach-operator/pkg/database"
@@ -143,12 +144,10 @@ func (d decommission) Act(ctx context.Context, cluster *resource.Cluster, log lo
143144
/// now check if the decommissionStaleErr and update status
144145
log.Error(err, "decommission failed")
145146
cluster.SetFalse(api.DecommissionCondition)
146-
CancelLoop(ctx, log)
147147
return err
148148
}
149149
// TO DO @alina we will need to save the status foreach action
150150
cluster.SetTrue(api.DecommissionCondition)
151151
log.V(DEBUGLEVEL).Info("decommission completed", "cond", ss.Status.Conditions)
152-
CancelLoop(ctx, log)
153152
return nil
154153
}

pkg/actor/deploy.go

-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ func (d deploy) Act(ctx context.Context, cluster *resource.Cluster, log logr.Log
8181

8282
if changed {
8383
log.Info("created/updated a resource, stopping request processing", "resource", b.ResourceName())
84-
CancelLoop(ctx, log)
8584
return nil
8685
}
8786
}

pkg/actor/deploy_test.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ func TestDeploysNotInitalizedClusterAfterVersionChecker(t *testing.T) {
8181
// 3 is the number of resources we expect to be created. The action should be repeated as it is
8282
// restarted on successful creation or update
8383
for i := 0; i < 3; i++ {
84-
assert.NoError(t, deploy.Act(actor.ContextWithCancelFn(context.TODO(), func() {}), cluster, testLog))
84+
assert.NoError(t, deploy.Act(context.Background(), cluster, testLog))
8585
}
8686

8787
assert.Equal(t, expected, actual)

pkg/actor/partitioned_update.go

+1-2
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package actor
1919
import (
2020
"context"
2121
"fmt"
22-
"github.com/go-logr/logr"
2322
"os"
2423
"strings"
2524
"time"
@@ -31,6 +30,7 @@ import (
3130
"github.com/cockroachdb/cockroach-operator/pkg/resource"
3231
"github.com/cockroachdb/cockroach-operator/pkg/update"
3332
"github.com/cockroachdb/errors"
33+
"github.com/go-logr/logr"
3434
"go.uber.org/zap/zapcore"
3535
appsv1 "k8s.io/api/apps/v1"
3636
kubetypes "k8s.io/apimachinery/pkg/types"
@@ -213,7 +213,6 @@ func (up *partitionedUpdate) Act(ctx context.Context, cluster *resource.Cluster,
213213

214214
// TODO set status that we are completed.
215215
log.V(DEBUGLEVEL).Info("update completed with partitioned update", "new version", versionWantedCalFmtStr)
216-
CancelLoop(ctx, log)
217216
return nil
218217
}
219218

pkg/actor/resize_pvc.go

+1-3
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ package actor
1919
import (
2020
"context"
2121
"fmt"
22-
"github.com/go-logr/logr"
2322
"time"
2423

2524
"github.com/cenkalti/backoff"
2625
api "github.com/cockroachdb/cockroach-operator/apis/v1alpha1"
2726
"github.com/cockroachdb/cockroach-operator/pkg/kube"
2827
"github.com/cockroachdb/cockroach-operator/pkg/resource"
2928
"github.com/cockroachdb/errors"
29+
"github.com/go-logr/logr"
3030
appsv1 "k8s.io/api/apps/v1"
3131
v1 "k8s.io/api/core/v1"
3232
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -140,8 +140,6 @@ func (rp *resizePVC) Act(ctx context.Context, cluster *resource.Cluster, log log
140140
}*/
141141

142142
log.Info("PVC resize completed")
143-
CancelLoop(ctx, log)
144-
145143
return nil
146144
}
147145

pkg/actor/validate_version.go

-2
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,6 @@ func (v *versionChecker) Act(ctx context.Context, cluster *resource.Cluster, log
136136

137137
if changed {
138138
log.V(int(zapcore.DebugLevel)).Info("created/updated job, stopping request processing")
139-
CancelLoop(ctx, log)
140139
return nil
141140
}
142141

@@ -349,7 +348,6 @@ func (v *versionChecker) completeVersionChecker(
349348
}
350349
log.V(int(zapcore.DebugLevel)).Info("completed version checker", "calVersion", version,
351350
"containerImage", imageName)
352-
CancelLoop(ctx, log)
353351
return nil
354352
}
355353

pkg/controller/BUILD.bazel

-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ go_test(
4949
"@io_k8s_apimachinery//pkg/types:go_default_library",
5050
"@io_k8s_sigs_controller_runtime//:go_default_library",
5151
"@io_k8s_sigs_controller_runtime//pkg/client/fake:go_default_library",
52-
"@io_k8s_sigs_controller_runtime//pkg/log:go_default_library",
5352
"@org_uber_go_zap//zaptest:go_default_library",
5453
],
5554
)

pkg/controller/cluster_controller.go

+1-18
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,7 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req reconcile.Request
147147
return noRequeue()
148148
}
149149

150-
// Save context cancellation function for actors to call if needed
151-
ctx = actor.ContextWithCancelFn(ctx, cancel)
150+
ctx = context.Background()
152151

153152
log.Info(fmt.Sprintf("Running action with name: %s", actorToExecute.GetActionType()))
154153
if err := actorToExecute.Act(ctx, &cluster, log); err != nil {
@@ -187,13 +186,6 @@ func (r *ClusterReconciler) Reconcile(ctx context.Context, req reconcile.Request
187186
cluster.SetActionFinished(actorToExecute.GetActionType())
188187
}
189188

190-
// Stop processing and wait for Kubernetes scheduler to call us again as the actor
191-
// modified actorToExecute resource owned by the controller
192-
if cancelled(ctx) {
193-
log.V(int(zapcore.InfoLevel)).Info("request was interrupted")
194-
return noRequeue()
195-
}
196-
197189
// Check if the resource has been updated while the controller worked on it
198190
fresh, err := cluster.IsFresh(fetcher)
199191
if err != nil {
@@ -254,12 +246,3 @@ func InitClusterReconcilerWithLogger(l logr.Logger) func(ctrl.Manager) error {
254246
}).SetupWithManager(mgr)
255247
}
256248
}
257-
258-
func cancelled(ctx context.Context) bool {
259-
select {
260-
case <-ctx.Done():
261-
return true
262-
default:
263-
return false
264-
}
265-
}

pkg/controller/cluster_controller_test.go

+1-14
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@ import (
2323
"time"
2424

2525
"github.com/go-logr/logr"
26-
"sigs.k8s.io/controller-runtime/pkg/log"
2726

2827
api "github.com/cockroachdb/cockroach-operator/apis/v1alpha1"
2928
"github.com/cockroachdb/cockroach-operator/pkg/actor"
@@ -43,14 +42,10 @@ import (
4342
)
4443

4544
type fakeActor struct {
46-
cancelCtx bool
47-
err error
45+
err error
4846
}
4947

5048
func (a *fakeActor) Act(ctx context.Context, _ *resource.Cluster, logger logr.Logger) error {
51-
if a.cancelCtx {
52-
actor.CancelLoop(ctx, log.NullLogger{})
53-
}
5449
return a.err
5550
}
5651
func (a *fakeActor) GetActionType() api.ActionType {
@@ -115,14 +110,6 @@ func TestReconcile(t *testing.T) {
115110
want: ctrl.Result{Requeue: true},
116111
wantErr: "",
117112
},
118-
{
119-
name: "reconcile action cancels the context",
120-
action: fakeActor{
121-
cancelCtx: true,
122-
},
123-
want: ctrl.Result{Requeue: false},
124-
wantErr: "",
125-
},
126113
{
127114
name: "reconcile action fails to probe expected condition",
128115
action: fakeActor{

0 commit comments

Comments
 (0)