Skip to content

Commit eca08d2

Browse files
pooknullhors
andauthored
K8SPG-703: add internal.percona.com/keep-job finalizer for backups (#1134)
* K8SPG-703: add `internal.percona.com/keep-job` finalizer for backups https://perconadev.atlassian.net/browse/K8SPG-703 * fix * fix tests * fix lint * fix * fix unit-tests * fix lint * fix lint * fix lint --------- Co-authored-by: Viacheslav Sarzhan <[email protected]>
1 parent c564163 commit eca08d2

File tree

8 files changed

+63
-17
lines changed

8 files changed

+63
-17
lines changed

internal/controller/postgrescluster/pgbackrest.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2491,6 +2491,8 @@ func (r *Reconciler) reconcileManualBackup(ctx context.Context,
24912491
})
24922492
backupJob.ObjectMeta.Labels = labels
24932493
backupJob.ObjectMeta.Annotations = annotations
2494+
// K8SPG-703
2495+
backupJob.Finalizers = []string{pNaming.FinalizerKeepJob}
24942496

24952497
// K8SPG-613
24962498
initImage, err := k8s.InitImage(ctx, r.Client, postgresCluster, &postgresCluster.Spec.Backups.PGBackRest)

percona/controller/pgbackup/controller.go

Lines changed: 34 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package pgbackup
33
import (
44
"context"
55
"path"
6+
"slices"
67
"strings"
78
"time"
89

@@ -131,7 +132,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re
131132
}
132133

133134
// start backup only if backup job doesn't exist
134-
_, err := findBackupJob(ctx, r.Client, pgCluster, pgBackup)
135+
_, err := findBackupJob(ctx, r.Client, pgBackup)
135136
if err != nil {
136137
if !errors.Is(err, ErrBackupJobNotFound) {
137138
return reconcile.Result{}, errors.Wrap(err, "find backup job")
@@ -184,7 +185,7 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re
184185
return reconcile.Result{}, errors.Errorf("PostgresCluster %s is not found", pgBackup.Spec.PGCluster)
185186
}
186187

187-
job, err := findBackupJob(ctx, r.Client, pgCluster, pgBackup)
188+
job, err := findBackupJob(ctx, r.Client, pgBackup)
188189
if err != nil {
189190
if errors.Is(err, ErrBackupJobNotFound) {
190191
log.Info("Waiting for backup to start")
@@ -231,6 +232,15 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re
231232
job := &batchv1.Job{}
232233
err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job)
233234
if err != nil {
235+
// If something has deleted the job even with the finalizer, we should fail the backup.
236+
if k8serrors.IsNotFound(err) {
237+
if err := updateStatus(ctx, r.Client, pgBackup, func(bcp *v2.PerconaPGBackup) {
238+
bcp.Status.State = v2.BackupFailed
239+
}); err != nil {
240+
return reconcile.Result{}, errors.Wrap(err, "update PGBackup status")
241+
}
242+
return reconcile.Result{}, nil
243+
}
234244
return reconcile.Result{}, errors.Wrap(err, "get backup job")
235245
}
236246

@@ -265,6 +275,21 @@ func (r *PGBackupReconciler) Reconcile(ctx context.Context, request reconcile.Re
265275

266276
return reconcile.Result{}, nil
267277
case v2.BackupSucceeded:
278+
job, err := findBackupJob(ctx, r.Client, pgBackup)
279+
if err == nil && slices.Contains(job.Finalizers, pNaming.FinalizerKeepJob) {
280+
if err := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
281+
j := new(batchv1.Job)
282+
if err := r.Client.Get(ctx, client.ObjectKeyFromObject(job), j); err != nil {
283+
return errors.Wrap(err, "get job")
284+
}
285+
j.Finalizers = slices.DeleteFunc(j.Finalizers, func(s string) bool { return s == pNaming.FinalizerKeepJob })
286+
287+
return r.Client.Update(ctx, j)
288+
}); err != nil {
289+
return reconcile.Result{}, errors.Wrap(err, "update PGBackup status")
290+
}
291+
}
292+
268293
if pgCluster == nil {
269294
return reconcile.Result{}, nil
270295
}
@@ -636,8 +661,11 @@ func startBackup(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) e
636661
return nil
637662
}
638663

639-
func findBackupJob(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster, pb *v2.PerconaPGBackup) (*batchv1.Job, error) {
640-
if jobName := pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName]; jobName != "" {
664+
func findBackupJob(ctx context.Context, c client.Client, pb *v2.PerconaPGBackup) (*batchv1.Job, error) {
665+
if jobName := pb.GetAnnotations()[pNaming.AnnotationPGBackrestBackupJobName]; jobName != "" || pb.Status.JobName != "" {
666+
if jobName == "" {
667+
jobName = pb.Status.JobName
668+
}
641669
job := new(batchv1.Job)
642670
err := c.Get(ctx, types.NamespacedName{Name: jobName, Namespace: pb.Namespace}, job)
643671
if err != nil {
@@ -648,9 +676,9 @@ func findBackupJob(ctx context.Context, c client.Client, pg *v2.PerconaPGCluster
648676

649677
jobList := &batchv1.JobList{}
650678
err := c.List(ctx, jobList,
651-
client.InNamespace(pg.Namespace),
679+
client.InNamespace(pb.Namespace),
652680
client.MatchingLabelsSelector{
653-
Selector: naming.PGBackRestBackupJobSelector(pg.Name, pb.Spec.RepoName, naming.BackupManual),
681+
Selector: naming.PGBackRestBackupJobSelector(pb.Spec.PGCluster, pb.Spec.RepoName, naming.BackupManual),
654682
})
655683
if err != nil {
656684
return nil, errors.Wrap(err, "get backup jobs")

percona/controller/pgbackup/testutils_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ func buildFakeClient(ctx context.Context, cr *v2.PerconaPGCluster, objs ...clien
4848
objs = append(objs, dcs)
4949

5050
cl := new(fakeClient)
51-
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).Build()
51+
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).
52+
WithIndex(new(v2.PerconaPGBackup), v2.IndexFieldPGCluster, v2.PGClusterIndexerFunc).
53+
Build()
5254

5355
return cl, nil
5456
}

percona/controller/pgcluster/backup.go

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55

66
"github.com/pkg/errors"
77
batchv1 "k8s.io/api/batch/v1"
8+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
89
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
910
"k8s.io/apimachinery/pkg/types"
1011
"k8s.io/client-go/util/retry"
@@ -87,14 +88,18 @@ func (r *PGClusterReconciler) cleanupOutdatedBackups(ctx context.Context, cr *v2
8788
// After the pg-backup is deleted, the job is not deleted immediately.
8889
// We need to set the DeletionTimestamp for a job so that `reconcileBackupJob` doesn't create a new pg-backup before the job deletion.
8990
job := new(batchv1.Job)
90-
if err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job); err != nil {
91+
err := r.Client.Get(ctx, types.NamespacedName{Name: pgBackup.Status.JobName, Namespace: pgBackup.Namespace}, job)
92+
if client.IgnoreNotFound(err) != nil {
9193
return errors.Wrap(err, "get backup job")
9294
}
93-
prop := metav1.DeletePropagationForeground
94-
if err := r.Client.Delete(ctx, job, &client.DeleteOptions{
95-
PropagationPolicy: &prop,
96-
}); err != nil {
97-
return errors.Wrapf(err, "delete job %s/%s", job.Name, job.Namespace)
95+
// The job may be deleted earlier due to ttlSecondsAfterFinished
96+
if !k8serrors.IsNotFound(err) {
97+
prop := metav1.DeletePropagationForeground
98+
if err := r.Client.Delete(ctx, job, &client.DeleteOptions{
99+
PropagationPolicy: &prop,
100+
}); err != nil {
101+
return errors.Wrapf(err, "delete job %s/%s", job.Name, job.Namespace)
102+
}
98103
}
99104
if err := r.Client.Delete(ctx, &pgBackup); err != nil {
100105
return errors.Wrapf(err, "delete backup %s/%s", pgBackup.Name, pgBackup.Namespace)

percona/controller/pgcluster/controller_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -671,7 +671,7 @@ var _ = Describe("Watching secrets", Ordered, func() {
671671
})
672672

673673
It("should reconcile 1 time", func() {
674-
Eventually(func() int { return getReconcileCount(crunchyR) }, time.Second*15, time.Millisecond*250).
674+
Eventually(func() int { return getReconcileCount(crunchyR) }, time.Second*20, time.Millisecond*250).
675675
Should(Equal(reconcileCount + 1))
676676
})
677677

@@ -692,7 +692,7 @@ var _ = Describe("Watching secrets", Ordered, func() {
692692
})
693693

694694
It("should reconcile 2 times", func() {
695-
Eventually(func() int { return getReconcileCount(crunchyR) }, time.Second*15, time.Millisecond*250).
695+
Eventually(func() int { return getReconcileCount(crunchyR) }, time.Second*20, time.Millisecond*250).
696696
Should(Equal(reconcileCount + 2))
697697
})
698698
})

percona/controller/pgcluster/testutils_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,9 @@ func buildFakeClient(ctx context.Context, cr *v2.PerconaPGCluster, objs ...clien
207207
objs = append(objs, dcs)
208208

209209
cl := new(fakeClient)
210-
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).Build()
210+
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).
211+
WithIndex(new(v2.PerconaPGBackup), v2.IndexFieldPGCluster, v2.PGClusterIndexerFunc).
212+
Build()
211213

212214
return cl, nil
213215
}

percona/k8s/testutils_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,9 @@ func buildFakeClient(ctx context.Context, cr *v2.PerconaPGCluster, objs ...clien
6161
objs = append(objs, dcs)
6262

6363
cl := new(fakeClient)
64-
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).Build()
64+
cl.Client = fake.NewClientBuilder().WithScheme(s).WithObjects(objs...).WithStatusSubresource(objs...).
65+
WithIndex(new(v2.PerconaPGBackup), v2.IndexFieldPGCluster, v2.PGClusterIndexerFunc).
66+
Build()
6567

6668
return cl, nil
6769
}

percona/naming/finalizers.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,8 @@ const (
1919
const (
2020
FinalizerDeleteBackup = PrefixPerconaInternal + "delete-backup" //nolint:gosec
2121
)
22+
23+
// PerconaPGBackup job finalizers
24+
const (
25+
FinalizerKeepJob = PrefixPerconaInternal + "keep-job" //nolint:gosec
26+
)

0 commit comments

Comments
 (0)