Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete NovaCell cr only when deletion job pass #917

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 50 additions & 20 deletions controllers/nova_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -603,18 +603,42 @@ func (r *NovaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (resul
for _, cr := range novaCellList.Items {
_, ok := instance.Spec.CellTemplates[cr.Spec.CellName]
if !ok {
err := r.ensureCellDeleted(ctx, h, instance,
result, err := r.ensureCellDeleted(ctx, h, instance,
cr.Spec.CellName, apiTransportURL,
secret, apiDB, cellDBs[novav1.Cell0Name].Database.GetDatabaseHostname(), cells[novav1.Cell0Name])
if err != nil {
return ctrl.Result{}, err
}
Log.Info("Cell deleted", "cell", cr.Spec.CellName)
delete(instance.Status.RegisteredCells, cr.Name)
if result == nova.CellDeleteComplete {
Log.Info("Cell deleted", "cell", cr.Spec.CellName)
delete(instance.Status.RegisteredCells, cr.Name)
}
Comment on lines +612 to +615
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so if the result is not CellDeleteComplete then we return success from the reconciler below and allowing the NovaCell object to be deleted, but we keep the nova.Status.RegisteredCells entry. However when the job finally finishes and a new reconciler starts on nova controller the novaCellList above will not containt the NovaCell CR any more so the nova controller will not cleanup the RegisteredCells entry.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! I added additional loop to check if registered cell job was sucess to ensure that we have correct status

}

}

// check if all cells are deleted
for crName := range instance.Status.RegisteredCells {
_, ok := instance.Spec.CellTemplates[crName]
if !ok {
job, err := job.GetJobWithName(ctx, h, crName+"-cell-delete", instance.Namespace)
if err != nil {
if !k8s_errors.IsNotFound(err) {
return ctrl.Result{}, err
}
}
if job != nil {
if job.Status.Succeeded == 1 {
Log.Info("Cell deleted", "cell", crName)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this say "cell job is deleted, cell deletion completed"
we are alredy saying cell deleted at L613

delete(instance.Status.RegisteredCells, crName)
} else {
Log.Info("Cell is still being deleted", "cell", crName)
return ctrl.Result{}, nil
}
}
}
}

Log.Info("Successfully reconciled")
return ctrl.Result{}, nil
}
Expand Down Expand Up @@ -669,7 +693,7 @@ func (r *NovaReconciler) ensureCellDeleted(
apiDB *mariadbv1.Database,
APIDatabaseHostname string,
cell0 *novav1.NovaCell,
) error {
) (nova.CellDeploymentStatus, error) {
Log := r.GetLogger(ctx)
cell := &novav1.NovaCell{}
fullCellName := types.NamespacedName{
Expand All @@ -682,29 +706,29 @@ func (r *NovaReconciler) ensureCellDeleted(
// We cannot do further cleanup of the MariaDBDatabase and
// MariaDBAccount as their name is only available in the NovaCell CR
// since the cell definition is removed from the Nova CR already.
return nil
return nova.CellDeleteFailed, nil
}
if err != nil {
return err
return nova.CellDeleteFailed, err
}
// If it is not created by us, we don't touch it
if !OwnedBy(cell, instance) {
Log.Info("Cell isn't defined in the Nova, but there is a "+
"Cell CR not owned by us. Not deleting it.",
"cell", cell)
return nil
return nova.CellDeleteFailed, nil
}

dbName, accountName := novaapi.ServiceName+"-"+cell.Spec.CellName, cell.Spec.CellDatabaseAccount

configHash, scriptName, configName, err := r.ensureNovaManageJobSecret(ctx, h, instance,
cell0, topLevelSecret, APIDatabaseHostname, apiTransportURL, apiDB)
if err != nil {
return err
return nova.CellDeleteFailed, err
}
inputHash, err := util.HashOfInputHashes(configHash)
if err != nil {
return err
return nova.CellDeleteFailed, err
}

labels := map[string]string{
Expand All @@ -716,24 +740,30 @@ func (r *NovaReconciler) ensureCellDeleted(
instance.Spec.PreserveJobs, r.RequeueTimeout,
inputHash)

_, err = job.DoJob(ctx, h)
result, err := job.DoJob(ctx, h)
if err != nil {
return err
return nova.CellDeleteFailed, err
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to check the result of the DoJob to see if the deletion job is still running and return to the caller if so.

}

if (result != ctrl.Result{}) {
// Job is still running. We can simply return as we will be reconciled
// when the Job status changes
return nova.CellDeleteInProgress, nil
}

secretName := getNovaCellCRName(instance.Name, cellName)
err = secret.DeleteSecretsWithName(ctx, h, secretName, instance.Namespace)
if err != nil && !k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}
configSecret, scriptSecret := r.getNovaManageJobSecretNames(cell)
err = secret.DeleteSecretsWithName(ctx, h, configSecret, instance.Namespace)
if err != nil && !k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}
err = secret.DeleteSecretsWithName(ctx, h, scriptSecret, instance.Namespace)
if err != nil && !k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}

// Delete transportURL cr
Expand All @@ -745,18 +775,18 @@ func (r *NovaReconciler) ensureCellDeleted(
}
err = r.Client.Delete(ctx, transportURL)
if err != nil && !k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}

err = mariadbv1.DeleteDatabaseAndAccountFinalizers(
ctx, h, dbName, accountName, instance.Namespace)
if err != nil {
return err
return nova.CellDeleteFailed, err
}

err = r.ensureAccountDeletedIfOwned(ctx, h, instance, accountName)
if err != nil {
return err
return nova.CellDeleteFailed, err
}

database := &mariadbv1.MariaDBDatabase{
Expand All @@ -767,7 +797,7 @@ func (r *NovaReconciler) ensureCellDeleted(
}
err = r.Client.Delete(ctx, database)
if err != nil && !k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}
Log.Info("Deleted MariaDBDatabase", "database", database)

Expand All @@ -776,11 +806,11 @@ func (r *NovaReconciler) ensureCellDeleted(
// what to clean up.
err = r.Client.Delete(ctx, cell)
if err != nil && k8s_errors.IsNotFound(err) {
return err
return nova.CellDeleteFailed, err
}

Log.Info("Cell isn't defined in the Nova CR, so it is deleted", "cell", cell)
return nil
return nova.CellDeleteComplete, nil
}

func (r *NovaReconciler) initStatus(
Expand Down
6 changes: 6 additions & 0 deletions pkg/nova/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ const (
// CellComputeDiscoveryReady indicates that all NovaComputes in cell reached the Ready status and
// the hosts in the cell have been successfully discovered
CellComputeDiscoveryReady CellDeploymentStatus = iota
// CellDeleteInProgress indicates that the NovaCell deletion is in progress
CellDeleteInProgress CellDeploymentStatus = iota
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right we need in progress indication. You however missed to use this status.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks I missed this somehow, now value is used in jobs checks

// CellDeleteFailed indicates that the NovaCell deletion failed
CellDeleteFailed CellDeploymentStatus = iota
// CellDeleteComplete indicates that the NovaCell deletion is complete
CellDeleteComplete CellDeploymentStatus = iota
)

// Database -
Expand Down
22 changes: 16 additions & 6 deletions test/functional/nova_reconfiguration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,18 +182,28 @@ var _ = Describe("Nova reconfiguration", func() {
g.Expect(k8sClient.Update(ctx, nova)).To(Succeed())
}, timeout, interval).Should(Succeed())

Eventually(func(g Gomega) {
nova := GetNova(novaNames.NovaName)
g.Expect(nova.Status.RegisteredCells).NotTo(HaveKey(cell1.CellCRName.Name))
}, timeout, interval).Should(Succeed())

NovaCellNotExists(cell1.CellCRName)
Eventually(func(g Gomega) {
mappingJob := th.GetJob(cell1.CellDeleteJobName)
newJobInputHash := GetEnvVarValue(
mappingJob.Spec.Template.Spec.Containers[0].Env, "INPUT_HASH", "")
g.Expect(newJobInputHash).NotTo(BeNil())
}, timeout, interval).Should(Succeed())

// Check that the cell status is not deleted
Eventually(func(g Gomega) {
nova := GetNova(novaNames.NovaName)
g.Expect(nova.Status.RegisteredCells).To(HaveKey(cell1.CellCRName.Name))
}, timeout, interval).Should(Succeed())
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this a side-effect/bug for now ?
we removed it from CR at L180, still we are verifying thats its present in CR.
I have seen this, that after even delete job is completed cell exists in CR (because of finalizer I think!)


// Simulate the cell delete job success
th.SimulateJobSuccess(cell1.CellDeleteJobName)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why to simulate, isn't this already verified in L189 ?

Eventually(func(g Gomega) {
nova := GetNova(novaNames.NovaName)
g.Expect(nova.Status.RegisteredCells).NotTo(HaveKey(cell1.CellCRName.Name))
}, timeout, interval).Should(Succeed())

NovaCellNotExists(cell1.CellCRName)

th.AssertSecretDoesNotExist(cell1.InternalCellSecretName)

Eventually(func(g Gomega) {
Expand Down
Loading