-
Notifications
You must be signed in to change notification settings - Fork 51
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Delete NovaCell cr only when deletion job pass #917
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -603,18 +603,42 @@ func (r *NovaReconciler) Reconcile(ctx context.Context, req ctrl.Request) (resul | |
for _, cr := range novaCellList.Items { | ||
_, ok := instance.Spec.CellTemplates[cr.Spec.CellName] | ||
if !ok { | ||
err := r.ensureCellDeleted(ctx, h, instance, | ||
result, err := r.ensureCellDeleted(ctx, h, instance, | ||
cr.Spec.CellName, apiTransportURL, | ||
secret, apiDB, cellDBs[novav1.Cell0Name].Database.GetDatabaseHostname(), cells[novav1.Cell0Name]) | ||
if err != nil { | ||
return ctrl.Result{}, err | ||
} | ||
Log.Info("Cell deleted", "cell", cr.Spec.CellName) | ||
delete(instance.Status.RegisteredCells, cr.Name) | ||
if result == nova.CellDeleteComplete { | ||
Log.Info("Cell deleted", "cell", cr.Spec.CellName) | ||
delete(instance.Status.RegisteredCells, cr.Name) | ||
} | ||
} | ||
|
||
} | ||
|
||
// check if all cells are deleted | ||
for crName := range instance.Status.RegisteredCells { | ||
_, ok := instance.Spec.CellTemplates[crName] | ||
if !ok { | ||
job, err := job.GetJobWithName(ctx, h, crName+"-cell-delete", instance.Namespace) | ||
if err != nil { | ||
if !k8s_errors.IsNotFound(err) { | ||
return ctrl.Result{}, err | ||
} | ||
} | ||
if job != nil { | ||
if job.Status.Succeeded == 1 { | ||
Log.Info("Cell deleted", "cell", crName) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this say "cell job is deleted, cell deletion completed" |
||
delete(instance.Status.RegisteredCells, crName) | ||
} else { | ||
Log.Info("Cell is still being deleted", "cell", crName) | ||
return ctrl.Result{}, nil | ||
} | ||
} | ||
} | ||
} | ||
|
||
Log.Info("Successfully reconciled") | ||
return ctrl.Result{}, nil | ||
} | ||
|
@@ -669,7 +693,7 @@ func (r *NovaReconciler) ensureCellDeleted( | |
apiDB *mariadbv1.Database, | ||
APIDatabaseHostname string, | ||
cell0 *novav1.NovaCell, | ||
) error { | ||
) (nova.CellDeploymentStatus, error) { | ||
Log := r.GetLogger(ctx) | ||
cell := &novav1.NovaCell{} | ||
fullCellName := types.NamespacedName{ | ||
|
@@ -682,29 +706,29 @@ func (r *NovaReconciler) ensureCellDeleted( | |
// We cannot do further cleanup of the MariaDBDatabase and | ||
// MariaDBAccount as their name is only available in the NovaCell CR | ||
// since the cell definition is removed from the Nova CR already. | ||
return nil | ||
return nova.CellDeleteFailed, nil | ||
} | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
// If it is not created by us, we don't touch it | ||
if !OwnedBy(cell, instance) { | ||
Log.Info("Cell isn't defined in the Nova, but there is a "+ | ||
"Cell CR not owned by us. Not deleting it.", | ||
"cell", cell) | ||
return nil | ||
return nova.CellDeleteFailed, nil | ||
} | ||
|
||
dbName, accountName := novaapi.ServiceName+"-"+cell.Spec.CellName, cell.Spec.CellDatabaseAccount | ||
|
||
configHash, scriptName, configName, err := r.ensureNovaManageJobSecret(ctx, h, instance, | ||
cell0, topLevelSecret, APIDatabaseHostname, apiTransportURL, apiDB) | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
inputHash, err := util.HashOfInputHashes(configHash) | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
labels := map[string]string{ | ||
|
@@ -716,24 +740,30 @@ func (r *NovaReconciler) ensureCellDeleted( | |
instance.Spec.PreserveJobs, r.RequeueTimeout, | ||
inputHash) | ||
|
||
_, err = job.DoJob(ctx, h) | ||
result, err := job.DoJob(ctx, h) | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to check the result of the DoJob to see if the deletion job is still running and return to the caller if so. |
||
} | ||
|
||
if (result != ctrl.Result{}) { | ||
// Job is still running. We can simply return as we will be reconciled | ||
// when the Job status changes | ||
return nova.CellDeleteInProgress, nil | ||
} | ||
|
||
secretName := getNovaCellCRName(instance.Name, cellName) | ||
err = secret.DeleteSecretsWithName(ctx, h, secretName, instance.Namespace) | ||
if err != nil && !k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
configSecret, scriptSecret := r.getNovaManageJobSecretNames(cell) | ||
err = secret.DeleteSecretsWithName(ctx, h, configSecret, instance.Namespace) | ||
if err != nil && !k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
err = secret.DeleteSecretsWithName(ctx, h, scriptSecret, instance.Namespace) | ||
if err != nil && !k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
// Delete transportURL cr | ||
|
@@ -745,18 +775,18 @@ func (r *NovaReconciler) ensureCellDeleted( | |
} | ||
err = r.Client.Delete(ctx, transportURL) | ||
if err != nil && !k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
err = mariadbv1.DeleteDatabaseAndAccountFinalizers( | ||
ctx, h, dbName, accountName, instance.Namespace) | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
err = r.ensureAccountDeletedIfOwned(ctx, h, instance, accountName) | ||
if err != nil { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
database := &mariadbv1.MariaDBDatabase{ | ||
|
@@ -767,7 +797,7 @@ func (r *NovaReconciler) ensureCellDeleted( | |
} | ||
err = r.Client.Delete(ctx, database) | ||
if err != nil && !k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
Log.Info("Deleted MariaDBDatabase", "database", database) | ||
|
||
|
@@ -776,11 +806,11 @@ func (r *NovaReconciler) ensureCellDeleted( | |
// what to clean up. | ||
err = r.Client.Delete(ctx, cell) | ||
if err != nil && k8s_errors.IsNotFound(err) { | ||
return err | ||
return nova.CellDeleteFailed, err | ||
} | ||
|
||
Log.Info("Cell isn't defined in the Nova CR, so it is deleted", "cell", cell) | ||
return nil | ||
return nova.CellDeleteComplete, nil | ||
} | ||
|
||
func (r *NovaReconciler) initStatus( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -96,6 +96,12 @@ const ( | |
// CellComputeDiscoveryReady indicates that all NovaComputes in cell reached the Ready status and | ||
// the hosts in the cell have been successfully discovered | ||
CellComputeDiscoveryReady CellDeploymentStatus = iota | ||
// CellDeleteInProgress indicates that the NovaCell deletion is in progress | ||
CellDeleteInProgress CellDeploymentStatus = iota | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are right we need in progress indication. You however missed to use this status. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks I missed this somehow, now value is used in jobs checks |
||
// CellDeleteFailed indicates that the NovaCell deletion failed | ||
CellDeleteFailed CellDeploymentStatus = iota | ||
// CellDeleteComplete indicates that the NovaCell deletion is complete | ||
CellDeleteComplete CellDeploymentStatus = iota | ||
) | ||
|
||
// Database - | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -182,18 +182,28 @@ var _ = Describe("Nova reconfiguration", func() { | |
g.Expect(k8sClient.Update(ctx, nova)).To(Succeed()) | ||
}, timeout, interval).Should(Succeed()) | ||
|
||
Eventually(func(g Gomega) { | ||
nova := GetNova(novaNames.NovaName) | ||
g.Expect(nova.Status.RegisteredCells).NotTo(HaveKey(cell1.CellCRName.Name)) | ||
}, timeout, interval).Should(Succeed()) | ||
|
||
NovaCellNotExists(cell1.CellCRName) | ||
Eventually(func(g Gomega) { | ||
mappingJob := th.GetJob(cell1.CellDeleteJobName) | ||
newJobInputHash := GetEnvVarValue( | ||
mappingJob.Spec.Template.Spec.Containers[0].Env, "INPUT_HASH", "") | ||
g.Expect(newJobInputHash).NotTo(BeNil()) | ||
}, timeout, interval).Should(Succeed()) | ||
|
||
// Check that the cell status is not deleted | ||
Eventually(func(g Gomega) { | ||
nova := GetNova(novaNames.NovaName) | ||
g.Expect(nova.Status.RegisteredCells).To(HaveKey(cell1.CellCRName.Name)) | ||
}, timeout, interval).Should(Succeed()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is this a side-effect/bug for now ? |
||
|
||
// Simulate the cell delete job success | ||
th.SimulateJobSuccess(cell1.CellDeleteJobName) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why to simulate, isn't this already verified in L189 ? |
||
Eventually(func(g Gomega) { | ||
nova := GetNova(novaNames.NovaName) | ||
g.Expect(nova.Status.RegisteredCells).NotTo(HaveKey(cell1.CellCRName.Name)) | ||
}, timeout, interval).Should(Succeed()) | ||
|
||
NovaCellNotExists(cell1.CellCRName) | ||
|
||
th.AssertSecretDoesNotExist(cell1.InternalCellSecretName) | ||
|
||
Eventually(func(g Gomega) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
so if the result is not CellDeleteComplete then we return success from the reconciler below and allowing the NovaCell object to be deleted, but we keep the nova.Status.RegisteredCells entry. However when the job finally finishes and a new reconciler starts on nova controller the novaCellList above will not containt the NovaCell CR any more so the nova controller will not cleanup the RegisteredCells entry.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I added additional loop to check if registered cell job was sucess to ensure that we have correct status