From eed53b25e040f03c4edd34e4db2029314bad48a1 Mon Sep 17 00:00:00 2001 From: Fabricio Aguiar Date: Tue, 7 May 2024 01:30:18 +0100 Subject: [PATCH] Report when backoff limit is exceeded Depends-On: https://github.com/openstack-k8s-operators/lib-common/pull/504 Signed-off-by: Fabricio Aguiar --- ...eee.openstack.org_openstackansibleees.yaml | 3 ++ api/v1beta1/conditions.go | 47 ------------------- api/v1beta1/openstack_ansibleee_types.go | 6 ++- ...eee.openstack.org_openstackansibleees.yaml | 3 ++ controllers/openstack_ansibleee_controller.go | 23 +++++---- docs/assemblies/openstack_ansibleee.adoc | 5 ++ tests/functional/ansibleee_controller_test.go | 43 +++++++++-------- .../tests/run_failed_playbook/01-assert.yaml | 15 +++--- 8 files changed, 59 insertions(+), 86 deletions(-) delete mode 100644 api/v1beta1/conditions.go diff --git a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/api/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/api/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/api/v1beta1/conditions.go b/api/v1beta1/conditions.go deleted file mode 100644 index 7b7bcd90..00000000 --- a/api/v1beta1/conditions.go +++ /dev/null @@ -1,47 +0,0 @@ -/* -Copyright 2023. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package v1beta1 - -import condition "github.com/openstack-k8s-operators/lib-common/modules/common/condition" - -// AnsibleEE Condition Types. -const ( - // AnsibleExecutionJobReadyCondition Status=True condition indicates - // AnsibleExecutionJob is ready. - AnsibleExecutionJobReadyCondition condition.Type = "AnsibleExecutionJobReady" -) - -// Common Messages used by AnsibleEE objects. -const ( - // - // AnsibleExecutionJob condition messages - // - // AnsibleExecutionJobInitMessage - AnsibleExecutionJobInitMessage = "AnsibleExecutionJob not started" - - // AnsibleExecutionJobReadyMessage - AnsibleExecutionJobReadyMessage = "AnsibleExecutionJob complete" - - // AnsibleExecutionJobNotFoundMessage - AnsibleExecutionJobNotFoundMessage = "AnsibleExecutionJob not found" - - // AnsibleExecutionJobWaitingMessage - AnsibleExecutionJobWaitingMessage = "AnsibleExecutionJob is running" - - // AnsibleExecutionJobErrorMessage - AnsibleExecutionJobErrorMessage = "AnsibleExecutionJob error occured %s" -) diff --git a/api/v1beta1/openstack_ansibleee_types.go b/api/v1beta1/openstack_ansibleee_types.go index 42ddfb82..8ead3a1c 100644 --- a/api/v1beta1/openstack_ansibleee_types.go +++ b/api/v1beta1/openstack_ansibleee_types.go @@ -135,6 +135,10 @@ type OpenStackAnsibleEEStatus struct { // JobStatus status of the executed job (Pending/Running/Succeeded/Failed) JobStatus string `json:"JobStatus,omitempty" optional:"true"` + // The number of pods which reached phase Failed. + // +optional + Failed int64 `json:"failed,omitempty"` + // ObservedGeneration - the most recent generation observed for this // service. If the observed generation is less than the spec generation, // then the controller has not processed the latest changes injected by @@ -182,7 +186,7 @@ func init() { // IsReady - returns true if the OpenStackAnsibleEE is ready func (instance OpenStackAnsibleEE) IsReady() bool { - return instance.Status.Conditions.IsTrue(AnsibleExecutionJobReadyCondition) + return instance.Status.Conditions.IsTrue(condition.JobReadyCondition) } // SetupDefaults - initializes any CRD field defaults based on environment variables (the defaulting mechanism itself is implemented via webhooks) diff --git a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml index 48c69ed2..0c4ac572 100644 --- a/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml +++ b/config/crd/bases/ansibleee.openstack.org_openstackansibleees.yaml @@ -1575,6 +1575,9 @@ spec: - type type: object type: array + failed: + format: int64 + type: integer hash: additionalProperties: type: string diff --git a/controllers/openstack_ansibleee_controller.go b/controllers/openstack_ansibleee_controller.go index 916cc399..3e70c823 100644 --- a/controllers/openstack_ansibleee_controller.go +++ b/controllers/openstack_ansibleee_controller.go @@ -134,7 +134,7 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R cl := condition.CreateList( condition.UnknownCondition(condition.ReadyCondition, condition.InitReason, condition.ReadyInitMessage), - condition.UnknownCondition(ansibleeev1.AnsibleExecutionJobReadyCondition, condition.InitReason, ansibleeev1.AnsibleExecutionJobInitMessage), + condition.UnknownCondition(condition.JobReadyCondition, condition.InitReason, condition.JobReadyInitMessage), ) instance.Status.Conditions.Init(&cl) @@ -163,7 +163,7 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R instance.Status.Conditions.Set(condition.FalseCondition( condition.NetworkAttachmentsReadyCondition, condition.ErrorReason, - condition.SeverityWarning, + condition.SeverityError, condition.NetworkAttachmentsReadyErrorMessage, err.Error())) return ctrl.Result{}, err @@ -208,22 +208,27 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R if (ctrlResult != ctrl.Result{}) { instance.Status.Conditions.Set(condition.FalseCondition( - ansibleeev1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, condition.RequestedReason, condition.SeverityInfo, - ansibleeev1.AnsibleExecutionJobWaitingMessage)) + condition.JobReadyRunningMessage)) instance.Status.JobStatus = ansibleeev1.JobStatusRunning return ctrlResult, nil } if err != nil { + errorReason := condition.ErrorReason + if ansibleeeJob.HasReachedLimit() { + errorReason = condition.JobReasonBackoffLimitExceeded + } instance.Status.Conditions.Set(condition.FalseCondition( - ansibleeev1.AnsibleExecutionJobReadyCondition, - condition.ErrorReason, - condition.SeverityWarning, - ansibleeev1.AnsibleExecutionJobErrorMessage, + condition.JobReadyCondition, + errorReason, + condition.SeverityError, + condition.JobReadyErrorMessage, err.Error())) instance.Status.JobStatus = ansibleeev1.JobStatusFailed + instance.Status.Failed = ansibleeeJob.GetTotalAttempts() return ctrl.Result{}, err } @@ -232,7 +237,7 @@ func (r *OpenStackAnsibleEEReconciler) Reconcile(ctx context.Context, req ctrl.R Log.Info(fmt.Sprintf("AnsibleEE CR '%s' - Job %s hash added - %s", instance.Name, jobDef.Name, instance.Status.Hash[ansibleeeJobType])) } - instance.Status.Conditions.MarkTrue(ansibleeev1.AnsibleExecutionJobReadyCondition, ansibleeev1.AnsibleExecutionJobReadyMessage) + instance.Status.Conditions.MarkTrue(condition.JobReadyCondition, condition.JobReadyMessage) instance.Status.JobStatus = ansibleeev1.JobStatusSucceeded // We reached the end of the Reconcile, update the Ready condition based on diff --git a/docs/assemblies/openstack_ansibleee.adoc b/docs/assemblies/openstack_ansibleee.adoc index 4c4aaa39..2ac12607 100644 --- a/docs/assemblies/openstack_ansibleee.adoc +++ b/docs/assemblies/openstack_ansibleee.adoc @@ -213,6 +213,11 @@ OpenStackAnsibleEEStatus defines the observed state of OpenStackAnsibleEE | string | false +| failed +| The number of pods which reached phase Failed. +| int64 +| false + | observedGeneration | ObservedGeneration - the most recent generation observed for this service. If the observed generation is less than the spec generation, then the controller has not processed the latest changes injected by the opentack-operator in the top-level CR (e.g. the ContainerImage) | int64 diff --git a/tests/functional/ansibleee_controller_test.go b/tests/functional/ansibleee_controller_test.go index 007d2485..7f60b2d2 100644 --- a/tests/functional/ansibleee_controller_test.go +++ b/tests/functional/ansibleee_controller_test.go @@ -27,7 +27,6 @@ import ( //revive:disable-next-line:dot-imports . "github.com/openstack-k8s-operators/lib-common/modules/common/test/helpers" - "github.com/openstack-k8s-operators/openstack-ansibleee-operator/api/v1beta1" ) var _ = Describe("Ansibleee controller", func() { @@ -40,7 +39,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -61,7 +60,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) th.ExpectCondition( @@ -80,7 +79,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -102,7 +101,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.ErrorReason, "AnsibleExecutionJob error occured Internal error occurred: Job Failed. Check job logs", @@ -123,7 +122,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -140,7 +139,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) ansibleee := GetAnsibleee(ansibleeeName) @@ -163,7 +162,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -177,7 +176,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) ansibleee = GetAnsibleee(ansibleeeName) @@ -197,7 +196,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -218,7 +217,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) th.ExpectCondition( @@ -237,7 +236,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -259,7 +258,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.ErrorReason, "AnsibleExecutionJob error occured Internal error occurred: Job Failed. Check job logs", @@ -328,7 +327,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -349,7 +348,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) th.ExpectCondition( @@ -368,7 +367,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -390,7 +389,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.ErrorReason, "AnsibleExecutionJob error occured Internal error occurred: Job Failed. Check job logs", @@ -476,7 +475,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -497,7 +496,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectCondition( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionTrue, ) th.ExpectCondition( @@ -516,7 +515,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", @@ -538,7 +537,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.ErrorReason, "AnsibleExecutionJob error occured Internal error occurred: Job Failed. Check job logs", @@ -569,7 +568,7 @@ var _ = Describe("Ansibleee controller", func() { th.ExpectConditionWithDetails( ansibleeeName, ConditionGetterFunc(AnsibleeeConditionGetter), - v1beta1.AnsibleExecutionJobReadyCondition, + condition.JobReadyCondition, corev1.ConditionFalse, condition.RequestedReason, "AnsibleExecutionJob is running", diff --git a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml index 2d2ba129..7200de2e 100644 --- a/tests/kuttl/tests/run_failed_playbook/01-assert.yaml +++ b/tests/kuttl/tests/run_failed_playbook/01-assert.yaml @@ -23,19 +23,20 @@ spec: preserveJobs: true status: JobStatus: Failed + failed: 4 conditions: - - message: 'AnsibleExecutionJob error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" type: Ready - - message: 'AnsibleExecutionJob error occured Internal error occurred: Job Failed. + - message: 'Job error occured Internal error occurred: Job has reached the specified backoff limit. Check job logs' - reason: Error - severity: Warning + reason: BackoffLimitExceeded + severity: Error status: "False" - type: AnsibleExecutionJobReady + type: JobReady --- apiVersion: v1 kind: Pod