From dcebe795257d2d73b91d616f122f23c80a3b5e27 Mon Sep 17 00:00:00 2001 From: Lukas Piwowarski Date: Thu, 12 Dec 2024 11:57:41 -0500 Subject: [PATCH] Move away from Jobs to Pods The test-operator is using Jobs to spawn test pods even though it does not use any features of this k8s object. Plus usage of the Jobs requires creation of ServiceAccount in the target namespaces. In order to be able to create a new, SA the test-oprator has to have a rights to create new roles and rolebindings which in our case makes the attack surface larger. This patch drops the usage of Jobs and moves to Pods. Depends-On: https://github.com/openstack-k8s-operators/ci-framework/pull/2604 --- controllers/ansibletest_controller.go | 25 ++---- controllers/common.go | 117 +++++++++++++++++--------- controllers/horizontest_controller.go | 23 ++--- controllers/tempest_controller.go | 25 ++---- controllers/tobiko_controller.go | 25 ++---- pkg/ansibletest/job.go | 78 +++++++---------- pkg/horizontest/job.go | 76 +++++++---------- pkg/tempest/job.go | 102 ++++++++++------------ pkg/tobiko/job.go | 84 ++++++++---------- 9 files changed, 251 insertions(+), 304 deletions(-) diff --git a/controllers/ansibletest_controller.go b/controllers/ansibletest_controller.go index b80d5849..d0e73d54 100644 --- a/controllers/ansibletest_controller.go +++ b/controllers/ansibletest_controller.go @@ -21,7 +21,6 @@ import ( "errors" "fmt" "strconv" - "time" "reflect" @@ -30,12 +29,10 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" - "github.com/openstack-k8s-operators/test-operator/api/v1beta1" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" + v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/ansibletest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -161,7 +158,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, false) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -170,7 +167,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -213,7 +210,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Create a new job mountCerts := r.CheckSecretExists(ctx, instance, "combined-ca-bundle") - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) envVars, workflowOverrideParams := r.PrepareAnsibleEnv(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, 0) containerImage, err := r.GetContainerImage(ctx, workflowOverrideParams["ContainerImage"], instance) @@ -249,8 +246,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) return rbacResult, nil } // Service account, role, binding - end - - jobDef := ansibletest.Job( + podDef := ansibletest.Pod( instance, serviceLabels, jobName, @@ -262,15 +258,8 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) containerImage, privileged, ) - ansibleTestsJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = ansibleTestsJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { // Creation of the ansibleTests job was not successfull. // Release the lock and allow other controllers to spawn @@ -303,7 +292,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *AnsibleTestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.AnsibleTest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) diff --git a/controllers/common.go b/controllers/common.go index 4abed53b..342de2a0 100644 --- a/controllers/common.go +++ b/controllers/common.go @@ -17,7 +17,6 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/util" v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "gopkg.in/yaml.v3" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" @@ -27,6 +26,7 @@ import ( "k8s.io/client-go/kubernetes" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) const ( @@ -80,13 +80,13 @@ const ( // to change Wait = iota - // CreateFirstJob indicates that the Reconcile loop should create the first job + // CreateFirstPod indicates that the Reconcile loop should create the first job // either specified in the .Spec section or in the .Spec.Workflow section. - CreateFirstJob + CreateFirstPod - // CreateNextJob indicates that the Reconcile loop should create a next job + // CreateNextPod indicates that the Reconcile loop should create a next job // specified in the .Spec.Workflow section (if .Spec.Workflow is defined) - CreateNextJob + CreateNextPod // EndTesting indicates that all jobs have already finished. The Reconcile // loop should end the testing and release resources that are required to @@ -97,6 +97,47 @@ const ( Failure ) +// GetPod returns pod that has a specific name (podName) in a given namespace +// (podNamespace). +func (r *Reconciler) GetPod( + ctx context.Context, + podName string, + podNamespace string, +) (*corev1.Pod, error) { + pod := &corev1.Pod{} + objectKey := client.ObjectKey{Namespace: podNamespace, Name: podName} + if err := r.Client.Get(ctx, objectKey, pod); err != nil { + return pod, err + } + + return pod, nil +} + +// CreatePod creates a pod based on a spec provided via PodSpec. +func (r *Reconciler) CreatePod( + ctx context.Context, + h helper.Helper, + podSpec *corev1.Pod, +) (ctrl.Result, error) { + _, err := r.GetPod(ctx, podSpec.Name, podSpec.Namespace) + if err == nil { + return ctrl.Result{}, nil + } else if !k8s_errors.IsNotFound(err) { + return ctrl.Result{}, err + } + + err = controllerutil.SetControllerReference(h.GetBeforeObject(), podSpec, r.GetScheme()) + if err != nil { + return ctrl.Result{}, err + } + + if err := r.Client.Create(ctx, podSpec); err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{}, nil +} + // NextAction indicates what action needs to be performed by the Reconcile loop // based on the current state of the OpenShift cluster. func (r *Reconciler) NextAction( @@ -104,52 +145,52 @@ func (r *Reconciler) NextAction( instance client.Object, workflowLength int, ) (NextAction, int, error) { - // Get the latest job. The latest job is job with the highest value stored + // Get the latest pod. The latest pod is pod with the highest value stored // in workflowStep label workflowStepIdx := 0 - lastJob, err := r.GetLastJob(ctx, instance) + lastPod, err := r.GetLastPod(ctx, instance) if err != nil { return Failure, workflowStepIdx, err } - // If there is a job associated with the current instance. - if lastJob != nil { - workflowStepIdx, err := strconv.Atoi(lastJob.Labels[workflowStepLabel]) + // If there is a pod associated with the current instance. + if lastPod != nil { + workflowStepIdx, err := strconv.Atoi(lastPod.Labels[workflowStepLabel]) if err != nil { return Failure, workflowStepIdx, err } - // If the last job is not in Failed or Succeded state -> Wait - lastJobFinished := (lastJob.Status.Failed + lastJob.Status.Succeeded) > 0 - if !lastJobFinished { + // If the last pod is not in Failed or Succeded state -> Wait + lastPodFinished := lastPod.Status.Phase == corev1.PodFailed || lastPod.Status.Phase == corev1.PodSucceeded + if !lastPodFinished { return Wait, workflowStepIdx, nil } - // If the last job is in Failed or Succeeded state and it is NOT the last - // job which was supposed to be created -> CreateNextJob - if lastJobFinished && !isLastJobIndex(workflowStepIdx, workflowLength) { + // If the last pod is in Failed or Succeeded state and it is NOT the last + // pod which was supposed to be created -> CreateNextPod + if lastPodFinished && !isLastPodIndex(workflowStepIdx, workflowLength) { workflowStepIdx++ - return CreateNextJob, workflowStepIdx, nil + return CreateNextPod, workflowStepIdx, nil } - // Otherwise if the job is in Failed or Succeded stated and it IS the - // last job -> EndTesting - if lastJobFinished && isLastJobIndex(workflowStepIdx, workflowLength) { + // Otherwise if the pod is in Failed or Succeded stated and it IS the + // last pod -> EndTesting + if lastPodFinished && isLastPodIndex(workflowStepIdx, workflowLength) { return EndTesting, workflowStepIdx, nil } } - // If there is not any job associated with the instance -> createFirstJob - if lastJob == nil { - return CreateFirstJob, workflowStepIdx, nil + // If there is not any pod associated with the instance -> createFirstPod + if lastPod == nil { + return CreateFirstPod, workflowStepIdx, nil } return Failure, workflowStepIdx, nil } -// isLastJobIndex returns true when jobIndex is the index of the last job that +// isLastPodIndex returns true when jobIndex is the index of the last job that // should be executed. Otherwise the return value is false. -func isLastJobIndex(jobIndex int, workflowLength int) bool { +func isLastPodIndex(jobIndex int, workflowLength int) bool { switch workflowLength { case 0: return jobIndex == workflowLength @@ -160,26 +201,26 @@ func isLastJobIndex(jobIndex int, workflowLength int) bool { // GetLastJob returns job associated with an instance which has the highest value // stored in the workflowStep label -func (r *Reconciler) GetLastJob( +func (r *Reconciler) GetLastPod( ctx context.Context, instance client.Object, -) (*batchv1.Job, error) { +) (*corev1.Pod, error) { labels := map[string]string{instanceNameLabel: instance.GetName()} namespaceListOpt := client.InNamespace(instance.GetNamespace()) labelsListOpt := client.MatchingLabels(labels) - jobList := &batchv1.JobList{} - err := r.Client.List(ctx, jobList, namespaceListOpt, labelsListOpt) + podList := &corev1.PodList{} + err := r.Client.List(ctx, podList, namespaceListOpt, labelsListOpt) if err != nil { return nil, err } - var maxJob *batchv1.Job + var maxJob *corev1.Pod maxJobWorkflowStep := 0 - for _, job := range jobList.Items { + for _, job := range podList.Items { workflowStep, err := strconv.Atoi(job.Labels[workflowStepLabel]) if err != nil { - return &batchv1.Job{}, err + return &corev1.Pod{}, err } if workflowStep >= maxJobWorkflowStep { @@ -307,7 +348,7 @@ func (r *Reconciler) GetContainerImage( return "", nil } -func (r *Reconciler) GetJobName(instance interface{}, workflowStepNum int) string { +func (r *Reconciler) GetPodName(instance interface{}, workflowStepNum int) string { if typedInstance, ok := instance.(*v1beta1.Tobiko); ok { if len(typedInstance.Spec.Workflow) == 0 || workflowStepNum == workflowStepNumInvalid { return typedInstance.Name @@ -552,11 +593,11 @@ func (r *Reconciler) ReleaseLock(ctx context.Context, instance client.Object) (b return false, errors.New("failed to delete test-operator-lock") } -func (r *Reconciler) JobExists(ctx context.Context, instance client.Object, workflowStepNum int) bool { - job := &batchv1.Job{} - jobName := r.GetJobName(instance, workflowStepNum) - objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: jobName} - err := r.Client.Get(ctx, objectKey, job) +func (r *Reconciler) PodExists(ctx context.Context, instance client.Object, workflowStepNum int) bool { + pod := &corev1.Pod{} + podName := r.GetPodName(instance, workflowStepNum) + objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: podName} + err := r.Client.Get(ctx, objectKey, pod) if err != nil && k8s_errors.IsNotFound(err) { return false } diff --git a/controllers/horizontest_controller.go b/controllers/horizontest_controller.go index 62b8e021..5b8a320a 100644 --- a/controllers/horizontest_controller.go +++ b/controllers/horizontest_controller.go @@ -20,18 +20,15 @@ import ( "context" "errors" "fmt" - "time" "github.com/go-logr/logr" "github.com/openstack-k8s-operators/lib-common/modules/common" "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/horizontest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -154,7 +151,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -163,7 +160,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -224,7 +221,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Prepare HorizonTest env vars envVars := r.PrepareHorizonTestEnvVars(instance) - jobName := r.GetJobName(instance, 0) + jobName := r.GetPodName(instance, 0) logsPVCName := r.GetPVCLogsName(instance, 0) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) if err != nil { @@ -240,8 +237,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) return rbacResult, nil } // Service account, role, binding - end - - jobDef := horizontest.Job( + podDef := horizontest.Pod( instance, serviceLabels, jobName, @@ -252,15 +248,8 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) envVars, containerImage, ) - horizontestJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = horizontestJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { instance.Status.Conditions.Set(condition.FalseCondition( condition.DeploymentReadyCondition, @@ -286,7 +275,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request) func (r *HorizonTestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.HorizonTest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) diff --git a/controllers/tempest_controller.go b/controllers/tempest_controller.go index 512e6a0f..89e25d55 100644 --- a/controllers/tempest_controller.go +++ b/controllers/tempest_controller.go @@ -29,14 +29,12 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/condition" "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" "github.com/openstack-k8s-operators/lib-common/modules/common/labels" nad "github.com/openstack-k8s-operators/lib-common/modules/common/networkattachment" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" "github.com/openstack-k8s-operators/lib-common/modules/common/util" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/tempest" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -178,7 +176,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -187,7 +185,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This is useful to check if for // example somebody / something deleted the lock and it got claimed by // another instance. This is considered to be an error state. @@ -287,7 +285,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re } // NetworkAttachments - if r.JobExists(ctx, instance, nextWorkflowStep) { + if r.PodExists(ctx, instance, nextWorkflowStep) { networkReady, networkAttachmentStatus, err := nad.VerifyNetworkStatusFromAnnotation( ctx, helper, @@ -323,7 +321,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re mountCerts := r.CheckSecretExists(ctx, instance, "combined-ca-bundle") customDataConfigMapName := GetCustomDataConfigMapName(instance, nextWorkflowStep) EnvVarsConfigMapName := GetEnvVarsConfigMapName(instance, nextWorkflowStep) - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, workflowStepNum) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) if err != nil { @@ -360,7 +358,7 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re } } - jobDef := tempest.Job( + podDef := tempest.Pod( instance, serviceLabels, serviceAnnotations, @@ -372,15 +370,8 @@ func (r *TempestReconciler) Reconcile(ctx context.Context, req ctrl.Request) (re mountSSHKey, containerImage, ) - tempestJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = tempestJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { // Creation of the tempest job was not successfull. // Release the lock and allow other controllers to spawn @@ -429,7 +420,7 @@ func (r *TempestReconciler) reconcileDelete( func (r *TempestReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.Tempest{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) @@ -508,7 +499,7 @@ func (r *TempestReconciler) setTempestConfigVars(envVars map[string]string, envVars["TEMPEST_EXTERNAL_PLUGIN_REFSPEC"] += externalPluginDictionary.ChangeRefspec + "," } - envVars["TEMPEST_WORKFLOW_STEP_DIR_NAME"] = r.GetJobName(instance, workflowStepNum) + envVars["TEMPEST_WORKFLOW_STEP_DIR_NAME"] = r.GetPodName(instance, workflowStepNum) extraImages := mergeWithWorkflow(tRun.ExtraImages, wtRun.ExtraImages) for _, extraImageDict := range extraImages { diff --git a/controllers/tobiko_controller.go b/controllers/tobiko_controller.go index 7d1e9e46..63d2bc9e 100644 --- a/controllers/tobiko_controller.go +++ b/controllers/tobiko_controller.go @@ -30,13 +30,11 @@ import ( "github.com/openstack-k8s-operators/lib-common/modules/common/configmap" "github.com/openstack-k8s-operators/lib-common/modules/common/env" "github.com/openstack-k8s-operators/lib-common/modules/common/helper" - "github.com/openstack-k8s-operators/lib-common/modules/common/job" nad "github.com/openstack-k8s-operators/lib-common/modules/common/networkattachment" common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac" "github.com/openstack-k8s-operators/lib-common/modules/common/util" testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" "github.com/openstack-k8s-operators/test-operator/pkg/tobiko" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" k8s_errors "k8s.io/apimachinery/pkg/api/errors" ctrl "sigs.k8s.io/controller-runtime" @@ -165,7 +163,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res Log.Info(InfoTestingCompleted) return ctrl.Result{}, nil - case CreateFirstJob: + case CreateFirstPod: lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel) if !lockAcquired { Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName)) @@ -174,7 +172,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep)) - case CreateNextJob: + case CreateNextPod: // Confirm that we still hold the lock. This needs to be checked in order // to prevent situation when somebody / something deleted the lock and it // got claimedy by another instance. @@ -261,7 +259,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res } // NetworkAttachments - if r.JobExists(ctx, instance, nextWorkflowStep) { + if r.PodExists(ctx, instance, nextWorkflowStep) { networkReady, networkAttachmentStatus, err := nad.VerifyNetworkStatusFromAnnotation( ctx, helper, @@ -310,7 +308,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res // Prepare Tobiko env vars envVars := r.PrepareTobikoEnvVars(ctx, serviceLabels, instance, helper, nextWorkflowStep) - jobName := r.GetJobName(instance, nextWorkflowStep) + jobName := r.GetPodName(instance, nextWorkflowStep) logsPVCName := r.GetPVCLogsName(instance, workflowStepNum) containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance) privileged := r.OverwriteValueWithWorkflow(instance.Spec, "Privileged", "pbool", nextWorkflowStep).(bool) @@ -328,7 +326,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res } // Service account, role, binding - end - jobDef := tobiko.Job( + podDef := tobiko.Job( instance, serviceLabels, serviceAnnotations, @@ -341,15 +339,8 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res containerImage, privileged, ) - tobikoJob := job.NewJob( - jobDef, - testv1beta1.ConfigHash, - true, - time.Duration(5)*time.Second, - "", - ) - ctrlResult, err = tobikoJob.DoJob(ctx, helper) + ctrlResult, err = r.CreatePod(ctx, *helper, podDef) if err != nil { instance.Status.Conditions.Set(condition.FalseCondition( condition.DeploymentReadyCondition, @@ -375,7 +366,7 @@ func (r *TobikoReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res func (r *TobikoReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&testv1beta1.Tobiko{}). - Owns(&batchv1.Job{}). + Owns(&corev1.Pod{}). Owns(&corev1.Secret{}). Owns(&corev1.ConfigMap{}). Complete(r) @@ -414,7 +405,7 @@ func (r *TobikoReconciler) PrepareTobikoEnvVars( // Prepare env vars envVars := make(map[string]env.Setter) envVars["USE_EXTERNAL_FILES"] = env.SetValue("True") - envVars["TOBIKO_LOGS_DIR_NAME"] = env.SetValue(r.GetJobName(instance, step)) + envVars["TOBIKO_LOGS_DIR_NAME"] = env.SetValue(r.GetPodName(instance, step)) testenv := r.OverwriteValueWithWorkflow(instance.Spec, "Testenv", "string", step).(string) envVars["TOBIKO_TESTENV"] = env.SetValue(testenv) diff --git a/pkg/ansibletest/job.go b/pkg/ansibletest/job.go index 0a500341..e1789466 100644 --- a/pkg/ansibletest/job.go +++ b/pkg/ansibletest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run AnsibleTests tests -func Job( +// Pod - prepare job to run AnsibleTests tests +func Pod( instance *testv1beta1.AnsibleTest, labels map[string]string, jobName string, @@ -22,69 +21,56 @@ func Job( externalWorkflowCounter int, containerImage string, privileged bool, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(227) runAsGroup := int64(227) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, Namespace: instance.Namespace, Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - }, - Spec: corev1.PodSpec{ - AutomountServiceAccountToken: &instance.Spec.Privileged, - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, instance, externalWorkflowCounter), - SecurityContext: &securityContext, - Resources: instance.Spec.Resources, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - workflowOverrideParams, - externalWorkflowCounter, - ), + Spec: corev1.PodSpec{ + AutomountServiceAccountToken: &instance.Spec.Privileged, + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, instance, externalWorkflowCounter), + SecurityContext: &securityContext, + Resources: instance.Spec.Resources, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + workflowOverrideParams, + externalWorkflowCounter, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/horizontest/job.go b/pkg/horizontest/job.go index 749e7c7d..176a1a43 100644 --- a/pkg/horizontest/job.go +++ b/pkg/horizontest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run Horizon tests -func Job( +// Pod - prepare job to run Horizon tests +func Pod( instance *testv1beta1.HorizonTest, labels map[string]string, jobName string, @@ -21,68 +20,55 @@ func Job( mountKubeconfig bool, envVars map[string]env.Setter, containerImage string, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(42455) runAsGroup := int64(42455) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, instance.Spec.Privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ Name: jobName, Namespace: instance.Namespace, Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: labels, - }, - Spec: corev1.PodSpec{ - AutomountServiceAccountToken: &instance.Spec.Privileged, - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), - SecurityContext: &securityContext, - Resources: instance.Spec.Resources, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - mountKubeconfig, - ), + Spec: corev1.PodSpec{ + AutomountServiceAccountToken: &instance.Spec.Privileged, + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), + SecurityContext: &securityContext, + Resources: instance.Spec.Resources, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + mountKubeconfig, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/tempest/job.go b/pkg/tempest/job.go index b3c91f32..cada8f62 100644 --- a/pkg/tempest/job.go +++ b/pkg/tempest/job.go @@ -5,13 +5,12 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) -// Job - prepare job to run Tempest tests -func Job( +// Pod - prepare pod to run Tempest tests +func Pod( instance *testv1beta1.Tempest, labels map[string]string, annotations map[string]string, @@ -22,81 +21,72 @@ func Job( mountCerts bool, mountSSHKey bool, containerImage string, -) *batchv1.Job { +) *corev1.Pod { envVars := map[string]env.Setter{} runAsUser := int64(42480) runAsGroup := int64(42480) securityContext := util.GetSecurityContext(runAsUser, []corev1.Capability{}, instance.Spec.Privileged) - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: jobName, - Namespace: instance.Namespace, - Labels: labels, + Annotations: annotations, + Name: jobName, + Namespace: instance.Namespace, + Labels: labels, }, - Spec: batchv1.JobSpec{ - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: annotations, - Labels: labels, - }, - Spec: corev1.PodSpec{ - AutomountServiceAccountToken: &instance.Spec.Privileged, - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ + Spec: corev1.PodSpec{ + AutomountServiceAccountToken: &instance.Spec.Privileged, + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name + "-tests-runner", + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountSSHKey, instance), + SecurityContext: &securityContext, + Resources: instance.Spec.Resources, + EnvFrom: []corev1.EnvFromSource{ { - Name: instance.Name + "-tests-runner", - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountSSHKey, instance), - SecurityContext: &securityContext, - Resources: instance.Spec.Resources, - EnvFrom: []corev1.EnvFromSource{ - { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: customDataConfigMapName, - }, - }, + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: customDataConfigMapName, }, - { - ConfigMapRef: &corev1.ConfigMapEnvSource{ - LocalObjectReference: corev1.LocalObjectReference{ - Name: envVarsConfigMapName, - }, - }, + }, + }, + { + ConfigMapRef: &corev1.ConfigMapEnvSource{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: envVarsConfigMapName, }, }, }, }, - Volumes: GetVolumes( - instance, - customDataConfigMapName, - logsPVCName, - mountCerts, - mountSSHKey, - ), }, }, + Volumes: GetVolumes( + instance, + customDataConfigMapName, + logsPVCName, + mountCerts, + mountSSHKey, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod } diff --git a/pkg/tobiko/job.go b/pkg/tobiko/job.go index 2ba14e68..b7404762 100644 --- a/pkg/tobiko/job.go +++ b/pkg/tobiko/job.go @@ -5,7 +5,6 @@ import ( testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1" util "github.com/openstack-k8s-operators/test-operator/pkg/util" - batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -23,72 +22,57 @@ func Job( envVars map[string]env.Setter, containerImage string, privileged bool, -) *batchv1.Job { +) *corev1.Pod { runAsUser := int64(42495) runAsGroup := int64(42495) - parallelism := int32(1) - completions := int32(1) capabilities := []corev1.Capability{"NET_ADMIN", "NET_RAW"} securityContext := util.GetSecurityContext(runAsUser, capabilities, privileged) - // Note(lpiwowar): Once the webhook is implemented move all the logic of merging - // the workflows there. - job := &batchv1.Job{ + pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ - Name: jobName, - Namespace: instance.Namespace, - Labels: labels, + Annotations: annotations, + Name: jobName, + Namespace: instance.Namespace, + Labels: labels, }, - Spec: batchv1.JobSpec{ - Parallelism: ¶llelism, - Completions: &completions, - BackoffLimit: instance.Spec.BackoffLimit, - Template: corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Annotations: annotations, - Labels: labels, - }, - Spec: corev1.PodSpec{ - AutomountServiceAccountToken: &instance.Spec.Privileged, - RestartPolicy: corev1.RestartPolicyNever, - ServiceAccountName: instance.RbacResourceName(), - SecurityContext: &corev1.PodSecurityContext{ - RunAsUser: &runAsUser, - RunAsGroup: &runAsGroup, - FSGroup: &runAsGroup, - }, - Tolerations: instance.Spec.Tolerations, - NodeSelector: instance.Spec.NodeSelector, - Containers: []corev1.Container{ - { - Name: instance.Name, - Image: containerImage, - Args: []string{}, - Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), - VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), - SecurityContext: &securityContext, - Resources: instance.Spec.Resources, - }, - }, - Volumes: GetVolumes( - instance, - logsPVCName, - mountCerts, - mountKeys, - mountKubeconfig, - ), + Spec: corev1.PodSpec{ + AutomountServiceAccountToken: &instance.Spec.Privileged, + RestartPolicy: corev1.RestartPolicyNever, + Tolerations: instance.Spec.Tolerations, + NodeSelector: instance.Spec.NodeSelector, + SecurityContext: &corev1.PodSecurityContext{ + RunAsUser: &runAsUser, + RunAsGroup: &runAsGroup, + FSGroup: &runAsGroup, + }, + Containers: []corev1.Container{ + { + Name: instance.Name, + Image: containerImage, + Args: []string{}, + Env: env.MergeEnvs([]corev1.EnvVar{}, envVars), + VolumeMounts: GetVolumeMounts(mountCerts, mountKeys, mountKubeconfig, instance), + SecurityContext: &securityContext, + Resources: instance.Spec.Resources, }, }, + Volumes: GetVolumes( + instance, + logsPVCName, + mountCerts, + mountKeys, + mountKubeconfig, + ), }, } if len(instance.Spec.SELinuxLevel) > 0 { - job.Spec.Template.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ + pod.Spec.SecurityContext.SELinuxOptions = &corev1.SELinuxOptions{ Level: instance.Spec.SELinuxLevel, } } - return job + return pod }