Skip to content

Commit dcebe79

Browse files
committed
Move away from Jobs to Pods
The test-operator is using Jobs to spawn test pods even though it does not use any features of this k8s object. Plus usage of the Jobs requires creation of ServiceAccount in the target namespaces. In order to be able to create a new, SA the test-oprator has to have a rights to create new roles and rolebindings which in our case makes the attack surface larger. This patch drops the usage of Jobs and moves to Pods. Depends-On: openstack-k8s-operators/ci-framework#2604
1 parent be35b07 commit dcebe79

File tree

9 files changed

+251
-304
lines changed

9 files changed

+251
-304
lines changed

controllers/ansibletest_controller.go

Lines changed: 7 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"errors"
2222
"fmt"
2323
"strconv"
24-
"time"
2524

2625
"reflect"
2726

@@ -30,12 +29,10 @@ import (
3029
"github.com/openstack-k8s-operators/lib-common/modules/common/condition"
3130
"github.com/openstack-k8s-operators/lib-common/modules/common/env"
3231
"github.com/openstack-k8s-operators/lib-common/modules/common/helper"
33-
"github.com/openstack-k8s-operators/lib-common/modules/common/job"
3432
common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac"
35-
"github.com/openstack-k8s-operators/test-operator/api/v1beta1"
3633
testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1"
34+
v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1"
3735
"github.com/openstack-k8s-operators/test-operator/pkg/ansibletest"
38-
batchv1 "k8s.io/api/batch/v1"
3936
corev1 "k8s.io/api/core/v1"
4037
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
4138
ctrl "sigs.k8s.io/controller-runtime"
@@ -161,7 +158,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
161158
Log.Info(InfoTestingCompleted)
162159
return ctrl.Result{}, nil
163160

164-
case CreateFirstJob:
161+
case CreateFirstPod:
165162
lockAcquired, err := r.AcquireLock(ctx, instance, helper, false)
166163
if !lockAcquired {
167164
Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName))
@@ -170,7 +167,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
170167

171168
Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep))
172169

173-
case CreateNextJob:
170+
case CreateNextPod:
174171
// Confirm that we still hold the lock. This is useful to check if for
175172
// example somebody / something deleted the lock and it got claimed by
176173
// another instance. This is considered to be an error state.
@@ -213,7 +210,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
213210

214211
// Create a new job
215212
mountCerts := r.CheckSecretExists(ctx, instance, "combined-ca-bundle")
216-
jobName := r.GetJobName(instance, nextWorkflowStep)
213+
jobName := r.GetPodName(instance, nextWorkflowStep)
217214
envVars, workflowOverrideParams := r.PrepareAnsibleEnv(instance, nextWorkflowStep)
218215
logsPVCName := r.GetPVCLogsName(instance, 0)
219216
containerImage, err := r.GetContainerImage(ctx, workflowOverrideParams["ContainerImage"], instance)
@@ -249,8 +246,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
249246
return rbacResult, nil
250247
}
251248
// Service account, role, binding - end
252-
253-
jobDef := ansibletest.Job(
249+
podDef := ansibletest.Pod(
254250
instance,
255251
serviceLabels,
256252
jobName,
@@ -262,15 +258,8 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
262258
containerImage,
263259
privileged,
264260
)
265-
ansibleTestsJob := job.NewJob(
266-
jobDef,
267-
testv1beta1.ConfigHash,
268-
true,
269-
time.Duration(5)*time.Second,
270-
"",
271-
)
272261

273-
ctrlResult, err = ansibleTestsJob.DoJob(ctx, helper)
262+
ctrlResult, err = r.CreatePod(ctx, *helper, podDef)
274263
if err != nil {
275264
// Creation of the ansibleTests job was not successfull.
276265
// Release the lock and allow other controllers to spawn
@@ -303,7 +292,7 @@ func (r *AnsibleTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
303292
func (r *AnsibleTestReconciler) SetupWithManager(mgr ctrl.Manager) error {
304293
return ctrl.NewControllerManagedBy(mgr).
305294
For(&testv1beta1.AnsibleTest{}).
306-
Owns(&batchv1.Job{}).
295+
Owns(&corev1.Pod{}).
307296
Owns(&corev1.Secret{}).
308297
Owns(&corev1.ConfigMap{}).
309298
Complete(r)

controllers/common.go

Lines changed: 79 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ import (
1717
"github.com/openstack-k8s-operators/lib-common/modules/common/util"
1818
v1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1"
1919
"gopkg.in/yaml.v3"
20-
batchv1 "k8s.io/api/batch/v1"
2120
corev1 "k8s.io/api/core/v1"
2221
rbacv1 "k8s.io/api/rbac/v1"
2322
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
@@ -27,6 +26,7 @@ import (
2726
"k8s.io/client-go/kubernetes"
2827
ctrl "sigs.k8s.io/controller-runtime"
2928
"sigs.k8s.io/controller-runtime/pkg/client"
29+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3030
)
3131

3232
const (
@@ -80,13 +80,13 @@ const (
8080
// to change
8181
Wait = iota
8282

83-
// CreateFirstJob indicates that the Reconcile loop should create the first job
83+
// CreateFirstPod indicates that the Reconcile loop should create the first job
8484
// either specified in the .Spec section or in the .Spec.Workflow section.
85-
CreateFirstJob
85+
CreateFirstPod
8686

87-
// CreateNextJob indicates that the Reconcile loop should create a next job
87+
// CreateNextPod indicates that the Reconcile loop should create a next job
8888
// specified in the .Spec.Workflow section (if .Spec.Workflow is defined)
89-
CreateNextJob
89+
CreateNextPod
9090

9191
// EndTesting indicates that all jobs have already finished. The Reconcile
9292
// loop should end the testing and release resources that are required to
@@ -97,59 +97,100 @@ const (
9797
Failure
9898
)
9999

100+
// GetPod returns pod that has a specific name (podName) in a given namespace
101+
// (podNamespace).
102+
func (r *Reconciler) GetPod(
103+
ctx context.Context,
104+
podName string,
105+
podNamespace string,
106+
) (*corev1.Pod, error) {
107+
pod := &corev1.Pod{}
108+
objectKey := client.ObjectKey{Namespace: podNamespace, Name: podName}
109+
if err := r.Client.Get(ctx, objectKey, pod); err != nil {
110+
return pod, err
111+
}
112+
113+
return pod, nil
114+
}
115+
116+
// CreatePod creates a pod based on a spec provided via PodSpec.
117+
func (r *Reconciler) CreatePod(
118+
ctx context.Context,
119+
h helper.Helper,
120+
podSpec *corev1.Pod,
121+
) (ctrl.Result, error) {
122+
_, err := r.GetPod(ctx, podSpec.Name, podSpec.Namespace)
123+
if err == nil {
124+
return ctrl.Result{}, nil
125+
} else if !k8s_errors.IsNotFound(err) {
126+
return ctrl.Result{}, err
127+
}
128+
129+
err = controllerutil.SetControllerReference(h.GetBeforeObject(), podSpec, r.GetScheme())
130+
if err != nil {
131+
return ctrl.Result{}, err
132+
}
133+
134+
if err := r.Client.Create(ctx, podSpec); err != nil {
135+
return ctrl.Result{}, err
136+
}
137+
138+
return ctrl.Result{}, nil
139+
}
140+
100141
// NextAction indicates what action needs to be performed by the Reconcile loop
101142
// based on the current state of the OpenShift cluster.
102143
func (r *Reconciler) NextAction(
103144
ctx context.Context,
104145
instance client.Object,
105146
workflowLength int,
106147
) (NextAction, int, error) {
107-
// Get the latest job. The latest job is job with the highest value stored
148+
// Get the latest pod. The latest pod is pod with the highest value stored
108149
// in workflowStep label
109150
workflowStepIdx := 0
110-
lastJob, err := r.GetLastJob(ctx, instance)
151+
lastPod, err := r.GetLastPod(ctx, instance)
111152
if err != nil {
112153
return Failure, workflowStepIdx, err
113154
}
114155

115-
// If there is a job associated with the current instance.
116-
if lastJob != nil {
117-
workflowStepIdx, err := strconv.Atoi(lastJob.Labels[workflowStepLabel])
156+
// If there is a pod associated with the current instance.
157+
if lastPod != nil {
158+
workflowStepIdx, err := strconv.Atoi(lastPod.Labels[workflowStepLabel])
118159
if err != nil {
119160
return Failure, workflowStepIdx, err
120161
}
121162

122-
// If the last job is not in Failed or Succeded state -> Wait
123-
lastJobFinished := (lastJob.Status.Failed + lastJob.Status.Succeeded) > 0
124-
if !lastJobFinished {
163+
// If the last pod is not in Failed or Succeded state -> Wait
164+
lastPodFinished := lastPod.Status.Phase == corev1.PodFailed || lastPod.Status.Phase == corev1.PodSucceeded
165+
if !lastPodFinished {
125166
return Wait, workflowStepIdx, nil
126167
}
127168

128-
// If the last job is in Failed or Succeeded state and it is NOT the last
129-
// job which was supposed to be created -> CreateNextJob
130-
if lastJobFinished && !isLastJobIndex(workflowStepIdx, workflowLength) {
169+
// If the last pod is in Failed or Succeeded state and it is NOT the last
170+
// pod which was supposed to be created -> CreateNextPod
171+
if lastPodFinished && !isLastPodIndex(workflowStepIdx, workflowLength) {
131172
workflowStepIdx++
132-
return CreateNextJob, workflowStepIdx, nil
173+
return CreateNextPod, workflowStepIdx, nil
133174
}
134175

135-
// Otherwise if the job is in Failed or Succeded stated and it IS the
136-
// last job -> EndTesting
137-
if lastJobFinished && isLastJobIndex(workflowStepIdx, workflowLength) {
176+
// Otherwise if the pod is in Failed or Succeded stated and it IS the
177+
// last pod -> EndTesting
178+
if lastPodFinished && isLastPodIndex(workflowStepIdx, workflowLength) {
138179
return EndTesting, workflowStepIdx, nil
139180
}
140181
}
141182

142-
// If there is not any job associated with the instance -> createFirstJob
143-
if lastJob == nil {
144-
return CreateFirstJob, workflowStepIdx, nil
183+
// If there is not any pod associated with the instance -> createFirstPod
184+
if lastPod == nil {
185+
return CreateFirstPod, workflowStepIdx, nil
145186
}
146187

147188
return Failure, workflowStepIdx, nil
148189
}
149190

150-
// isLastJobIndex returns true when jobIndex is the index of the last job that
191+
// isLastPodIndex returns true when jobIndex is the index of the last job that
151192
// should be executed. Otherwise the return value is false.
152-
func isLastJobIndex(jobIndex int, workflowLength int) bool {
193+
func isLastPodIndex(jobIndex int, workflowLength int) bool {
153194
switch workflowLength {
154195
case 0:
155196
return jobIndex == workflowLength
@@ -160,26 +201,26 @@ func isLastJobIndex(jobIndex int, workflowLength int) bool {
160201

161202
// GetLastJob returns job associated with an instance which has the highest value
162203
// stored in the workflowStep label
163-
func (r *Reconciler) GetLastJob(
204+
func (r *Reconciler) GetLastPod(
164205
ctx context.Context,
165206
instance client.Object,
166-
) (*batchv1.Job, error) {
207+
) (*corev1.Pod, error) {
167208
labels := map[string]string{instanceNameLabel: instance.GetName()}
168209
namespaceListOpt := client.InNamespace(instance.GetNamespace())
169210
labelsListOpt := client.MatchingLabels(labels)
170-
jobList := &batchv1.JobList{}
171-
err := r.Client.List(ctx, jobList, namespaceListOpt, labelsListOpt)
211+
podList := &corev1.PodList{}
212+
err := r.Client.List(ctx, podList, namespaceListOpt, labelsListOpt)
172213
if err != nil {
173214
return nil, err
174215
}
175216

176-
var maxJob *batchv1.Job
217+
var maxJob *corev1.Pod
177218
maxJobWorkflowStep := 0
178219

179-
for _, job := range jobList.Items {
220+
for _, job := range podList.Items {
180221
workflowStep, err := strconv.Atoi(job.Labels[workflowStepLabel])
181222
if err != nil {
182-
return &batchv1.Job{}, err
223+
return &corev1.Pod{}, err
183224
}
184225

185226
if workflowStep >= maxJobWorkflowStep {
@@ -307,7 +348,7 @@ func (r *Reconciler) GetContainerImage(
307348
return "", nil
308349
}
309350

310-
func (r *Reconciler) GetJobName(instance interface{}, workflowStepNum int) string {
351+
func (r *Reconciler) GetPodName(instance interface{}, workflowStepNum int) string {
311352
if typedInstance, ok := instance.(*v1beta1.Tobiko); ok {
312353
if len(typedInstance.Spec.Workflow) == 0 || workflowStepNum == workflowStepNumInvalid {
313354
return typedInstance.Name
@@ -552,11 +593,11 @@ func (r *Reconciler) ReleaseLock(ctx context.Context, instance client.Object) (b
552593
return false, errors.New("failed to delete test-operator-lock")
553594
}
554595

555-
func (r *Reconciler) JobExists(ctx context.Context, instance client.Object, workflowStepNum int) bool {
556-
job := &batchv1.Job{}
557-
jobName := r.GetJobName(instance, workflowStepNum)
558-
objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: jobName}
559-
err := r.Client.Get(ctx, objectKey, job)
596+
func (r *Reconciler) PodExists(ctx context.Context, instance client.Object, workflowStepNum int) bool {
597+
pod := &corev1.Pod{}
598+
podName := r.GetPodName(instance, workflowStepNum)
599+
objectKey := client.ObjectKey{Namespace: instance.GetNamespace(), Name: podName}
600+
err := r.Client.Get(ctx, objectKey, pod)
560601
if err != nil && k8s_errors.IsNotFound(err) {
561602
return false
562603
}

controllers/horizontest_controller.go

Lines changed: 6 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,15 @@ import (
2020
"context"
2121
"errors"
2222
"fmt"
23-
"time"
2423

2524
"github.com/go-logr/logr"
2625
"github.com/openstack-k8s-operators/lib-common/modules/common"
2726
"github.com/openstack-k8s-operators/lib-common/modules/common/condition"
2827
"github.com/openstack-k8s-operators/lib-common/modules/common/env"
2928
"github.com/openstack-k8s-operators/lib-common/modules/common/helper"
30-
"github.com/openstack-k8s-operators/lib-common/modules/common/job"
3129
common_rbac "github.com/openstack-k8s-operators/lib-common/modules/common/rbac"
3230
testv1beta1 "github.com/openstack-k8s-operators/test-operator/api/v1beta1"
3331
"github.com/openstack-k8s-operators/test-operator/pkg/horizontest"
34-
batchv1 "k8s.io/api/batch/v1"
3532
corev1 "k8s.io/api/core/v1"
3633
k8s_errors "k8s.io/apimachinery/pkg/api/errors"
3734
ctrl "sigs.k8s.io/controller-runtime"
@@ -154,7 +151,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
154151
Log.Info(InfoTestingCompleted)
155152
return ctrl.Result{}, nil
156153

157-
case CreateFirstJob:
154+
case CreateFirstPod:
158155
lockAcquired, err := r.AcquireLock(ctx, instance, helper, instance.Spec.Parallel)
159156
if !lockAcquired {
160157
Log.Info(fmt.Sprintf(InfoCanNotAcquireLock, testOperatorLockName))
@@ -163,7 +160,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
163160

164161
Log.Info(fmt.Sprintf(InfoCreatingFirstPod, nextWorkflowStep))
165162

166-
case CreateNextJob:
163+
case CreateNextPod:
167164
// Confirm that we still hold the lock. This is useful to check if for
168165
// example somebody / something deleted the lock and it got claimed by
169166
// another instance. This is considered to be an error state.
@@ -224,7 +221,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
224221

225222
// Prepare HorizonTest env vars
226223
envVars := r.PrepareHorizonTestEnvVars(instance)
227-
jobName := r.GetJobName(instance, 0)
224+
jobName := r.GetPodName(instance, 0)
228225
logsPVCName := r.GetPVCLogsName(instance, 0)
229226
containerImage, err := r.GetContainerImage(ctx, instance.Spec.ContainerImage, instance)
230227
if err != nil {
@@ -240,8 +237,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
240237
return rbacResult, nil
241238
}
242239
// Service account, role, binding - end
243-
244-
jobDef := horizontest.Job(
240+
podDef := horizontest.Pod(
245241
instance,
246242
serviceLabels,
247243
jobName,
@@ -252,15 +248,8 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
252248
envVars,
253249
containerImage,
254250
)
255-
horizontestJob := job.NewJob(
256-
jobDef,
257-
testv1beta1.ConfigHash,
258-
true,
259-
time.Duration(5)*time.Second,
260-
"",
261-
)
262251

263-
ctrlResult, err = horizontestJob.DoJob(ctx, helper)
252+
ctrlResult, err = r.CreatePod(ctx, *helper, podDef)
264253
if err != nil {
265254
instance.Status.Conditions.Set(condition.FalseCondition(
266255
condition.DeploymentReadyCondition,
@@ -286,7 +275,7 @@ func (r *HorizonTestReconciler) Reconcile(ctx context.Context, req ctrl.Request)
286275
func (r *HorizonTestReconciler) SetupWithManager(mgr ctrl.Manager) error {
287276
return ctrl.NewControllerManagedBy(mgr).
288277
For(&testv1beta1.HorizonTest{}).
289-
Owns(&batchv1.Job{}).
278+
Owns(&corev1.Pod{}).
290279
Owns(&corev1.Secret{}).
291280
Owns(&corev1.ConfigMap{}).
292281
Complete(r)

0 commit comments

Comments
 (0)