Skip to content

Commit 9d1c69b

Browse files
Fixed race condition in additional volumes reconcile
1 parent cbdb255 commit 9d1c69b

File tree

11 files changed

+248
-44
lines changed

11 files changed

+248
-44
lines changed

api/v1beta1/zz_generated.conversion.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta2/ibmvpcmachine_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ type IBMVPCMachineStatus struct {
184184
// LoadBalancerPoolMembers is the status of IBM Cloud VPC Load Balancer Backend Pools the machine is a member.
185185
// +optional
186186
LoadBalancerPoolMembers []VPCLoadBalancerBackendPoolMember `json:"loadBalancerPoolMembers,omitempty"`
187+
188+
// AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
189+
// +optional
190+
AdditionalVolumeIDs []string `json:"additionalVolumeIDs,omitempty"`
187191
}
188192

189193
// +kubebuilder:object:root=true

api/v1beta2/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloud/scope/machine.go

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,12 +1168,27 @@ func (m *MachineScope) GetVolumeAttachments() ([]vpcv1.VolumeAttachment, error)
11681168
return result.VolumeAttachments, nil
11691169
}
11701170

1171-
// CreateAndAttachVolume creates a new Volume and attaches it to the instance.
1172-
func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume) error {
1171+
// GetVolumeState returns the volume's state.
1172+
func (m *MachineScope) GetVolumeState(volumeID string) (string, error) {
1173+
options := vpcv1.GetVolumeOptions{
1174+
ID: &volumeID,
1175+
}
1176+
result, _, err := m.IBMVPCClient.GetVolume(&options)
1177+
if err != nil {
1178+
return "", fmt.Errorf("could not fetch volume status: %w", err)
1179+
}
1180+
return *result.Status, err
1181+
}
1182+
1183+
// CreateVolume creates a new Volume and attaches it to the instance.
1184+
func (m *MachineScope) CreateVolume(vpcVolume *infrav1beta2.VPCVolume) (string, error) {
11731185
volumeOptions := vpcv1.CreateVolumeOptions{}
11741186
// TODO: EncryptionKeyCRN is not supported for now, the field is omitted from the manifest
11751187
if vpcVolume.Profile != "custom" {
11761188
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
1189+
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
1190+
ID: &m.IBMVPCCluster.Spec.ResourceGroup,
1191+
},
11771192
Profile: &vpcv1.VolumeProfileIdentityByName{
11781193
Name: &vpcVolume.Profile,
11791194
},
@@ -1184,6 +1199,9 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)
11841199
}
11851200
} else {
11861201
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
1202+
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
1203+
ID: &m.IBMVPCCluster.Spec.ResourceGroup,
1204+
},
11871205
Iops: &vpcVolume.Iops,
11881206
Profile: &vpcv1.VolumeProfileIdentityByName{
11891207
Name: &vpcVolume.Profile,
@@ -1197,19 +1215,28 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)
11971215

11981216
volumeResult, _, err := m.IBMVPCClient.CreateVolume(&volumeOptions)
11991217
if err != nil {
1200-
return fmt.Errorf("error while creating volume: %w", err)
1218+
return "", fmt.Errorf("error while creating volume: %w", err)
1219+
}
1220+
1221+
if *volumeResult.Status != vpcv1.VolumeStatusAvailableConst {
1222+
return *volumeResult.ID, fmt.Errorf("volume is not in available state")
12011223
}
12021224

1225+
return *volumeResult.ID, m.AttachVolume(vpcVolume.DeleteVolumeOnInstanceDelete, *volumeResult.ID, vpcVolume.Name)
1226+
}
1227+
1228+
// AttachVolume attaches the given volume to the instance.
1229+
func (m *MachineScope) AttachVolume(deleteOnInstanceDelete bool, volumeID, volumeName string) error {
12031230
attachmentOptions := vpcv1.CreateInstanceVolumeAttachmentOptions{
12041231
InstanceID: &m.IBMVPCMachine.Status.InstanceID,
12051232
Volume: &vpcv1.VolumeAttachmentPrototypeVolume{
1206-
ID: volumeResult.ID,
1233+
ID: &volumeID,
12071234
},
1208-
DeleteVolumeOnInstanceDelete: &vpcVolume.DeleteVolumeOnInstanceDelete,
1209-
Name: &vpcVolume.Name,
1235+
DeleteVolumeOnInstanceDelete: &deleteOnInstanceDelete,
1236+
Name: &volumeName,
12101237
}
12111238

1212-
_, _, err = m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
1239+
_, _, err := m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
12131240
if err != nil {
12141241
err = fmt.Errorf("error while attaching volume to instance: %w", err)
12151242
}

cloud/scope/machine_test.go

Lines changed: 94 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ import (
4343
. "github.com/onsi/gomega"
4444
)
4545

46+
var (
47+
volumeName = "foo-volume"
48+
volumeID = "foo-volume-id"
49+
)
50+
4651
func newVPCMachine(clusterName, machineName string) *infrav1beta2.IBMVPCMachine {
4752
return &infrav1beta2.IBMVPCMachine{
4853
ObjectMeta: metav1.ObjectMeta{
@@ -1109,7 +1114,6 @@ func TestGetVolumeAttachments(t *testing.T) {
11091114
},
11101115
}
11111116
volumeAttachmentName := "foo-volume-attachment"
1112-
volumeName := "foo-volume"
11131117

11141118
testVolumeAttachments := vpcv1.VolumeAttachmentCollection{
11151119
VolumeAttachments: []vpcv1.VolumeAttachment{{
@@ -1145,14 +1149,57 @@ func TestGetVolumeAttachments(t *testing.T) {
11451149
})
11461150
}
11471151

1148-
func TestCreateAndAttachVolume(t *testing.T) {
1152+
func TestGetVolumeState(t *testing.T) {
11491153
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
11501154
t.Helper()
11511155
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
11521156
}
11531157

1154-
volumeName := "foo-volume"
1155-
volumeID := "foo-volume-id"
1158+
volumeStatus := vpcv1.VolumeStatusPendingConst
1159+
1160+
vpcMachine := infrav1beta2.IBMVPCMachine{
1161+
Status: infrav1beta2.IBMVPCMachineStatus{
1162+
InstanceID: "foo-instance-id",
1163+
},
1164+
}
1165+
1166+
vpcVolume := vpcv1.Volume{
1167+
Name: &volumeName,
1168+
ID: &volumeID,
1169+
Status: &volumeStatus,
1170+
}
1171+
volumeFetchError := errors.New("error while fetching volume")
1172+
1173+
t.Run("Return correct volume state", func(t *testing.T) {
1174+
g := NewWithT(t)
1175+
mockController, mockVPC := setup(t)
1176+
t.Cleanup(mockController.Finish)
1177+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1178+
scope.IBMVPCMachine.Status = vpcMachine.Status
1179+
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(&vpcVolume, nil, nil)
1180+
state, err := scope.GetVolumeState(volumeID)
1181+
g.Expect(err).To(BeNil())
1182+
g.Expect(state).To(Equal(volumeStatus))
1183+
})
1184+
1185+
t.Run("Return error when GetVolumeState returns error", func(t *testing.T) {
1186+
g := NewWithT(t)
1187+
mockController, mockVPC := setup(t)
1188+
t.Cleanup(mockController.Finish)
1189+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1190+
scope.IBMVPCMachine.Status = vpcMachine.Status
1191+
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(nil, nil, volumeFetchError)
1192+
state, err := scope.GetVolumeState(volumeID)
1193+
g.Expect(state).To(BeZero())
1194+
g.Expect(errors.Is(err, volumeFetchError)).To(BeTrue())
1195+
})
1196+
}
1197+
1198+
func TestCreateVolume(t *testing.T) {
1199+
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
1200+
t.Helper()
1201+
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
1202+
}
11561203

11571204
vpcMachine := infrav1beta2.IBMVPCMachine{
11581205
Status: infrav1beta2.IBMVPCMachineStatus{
@@ -1161,54 +1208,79 @@ func TestCreateAndAttachVolume(t *testing.T) {
11611208
}
11621209

11631210
infraVolume := infrav1beta2.VPCVolume{
1164-
Name: volumeName,
1211+
Name: volumeName,
1212+
Profile: "custom",
1213+
Iops: 100,
1214+
SizeGiB: 50,
11651215
}
1216+
pendingState := vpcv1.VolumeStatusPendingConst
11661217

11671218
vpcVolume := vpcv1.Volume{
1168-
Name: &volumeName,
1169-
ID: &volumeID,
1219+
Name: &volumeName,
1220+
ID: &volumeID,
1221+
Status: &pendingState,
11701222
}
11711223

11721224
volumeCreationError := errors.New("error while creating volume")
1173-
1174-
volumeAttachmentError := errors.New("error while attaching volume")
1175-
1176-
t.Run("Volume creation and attachment is successful", func(t *testing.T) {
1225+
volumeNotAvailableError := errors.New("volume is not in available state")
1226+
t.Run("Volume creation is successful, volume is not yet in available state", func(t *testing.T) {
11771227
g := NewWithT(t)
11781228
mockController, mockVPC := setup(t)
11791229
t.Cleanup(mockController.Finish)
11801230
scope := setupMachineScope(clusterName, machineName, mockVPC)
11811231
scope.IBMVPCMachine.Status = vpcMachine.Status
11821232
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
1183-
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)
1184-
1185-
err := scope.CreateAndAttachVolume(&infraVolume)
1186-
g.Expect(err).Should(Succeed())
1233+
id, err := scope.CreateVolume(&infraVolume)
1234+
g.Expect(err).ShouldNot(Succeed())
1235+
g.Expect(err).To(Equal(volumeNotAvailableError))
1236+
g.Expect(id).Should(Equal(volumeID))
11871237
})
1188-
1189-
t.Run("Volume Creation Fails", func(t *testing.T) {
1238+
t.Run("Volume creation fails", func(t *testing.T) {
11901239
g := NewWithT(t)
11911240
mockController, mockVPC := setup(t)
11921241
t.Cleanup(mockController.Finish)
11931242
scope := setupMachineScope(clusterName, machineName, mockVPC)
11941243
scope.IBMVPCMachine.Status = vpcMachine.Status
11951244
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(nil, nil, volumeCreationError)
11961245

1197-
err := scope.CreateAndAttachVolume(&infraVolume)
1246+
id, err := scope.CreateVolume(&infraVolume)
11981247
g.Expect(err).ShouldNot(Succeed())
11991248
g.Expect(errors.Is(err, volumeCreationError)).To(BeTrue())
1249+
g.Expect(id).To(BeZero())
12001250
})
1251+
}
1252+
1253+
func TestAttachVolume(t *testing.T) {
1254+
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
1255+
t.Helper()
1256+
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
1257+
}
12011258

1202-
t.Run("Volume Attachment Fails", func(t *testing.T) {
1259+
deleteOnInstanceDelete := true
1260+
vpcMachine := infrav1beta2.IBMVPCMachine{
1261+
Status: infrav1beta2.IBMVPCMachineStatus{
1262+
InstanceID: "foo-instance-id",
1263+
},
1264+
}
1265+
volumeAttachmentError := errors.New("error while attaching volume")
1266+
t.Run("Volume attachment is successful", func(t *testing.T) {
1267+
g := NewWithT(t)
1268+
mockController, mockVPC := setup(t)
1269+
t.Cleanup(mockController.Finish)
1270+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1271+
scope.IBMVPCMachine.Status = vpcMachine.Status
1272+
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)
1273+
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
1274+
g.Expect(err).Should(Succeed())
1275+
})
1276+
t.Run("Volume attachment fails", func(t *testing.T) {
12031277
g := NewWithT(t)
12041278
mockController, mockVPC := setup(t)
12051279
t.Cleanup(mockController.Finish)
12061280
scope := setupMachineScope(clusterName, machineName, mockVPC)
12071281
scope.IBMVPCMachine.Status = vpcMachine.Status
1208-
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
12091282
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, volumeAttachmentError)
1210-
1211-
err := scope.CreateAndAttachVolume(&infraVolume)
1283+
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
12121284
g.Expect(err).ShouldNot(Succeed())
12131285
g.Expect(errors.Is(err, volumeAttachmentError)).To(BeTrue())
12141286
})

config/crd/bases/infrastructure.cluster.x-k8s.io_ibmvpcmachines.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,11 @@ spec:
539539
status:
540540
description: IBMVPCMachineStatus defines the observed state of IBMVPCMachine.
541541
properties:
542+
additionalVolumeIDs:
543+
description: AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
544+
items:
545+
type: string
546+
type: array
542547
addresses:
543548
description: Addresses contains the IBM Cloud instance associated
544549
addresses.

controllers/ibmvpcmachine_controller.go

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,15 +258,16 @@ func (r *IBMVPCMachineReconciler) reconcileNormal(machineScope *scope.MachineSco
258258
}
259259

260260
// Handle Additional Volumes
261-
err = r.reconcileAdditionalVolumes(machineScope)
261+
var result ctrl.Result
262+
result, err = r.reconcileAdditionalVolumes(machineScope)
262263
if err != nil {
263264
return ctrl.Result{}, fmt.Errorf("error reconciling additional volumes: %w", err)
264265
}
265266

266267
// With a running machine and all Load Balancer Pool Members reconciled, mark machine as ready.
267268
machineScope.SetReady()
268269
conditions.MarkTrue(machineScope.IBMVPCMachine, infrav1beta2.InstanceReadyCondition)
269-
return ctrl.Result{}, nil
270+
return result, nil
270271
}
271272

272273
func (r *IBMVPCMachineReconciler) getOrCreate(scope *scope.MachineScope) (*vpcv1.Instance, error) {
@@ -298,14 +299,18 @@ func (r *IBMVPCMachineReconciler) reconcileDelete(scope *scope.MachineScope) (_
298299
return ctrl.Result{}, nil
299300
}
300301

301-
func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) error {
302+
func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) (ctrl.Result, error) {
303+
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs == nil {
304+
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs = make([]string, len(machineScope.IBMVPCMachine.Spec.AdditionalVolumes))
305+
}
302306
machineVolumes := machineScope.IBMVPCMachine.Spec.AdditionalVolumes
307+
result := ctrl.Result{}
303308
if len(machineVolumes) == 0 {
304-
return nil
309+
return result, nil
305310
}
306311
volumeAttachmentList, err := machineScope.GetVolumeAttachments()
307312
if err != nil {
308-
return err
313+
return result, err
309314
}
310315
volumeAttachmentNames := sets.New[string]()
311316
for i := range volumeAttachmentList {
@@ -315,15 +320,33 @@ func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope
315320
// Read through the list, checking if volume exists and create volume if it does not
316321
for v := range machineVolumes {
317322
if volumeAttachmentNames.Has(machineVolumes[v].Name) {
318-
// volume attachment has been created so volume will eventually be attached
323+
// volume attachment has been created so volume is already attached
319324
continue
320325
}
321-
322-
err = machineScope.CreateAndAttachVolume(machineVolumes[v])
323-
if err != nil {
324-
errList = append(errList, err)
325-
continue
326+
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] != "" {
327+
// volume was already created, fetch volume status and attach if possible
328+
state, err := machineScope.GetVolumeState(machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v])
329+
if err != nil {
330+
errList = append(errList, err)
331+
}
332+
if state != vpcv1.VolumeStatusAvailableConst {
333+
// volume not ready, requeue
334+
result = ctrl.Result{RequeueAfter: 30 * time.Second}
335+
continue
336+
}
337+
err = machineScope.AttachVolume(machineVolumes[v].DeleteVolumeOnInstanceDelete, machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v], machineVolumes[v].Name)
338+
if err != nil {
339+
errList = append(errList, err)
340+
}
341+
} else {
342+
volumeID, err := machineScope.CreateVolume(machineVolumes[v])
343+
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] = volumeID
344+
if err != nil && err.Error() == "volume is not in available state" {
345+
result = ctrl.Result{RequeueAfter: 30 * time.Second}
346+
} else if err != nil {
347+
errList = append(errList, err)
348+
}
326349
}
327350
}
328-
return errors.Join(errList...)
351+
return result, errors.Join(errList...)
329352
}

0 commit comments

Comments
 (0)