Skip to content

Commit 0b71729

Browse files
Fixed race condition in additional volumes reconcile
1 parent bd1cb3f commit 0b71729

File tree

11 files changed

+297
-48
lines changed

11 files changed

+297
-48
lines changed

api/v1beta1/zz_generated.conversion.go

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1beta2/ibmvpcmachine_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,10 @@ type IBMVPCMachineStatus struct {
184184
// LoadBalancerPoolMembers is the status of IBM Cloud VPC Load Balancer Backend Pools the machine is a member.
185185
// +optional
186186
LoadBalancerPoolMembers []VPCLoadBalancerBackendPoolMember `json:"loadBalancerPoolMembers,omitempty"`
187+
188+
// AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
189+
// +optional
190+
AdditionalVolumeIDs []string `json:"additionalVolumeIDs,omitempty"`
187191
}
188192

189193
// +kubebuilder:object:root=true

api/v1beta2/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cloud/scope/machine.go

Lines changed: 36 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,12 +1168,33 @@ func (m *MachineScope) GetVolumeAttachments() ([]vpcv1.VolumeAttachment, error)
11681168
return result.VolumeAttachments, nil
11691169
}
11701170

1171-
// CreateAndAttachVolume creates a new Volume and attaches it to the instance.
1172-
func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume) error {
1171+
// GetVolumeState returns the volume's state.
1172+
func (m *MachineScope) GetVolumeState(volumeID string) (string, error) {
1173+
options := vpcv1.GetVolumeOptions{
1174+
ID: &volumeID,
1175+
}
1176+
result, _, err := m.IBMVPCClient.GetVolume(&options)
1177+
if err != nil {
1178+
return "", fmt.Errorf("could not fetch volume status: %w", err)
1179+
}
1180+
return *result.Status, err
1181+
}
1182+
1183+
// CreateVolume creates a new Volume and attaches it to the instance.
1184+
func (m *MachineScope) CreateVolume(vpcVolume *infrav1beta2.VPCVolume) (string, error) {
11731185
volumeOptions := vpcv1.CreateVolumeOptions{}
1186+
var resourceGroupID string
1187+
if m.IBMVPCCluster.Status.ResourceGroup != nil {
1188+
resourceGroupID = m.IBMVPCCluster.Status.ResourceGroup.ID
1189+
} else {
1190+
resourceGroupID = m.IBMVPCCluster.Spec.ResourceGroup
1191+
}
11741192
// TODO: EncryptionKeyCRN is not supported for now, the field is omitted from the manifest
11751193
if vpcVolume.Profile != "custom" {
11761194
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
1195+
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
1196+
ID: &resourceGroupID,
1197+
},
11771198
Profile: &vpcv1.VolumeProfileIdentityByName{
11781199
Name: &vpcVolume.Profile,
11791200
},
@@ -1184,6 +1205,9 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)
11841205
}
11851206
} else {
11861207
volumeOptions.VolumePrototype = &vpcv1.VolumePrototype{
1208+
ResourceGroup: &vpcv1.ResourceGroupIdentityByID{
1209+
ID: &resourceGroupID,
1210+
},
11871211
Iops: &vpcVolume.Iops,
11881212
Profile: &vpcv1.VolumeProfileIdentityByName{
11891213
Name: &vpcVolume.Profile,
@@ -1197,19 +1221,24 @@ func (m *MachineScope) CreateAndAttachVolume(vpcVolume *infrav1beta2.VPCVolume)
11971221

11981222
volumeResult, _, err := m.IBMVPCClient.CreateVolume(&volumeOptions)
11991223
if err != nil {
1200-
return fmt.Errorf("error while creating volume: %w", err)
1224+
return "", fmt.Errorf("error while creating volume: %w", err)
12011225
}
12021226

1227+
return *volumeResult.ID, nil
1228+
}
1229+
1230+
// AttachVolume attaches the given volume to the instance.
1231+
func (m *MachineScope) AttachVolume(deleteOnInstanceDelete bool, volumeID, volumeName string) error {
12031232
attachmentOptions := vpcv1.CreateInstanceVolumeAttachmentOptions{
12041233
InstanceID: &m.IBMVPCMachine.Status.InstanceID,
12051234
Volume: &vpcv1.VolumeAttachmentPrototypeVolume{
1206-
ID: volumeResult.ID,
1235+
ID: &volumeID,
12071236
},
1208-
DeleteVolumeOnInstanceDelete: &vpcVolume.DeleteVolumeOnInstanceDelete,
1209-
Name: &vpcVolume.Name,
1237+
DeleteVolumeOnInstanceDelete: &deleteOnInstanceDelete,
1238+
Name: &volumeName,
12101239
}
12111240

1212-
_, _, err = m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
1241+
_, _, err := m.IBMVPCClient.AttachVolumeToInstance(&attachmentOptions)
12131242
if err != nil {
12141243
err = fmt.Errorf("error while attaching volume to instance: %w", err)
12151244
}

cloud/scope/machine_test.go

Lines changed: 91 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,11 @@ import (
4343
. "github.com/onsi/gomega"
4444
)
4545

46+
var (
47+
volumeName = "foo-volume"
48+
volumeID = "foo-volume-id"
49+
)
50+
4651
func newVPCMachine(clusterName, machineName string) *infrav1beta2.IBMVPCMachine {
4752
return &infrav1beta2.IBMVPCMachine{
4853
ObjectMeta: metav1.ObjectMeta{
@@ -1109,7 +1114,6 @@ func TestGetVolumeAttachments(t *testing.T) {
11091114
},
11101115
}
11111116
volumeAttachmentName := "foo-volume-attachment"
1112-
volumeName := "foo-volume"
11131117

11141118
testVolumeAttachments := vpcv1.VolumeAttachmentCollection{
11151119
VolumeAttachments: []vpcv1.VolumeAttachment{{
@@ -1145,14 +1149,57 @@ func TestGetVolumeAttachments(t *testing.T) {
11451149
})
11461150
}
11471151

1148-
func TestCreateAndAttachVolume(t *testing.T) {
1152+
func TestGetVolumeState(t *testing.T) {
11491153
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
11501154
t.Helper()
11511155
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
11521156
}
11531157

1154-
volumeName := "foo-volume"
1155-
volumeID := "foo-volume-id"
1158+
volumeStatus := vpcv1.VolumeStatusPendingConst
1159+
1160+
vpcMachine := infrav1beta2.IBMVPCMachine{
1161+
Status: infrav1beta2.IBMVPCMachineStatus{
1162+
InstanceID: "foo-instance-id",
1163+
},
1164+
}
1165+
1166+
vpcVolume := vpcv1.Volume{
1167+
Name: &volumeName,
1168+
ID: &volumeID,
1169+
Status: &volumeStatus,
1170+
}
1171+
volumeFetchError := errors.New("error while fetching volume")
1172+
1173+
t.Run("Return correct volume state", func(t *testing.T) {
1174+
g := NewWithT(t)
1175+
mockController, mockVPC := setup(t)
1176+
t.Cleanup(mockController.Finish)
1177+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1178+
scope.IBMVPCMachine.Status = vpcMachine.Status
1179+
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(&vpcVolume, nil, nil)
1180+
state, err := scope.GetVolumeState(volumeID)
1181+
g.Expect(err).To(BeNil())
1182+
g.Expect(state).To(Equal(volumeStatus))
1183+
})
1184+
1185+
t.Run("Return error when GetVolumeState returns error", func(t *testing.T) {
1186+
g := NewWithT(t)
1187+
mockController, mockVPC := setup(t)
1188+
t.Cleanup(mockController.Finish)
1189+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1190+
scope.IBMVPCMachine.Status = vpcMachine.Status
1191+
mockVPC.EXPECT().GetVolume(gomock.AssignableToTypeOf(&vpcv1.GetVolumeOptions{})).Return(nil, nil, volumeFetchError)
1192+
state, err := scope.GetVolumeState(volumeID)
1193+
g.Expect(state).To(BeZero())
1194+
g.Expect(errors.Is(err, volumeFetchError)).To(BeTrue())
1195+
})
1196+
}
1197+
1198+
func TestCreateVolume(t *testing.T) {
1199+
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
1200+
t.Helper()
1201+
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
1202+
}
11561203

11571204
vpcMachine := infrav1beta2.IBMVPCMachine{
11581205
Status: infrav1beta2.IBMVPCMachineStatus{
@@ -1161,54 +1208,75 @@ func TestCreateAndAttachVolume(t *testing.T) {
11611208
}
11621209

11631210
infraVolume := infrav1beta2.VPCVolume{
1164-
Name: volumeName,
1211+
Name: volumeName,
1212+
Profile: "custom",
1213+
Iops: 100,
1214+
SizeGiB: 50,
11651215
}
1216+
pendingState := vpcv1.VolumeStatusPendingConst
11661217

11671218
vpcVolume := vpcv1.Volume{
1168-
Name: &volumeName,
1169-
ID: &volumeID,
1219+
Name: &volumeName,
1220+
ID: &volumeID,
1221+
Status: &pendingState,
11701222
}
11711223

11721224
volumeCreationError := errors.New("error while creating volume")
1173-
1174-
volumeAttachmentError := errors.New("error while attaching volume")
1175-
1176-
t.Run("Volume creation and attachment is successful", func(t *testing.T) {
1225+
t.Run("Volume creation is successful", func(t *testing.T) {
11771226
g := NewWithT(t)
11781227
mockController, mockVPC := setup(t)
11791228
t.Cleanup(mockController.Finish)
11801229
scope := setupMachineScope(clusterName, machineName, mockVPC)
1181-
scope.IBMVPCMachine.Status = vpcMachine.Status
11821230
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
1183-
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)
1184-
1185-
err := scope.CreateAndAttachVolume(&infraVolume)
1231+
id, err := scope.CreateVolume(&infraVolume)
11861232
g.Expect(err).Should(Succeed())
1233+
g.Expect(id).Should(Equal(volumeID))
11871234
})
1188-
1189-
t.Run("Volume Creation Fails", func(t *testing.T) {
1235+
t.Run("Volume creation fails", func(t *testing.T) {
11901236
g := NewWithT(t)
11911237
mockController, mockVPC := setup(t)
11921238
t.Cleanup(mockController.Finish)
11931239
scope := setupMachineScope(clusterName, machineName, mockVPC)
11941240
scope.IBMVPCMachine.Status = vpcMachine.Status
11951241
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(nil, nil, volumeCreationError)
1196-
1197-
err := scope.CreateAndAttachVolume(&infraVolume)
1242+
id, err := scope.CreateVolume(&infraVolume)
11981243
g.Expect(err).ShouldNot(Succeed())
11991244
g.Expect(errors.Is(err, volumeCreationError)).To(BeTrue())
1245+
g.Expect(id).To(BeZero())
12001246
})
1247+
}
1248+
1249+
func TestAttachVolume(t *testing.T) {
1250+
setup := func(t *testing.T) (*gomock.Controller, *mock.MockVpc) {
1251+
t.Helper()
1252+
return gomock.NewController(t), mock.NewMockVpc(gomock.NewController(t))
1253+
}
12011254

1202-
t.Run("Volume Attachment Fails", func(t *testing.T) {
1255+
deleteOnInstanceDelete := true
1256+
vpcMachine := infrav1beta2.IBMVPCMachine{
1257+
Status: infrav1beta2.IBMVPCMachineStatus{
1258+
InstanceID: "foo-instance-id",
1259+
},
1260+
}
1261+
volumeAttachmentError := errors.New("error while attaching volume")
1262+
t.Run("Volume attachment is successful", func(t *testing.T) {
1263+
g := NewWithT(t)
1264+
mockController, mockVPC := setup(t)
1265+
t.Cleanup(mockController.Finish)
1266+
scope := setupMachineScope(clusterName, machineName, mockVPC)
1267+
scope.IBMVPCMachine.Status = vpcMachine.Status
1268+
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, nil)
1269+
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
1270+
g.Expect(err).Should(Succeed())
1271+
})
1272+
t.Run("Volume attachment fails", func(t *testing.T) {
12031273
g := NewWithT(t)
12041274
mockController, mockVPC := setup(t)
12051275
t.Cleanup(mockController.Finish)
12061276
scope := setupMachineScope(clusterName, machineName, mockVPC)
12071277
scope.IBMVPCMachine.Status = vpcMachine.Status
1208-
mockVPC.EXPECT().CreateVolume(gomock.AssignableToTypeOf(&vpcv1.CreateVolumeOptions{})).Return(&vpcVolume, nil, nil)
12091278
mockVPC.EXPECT().AttachVolumeToInstance(gomock.AssignableToTypeOf(&vpcv1.CreateInstanceVolumeAttachmentOptions{})).Return(nil, nil, volumeAttachmentError)
1210-
1211-
err := scope.CreateAndAttachVolume(&infraVolume)
1279+
err := scope.AttachVolume(deleteOnInstanceDelete, volumeID, volumeName)
12121280
g.Expect(err).ShouldNot(Succeed())
12131281
g.Expect(errors.Is(err, volumeAttachmentError)).To(BeTrue())
12141282
})

config/crd/bases/infrastructure.cluster.x-k8s.io_ibmvpcmachines.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,11 @@ spec:
539539
status:
540540
description: IBMVPCMachineStatus defines the observed state of IBMVPCMachine.
541541
properties:
542+
additionalVolumeIDs:
543+
description: AdditionalVolumeIDs is a list of Volume ID as per IBMCloud
544+
items:
545+
type: string
546+
type: array
542547
addresses:
543548
description: Addresses contains the IBM Cloud instance associated
544549
addresses.

controllers/ibmvpcmachine_controller.go

Lines changed: 37 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -258,15 +258,16 @@ func (r *IBMVPCMachineReconciler) reconcileNormal(machineScope *scope.MachineSco
258258
}
259259

260260
// Handle Additional Volumes
261-
err = r.reconcileAdditionalVolumes(machineScope)
261+
var result ctrl.Result
262+
result, err = r.reconcileAdditionalVolumes(machineScope)
262263
if err != nil {
263264
return ctrl.Result{}, fmt.Errorf("error reconciling additional volumes: %w", err)
264265
}
265266

266267
// With a running machine and all Load Balancer Pool Members reconciled, mark machine as ready.
267268
machineScope.SetReady()
268269
conditions.MarkTrue(machineScope.IBMVPCMachine, infrav1beta2.InstanceReadyCondition)
269-
return ctrl.Result{}, nil
270+
return result, nil
270271
}
271272

272273
func (r *IBMVPCMachineReconciler) getOrCreate(scope *scope.MachineScope) (*vpcv1.Instance, error) {
@@ -298,14 +299,18 @@ func (r *IBMVPCMachineReconciler) reconcileDelete(scope *scope.MachineScope) (_
298299
return ctrl.Result{}, nil
299300
}
300301

301-
func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) error {
302+
func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope.MachineScope) (ctrl.Result, error) {
303+
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs == nil {
304+
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs = make([]string, len(machineScope.IBMVPCMachine.Spec.AdditionalVolumes))
305+
}
302306
machineVolumes := machineScope.IBMVPCMachine.Spec.AdditionalVolumes
307+
result := ctrl.Result{}
303308
if len(machineVolumes) == 0 {
304-
return nil
309+
return result, nil
305310
}
306311
volumeAttachmentList, err := machineScope.GetVolumeAttachments()
307312
if err != nil {
308-
return err
313+
return result, err
309314
}
310315
volumeAttachmentNames := sets.New[string]()
311316
for i := range volumeAttachmentList {
@@ -315,15 +320,35 @@ func (r *IBMVPCMachineReconciler) reconcileAdditionalVolumes(machineScope *scope
315320
// Read through the list, checking if volume exists and create volume if it does not
316321
for v := range machineVolumes {
317322
if volumeAttachmentNames.Has(machineVolumes[v].Name) {
318-
// volume attachment has been created so volume will eventually be attached
323+
// volume attachment has been created so volume is already attached
319324
continue
320325
}
321-
322-
err = machineScope.CreateAndAttachVolume(machineVolumes[v])
323-
if err != nil {
324-
errList = append(errList, err)
325-
continue
326+
if machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] != "" {
327+
// volume was already created, fetch volume status and attach if possible
328+
state, err := machineScope.GetVolumeState(machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v])
329+
if err != nil {
330+
errList = append(errList, err)
331+
}
332+
switch state {
333+
case vpcv1.VolumeStatusPendingConst, vpcv1.VolumeStatusUpdatingConst:
334+
result = ctrl.Result{RequeueAfter: 10 * time.Second}
335+
case vpcv1.VolumeStatusFailedConst, vpcv1.VolumeStatusUnusableConst:
336+
errList = append(errList, fmt.Errorf("volume in unexpected state: %s", state))
337+
case vpcv1.VolumeStatusAvailableConst:
338+
err = machineScope.AttachVolume(machineVolumes[v].DeleteVolumeOnInstanceDelete, machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v], machineVolumes[v].Name)
339+
if err != nil {
340+
errList = append(errList, err)
341+
}
342+
}
343+
} else {
344+
// volume does not exist, create it and requeue so that it becomes available
345+
volumeID, err := machineScope.CreateVolume(machineVolumes[v])
346+
machineScope.IBMVPCMachine.Status.AdditionalVolumeIDs[v] = volumeID
347+
if err != nil {
348+
errList = append(errList, err)
349+
}
350+
result = ctrl.Result{RequeueAfter: 10 * time.Second}
326351
}
327352
}
328-
return errors.Join(errList...)
353+
return result, errors.Join(errList...)
329354
}

0 commit comments

Comments
 (0)