Skip to content

Commit 9df1b09

Browse files
authored
Add lock to subnet to avoid race between gc and controller (vmware-tanzu#770)
Subnetport/pod controller will acquire a subnet, create nsx subnetport on it and save the subnetport to store after it is created. But Subnetset GC may delete the subnet during the subnetport creation The PR adds locks for subnet by subnet path to avoid this race condition. Testing done: Decreased the subnetset garbage collector running interval to 10 seconds to increase the potential of race. Created 8 pods, waited for all related subnetports created without error and delete the pods. Repeated for 5 times and not found deadlock or race issue. Signed-off-by: Yanjun Zhou <[email protected]>
1 parent aa84b47 commit 9df1b09

File tree

6 files changed

+65
-1
lines changed

6 files changed

+65
-1
lines changed

pkg/controllers/pod/pod_controller.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,11 @@ func (r *PodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R
8888
return common.ResultRequeue, err
8989
}
9090
contextID := *node.UniqueId
91+
// There is a race condition that the subnetset controller may delete the
92+
// subnet during CollectGarbage. So check the subnet under lock.
93+
r.SubnetService.LockSubnet(&nsxSubnetPath)
94+
defer r.SubnetService.UnlockSubnet(&nsxSubnetPath)
95+
9196
nsxSubnet, err := r.SubnetService.GetSubnetByPath(nsxSubnetPath)
9297
if err != nil {
9398
return common.ResultRequeue, err

pkg/controllers/subnetport/subnetport_controller.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,11 @@ func (r *SubnetPortReconciler) Reconcile(ctx context.Context, req ctrl.Request)
105105
updateFail(r, ctx, subnetPort, &err)
106106
return common.ResultRequeue, err
107107
}
108+
// There is a race condition that the subnetset controller may delete the
109+
// subnet during CollectGarbage. So check the subnet under lock.
110+
r.SubnetService.LockSubnet(&nsxSubnetPath)
111+
defer r.SubnetService.UnlockSubnet(&nsxSubnetPath)
112+
108113
nsxSubnet, err := r.SubnetService.GetSubnetByPath(nsxSubnetPath)
109114
if err != nil {
110115
updateFail(r, ctx, subnetPort, &err)

pkg/controllers/subnetset/subnetset_controller.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,16 +293,18 @@ func (r *SubnetSetReconciler) DeleteSubnetForSubnetSet(obj v1alpha1.SubnetSet, u
293293
hitError := false
294294
hasStaleSubnetPorts := false
295295
for _, subnet := range nsxSubnets {
296+
r.SubnetService.LockSubnet(subnet.Path)
296297
portNums := len(r.SubnetPortService.GetPortsOfSubnet(*subnet.Id))
297298
if portNums > 0 {
298299
hasStaleSubnetPorts = true
300+
r.SubnetService.UnlockSubnet(subnet.Path)
299301
continue
300302
}
301303
if err := r.SubnetService.DeleteSubnet(*subnet); err != nil {
302304
log.Error(err, "fail to delete subnet from subnetset cr", "ID", *subnet.Id)
303305
hitError = true
304306
}
305-
307+
r.SubnetService.UnlockSubnet(subnet.Path)
306308
}
307309
if updataStatus {
308310
if err := r.SubnetService.UpdateSubnetSetStatus(&obj); err != nil {

pkg/nsx/services/common/services.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ type SubnetServiceProvider interface {
3030
GetSubnetsByIndex(key, value string) []*model.VpcSubnet
3131
CreateOrUpdateSubnet(obj client.Object, vpcInfo VPCResourceInfo, tags []model.Tag) (string, error)
3232
GenerateSubnetNSTags(obj client.Object, nsUID string) []model.Tag
33+
LockSubnet(path *string)
34+
UnlockSubnet(path *string)
3335
}
3436

3537
type SubnetPortServiceProvider interface {

pkg/nsx/services/subnet/store.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package subnet
22

33
import (
44
"errors"
5+
"sync"
56

67
"github.com/vmware/vsphere-automation-sdk-go/services/nsxt/model"
78

@@ -51,6 +52,41 @@ func subnetSetIndexFunc(obj interface{}) ([]string, error) {
5152
// SubnetStore is a store for subnet.
5253
type SubnetStore struct {
5354
common.ResourceStore
55+
// save locks for subnet by path
56+
pathLocks sync.Map
57+
}
58+
59+
func (subnetStore *SubnetStore) Add(i interface{}) error {
60+
subnet := i.(*model.VpcSubnet)
61+
if subnet.Path == nil {
62+
log.Info("Store a subnet without path", "subnet", subnet)
63+
return subnetStore.ResourceStore.Add(i)
64+
}
65+
lock := sync.Mutex{}
66+
subnetStore.pathLocks.LoadOrStore(*subnet.Path, &lock)
67+
return subnetStore.ResourceStore.Add(i)
68+
}
69+
70+
func (subnetStore *SubnetStore) Delete(i interface{}) error {
71+
subnet := i.(*model.VpcSubnet)
72+
if subnet.Path == nil {
73+
log.Info("Delete a subnet without path", "subnet", subnet)
74+
return subnetStore.ResourceStore.Delete(i)
75+
}
76+
subnetStore.pathLocks.Delete(*subnet.Path)
77+
return subnetStore.ResourceStore.Delete(i)
78+
}
79+
80+
func (subnetStore *SubnetStore) Lock(path string) {
81+
lock := sync.Mutex{}
82+
subnetLock, _ := subnetStore.pathLocks.LoadOrStore(path, &lock)
83+
subnetLock.(*sync.Mutex).Lock()
84+
}
85+
86+
func (subnetStore *SubnetStore) Unlock(path string) {
87+
if subnetLock, existed := subnetStore.pathLocks.Load(path); existed {
88+
subnetLock.(*sync.Mutex).Unlock()
89+
}
5490
}
5591

5692
func (subnetStore *SubnetStore) Apply(i interface{}) error {

pkg/nsx/services/subnet/subnet.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,3 +421,17 @@ func (service *SubnetService) UpdateSubnetSetTags(ns string, vpcSubnets []*model
421421
}
422422
return nil
423423
}
424+
425+
func (service *SubnetService) LockSubnet(path *string) {
426+
if path != nil && *path != "" {
427+
log.V(1).Info("locked subnet", "path", *path)
428+
service.SubnetStore.Lock(*path)
429+
}
430+
}
431+
432+
func (service *SubnetService) UnlockSubnet(path *string) {
433+
if path != nil && *path != "" {
434+
log.V(1).Info("unlocked subnet", "path", *path)
435+
service.SubnetStore.Unlock(*path)
436+
}
437+
}

0 commit comments

Comments
 (0)