Skip to content

Commit 02be276

Browse files
authored
Merge pull request #85111 from andyzhangx/automated-cherry-pick-of-#84917-upstream-release-1.16
Automated cherry pick of #84917: fix race condition when attach/delete disk
2 parents ee59384 + 5f3d155 commit 02be276

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go

+17-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"fmt"
2424
"path"
2525
"strings"
26+
"sync"
2627
"time"
2728

2829
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
@@ -65,7 +66,9 @@ type controllerCommon struct {
6566
location string
6667
storageEndpointSuffix string
6768
resourceGroup string
68-
cloud *Cloud
69+
// store disk URI when disk is in attaching or detaching process
70+
diskAttachDetachMap sync.Map
71+
cloud *Cloud
6972
}
7073

7174
// getNodeVMSet gets the VMSet interface based on config.VMType and the real virtual machine type.
@@ -145,6 +148,8 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
145148
}
146149

147150
klog.V(2).Infof("Trying to attach volume %q lun %d to node %q.", diskURI, lun, nodeName)
151+
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "attaching")
152+
defer c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
148153
return lun, vmset.AttachDisk(isManagedDisk, diskName, diskURI, nodeName, lun, cachingMode)
149154
}
150155

@@ -171,14 +176,18 @@ func (c *controllerCommon) DetachDisk(diskName, diskURI string, nodeName types.N
171176

172177
// make the lock here as small as possible
173178
diskOpMutex.LockKey(instanceid)
179+
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "detaching")
174180
resp, err := vmset.DetachDisk(diskName, diskURI, nodeName)
181+
c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
175182
diskOpMutex.UnlockKey(instanceid)
176183

177184
if c.cloud.CloudProviderBackoff && shouldRetryHTTPRequest(resp, err) {
178185
klog.V(2).Infof("azureDisk - update backing off: detach disk(%s, %s), err: %v", diskName, diskURI, err)
179186
retryErr := kwait.ExponentialBackoff(c.cloud.RequestBackoff(), func() (bool, error) {
180187
diskOpMutex.LockKey(instanceid)
188+
c.diskAttachDetachMap.Store(strings.ToLower(diskURI), "detaching")
181189
resp, err := vmset.DetachDisk(diskName, diskURI, nodeName)
190+
c.diskAttachDetachMap.Delete(strings.ToLower(diskURI))
182191
diskOpMutex.UnlockKey(instanceid)
183192
return c.cloud.processHTTPRetryResponse(nil, "", resp, err)
184193
})
@@ -220,9 +229,13 @@ func (c *controllerCommon) GetDiskLun(diskName, diskURI string, nodeName types.N
220229
if disk.Lun != nil && (disk.Name != nil && diskName != "" && strings.EqualFold(*disk.Name, diskName)) ||
221230
(disk.Vhd != nil && disk.Vhd.URI != nil && diskURI != "" && strings.EqualFold(*disk.Vhd.URI, diskURI)) ||
222231
(disk.ManagedDisk != nil && strings.EqualFold(*disk.ManagedDisk.ID, diskURI)) {
223-
// found the disk
224-
klog.V(2).Infof("azureDisk - find disk: lun %d name %q uri %q", *disk.Lun, diskName, diskURI)
225-
return *disk.Lun, nil
232+
if disk.ToBeDetached != nil && *disk.ToBeDetached {
233+
klog.Warningf("azureDisk - find disk(ToBeDetached): lun %d name %q uri %q", *disk.Lun, diskName, diskURI)
234+
} else {
235+
// found the disk
236+
klog.V(2).Infof("azureDisk - find disk: lun %d name %q uri %q", *disk.Lun, diskName, diskURI)
237+
return *disk.Lun, nil
238+
}
226239
}
227240
}
228241
return -1, fmt.Errorf("cannot find Lun for disk %s", diskName)

staging/src/k8s.io/legacy-cloud-providers/azure/azure_managedDiskController.go

+4
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,10 @@ func (c *ManagedDiskController) DeleteManagedDisk(diskURI string) error {
187187
ctx, cancel := getContextWithCancel()
188188
defer cancel()
189189

190+
if _, ok := c.common.diskAttachDetachMap.Load(strings.ToLower(diskURI)); ok {
191+
return fmt.Errorf("failed to delete disk(%s) since it's in attaching or detaching state", diskURI)
192+
}
193+
190194
_, err = c.common.cloud.DisksClient.Delete(ctx, resourceGroup, diskName)
191195
if err != nil {
192196
return err

0 commit comments

Comments
 (0)