From cb7308a365416b7e2c9867be60156bb503f12832 Mon Sep 17 00:00:00 2001 From: imitev Date: Fri, 30 Mar 2018 03:59:38 -0700 Subject: [PATCH 001/149] [SAP] implement size expansion when creating volume from template-based snapshot --- .../drivers/vmware/test_vmware_volumeops.py | 11 +++----- cinder/volume/drivers/vmware/vmdk.py | 16 ++++++++++-- cinder/volume/drivers/vmware/volumeops.py | 25 ++++++++++++------- 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 4f1ab79485e..f2b62b3004e 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -1050,9 +1050,6 @@ def _test_get_clone_spec( else: disk_device = None - dev_change = mock.sentinel.dev_change - create_device_change_for_disk_removal.return_value = dev_change - datastore = mock.sentinel.datastore disk_move_type = mock.sentinel.disk_move_type snapshot = mock.sentinel.snapshot @@ -1072,7 +1069,7 @@ def _test_get_clone_spec( host=host, resource_pool=rp, extra_config=extra_config, - disks_to_clone=disks_to_clone) + device_changes='fake-device-changes') self.assertEqual(relocate_spec, ret.location) self.assertFalse(ret.powerOn) @@ -1087,9 +1084,7 @@ def _test_get_clone_spec( disk_move_type, disk_type, disk_device) self._verify_extra_config(ret.config.extraConfig, key, value) - create_device_change_for_disk_removal.assert_called_once_with( - backing, disks_to_clone) - self.assertEqual(dev_change, ret.config.deviceChange) + self.assertEqual('fake-device-changes', ret.config.deviceChange) def test_get_clone_spec(self): self._test_get_clone_spec() @@ -1166,7 +1161,7 @@ def _test_clone_backing( get_clone_spec.assert_called_once_with( datastore, exp_disk_move_type, snapshot, backing, disk_type, host=host, resource_pool=resource_pool, extra_config=extra_config, - disks_to_clone=None) + device_changes=None) exp_folder = folder if folder else backing_folder self.session.invoke_api.assert_called_once_with( diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 833de20990e..8f0909b1e18 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2210,6 +2210,13 @@ def _create_volume_from_template(self, volume, path): (host, rp, folder, summary) = self._select_ds_for_volume(volume) datastore = summary.datastore disk_type = VMwareVcVmdkDriver._get_disk_type(volume) + device_changes = None + if volume['size']: + new_size_in_kb = volume['size'] * units.Gi / units.Ki + disk_device = self.volumeops._get_disk_device(template) + if new_size_in_kb > disk_device.capacityInKB: + device_changes = self.volumeops._create_spec_for_disk_expand(disk_device, new_size_in_kb) + tmp_backing = self.volumeops.clone_backing(tmp_name, template, None, @@ -2218,7 +2225,8 @@ def _create_volume_from_template(self, volume, path): disk_type=disk_type, host=host, resource_pool=rp, - folder=folder) + folder=folder, + device_changes=device_changes) self._create_volume_from_temp_backing(volume, tmp_backing) @@ -2284,10 +2292,14 @@ def _create_temp_backing_from_attached_vmdk( 'instance': instance}) tmp_name = tmp_name or uuidutils.generate_uuid() + + device_changes = self.volumeops._create_device_change_for_disk_removal( + instance, disks_to_clone=[vol_dev_uuid]) + return self.volumeops.clone_backing( tmp_name, instance, None, volumeops.FULL_CLONE_TYPE, datastore, host=host, resource_pool=rp, folder=folder, - disks_to_clone=[vol_dev_uuid]) + device_changes=device_changes) def _extend_if_needed(self, volume, backing): volume_size = volume.size * units.Gi diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 7837c7eb41e..d2592539d83 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1114,7 +1114,7 @@ def _get_folder(self, backing): def _get_clone_spec(self, datastore, disk_move_type, snapshot, backing, disk_type, host=None, resource_pool=None, - extra_config=None, disks_to_clone=None): + extra_config=None, device_changes=None): """Get the clone spec. :param datastore: Reference to datastore @@ -1126,7 +1126,7 @@ def _get_clone_spec(self, datastore, disk_move_type, snapshot, backing, :param resource_pool: Target resource pool :param extra_config: Key-value pairs to be written to backing's extra-config - :param disks_to_clone: UUIDs of disks to clone + :param device_changes: Device changes to be applied during cloning :return: Clone spec """ if disk_type is not None: @@ -1154,10 +1154,8 @@ def _get_clone_spec(self, datastore, disk_move_type, snapshot, backing, config_spec.extraConfig = self._get_extra_config_option_values( extra_config) - if disks_to_clone: - config_spec.deviceChange = ( - self._create_device_change_for_disk_removal( - backing, disks_to_clone)) + if device_changes: + config_spec.deviceChange = device_changes LOG.debug("Spec for cloning the backing: %s.", clone_spec) return clone_spec @@ -1174,7 +1172,7 @@ def _create_device_change_for_disk_removal(self, backing, disks_to_clone): def clone_backing(self, name, backing, snapshot, clone_type, datastore, disk_type=None, host=None, resource_pool=None, - extra_config=None, folder=None, disks_to_clone=None): + extra_config=None, folder=None, device_changes=None): """Clone backing. If the clone_type is 'full', then a full clone of the source volume @@ -1192,7 +1190,7 @@ def clone_backing(self, name, backing, snapshot, clone_type, datastore, :param extra_config: Key-value pairs to be written to backing's extra-config :param folder: The location of the clone - :param disks_to_clone: UUIDs of disks to clone + :param device_changes: Device changes to be applied during cloning """ LOG.debug("Creating a clone of backing: %(back)s, named: %(name)s, " "clone type: %(type)s from snapshot: %(snap)s on " @@ -1213,7 +1211,7 @@ def clone_backing(self, name, backing, snapshot, clone_type, datastore, clone_spec = self._get_clone_spec( datastore, disk_move_type, snapshot, backing, disk_type, host=host, resource_pool=resource_pool, extra_config=extra_config, - disks_to_clone=disks_to_clone) + device_changes=device_changes) task = self._session.invoke_api(self._session.vim, 'CloneVM_Task', backing, folder=folder, name=name, @@ -1298,6 +1296,15 @@ def _create_spec_for_disk_remove(self, disk_device): disk_spec.device = disk_device return disk_spec + def _create_spec_for_disk_expand(self, disk_device, new_size_in_kb): + cf = self._session.vim.client.factory + disk_spec = cf.create('ns0:VirtualDeviceConfigSpec') + disk_spec.operation = 'edit' + disk_spec.device = disk_device + disk_spec.device.capacityInKB = new_size_in_kb + disk_spec.device.capacityInBytes = disk_spec.device.capacityInKB * units.Ki + return disk_spec + def detach_disk_from_backing(self, backing, disk_device): """Detach the given disk from backing.""" From f504c3255d82ff59fa5e56d0b7c588f917f735ed Mon Sep 17 00:00:00 2001 From: Andrew Karpow Date: Wed, 25 Jul 2018 10:29:59 +0200 Subject: [PATCH 002/149] [SAP] netapp/dataontap: ignore certificate --- cinder/volume/drivers/netapp/dataontap/client/api.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/netapp/dataontap/client/api.py b/cinder/volume/drivers/netapp/dataontap/client/api.py index ada92e7f107..f60828f4710 100644 --- a/cinder/volume/drivers/netapp/dataontap/client/api.py +++ b/cinder/volume/drivers/netapp/dataontap/client/api.py @@ -28,6 +28,7 @@ from oslo_utils import netutils import six from six.moves import urllib +import ssl from cinder import exception from cinder.i18n import _ @@ -299,7 +300,8 @@ def _build_opener(self): auth_handler = self._create_basic_auth_handler() else: auth_handler = self._create_certificate_auth_handler() - opener = urllib.request.build_opener(auth_handler) + https_handler = self._create_no_cert_check_handler() + opener = urllib.request.build_opener(https_handler, auth_handler) self._opener = opener def _create_basic_auth_handler(self): @@ -312,6 +314,13 @@ def _create_basic_auth_handler(self): def _create_certificate_auth_handler(self): raise NotImplementedError() + def _create_no_cert_check_handler(self): + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + return urllib.request.HTTPSHandler(context=ctx) + + def __str__(self): return "server: %s" % self._host From 2ec0b484eba7f989e9e4b2f08b42d534e0c39f39 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Thu, 6 Sep 2018 14:27:20 +0200 Subject: [PATCH 003/149] [SAP] Fix pep8 warnings --- .../drivers/netapp/dataontap/client/api.py | 1 - cinder/volume/drivers/vmware/vmdk.py | 26 ++++++++++--------- cinder/volume/drivers/vmware/volumeops.py | 3 ++- 3 files changed, 16 insertions(+), 14 deletions(-) diff --git a/cinder/volume/drivers/netapp/dataontap/client/api.py b/cinder/volume/drivers/netapp/dataontap/client/api.py index f60828f4710..95b3cb63b45 100644 --- a/cinder/volume/drivers/netapp/dataontap/client/api.py +++ b/cinder/volume/drivers/netapp/dataontap/client/api.py @@ -320,7 +320,6 @@ def _create_no_cert_check_handler(self): ctx.verify_mode = ssl.CERT_NONE return urllib.request.HTTPSHandler(context=ctx) - def __str__(self): return "server: %s" % self._host diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8f0909b1e18..8b46d2d4bad 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2215,18 +2215,20 @@ def _create_volume_from_template(self, volume, path): new_size_in_kb = volume['size'] * units.Gi / units.Ki disk_device = self.volumeops._get_disk_device(template) if new_size_in_kb > disk_device.capacityInKB: - device_changes = self.volumeops._create_spec_for_disk_expand(disk_device, new_size_in_kb) - - tmp_backing = self.volumeops.clone_backing(tmp_name, - template, - None, - volumeops.FULL_CLONE_TYPE, - datastore, - disk_type=disk_type, - host=host, - resource_pool=rp, - folder=folder, - device_changes=device_changes) + device_changes = self.volumeops._create_spec_for_disk_expand( + disk_device, new_size_in_kb) + + tmp_backing = self.volumeops.clone_backing( + tmp_name, + template, + None, + volumeops.FULL_CLONE_TYPE, + datastore, + disk_type=disk_type, + host=host, + resource_pool=rp, + folder=folder, + device_changes=device_changes) self._create_volume_from_temp_backing(volume, tmp_backing) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index d2592539d83..66e3f7414c7 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1302,7 +1302,8 @@ def _create_spec_for_disk_expand(self, disk_device, new_size_in_kb): disk_spec.operation = 'edit' disk_spec.device = disk_device disk_spec.device.capacityInKB = new_size_in_kb - disk_spec.device.capacityInBytes = disk_spec.device.capacityInKB * units.Ki + disk_spec.device.capacityInBytes =\ + disk_spec.device.capacityInKB * units.Ki return disk_spec def detach_disk_from_backing(self, backing, disk_device): From e8a1a3ae1c8c4216ec6b24705002a162bdbe7412 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Fri, 30 Nov 2018 14:59:34 +0100 Subject: [PATCH 004/149] [SAP] Change the minimal virtual disk capacity to 4MB With our VASA provider we cannot create disks with the default minimum of 1MB as no LUNs below 4MB size can be created. The need for creating such small volumes comes from the fact, that finding the actual size of stream-optimized images is left to the vCenter, which grows the volume dynamically. --- .../unit/volume/drivers/vmware/test_vmware_volumeops.py | 7 ++++--- cinder/volume/drivers/vmware/volumeops.py | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index f2b62b3004e..844f9f12571 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -558,7 +558,7 @@ def test_create_virtual_disk_config_spec(self): cf = self.session.vim.client.factory cf.create.side_effect = lambda *args: mock.Mock() - size_kb = units.Ki + size_kb = volumeops.MIN_VIRTUAL_DISK_SIZE_KB controller_key = 200 disk_type = 'thick' profile_id = mock.sentinel.profile_id @@ -596,7 +596,8 @@ def test_create_specs_for_ide_disk_add(self): factory.create.side_effect = None self.assertEqual(1, len(ret)) - self.assertEqual(units.Ki, ret[0].device.capacityInKB) + expected_size = volumeops.MIN_VIRTUAL_DISK_SIZE_KB + self.assertEqual(expected_size, ret[0].device.capacityInKB) self.assertEqual(200, ret[0].device.controllerKey) expected = [mock.call.create('ns0:VirtualDeviceConfigSpec'), mock.call.create('ns0:VirtualDisk'), @@ -607,7 +608,7 @@ def test_create_specs_for_scsi_disk_add(self): factory = self.session.vim.client.factory factory.create.side_effect = lambda *args: mock.Mock() - size_kb = 2 * units.Ki + size_kb = 8 * units.Ki disk_type = 'thin' adapter_type = 'lsiLogicsas' profile_id = mock.sentinel.profile_id diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 66e3f7414c7..46357ebc587 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -36,6 +36,7 @@ FULL_CLONE_TYPE = 'full' BACKING_UUID_KEY = 'instanceUuid' +MIN_VIRTUAL_DISK_SIZE_KB = 4 * units.Ki def split_datastore_path(datastore_path): @@ -661,8 +662,9 @@ def _create_virtual_disk_config_spec(self, size_kb, disk_type, cf = self._session.vim.client.factory disk_device = cf.create('ns0:VirtualDisk') - # disk size should be at least 1024KB - disk_device.capacityInKB = max(units.Ki, int(size_kb)) + # disk size should be at least 4MB for VASA provider + min_size_kb = MIN_VIRTUAL_DISK_SIZE_KB + disk_device.capacityInKB = max(min_size_kb, int(size_kb)) if controller_key < 0: disk_device.key = controller_key - 1 else: From c5d3f7c235b397cc257ae0c6385a39e036a07dbf Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Thu, 27 Jun 2019 10:58:03 +0200 Subject: [PATCH 005/149] [SAP] Create volume from image with expected size When we're creating a volume from an image, the size was set to 0 to let the vCenter figure out the uncompressed size. But with our VASA-provider and using thin-provisioning we end up with empty volumes, presumably because the volume doesn't auto-grow properly. Since we know the size the volume should have in the end in advance and we're not re-using the created VM as a template for others, we can create the volume initially with the user-wanted size and don't have to resize the volume afterwards. --- cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py | 6 ++++-- cinder/volume/drivers/vmware/vmdk.py | 6 ++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index cae4666722c..3f95ed9bc3f 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -1180,6 +1180,7 @@ def _test_copy_image_to_volume_stream_optimized(self, context = mock.sentinel.context volume = self._create_volume_dict(size=3) + volume_size_kb = 3 * units.Gi / units.Ki image_service = mock.sentinel.image_service image_id = mock.sentinel.image_id image_size = 2 * units.Gi @@ -1198,8 +1199,9 @@ def _test_copy_image_to_volume_stream_optimized(self, select_ds_for_volume.assert_called_once_with(volume) vops.get_create_spec.assert_called_once_with( - volume['name'], 0, disk_type, summary.name, profile_id=profile_id, - adapter_type=adapter_type, extra_config=extra_config) + volume['name'], volume_size_kb, disk_type, summary.name, + profile_id=profile_id, adapter_type=adapter_type, + extra_config=extra_config) self.assertEqual(vm_create_spec, import_spec.configSpec) download_image.assert_called_with( context, diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8b46d2d4bad..318f6103c54 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -1362,13 +1362,11 @@ def _fetch_stream_optimized_image(self, context, volume, image_service, profile_id = self._get_storage_profile_id(volume) disk_type = VMwareVcVmdkDriver._get_disk_type(volume) - # The size of stream optimized glance image is often suspect, - # so better let vCenter figure out the disk capacity during import. - dummy_disk_size = 0 + size_kb = size_gb * units.Gi / units.Ki extra_config = self._get_extra_config(volume) vm_create_spec = self.volumeops.get_create_spec( volume['name'], - dummy_disk_size, + size_kb, disk_type, summary.name, profile_id=profile_id, From a8c3937df3929743178d70af4fa9fa9783e05851 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Mon, 14 Oct 2019 11:23:16 +0200 Subject: [PATCH 006/149] [SAP] Remove all NICs from snapshot Until now, when creating a snapshot from an attached volume, only the additional unwanted disks got remove and the NICs/VIFs were kept. With nsx-t this is a problem, as it does not allow duplicate MAC addresses on the same logical switch. Therefore, we now remove all VIFs - or rather every device that has a "macAddress" attribute - in the clone-process. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 6 +- .../drivers/vmware/test_vmware_volumeops.py | 69 ++++++++++++++++--- cinder/volume/drivers/vmware/vmdk.py | 2 + cinder/volume/drivers/vmware/volumeops.py | 33 ++++++++- 4 files changed, 96 insertions(+), 14 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 3f95ed9bc3f..0c7ada054d3 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2370,6 +2370,10 @@ def test_create_temp_backing_from_attached_vmdk( vol_dev_uuid = mock.sentinel.vol_dev_uuid get_volume_device_uuid.return_value = vol_dev_uuid + dev_change_disk_remove = mock.sentinel.dev_change_disk_remove + vops._create_device_change_for_disk_removal.return_value =\ + [dev_change_disk_remove] + tmp_name = mock.sentinel.tmp_name generate_uuid.return_value = tmp_name @@ -2395,7 +2399,7 @@ def test_create_temp_backing_from_attached_vmdk( vops.clone_backing.assert_called_once_with( tmp_name, instance, None, volumeops.FULL_CLONE_TYPE, datastore, host=host, resource_pool=rp, folder=folder, - disks_to_clone=[vol_dev_uuid]) + device_changes=[dev_change_disk_remove]) @mock.patch.object(VMDK_DRIVER, '_get_disk_type') @mock.patch.object(VMDK_DRIVER, 'volumeops') diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 844f9f12571..27743fce5f7 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -1035,10 +1035,8 @@ def _verify_extra_config(self, option_values, key, value): '_get_relocate_spec') @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' '_get_disk_device') - @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' - '_create_device_change_for_disk_removal') def _test_get_clone_spec( - self, create_device_change_for_disk_removal, get_disk_device, + self, get_disk_device, get_relocate_spec, disk_type=None): factory = self.session.vim.client.factory factory.create.side_effect = lambda *args: mock.Mock() @@ -1096,9 +1094,9 @@ def test_get_clone_spec_with_thin_disk_type(self): @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' '_get_disk_devices') @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' - '_create_spec_for_disk_remove') + '_create_spec_for_device_remove') def test_create_device_change_for_disk_removal( - self, create_spec_for_disk_remove, get_disk_devices): + self, create_spec_for_device_remove, get_disk_devices): uuid_1 = mock.sentinel.uuid_1 disk_dev_1 = self._create_disk_device('foo', uuid_1) @@ -1108,7 +1106,7 @@ def test_create_device_change_for_disk_removal( get_disk_devices.return_value = [disk_dev_1, disk_dev_2] spec = mock.sentinel.spec - create_spec_for_disk_remove.return_value = spec + create_spec_for_device_remove.return_value = spec backing = mock.sentinel.backing disks_to_clone = [uuid_2] @@ -1116,9 +1114,35 @@ def test_create_device_change_for_disk_removal( backing, disks_to_clone) get_disk_devices.assert_called_once_with(backing) - create_spec_for_disk_remove.assert_called_once_with(disk_dev_1) + create_spec_for_device_remove.assert_called_once_with(disk_dev_1) self.assertEqual([spec], ret) + @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' + '_get_vif_devices') + @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' + '_create_spec_for_device_remove') + def test_create_device_change_for_vif_removal( + self, create_spec_for_device_remove, get_vif_devices): + mac1 = mock.sentinel.mac1 + vif_dev_1 = self._create_vif_device(mac1) + + mac2 = mock.sentinel.mac2 + vif_dev_2 = self._create_vif_device(mac2) + + get_vif_devices.return_value = [vif_dev_1, vif_dev_2] + + spec = mock.sentinel.spec + create_spec_for_device_remove.return_value = spec + + backing = mock.sentinel.backing + ret = self.vops._create_device_change_for_vif_removal(backing) + + get_vif_devices.assert_called_once_with(backing) + exp_calls = [mock.call(vif_dev_1), mock.call(vif_dev_2)] + self.assertEqual(exp_calls, + create_spec_for_device_remove.call_args_list) + self.assertEqual([spec, spec], ret) + @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' '_get_folder') @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' @@ -1270,12 +1294,12 @@ def test_attach_disk_to_backing_existing_controller( spec=reconfig_spec) self.session.wait_for_task.assert_called_once_with(task) - def test_create_spec_for_disk_remove(self): + def test_create_spec_for_device_remove(self): disk_spec = mock.Mock() self.session.vim.client.factory.create.return_value = disk_spec disk_device = mock.sentinel.disk_device - self.vops._create_spec_for_disk_remove(disk_device) + self.vops._create_spec_for_device_remove(disk_device) self.session.vim.client.factory.create.assert_called_once_with( 'ns0:VirtualDeviceConfigSpec') @@ -1283,7 +1307,7 @@ def test_create_spec_for_disk_remove(self): self.assertEqual(disk_device, disk_spec.device) @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' - '_create_spec_for_disk_remove') + '_create_spec_for_device_remove') @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' '_reconfigure_backing') def test_detach_disk_from_backing(self, reconfigure_backing, create_spec): @@ -1877,6 +1901,31 @@ def test_get_disk_device(self, get_disk_devices): self.vops.get_disk_device(vm, '[ds1] foo/foo_1.vmdk')) get_disk_devices.assert_called_once_with(vm) + def _create_vif_device(self, mac): + return mock.Mock(macAddress=mac) + + def test_get_vif_devices(self): + disk_device = mock.Mock(spec=[]) + disk_device.__class__.__name__ = 'VirtualDisk' + + controller_device = mock.Mock(spec=[]) + controller_device.__class__.__name__ = 'VirtualLSILogicController' + + vif_device = mock.Mock(spec=['macAddress']) + vif_device.__class__.__name__ = 'VirtualVmxnet3' + vif_device.macAddress = 'fake-mac' + + devices = mock.Mock() + devices.__class__.__name__ = "ArrayOfVirtualDevice" + devices.VirtualDevice = [disk_device, controller_device, vif_device] + self.session.invoke_api.return_value = devices + + vm = mock.sentinel.vm + self.assertEqual([vif_device], self.vops._get_vif_devices(vm)) + self.session.invoke_api.assert_called_once_with( + vim_util, 'get_object_property', self.session.vim, + vm, 'config.hardware.device') + @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' 'get_entity_by_inventory_path') def test_copy_datastore_file(self, get_entity_by_inventory_path): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 318f6103c54..8cb4201c300 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2295,6 +2295,8 @@ def _create_temp_backing_from_attached_vmdk( device_changes = self.volumeops._create_device_change_for_disk_removal( instance, disks_to_clone=[vol_dev_uuid]) + device_changes.extend( + self.volumeops._create_device_change_for_vif_removal(instance)) return self.volumeops.clone_backing( tmp_name, instance, None, volumeops.FULL_CLONE_TYPE, datastore, diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 46357ebc587..e6bac698411 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1168,7 +1168,34 @@ def _create_device_change_for_disk_removal(self, backing, disks_to_clone): device_change = [] for device in disk_devices: if device.backing.uuid not in disks_to_clone: - device_change.append(self._create_spec_for_disk_remove(device)) + spec = self._create_spec_for_device_remove(device) + device_change.append(spec) + + return device_change + + def _get_vif_devices(self, vm): + vif_devices = [] + hardware_devices = self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + vm, + 'config.hardware.device') + + if hardware_devices.__class__.__name__ == "ArrayOfVirtualDevice": + hardware_devices = hardware_devices.VirtualDevice + + for device in hardware_devices: + if hasattr(device, 'macAddress'): + vif_devices.append(device) + + return vif_devices + + def _create_device_change_for_vif_removal(self, backing): + devices = self._get_vif_devices(backing) + + device_change = [] + for device in devices: + device_change.append(self._create_spec_for_device_remove(device)) return device_change @@ -1291,7 +1318,7 @@ def attach_disk_to_backing(self, backing, size_in_kb, disk_type, self._reconfigure_backing(backing, reconfig_spec) LOG.debug("Backing VM: %s reconfigured with new disk.", backing) - def _create_spec_for_disk_remove(self, disk_device): + def _create_spec_for_device_remove(self, disk_device): cf = self._session.vim.client.factory disk_spec = cf.create('ns0:VirtualDeviceConfigSpec') disk_spec.operation = 'remove' @@ -1317,7 +1344,7 @@ def detach_disk_from_backing(self, backing, disk_device): cf = self._session.vim.client.factory reconfig_spec = cf.create('ns0:VirtualMachineConfigSpec') - spec = self._create_spec_for_disk_remove(disk_device) + spec = self._create_spec_for_device_remove(disk_device) reconfig_spec.deviceChange = [spec] self._reconfigure_backing(backing, reconfig_spec) From e092283969e8edad96f7c6a3ac8af3d904d6da2c Mon Sep 17 00:00:00 2001 From: Csaba Seres <45421502+Scsabiii@users.noreply.github.com> Date: Wed, 13 Nov 2019 12:30:34 +0100 Subject: [PATCH 007/149] [SAP] Online resize of cinder volumes (#24) * Adding-support-for-online-resize-cinder-volume * pep8 fixes * Added vmware_online_resize config option to disable online-resize --- cinder/volume/drivers/vmware/vmdk.py | 39 +++++++++++++-- cinder/volume/drivers/vmware/volumeops.py | 58 +++++++++++++++++++++++ 2 files changed, 94 insertions(+), 3 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8cb4201c300..19003e045cf 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -163,7 +163,10 @@ help='If true, this enables the fetching of the volume stats ' 'from the backend. This has potential performance ' 'issues at scale. When False, the driver will not ' - 'collect ANY stats about the backend.') + 'collect ANY stats about the backend.'), + cfg.BoolOpt('vmware_online_resize', + default=True, + help='If true, enables volume resize in in-use state'), ] CONF = cfg.CONF @@ -1424,6 +1427,20 @@ def _extend_backing(self, backing, new_size_in_gb, disk_type): eager_zero = disk_type == EAGER_ZEROED_THICK_VMDK_TYPE self.volumeops.extend_virtual_disk(new_size_in_gb, root_vmdk_path, datacenter, eager_zero) + self.volumeops.reload_backing(backing) + + def _extend_backing_online(self, backing, new_size_in_gb, attachedvm): + """Extend volume backing's virtual disk online + + :param backing: volume backing + :param new_size_in_gb: new size of virtual disk + :param attachedvm: the id of the vm where the virtual disk is attached + """ + root_vmdk_path = self.volumeops.get_vmdk_path(backing) + self.volumeops.extend_virtual_disk_online(new_size_in_gb, + root_vmdk_path, + attachedvm) + self.volumeops.reload_backing(backing) def clone_image(self, context, volume, image_location, image_meta, image_service): @@ -1778,8 +1795,24 @@ def extend_volume(self, volume, new_size): LOG.info("There is no backing for volume: %s; no need to " "extend the virtual disk.", vol_name) return - - # try extending vmdk in place + if (self._in_use(volume) and not volume['multiattach'] and + self.configuration.vmware_online_resize): + attachments = volume.volume_attachment + instance_uuid = attachments[0]['instance_uuid'] + attachedvm = self.volumeops.get_backing_by_uuid(instance_uuid) + try: + self._extend_backing_online(backing, new_size, attachedvm) + LOG.info("Successfully extended volume: %(vol)s to size: " + "%(size)s GB.", + {'vol': vol_name, 'size': new_size}) + return + except exceptions.NoDiskSpaceException: + LOG.warning("Unable to extend volume: %(vol)s to size: " + "%(size)s on current datastore due to insufficient" + " space.", + {'vol': vol_name, 'size': new_size}) + return + # try extending vmdk in place offline try: self._extend_backing(backing, new_size, VMwareVcVmdkDriver._get_disk_type(volume)) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index e6bac698411..3e990720697 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -376,6 +376,15 @@ def delete_backing(self, backing): self._session.wait_for_task(task) LOG.info("Deleted the VM backing: %s.", backing) + def reload_backing(self, backing): + """Reload the backing. + + :param backing: Managed object reference to the backing + """ + LOG.debug("Reloading the VM backing: %s.", backing) + self._session.invoke_api(self._session.vim, 'Reload', backing) + LOG.info("Reloaded the VM backing: %s.", backing) + # TODO(kartikaditya) Keep the methods not specific to volume in # a different file def get_host(self, instance): @@ -624,6 +633,55 @@ def extend_virtual_disk(self, requested_size_in_gb, path, dc_ref, "%(size)s GB.", {'path': path, 'size': requested_size_in_gb}) + def extend_virtual_disk_online(self, requested_size_in_gb, path, vm_ref): + """Extend the virtual disk online to the requested size. + + :param requested_size_in_gb: Size of the volume in GB + :param path: Datastore path of the virtual disk to extend + :param vm_ref: Reference to the VM instance + """ + LOG.debug("Extending virtual disk: %(path)s to %(size)s GB.", + {'path': path, 'size': requested_size_in_gb}) + # VMWare API needs the capacity unit to be in KB, so convert the + # capacity unit from GB to KB. + cf = self._session.vim.client.factory + size_in_kb = requested_size_in_gb * units.Mi + config_spec = cf.create('ns0:VirtualMachineConfigSpec') + disk = None + devices = self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + vm_ref, + 'config.hardware.device') + for device in devices['VirtualDevice']: + if device.__class__.__name__ == "VirtualDisk" and \ + device.backing.fileName == path: + disk = device + break + else: + msg = str.format("Error during online-resize of disk: %(path)s to " + "%(size)s GB. Can't find the attachment", + {'path': path, 'size': requested_size_in_gb}) + raise exceptions.VimException(msg) + + disk.capacityInKB = size_in_kb + delattr(disk, 'capacityInBytes') + delattr(disk, 'deviceInfo') + device_change = [] + devspec = cf.create('ns0:VirtualDeviceConfigSpec') + devspec.operation = 'edit' + devspec.device = disk + device_change.append(devspec) + config_spec.deviceChange = device_change + task = self._session.invoke_api(self._session.vim, + "ReconfigVM_Task", + vm_ref, + spec=config_spec) + self._session.wait_for_task(task) + LOG.info("Successfully extended virtual disk: %(path)s to " + "%(size)s GB.", + {'path': path, 'size': requested_size_in_gb}) + def _create_controller_config_spec(self, adapter_type): """Returns config spec for adding a disk controller.""" cf = self._session.vim.client.factory From 3e858e42015d1e2501f07e868fba88130606d152 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Fri, 17 Jan 2020 13:00:50 +0100 Subject: [PATCH 008/149] [SAP] scheduler: Add ShardFilter This filters out backends based on the shard defined in the backend's extra_capabilities and in the project's tags. --- cinder/scheduler/filters/shard_filter.py | 192 ++++++++++++++++++ .../tests/unit/scheduler/test_shard_filter.py | 97 +++++++++ setup.cfg | 1 + 3 files changed, 290 insertions(+) create mode 100644 cinder/scheduler/filters/shard_filter.py create mode 100644 cinder/tests/unit/scheduler/test_shard_filter.py diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py new file mode 100644 index 00000000000..2348993d98f --- /dev/null +++ b/cinder/scheduler/filters/shard_filter.py @@ -0,0 +1,192 @@ +# Copyright (c) 2020 SAP SE +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import time + +from keystoneauth1 import exceptions as kse +from keystoneauth1 import loading as ks_loading +from oslo_config import cfg +from oslo_log import log as logging + +from cinder.scheduler import filters +from cinder.service_auth import SERVICE_USER_GROUP +from cinder import utils as cinder_utils + + +LOG = logging.getLogger(__name__) +CONF = cfg.CONF + +_SERVICE_AUTH = None +KEYSTONE_GROUP = 'keystone' + + +# register keystone config options so we can create an adapter for it easily +ks_loading.register_session_conf_options(CONF, KEYSTONE_GROUP) +ks_loading.register_auth_conf_options(CONF, KEYSTONE_GROUP) +keystone_opts = ks_loading.get_adapter_conf_options() +cfg.set_defaults(keystone_opts, + valid_interfaces=['internal', 'public'], + service_type='identity') +CONF.register_opts(keystone_opts, group=KEYSTONE_GROUP) + + +class ShardFilter(filters.BaseBackendFilter): + """Filters backends by shard of the project + + Every project has tags assigned, which define the vCenter the project is + in. This filter filters out any backend that's not configured for the shard + of a project. + """ + + # project shards do not change within a request + run_filter_once_per_request = True + + _PROJECT_SHARD_CACHE = {} + _PROJECT_SHARD_CACHE_RETENTION_TIME = 10 * 60 + _SHARD_PREFIX = 'vc-' + _CAPABILITY_NAME = 'vcenter-shard' + + def _get_keystone_adapter(self): + """Return a keystone adapter + + This needs [service_user] for the auth. + """ + global _SERVICE_AUTH + + if _SERVICE_AUTH is None: + _SERVICE_AUTH = ks_loading.load_auth_from_conf_options( + CONF, group=SERVICE_USER_GROUP) + if _SERVICE_AUTH is None: + # This indicates a misconfiguration so log a warning and + # return the user_auth. + LOG.error('Unable to load auth from %(group)s ' + 'configuration. Ensure "auth_type" is set.', + {'group': SERVICE_USER_GROUP}) + return + + ksa_session = ks_loading.load_session_from_conf_options( + CONF, + KEYSTONE_GROUP, + auth=_SERVICE_AUTH) + + return ks_loading.load_adapter_from_conf_options( + CONF, KEYSTONE_GROUP, session=ksa_session, auth=_SERVICE_AUTH, + min_version=(3, 0), max_version=(3, 'latest')) + + def _update_cache(self): + """Update the cache with infos from keystone + + Ask keystone for the list of projects to save the interesting tags + of each project in the cache. + """ + adap = self._get_keystone_adapter() + if not adap: + return + + # NOTE: the same code exists in nova + url = '/projects' + while url: + try: + resp = adap.get(url, raise_exc=False) + except kse.EndpointNotFound: + LOG.error( + "Keystone identity service version 3.0 was not found. " + "This might be because your endpoint points to the v2.0 " + "versioned endpoint which is not supported. Please fix " + "this.") + return + except kse.ClientException: + LOG.error("Unable to contact keystone to update project tags " + "cache") + return + + resp.raise_for_status() + + data = resp.json() + for project in data['projects']: + project_id = project['id'] + shards = [t for t in project['tags'] + if t.startswith(self._SHARD_PREFIX)] + self._PROJECT_SHARD_CACHE[project_id] = shards + + url = data['links']['next'] + + self._PROJECT_SHARD_CACHE['last_modified'] = time.time() + + @cinder_utils.synchronized('update-shard-cache') + def _get_shards(self, project_id): + # expire the cache 10min after last write + last_modified = self._PROJECT_SHARD_CACHE.get('last_modified', 0) + time_diff = time.time() - last_modified + if time_diff > self._PROJECT_SHARD_CACHE_RETENTION_TIME: + self._PROJECT_SHARD_CACHE = {} + + if project_id not in self._PROJECT_SHARD_CACHE: + self._update_cache() + + return self._PROJECT_SHARD_CACHE.get(project_id) + + def backend_passes(self, backend_state, filter_properties): + spec = filter_properties.get('request_spec', {}) + vol = spec.get('volume_properties', {}) + project_id = vol.get('project_id', None) + + volid = None + if spec: + volid = spec.get('volume_id') + + if project_id is None: + LOG.debug('Could not determine the project for volume %(id)s.', + {'id': volid}) + return False + + shards = self._get_shards(project_id) + if shards is None: + LOG.error('Failure retrieving shards for project %(project_id)s.', + {'project_id': project_id}) + return False + + if not len(shards): + LOG.error('Project %(project_id)s is not assigned to any shard.', + {'project_id': project_id}) + return False + + # set extra_capabilities in the cinder-volume.conf, so we can filter on + # them here. + configured_shards_set = set() + cap = backend_state.capabilities.get(self._CAPABILITY_NAME) + if cap is not None: + configured_shards_set.update(cap.split(',')) + + if not configured_shards_set: + LOG.error('%(backend)s does not have any capability starting with ' + '%(shard_prefix)s.', + {'backend': backend_state, + 'shard_prefix': self._SHARD_PREFIX}) + return False + + if configured_shards_set & set(shards): + LOG.debug('%(backend)s shard %(backend_shards)s found in project ' + 'shards %(project_shards)s.', + {'backend': backend_state, + 'backend_shards': configured_shards_set, + 'project_shards': shards}) + return True + else: + LOG.debug('%(backend)s shard %(backend_shards)s not found in ' + 'project shards %(project_shards)s.', + {'backend': backend_state, + 'backend_shards': configured_shards_set, + 'project_shards': shards}) + return False diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py new file mode 100644 index 00000000000..0b9e43854b1 --- /dev/null +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -0,0 +1,97 @@ +# Copyright 2020 SAP SE # All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. +import time + +import mock + +from cinder.tests.unit.scheduler import fakes +from cinder.tests.unit.scheduler.test_host_filters \ + import BackendFiltersTestCase + + +class ShardFilterTestCase(BackendFiltersTestCase): + + def setUp(self): + super(ShardFilterTestCase, self).setUp() + self.filt_cls = self.class_map['ShardFilter']() + self.filt_cls._PROJECT_SHARD_CACHE = { + 'foo': ['vc-a-0', 'vc-b-0'], + 'last_modified': time.time() + } + self.props = { + 'request_spec': { + 'volume_properties': { + 'project_id': 'foo' + } + } + } + + @mock.patch('cinder.scheduler.filters.shard_filter.' + 'ShardFilter._update_cache') + def test_get_shards_cache_timeout(self, mock_update_cache): + def set_cache(): + self.filt_cls._PROJECT_SHARD_CACHE = { + 'foo': ['vc-a-1'] + } + mock_update_cache.side_effect = set_cache + + project_id = 'foo' + mod = time.time() - self.filt_cls._PROJECT_SHARD_CACHE_RETENTION_TIME + + self.assertEqual(self.filt_cls._get_shards(project_id), + ['vc-a-0', 'vc-b-0']) + + self.filt_cls._PROJECT_SHARD_CACHE['last_modified'] = mod + self.assertEqual(self.filt_cls._get_shards(project_id), ['vc-a-1']) + + @mock.patch('cinder.scheduler.filters.shard_filter.' + 'ShardFilter._update_cache') + def test_get_shards_project_not_included(self, mock_update_cache): + def set_cache(): + self.filt_cls._PROJECT_SHARD_CACHE = { + 'bar': ['vc-a-1', 'vc-b-0'] + } + mock_update_cache.side_effect = set_cache + + self.assertEqual(self.filt_cls._get_shards('bar'), + ['vc-a-1', 'vc-b-0']) + mock_update_cache.assert_called_once() + + @mock.patch('cinder.scheduler.filters.shard_filter.' + 'ShardFilter._update_cache') + def test_shard_project_not_found(self, mock_update_cache): + caps = {'vcenter-shard': 'vc-a-1'} + self.props['request_spec']['volume_properties']['project_id'] = 'bar' + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_shard_project_no_shards(self): + caps = {'vcenter-shard': 'vc-a-1'} + self.filt_cls._PROJECT_SHARD_CACHE['foo'] = [] + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_backend_without_shard(self): + host = fakes.FakeBackendState('host1', {}) + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_backend_shards_dont_match(self): + caps = {'vcenter-shard': 'vc-a-1'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_backend_shards_match(self): + caps = {'vcenter-shard': 'vc-b-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) diff --git a/setup.cfg b/setup.cfg index d36ed0748ba..a7db385e2f6 100644 --- a/setup.cfg +++ b/setup.cfg @@ -45,6 +45,7 @@ cinder.scheduler.filters = RetryFilter = cinder.scheduler.filters.ignore_attempted_hosts_filter:IgnoreAttemptedHostsFilter SameBackendFilter = cinder.scheduler.filters.affinity_filter:SameBackendFilter InstanceLocalityFilter = cinder.scheduler.filters.instance_locality_filter:InstanceLocalityFilter + ShardFilter = cinder.scheduler.filters.shard_filter:ShardFilter cinder.scheduler.weights = AllocatedCapacityWeigher = cinder.scheduler.weights.capacity:AllocatedCapacityWeigher CapacityWeigher = cinder.scheduler.weights.capacity:CapacityWeigher From 016718f9b88c6a78d1210ebb8bdcd7c673eab98a Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Wed, 8 Jan 2020 16:00:29 +0200 Subject: [PATCH 009/149] [SAP] Backup: send data needed to build an ImportVApp spec This is addressing the fix for restoring VVol volumes from swift backup. cinder-backup needs to upload the backup data to VMWare via HttpNfc API by performing a ImportVApp call. Since this operation is about to replace the existing backing, we want to keep the main logic in cinder-volume. Thus, we instruct cinder-backup how to build the spec for ImportVApp via a json-like syntax, since it seems that suds objects can't pe pickled or simply can't be sent plain over RPC. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 85 +++++++++++++- cinder/volume/drivers/vmware/vmdk.py | 63 +++++++++- cinder/volume/drivers/vmware/volumeops.py | 110 ++++++++++++++---- 3 files changed, 231 insertions(+), 27 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 0c7ada054d3..05b6e683ed4 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -1960,14 +1960,18 @@ def test_select_ds_for_volume( @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_storage_profile_id') + @mock.patch.object(VMDK_DRIVER, '_get_connection_import_data') def _test_get_connection_info( - self, get_storage_profile_id, vops, vmdk_connector=False): - volume = self._create_volume_obj() + self, get_connection_import_data, get_storage_profile_id, vops, + vmdk_connector=False): + volume = self._create_volume_obj(status='restoring-backup') backing = mock.Mock(value='ref-1') profile_id = mock.sentinel.profile_id get_storage_profile_id.return_value = profile_id + get_connection_import_data.return_value = {'folder': 'folder-1'} + if vmdk_connector: vmdk_path = mock.sentinel.vmdk_path vops.get_vmdk_path.return_value = vmdk_path @@ -1995,6 +1999,7 @@ def _test_get_connection_info( self.assertEqual(vmdk_path, ret['data']['vmdk_path']) self.assertEqual('ds-1', ret['data']['datastore']) self.assertEqual('dc-1', ret['data']['datacenter']) + self.assertEqual('folder-1', ret['data']['import_data']['folder']) config = self._driver.configuration exp_config = { @@ -2018,6 +2023,82 @@ def test_get_connection_info(self): def test_get_connection_info_vmdk_connector(self): self._test_get_connection_info(vmdk_connector=True) + @mock.patch.object(VMDK_DRIVER, 'volumeops') + @mock.patch.object(VMDK_DRIVER, '_select_ds_for_volume') + @mock.patch.object(VMDK_DRIVER, '_get_extra_config') + @mock.patch.object(VMDK_DRIVER, '_get_adapter_type') + @mock.patch.object(VMDK_DRIVER, '_get_disk_type') + def test_get_connection_import_data(self, get_disk_type, get_adapter_type, + get_extra_config, + select_ds_for_volume, vops): + volume = self._create_volume_obj(size=1) + folder = mock.Mock(value=mock.Mock()) + rp = mock.Mock(value=mock.Mock()) + mock_data = { + 'folder': folder.value, + 'resource_pool': rp.value, + 'vm': { + 'path_name': '[ds-1]', + 'guest_id': 'guest-id', + 'num_cpus': 1, + 'memory_mb': 128, + 'vmx_version': 'vmx-8', + 'extension_key': 'foo-extension-key', + 'extension_type': 'foo-extension-type', + 'extra_config': {} + }, + 'adapter_type': mock.Mock(), + 'controller': { + 'type': 'controllerTypeOne', + 'key': 1, + 'create': True, + 'shared_bus': 'shared', + 'bus_number': 1 + }, + 'disk': { + 'type': 'diskTypeOne', + 'key': -101, + 'capacity_in_kb': 1024 * 1024, + 'eagerly_scrub': None, + 'thin_provisioned': True + } + } + + vops.get_controller_type.return_value = mock_data['controller']['type'] + vops.get_controller_key_and_spec.return_value = (mock_data[ + 'controller'][ + 'key'], + mock.Mock()) + vops.get_vm_path_name.return_value = mock_data['vm']['path_name'] + vops.get_vm_guest_id.return_value = mock_data['vm']['guest_id'] + vops.get_vm_num_cpus.return_value = mock_data['vm']['num_cpus'] + vops.get_vm_memory_mb.return_value = mock_data['vm']['memory_mb'] + vops.get_vmx_version.return_value = mock_data['vm']['vmx_version'] + vops._extension_key = mock_data['vm']['extension_key'] + vops._extension_type = mock_data['vm']['extension_type'] + vops.get_controller_device_shared_bus.return_value = mock_data[ + 'controller']['shared_bus'] + vops.get_controller_device_default_bus_number.return_value = \ + mock_data['controller']['bus_number'] + vops.get_disk_device_key.return_value = mock_data['disk']['key'] + vops.get_disk_capacity_in_kb.return_value = mock_data['disk'][ + 'capacity_in_kb'] + vops.get_disk_eagerly_scrub.return_value = mock_data['disk'][ + 'eagerly_scrub'] + vops.get_disk_thin_provisioned.return_value = mock_data['disk'][ + 'thin_provisioned'] + + get_disk_type.return_value = mock_data['disk']['type'] + select_ds_for_volume.return_value = (mock.Mock(), rp, folder, + mock.Mock()) + get_extra_config.return_value = mock_data['vm']['extra_config'] + get_adapter_type.return_value = mock_data['adapter_type'] + + data = self._driver._get_connection_import_data(volume) + + self.assertEqual(mock_data, data) + + @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch('oslo_vmware.vim_util.get_moref') @mock.patch.object(VMDK_DRIVER, '_create_backing') diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 19003e045cf..03d7cb215b5 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -757,6 +757,12 @@ def _get_connection_info(self, volume, backing, connector): } connection_info['data']['config'] = vmdk_connector_config + # instruct os-brick to use ImportVApp and HttpNfc upload for + # disconnecting the volume + if volume['status'] == 'restoring-backup': + connection_info['data']['import_data'] = \ + self._get_connection_import_data(volume) + LOG.debug("Returning connection_info (volume: '%(volume)s', volume_id:" " '%(volume_id)s'), profile_id: '%(profile_id)s' for " "connector: %(connector)s.", @@ -767,6 +773,58 @@ def _get_connection_info(self, volume, backing, connector): return connection_info + def _get_connection_import_data(self, volume): + (host, rp, folder, summary) = self._select_ds_for_volume( + volume) + extra_config = self._get_extra_config(volume) + if volumeops.BACKING_UUID_KEY in extra_config: + extra_config.pop(volumeops.BACKING_UUID_KEY) + disk_type = VMwareVcVmdkDriver._get_disk_type(volume) + size_kb = volume['size'] * units.Mi + adapter_type = self._get_adapter_type(volume) + controller_type = self.volumeops.get_controller_type(adapter_type) + controller_key, controller_spec = \ + self.volumeops.get_controller_key_and_spec(adapter_type) + return { + 'folder': folder.value, + 'resource_pool': rp.value, + 'vm': { + 'path_name': self.volumeops.get_vm_path_name( + summary.name), + 'guest_id': self.volumeops.get_vm_guest_id(), + 'num_cpus': self.volumeops.get_vm_num_cpus(), + 'memory_mb': self.volumeops.get_vm_memory_mb(), + 'vmx_version': self.volumeops.get_vmx_version(), + 'extension_key': self.volumeops._extension_key, + 'extension_type': self.volumeops._extension_type, + 'extra_config': extra_config, + }, + 'adapter_type': adapter_type, + 'controller': { + 'type': controller_type, + 'key': controller_key, + 'create': controller_spec is not None, + 'shared_bus': + self.volumeops.get_controller_device_shared_bus( + controller_type), + 'bus_number': + self.volumeops + .get_controller_device_default_bus_number() + }, + 'disk': { + 'type': disk_type, + 'key': self.volumeops.get_disk_device_key( + controller_key), + 'capacity_in_kb': + self.volumeops.get_disk_capacity_in_kb(size_kb), + 'eagerly_scrub': + self.volumeops.get_disk_eagerly_scrub(disk_type), + 'thin_provisioned': + self.volumeops.get_disk_thin_provisioned(disk_type) + + } + } + def _initialize_connection(self, volume, connector): """Get information of volume's backing. @@ -829,7 +887,10 @@ def initialize_connection(self, volume, connector): return self._initialize_connection(volume, connector) def terminate_connection(self, volume, connector, force=False, **kwargs): - pass + if 'platform' in connector and 'os_type' in connector and \ + volume['status'] == 'restoring-backup': + backing = self.volumeops.get_backing_by_uuid(volume['id']) + self.volumeops.update_backing_disk_uuid(backing, volume['id']) def create_export(self, context, volume, connector): pass diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 3e990720697..7a853b54b7b 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -682,6 +682,20 @@ def extend_virtual_disk_online(self, requested_size_in_gb, path, vm_ref): "%(size)s GB.", {'path': path, 'size': requested_size_in_gb}) + @staticmethod + def get_controller_device_shared_bus(controller_type): + if ControllerType.is_scsi_controller(controller_type): + return 'noSharing' + return None + + @staticmethod + def get_controller_device_default_bus_number(): + return 0 + + @staticmethod + def get_controller_type(adapter_type): + return ControllerType.get_controller_type(adapter_type) + def _create_controller_config_spec(self, adapter_type): """Returns config spec for adding a disk controller.""" cf = self._session.vim.client.factory @@ -689,23 +703,40 @@ def _create_controller_config_spec(self, adapter_type): controller_type = ControllerType.get_controller_type(adapter_type) controller_device = cf.create('ns0:%s' % controller_type) controller_device.key = -100 - controller_device.busNumber = 0 - if ControllerType.is_scsi_controller(controller_type): - controller_device.sharedBus = 'noSharing' + controller_device.busNumber = \ + self.get_controller_device_default_bus_number() + shared_bus = self.get_controller_device_shared_bus(controller_type) + if shared_bus: + controller_device.sharedBus = shared_bus controller_spec = cf.create('ns0:VirtualDeviceConfigSpec') controller_spec.operation = 'add' controller_spec.device = controller_device return controller_spec + @staticmethod + def get_disk_eagerly_scrub(disk_type): + if disk_type == VirtualDiskType.EAGER_ZEROED_THICK: + return True + return None + + @staticmethod + def get_disk_thin_provisioned(disk_type): + if disk_type == VirtualDiskType.THIN: + return True + return None + def _create_disk_backing(self, disk_type, vmdk_ds_file_path): """Creates file backing for virtual disk.""" cf = self._session.vim.client.factory disk_device_bkng = cf.create('ns0:VirtualDiskFlatVer2BackingInfo') - if disk_type == VirtualDiskType.EAGER_ZEROED_THICK: + eagerly_scrub = self.get_disk_eagerly_scrub(disk_type) + thin_provisioned = self.get_disk_thin_provisioned(disk_type) + + if eagerly_scrub: disk_device_bkng.eagerlyScrub = True - elif disk_type == VirtualDiskType.THIN: + elif thin_provisioned: disk_device_bkng.thinProvisioned = True disk_device_bkng.fileName = vmdk_ds_file_path or '' @@ -713,6 +744,16 @@ def _create_disk_backing(self, disk_type, vmdk_ds_file_path): return disk_device_bkng + @staticmethod + def get_disk_capacity_in_kb(size_kb): + return max(MIN_VIRTUAL_DISK_SIZE_KB, int(size_kb)) + + @staticmethod + def get_disk_device_key(controller_key): + if controller_key < 0: + return controller_key - 1 + return -101 + def _create_virtual_disk_config_spec(self, size_kb, disk_type, controller_key, profile_id, vmdk_ds_file_path): @@ -721,12 +762,8 @@ def _create_virtual_disk_config_spec(self, size_kb, disk_type, disk_device = cf.create('ns0:VirtualDisk') # disk size should be at least 4MB for VASA provider - min_size_kb = MIN_VIRTUAL_DISK_SIZE_KB - disk_device.capacityInKB = max(min_size_kb, int(size_kb)) - if controller_key < 0: - disk_device.key = controller_key - 1 - else: - disk_device.key = -101 + disk_device.capacityInKB = self.get_disk_capacity_in_kb(size_kb) + disk_device.key = self.get_disk_device_key(controller_key) disk_device.unitNumber = 0 disk_device.controllerKey = controller_key disk_device.backing = self._create_disk_backing(disk_type, @@ -744,6 +781,18 @@ def _create_virtual_disk_config_spec(self, size_kb, disk_type, return disk_spec + def get_controller_key_and_spec(self, adapter_type): + controller_spec = None + if adapter_type == 'ide': + # For IDE disks, use one of the default IDE controllers (with keys + # 200 and 201) created as part of backing VM creation. + controller_key = 200 + else: + controller_spec = self._create_controller_config_spec(adapter_type) + controller_key = controller_spec.device.key + + return controller_key, controller_spec + def _create_specs_for_disk_add(self, size_kb, disk_type, adapter_type, profile_id, vmdk_ds_file_path=None): """Create controller and disk config specs for adding a new disk. @@ -757,14 +806,8 @@ def _create_specs_for_disk_add(self, size_kb, disk_type, adapter_type, not created for the virtual disk. :return: list containing controller and disk config specs """ - controller_spec = None - if adapter_type == 'ide': - # For IDE disks, use one of the default IDE controllers (with keys - # 200 and 201) created as part of backing VM creation. - controller_key = 200 - else: - controller_spec = self._create_controller_config_spec(adapter_type) - controller_key = controller_spec.device.key + (controller_key, controller_spec) = self.get_controller_key_and_spec( + adapter_type) disk_spec = self._create_virtual_disk_config_spec(size_kb, disk_type, @@ -796,6 +839,25 @@ def _create_managed_by_info(self): managed_by.type = self._extension_type return managed_by + @staticmethod + def get_vm_path_name(ds_name): + return '[%s]' % ds_name + + @staticmethod + def get_vm_num_cpus(): + return 1 + + @staticmethod + def get_vm_memory_mb(): + return 128 + + @staticmethod + def get_vm_guest_id(): + return 'otherGuest' + + def get_vmx_version(self): + return self._vmx_version or "vmx-08" + def _get_create_spec_disk_less(self, name, ds_name, profileId=None, extra_config=None): """Return spec for creating disk-less backing. @@ -809,19 +871,19 @@ def _get_create_spec_disk_less(self, name, ds_name, profileId=None, """ cf = self._session.vim.client.factory vm_file_info = cf.create('ns0:VirtualMachineFileInfo') - vm_file_info.vmPathName = '[%s]' % ds_name + vm_file_info.vmPathName = self.get_vm_path_name(ds_name) create_spec = cf.create('ns0:VirtualMachineConfigSpec') create_spec.name = name - create_spec.guestId = 'otherGuest' - create_spec.numCPUs = 1 - create_spec.memoryMB = 128 + create_spec.guestId = self.get_vm_guest_id() + create_spec.numCPUs = self.get_vm_num_cpus() + create_spec.memoryMB = self.get_vm_memory_mb() create_spec.files = vm_file_info # Set the default hardware version to a compatible version supported by # vSphere 5.0. This will ensure that the backing VM can be migrated # without any incompatibility issues in a mixed cluster of ESX hosts # with versions 5.0 or above. - create_spec.version = self._vmx_version or "vmx-08" + create_spec.version = self.get_vmx_version() if profileId: vmProfile = cf.create('ns0:VirtualMachineDefinedProfileSpec') From ade0d6ec9744db734c0e6edabec0c1c501fdad9d Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Wed, 15 Jan 2020 17:21:17 +0200 Subject: [PATCH 010/149] [SAP] Normalize the new backing during terminate_connection After a restore backup operation, we correct the name and backing uuid for the newly created backing. This commit also includes moving some values into constants and updating the unit tests. [SAP] fix VM related constants [SAP] vmdk driver - adding comment for terminate_connection() --- .../volume/drivers/vmware/test_vmware_vmdk.py | 35 +++++++++------ cinder/volume/drivers/vmware/vmdk.py | 19 +++++--- cinder/volume/drivers/vmware/volumeops.py | 45 ++++++------------- 3 files changed, 48 insertions(+), 51 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 05b6e683ed4..888264f0529 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2023,6 +2023,7 @@ def test_get_connection_info(self): def test_get_connection_info_vmdk_connector(self): self._test_get_connection_info(vmdk_connector=True) + @mock.patch('cinder.volume.drivers.vmware.volumeops.ControllerType') @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_select_ds_for_volume') @mock.patch.object(VMDK_DRIVER, '_get_extra_config') @@ -2030,7 +2031,8 @@ def test_get_connection_info_vmdk_connector(self): @mock.patch.object(VMDK_DRIVER, '_get_disk_type') def test_get_connection_import_data(self, get_disk_type, get_adapter_type, get_extra_config, - select_ds_for_volume, vops): + select_ds_for_volume, vops, + controller_type): volume = self._create_volume_obj(size=1) folder = mock.Mock(value=mock.Mock()) rp = mock.Mock(value=mock.Mock()) @@ -2039,10 +2041,10 @@ def test_get_connection_import_data(self, get_disk_type, get_adapter_type, 'resource_pool': rp.value, 'vm': { 'path_name': '[ds-1]', - 'guest_id': 'guest-id', - 'num_cpus': 1, - 'memory_mb': 128, - 'vmx_version': 'vmx-8', + 'guest_id': volumeops.VM_GUEST_ID, + 'num_cpus': volumeops.VM_NUM_CPUS, + 'memory_mb': volumeops.VM_MEMORY_MB, + 'vmx_version': volumeops.VMX_VERSION, 'extension_key': 'foo-extension-key', 'extension_type': 'foo-extension-type', 'extra_config': {} @@ -2053,33 +2055,29 @@ def test_get_connection_import_data(self, get_disk_type, get_adapter_type, 'key': 1, 'create': True, 'shared_bus': 'shared', - 'bus_number': 1 + 'bus_number': volumeops.CONTROLLER_DEVICE_BUS_NUMBER }, 'disk': { 'type': 'diskTypeOne', 'key': -101, 'capacity_in_kb': 1024 * 1024, - 'eagerly_scrub': None, + 'eagerly_scrub': False, 'thin_provisioned': True } } - vops.get_controller_type.return_value = mock_data['controller']['type'] + controller_type.get_controller_type.return_value = mock_data[ + 'controller']['type'] vops.get_controller_key_and_spec.return_value = (mock_data[ 'controller'][ 'key'], mock.Mock()) vops.get_vm_path_name.return_value = mock_data['vm']['path_name'] - vops.get_vm_guest_id.return_value = mock_data['vm']['guest_id'] - vops.get_vm_num_cpus.return_value = mock_data['vm']['num_cpus'] - vops.get_vm_memory_mb.return_value = mock_data['vm']['memory_mb'] vops.get_vmx_version.return_value = mock_data['vm']['vmx_version'] vops._extension_key = mock_data['vm']['extension_key'] vops._extension_type = mock_data['vm']['extension_type'] vops.get_controller_device_shared_bus.return_value = mock_data[ 'controller']['shared_bus'] - vops.get_controller_device_default_bus_number.return_value = \ - mock_data['controller']['bus_number'] vops.get_disk_device_key.return_value = mock_data['disk']['key'] vops.get_disk_capacity_in_kb.return_value = mock_data['disk'][ 'capacity_in_kb'] @@ -2164,6 +2162,17 @@ def test_initialize_connection_with_no_instance_and_no_backing(self): def test_initialize_connection_with_no_instance_and_backing(self): self._test_initialize_connection(instance_exists=False) + @mock.patch.object(VMDK_DRIVER, 'volumeops') + def test_terminate_connection(self, vops): + volume = self._create_volume_obj(status='restoring-backup') + connector = {'platform': 'amd64', 'os_type': 'linux'} + backing = mock.Mock() + vops.get_backing_by_uuid.return_value = backing + self._driver.terminate_connection(volume, connector) + vops.get_backing_by_uuid.assert_called_once_with(volume['id']) + vops.rename_backing.assert_called_once_with(backing, volume['name']) + vops.update_backing_disk_uuid(backing, volume['id']) + @mock.patch.object(VMDK_DRIVER, 'volumeops') def _test_get_volume_group_folder(self, vops, snapshot=False): folder = mock.sentinel.folder diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 03d7cb215b5..ebc0d680146 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -782,7 +782,8 @@ def _get_connection_import_data(self, volume): disk_type = VMwareVcVmdkDriver._get_disk_type(volume) size_kb = volume['size'] * units.Mi adapter_type = self._get_adapter_type(volume) - controller_type = self.volumeops.get_controller_type(adapter_type) + controller_type = volumeops.ControllerType.get_controller_type( + adapter_type) controller_key, controller_spec = \ self.volumeops.get_controller_key_and_spec(adapter_type) return { @@ -791,9 +792,9 @@ def _get_connection_import_data(self, volume): 'vm': { 'path_name': self.volumeops.get_vm_path_name( summary.name), - 'guest_id': self.volumeops.get_vm_guest_id(), - 'num_cpus': self.volumeops.get_vm_num_cpus(), - 'memory_mb': self.volumeops.get_vm_memory_mb(), + 'guest_id': volumeops.VM_GUEST_ID, + 'num_cpus': volumeops.VM_NUM_CPUS, + 'memory_mb': volumeops.VM_MEMORY_MB, 'vmx_version': self.volumeops.get_vmx_version(), 'extension_key': self.volumeops._extension_key, 'extension_type': self.volumeops._extension_type, @@ -807,9 +808,7 @@ def _get_connection_import_data(self, volume): 'shared_bus': self.volumeops.get_controller_device_shared_bus( controller_type), - 'bus_number': - self.volumeops - .get_controller_device_default_bus_number() + 'bus_number': volumeops.CONTROLLER_DEVICE_BUS_NUMBER }, 'disk': { 'type': disk_type, @@ -887,9 +886,15 @@ def initialize_connection(self, volume, connector): return self._initialize_connection(volume, connector) def terminate_connection(self, volume, connector, force=False, **kwargs): + # Checking if the connection was used to restore from a backup. In + # that case, the VMDK connector in os-brick created a new backing + # which will replace the initial one. Here we set the proper name + # and backing uuid for the new backing, because os-brick doesn't do it. if 'platform' in connector and 'os_type' in connector and \ volume['status'] == 'restoring-backup': backing = self.volumeops.get_backing_by_uuid(volume['id']) + + self.volumeops.rename_backing(backing, volume['name']) self.volumeops.update_backing_disk_uuid(backing, volume['id']) def create_export(self, context, volume, connector): diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 7a853b54b7b..a19e717f33e 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -37,7 +37,11 @@ BACKING_UUID_KEY = 'instanceUuid' MIN_VIRTUAL_DISK_SIZE_KB = 4 * units.Ki - +VM_GUEST_ID = 'otherGuest' +VM_NUM_CPUS = 1 +VM_MEMORY_MB = 128 +VMX_VERSION = 'vmx-08' +CONTROLLER_DEVICE_BUS_NUMBER = 0 def split_datastore_path(datastore_path): """Split the datastore path to components. @@ -688,14 +692,6 @@ def get_controller_device_shared_bus(controller_type): return 'noSharing' return None - @staticmethod - def get_controller_device_default_bus_number(): - return 0 - - @staticmethod - def get_controller_type(adapter_type): - return ControllerType.get_controller_type(adapter_type) - def _create_controller_config_spec(self, adapter_type): """Returns config spec for adding a disk controller.""" cf = self._session.vim.client.factory @@ -703,8 +699,7 @@ def _create_controller_config_spec(self, adapter_type): controller_type = ControllerType.get_controller_type(adapter_type) controller_device = cf.create('ns0:%s' % controller_type) controller_device.key = -100 - controller_device.busNumber = \ - self.get_controller_device_default_bus_number() + controller_device.busNumber = CONTROLLER_DEVICE_BUS_NUMBER shared_bus = self.get_controller_device_shared_bus(controller_type) if shared_bus: controller_device.sharedBus = shared_bus @@ -718,13 +713,13 @@ def _create_controller_config_spec(self, adapter_type): def get_disk_eagerly_scrub(disk_type): if disk_type == VirtualDiskType.EAGER_ZEROED_THICK: return True - return None + return False @staticmethod def get_disk_thin_provisioned(disk_type): if disk_type == VirtualDiskType.THIN: return True - return None + return False def _create_disk_backing(self, disk_type, vmdk_ds_file_path): """Creates file backing for virtual disk.""" @@ -806,8 +801,8 @@ def _create_specs_for_disk_add(self, size_kb, disk_type, adapter_type, not created for the virtual disk. :return: list containing controller and disk config specs """ - (controller_key, controller_spec) = self.get_controller_key_and_spec( - adapter_type) + (controller_key, controller_spec) = \ + self.get_controller_key_and_spec(adapter_type) disk_spec = self._create_virtual_disk_config_spec(size_kb, disk_type, @@ -843,20 +838,8 @@ def _create_managed_by_info(self): def get_vm_path_name(ds_name): return '[%s]' % ds_name - @staticmethod - def get_vm_num_cpus(): - return 1 - - @staticmethod - def get_vm_memory_mb(): - return 128 - - @staticmethod - def get_vm_guest_id(): - return 'otherGuest' - def get_vmx_version(self): - return self._vmx_version or "vmx-08" + return self._vmx_version or VMX_VERSION def _get_create_spec_disk_less(self, name, ds_name, profileId=None, extra_config=None): @@ -875,9 +858,9 @@ def _get_create_spec_disk_less(self, name, ds_name, profileId=None, create_spec = cf.create('ns0:VirtualMachineConfigSpec') create_spec.name = name - create_spec.guestId = self.get_vm_guest_id() - create_spec.numCPUs = self.get_vm_num_cpus() - create_spec.memoryMB = self.get_vm_memory_mb() + create_spec.guestId = VM_GUEST_ID + create_spec.numCPUs = VM_NUM_CPUS + create_spec.memoryMB = VM_MEMORY_MB create_spec.files = vm_file_info # Set the default hardware version to a compatible version supported by # vSphere 5.0. This will ensure that the backing VM can be migrated From dcbe1a3fc55a9c372f6c6f9b490fd54d586e0a4c Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Wed, 29 Jan 2020 10:51:45 +0100 Subject: [PATCH 011/149] [SAP] fix pep8 --- .../tests/unit/volume/drivers/vmware/test_vmware_vmdk.py | 7 ++----- cinder/volume/drivers/vmware/volumeops.py | 1 + 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 888264f0529..9fa10b74b1c 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2068,10 +2068,8 @@ def test_get_connection_import_data(self, get_disk_type, get_adapter_type, controller_type.get_controller_type.return_value = mock_data[ 'controller']['type'] - vops.get_controller_key_and_spec.return_value = (mock_data[ - 'controller'][ - 'key'], - mock.Mock()) + vops.get_controller_key_and_spec.return_value = \ + (mock_data['controller']['key'], mock.Mock()) vops.get_vm_path_name.return_value = mock_data['vm']['path_name'] vops.get_vmx_version.return_value = mock_data['vm']['vmx_version'] vops._extension_key = mock_data['vm']['extension_key'] @@ -2096,7 +2094,6 @@ def test_get_connection_import_data(self, get_disk_type, get_adapter_type, self.assertEqual(mock_data, data) - @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch('oslo_vmware.vim_util.get_moref') @mock.patch.object(VMDK_DRIVER, '_create_backing') diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index a19e717f33e..ed318e43cf7 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -43,6 +43,7 @@ VMX_VERSION = 'vmx-08' CONTROLLER_DEVICE_BUS_NUMBER = 0 + def split_datastore_path(datastore_path): """Split the datastore path to components. From fdede66fd5a5c60f2cb01e320188201ba20c0a7b Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Wed, 29 Jan 2020 10:51:51 +0100 Subject: [PATCH 012/149] [SAP] run `tox -e genopts` for "scheduler: Add ShardFilter" Got notified about this by `tox -e pep8` ... didn't know. --- cinder/opts.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cinder/opts.py b/cinder/opts.py index f0274559fd4..e695ce79797 100644 --- a/cinder/opts.py +++ b/cinder/opts.py @@ -56,6 +56,8 @@ from cinder.message import api as cinder_message_api from cinder import quota as cinder_quota from cinder.scheduler import driver as cinder_scheduler_driver +from cinder.scheduler.filters import shard_filter as \ + cinder_scheduler_filters_shardfilter from cinder.scheduler import host_manager as cinder_scheduler_hostmanager from cinder.scheduler import manager as cinder_scheduler_manager from cinder.scheduler import scheduler_options as \ @@ -296,6 +298,10 @@ def list_opts(): cinder_zonemanager_drivers_cisco_ciscofczonedriver.cisco_opts, cinder_zonemanager_fczonemanager.zone_manager_opts, )), + ('keystone_group', + itertools.chain( + cinder_scheduler_filters_shardfilter.keystone_opts, + )), ('key_manager', itertools.chain( cinder_keymgr_confkeymgr.key_mgr_opts, From d4e04e2ae146def4dfb48ef2bf763443fffc5b3c Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Wed, 29 Jan 2020 13:08:08 +0100 Subject: [PATCH 013/149] [SAP] vmware: Optionally storage-profile on attach When we set a datastore in maintenace, we remove the tag connecting it to the storage-profile. We do this to prohibit cinder from using that datastore for new volumes. But since cinder also checks the tags for finding a valid datastore on attachment, it does a costly and slow vMotion of the volume to another datasore. We don't want it to vMotion the volumes automatically, but rather want to do it on our own, as doing it on attachment makes attachments really slow. Setting `vmware_profile_check_on_attach` to `false` will disable the check on attachment. It's still done on resize, though. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 30 ++++++++++++++++++- cinder/volume/drivers/vmware/vmdk.py | 16 ++++++++-- 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 9fa10b74b1c..b9414b999d6 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -115,7 +115,8 @@ def setUp(self): vmware_snapshot_format=self.SNAPSHOT_FORMAT, vmware_lazy_create=True, vmware_datastore_regex=None, - reserved_percentage=0 + reserved_percentage=0, + vmware_profile_check_on_attach=True, ) self._db = mock.Mock() @@ -3035,6 +3036,33 @@ def test_relocate_backing_nop(self, ds_sel, get_profile, vops): profile) self.assertFalse(vops.relocate_backing.called) + @mock.patch.object(VMDK_DRIVER, 'volumeops') + @mock.patch.object(VMDK_DRIVER, '_get_storage_profile') + @mock.patch.object(VMDK_DRIVER, 'ds_sel') + def test_relocate_backing_nop_on_attach_if_disabled(self, ds_sel, + get_profile, vops): + self._driver._storage_policy_enabled = True + self._driver.configuration.vmware_profile_check_on_attach = False + volume = self._create_volume_dict() + + datastore = mock.sentinel.datastore + vops.get_datastore.return_value = datastore + + profile = mock.sentinel.profile + get_profile.return_value = profile + + vops.is_datastore_accessible.return_value = True + ds_sel.is_datastore_compliant.return_value = False + + backing = mock.sentinel.backing + host = mock.sentinel.host + self._driver._relocate_backing(volume, backing, host) + + get_profile.assert_called_once_with(volume) + vops.is_datastore_accessible.assert_called_once_with(datastore, host) + self.assertFalse(ds_sel.is_datastore_compliant.called) + self.assertFalse(vops.relocate_backing.called) + @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_storage_profile') @mock.patch.object(VMDK_DRIVER, 'ds_sel') diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index ebc0d680146..380c258c04e 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -167,6 +167,15 @@ cfg.BoolOpt('vmware_online_resize', default=True, help='If true, enables volume resize in in-use state'), + cfg.BoolOpt('vmware_profile_check_on_attach', + default=True, + help='If False, we are not checking the storage-policy in ' + 'case of attach operation for an existing backing. This is ' + 'required to allow DS maintanance, where we remove the ' + 'storage-profile to prohibit cinder from scheduling new ' + 'volumes to that DS and move the volumes away manually. ' + 'Not disabling this would mean cinder moves the volumes ' + 'around, which can take a long time and leads to timeouts.'), ] CONF = cfg.CONF @@ -2185,9 +2194,12 @@ def _relocate_backing(self, volume, backing, host): backing_profile = None if self._storage_policy_enabled: backing_profile = self._get_storage_profile(volume) + is_compliant = True + if self.configuration.vmware_profile_check_on_attach: + is_compliant = self.ds_sel.is_datastore_compliant(datastore, + backing_profile) if (self.volumeops.is_datastore_accessible(datastore, host) and - self.ds_sel.is_datastore_compliant(datastore, - backing_profile)): + is_compliant): LOG.debug("Datastore: %(datastore)s of backing: %(backing)s is " "already accessible to instance's host: %(host)s.", {'backing': backing, From f88515c9812a47651fe7e3d8a403d9cc687dc9f3 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Mon, 10 Feb 2020 08:52:46 +0100 Subject: [PATCH 014/149] [SAP] scheduler: ShardFilter lets snapshots pass Snapshots are only run through the scheduler to check the capacity is still available. The host is already defined and only that host is checked. Therefore, we can savely ignore snapshots in the `ShardFilter`. --- cinder/scheduler/filters/shard_filter.py | 5 +++++ .../tests/unit/scheduler/test_shard_filter.py | 22 +++++++++++++++++++ 2 files changed, 27 insertions(+) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 2348993d98f..f568ecbf713 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -146,6 +146,11 @@ def backend_passes(self, backend_state, filter_properties): if spec: volid = spec.get('volume_id') + if spec.get('snapshot_id'): + # Snapshots always use the same host as the volume. + LOG.debug('Ignoring snapshot.') + return True + if project_id is None: LOG.debug('Could not determine the project for volume %(id)s.', {'id': volid}) diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 0b9e43854b1..3ac1cd76ad6 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -76,6 +76,28 @@ def test_shard_project_not_found(self, mock_update_cache): host = fakes.FakeBackendState('host1', {'capabilities': caps}) self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + def test_snapshot(self): + snap_props = { + 'request_spec': { + 'snapshot_id': 'asdf', + 'volume_properties': {'size': 7} + } + } + caps = {'vcenter-shard': 'vc-a-1'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertTrue(self.filt_cls.backend_passes(host, snap_props)) + + def test_snapshot_None(self): + snap_props = { + 'request_spec': { + 'snapshot_id': None, + 'volume_properties': {'size': 7} + } + } + caps = {'vcenter-shard': 'vc-a-1'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertFalse(self.filt_cls.backend_passes(host, snap_props)) + def test_shard_project_no_shards(self): caps = {'vcenter-shard': 'vc-a-1'} self.filt_cls._PROJECT_SHARD_CACHE['foo'] = [] From 2a906449d7bcbce668e8cc4f2837234066278ea9 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Mon, 10 Feb 2020 10:58:45 +0100 Subject: [PATCH 015/149] [SAP] scheduler: Allow shard override via hint Since it would be too much effort to change our blackbox tests to use multiple projects so they can test in all shards, we implement an override in the `ShardFilter` via scheduler_hints. Example: os volume create --size 10 asdf --hint vcenter-shard=vc-a-1 --- cinder/scheduler/filters/shard_filter.py | 29 ++++++++++++------- .../tests/unit/scheduler/test_shard_filter.py | 18 ++++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index f568ecbf713..fafe7ed2f38 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -151,16 +151,25 @@ def backend_passes(self, backend_state, filter_properties): LOG.debug('Ignoring snapshot.') return True - if project_id is None: - LOG.debug('Could not determine the project for volume %(id)s.', - {'id': volid}) - return False - - shards = self._get_shards(project_id) - if shards is None: - LOG.error('Failure retrieving shards for project %(project_id)s.', - {'project_id': project_id}) - return False + # allow an override of the automatic shard-detection like nova does for + # its compute-hosts + scheduler_hints = filter_properties.get('scheduler_hints') or {} + if self._CAPABILITY_NAME in scheduler_hints: + shards = set([scheduler_hints[self._CAPABILITY_NAME]]) + LOG.debug('Using overridden shards %(shards)s for scheduling.', + {'shards': shards}) + else: + if project_id is None: + LOG.debug('Could not determine the project for volume %(id)s.', + {'id': volid}) + return False + + shards = self._get_shards(project_id) + if shards is None: + LOG.error('Failure retrieving shards for project ' + '%(project_id)s.', + {'project_id': project_id}) + return False if not len(shards): LOG.error('Project %(project_id)s is not assigned to any shard.', diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 3ac1cd76ad6..75d70e9d124 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -117,3 +117,21 @@ def test_backend_shards_match(self): caps = {'vcenter-shard': 'vc-b-0'} host = fakes.FakeBackendState('host1', {'capabilities': caps}) self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + + def test_shard_override_matches(self): + caps = {'vcenter-shard': 'vc-a-1'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + + def test_shard_override_no_match(self): + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_shard_override_no_data(self): + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.props['scheduler_hints'] = {'vcenter-shard': None} + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) From 6dfb790146291bf68b742a5307e4a826687ee2f9 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 24 Feb 2020 09:20:36 -0500 Subject: [PATCH 016/149] [SAP] Add loci consumed custom requirements This patch adds the 'custom-requirements.txt' file which is used by loci builds for cinder base container images. This patch adds the python-agentliveness package that we use to ensure the c-api/c-sch/c-bak/c-vol service is up. --- custom-requirements.txt | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 custom-requirements.txt diff --git a/custom-requirements.txt b/custom-requirements.txt new file mode 100644 index 00000000000..6495ff1685e --- /dev/null +++ b/custom-requirements.txt @@ -0,0 +1,2 @@ +# Any custom requirements that we need for ccloud +-e git+https://github.com/sapcc/python-agentliveness.git#egg=agentliveness From 8777ae9b6d5b1811955e6596c3b5a9e8f06f8ea6 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 24 Feb 2020 15:00:56 -0500 Subject: [PATCH 017/149] [SAP] add reporting of thin provisioning This patch adds the capability reporting for thin provisioning support as well as max over subscription and reserve percentage. https://docs.openstack.org/cinder/queens/admin/blockstorage-over-subscription.html --- cinder/volume/drivers/vmware/vmdk.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 380c258c04e..213c08b813f 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -293,7 +293,9 @@ class VMwareVcVmdkDriver(driver.VolumeDriver): # real get_volume_stats for proper scheduling of this driver. # 3.4.4 - Ensure datastores exist for storage profiles during # get_volume_stats() - VERSION = '3.4.4' + # 3.4.4.99.0 - Added reporting of thin_provisioning_support, + # max_over_subscription_ratio. + VERSION = '3.4.2.99.0' # ThirdPartySystems wiki page CI_WIKI_NAME = "VMware_CI" @@ -378,12 +380,18 @@ def _get_volume_stats(self): backend_name = self.configuration.safe_get('volume_backend_name') if not backend_name: backend_name = self.__class__.__name__ + + max_over_subscription_ratio = self.configuration.safe_get( + 'max_over_subscription_ratio') data = {'volume_backend_name': backend_name, 'vendor_name': 'VMware', 'driver_version': self.VERSION, 'storage_protocol': 'vmdk', 'reserved_percentage': self.configuration.reserved_percentage, - 'shared_targets': False} + 'shared_targets': False, + 'thin_provisioning_support': True, + 'thick_provisioning_support': True, + 'max_over_subscription_ratio': max_over_subscription_ratio} ds_summaries = self._get_datastore_summaries() available_hosts = self._get_hosts(self._clusters) global_capacity = 0 From 1a19797991720809138d25cfd9d44007d8c6fcba Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 27 Feb 2020 07:13:05 -0500 Subject: [PATCH 018/149] [SAP] added some missing custom requirements This patch adds some missing requirements that prevents cinder services from running in our environment. --- custom-requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/custom-requirements.txt b/custom-requirements.txt index 6495ff1685e..9f64fc8131e 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -1,2 +1,5 @@ # Any custom requirements that we need for ccloud -e git+https://github.com/sapcc/python-agentliveness.git#egg=agentliveness +-e git+https://github.com/sapcc/raven-python.git@ccloud#egg=raven +-e git+https://github.com/sapcc/openstack-watcher-middleware.git#egg=watcher-middleware +-e git+https://github.com/sapcc/openstack-audit-middleware.git#egg=audit-middleware From 337880627f62a805ef628d0b6147153d5968fadb Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 27 Feb 2020 08:03:20 -0500 Subject: [PATCH 019/149] [SAP] need redis for osprofiler This patch adds redis to the custom-requirements.txt which is needed by osprofiler --- custom-requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/custom-requirements.txt b/custom-requirements.txt index 9f64fc8131e..0ec7a34af8b 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -1,4 +1,8 @@ # Any custom requirements that we need for ccloud + +# needed for osprofiler +redis + -e git+https://github.com/sapcc/python-agentliveness.git#egg=agentliveness -e git+https://github.com/sapcc/raven-python.git@ccloud#egg=raven -e git+https://github.com/sapcc/openstack-watcher-middleware.git#egg=watcher-middleware From cd504727ab206fc11a571bbb909449a74df95865 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 24 Mar 2020 15:31:01 +0200 Subject: [PATCH 020/149] [SAP] Chunkeddriver - no seek (#30) * chunkeddriver - improving the restore operation Compute all the incremental backups prior to writing it to the file, so that a single write operation is executed on the volume_file regardless the number of incremental backups. Removes the need of seeking back into the volume_file for overwriting with incremental chunks. * Fix add_object() to avoid unnecessary iteration over the same object The previous approach of iterating on enumerate() while inserting 2 times into the list, was doing an extra useless iteration over an object that has just been inserted. We switch to while loop so that we are able to jump to the desired index after we inserted the segments into the list. * Add iterator methods in BackupRestoreHandle Since this was built to be used as an iterator, it's cleaner to use the python iterator api and get rid of the has_next() and get_next() methods. * Fix _clear_reader() to properly clear the reader if it's not needed It checks if there are no more segments of the same object after the current index till the end of the segments list, case when it also closes and removes the reader from the cache directly. * Added a docstring for the Segment.of() method * Create BackupRestoreHandleV1 for handling v1 metadata Since we're handling most of the restore process within the BackupRestoreHandle class, we're now moving the metadata versioning down to it's own class (BackupRestoreHandleV1). DRIVER_VERSION_MAPPING should now refer to class names. This kind of classes should extend BackupRestoreHandle or at least take as constructor parameters: * chunked_driver - an instance of ChunkedBackupDriver * volume_id - the volume id * volume_file - the file handle where to write the data Additionaly, such a class should implement the following methods: * add_backup(backup, metadata) - called for each backup * finish_restore() - called after the backups are iterated * Make BackupRestoreHandle an abstract class Since BackupRestoreHandle does not implement the add_backup method which lets other classes inheriting it to define their own backup and metadata handling, it makes sense to make it abstract. --- cinder/backup/chunkeddriver.py | 332 +++++++++++++----- .../unit/backup/drivers/test_backup_handle.py | 184 ++++++++++ 2 files changed, 426 insertions(+), 90 deletions(-) create mode 100644 cinder/tests/unit/backup/drivers/test_backup_handle.py diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index b780d51dcca..79fdd3a6ba8 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -3,6 +3,7 @@ # Copyright (c) 2015 EMC Corporation # Copyright (C) 2015 Kevin Fox # Copyright (C) 2015 Tom Barron +# Copyright (C) 2020 SAP SE # All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may @@ -23,6 +24,7 @@ import abc import hashlib +import io import json import os import sys @@ -33,6 +35,7 @@ from oslo_service import loopingcall from oslo_utils import excutils from oslo_utils import secretutils +from oslo_utils import importutils from oslo_utils import units from cinder.backup import driver @@ -80,7 +83,9 @@ class ChunkedBackupDriver(driver.BackupDriver, metaclass=abc.ABCMeta): """ DRIVER_VERSION = '1.0.0' - DRIVER_VERSION_MAPPING = {'1.0.0': '_restore_v1'} + DRIVER_VERSION_MAPPING = { + '1.0.0': 'cinder.backup.chunkeddriver.BackupRestoreHandleV1' + } def _get_compressor(self, algorithm): try: @@ -660,89 +665,6 @@ def _notify_progress(): self._finalize_backup(backup, container, object_meta, object_sha256) - def _restore_v1(self, backup, volume_id, metadata, volume_file, - requested_backup): - """Restore a v1 volume backup. - - Raises BackupRestoreCancel on any requested_backup status change, we - ignore the backup parameter for this check since that's only the - current data source from the list of backup sources. - """ - backup_id = backup['id'] - LOG.debug('v1 volume backup restore of %s started.', backup_id) - extra_metadata = metadata.get('extra_metadata') - container = backup['container'] - metadata_objects = metadata['objects'] - metadata_object_names = [] - for obj in metadata_objects: - metadata_object_names.extend(obj.keys()) - LOG.debug('metadata_object_names = %s.', metadata_object_names) - prune_list = [self._metadata_filename(backup), - self._sha256_filename(backup)] - object_names = [object_name for object_name in - self._generate_object_names(backup) - if object_name not in prune_list] - if sorted(object_names) != sorted(metadata_object_names): - err = _('restore_backup aborted, actual object list ' - 'does not match object list stored in metadata.') - raise exception.InvalidBackup(reason=err) - - for metadata_object in metadata_objects: - # Abort when status changes to error, available, or anything else - with requested_backup.as_read_deleted(): - requested_backup.refresh() - if requested_backup.status != fields.BackupStatus.RESTORING: - raise exception.BackupRestoreCancel(back_id=backup.id, - vol_id=volume_id) - - object_name, obj = list(metadata_object.items())[0] - LOG.debug('restoring object. backup: %(backup_id)s, ' - 'container: %(container)s, object name: ' - '%(object_name)s, volume: %(volume_id)s.', - { - 'backup_id': backup_id, - 'container': container, - 'object_name': object_name, - 'volume_id': volume_id, - }) - - with self._get_object_reader( - container, object_name, - extra_metadata=extra_metadata) as reader: - body = reader.read() - compression_algorithm = metadata_object[object_name]['compression'] - decompressor = self._get_compressor(compression_algorithm) - volume_file.seek(obj['offset']) - if decompressor is not None: - LOG.debug('decompressing data using %s algorithm', - compression_algorithm) - decompressed = decompressor.decompress(body) - body = None # Allow Python to free it - volume_file.write(decompressed) - decompressed = None # Allow Python to free it - else: - volume_file.write(body) - body = None # Allow Python to free it - - # force flush every write to avoid long blocking write on close - volume_file.flush() - - # Be tolerant to IO implementations that do not support fileno() - try: - fileno = volume_file.fileno() - except IOError: - LOG.info("volume_file does not support fileno() so skipping " - "fsync()") - else: - os.fsync(fileno) - - # Restoring a backup to a volume can take some time. Yield so other - # threads can run, allowing for among other things the service - # status to be updated - eventlet.sleep(0) - LOG.debug('v1 volume backup restore of %s finished.', - backup_id) - def restore(self, backup, volume_id, volume_file): """Restore the given volume backup from backup repository. @@ -764,9 +686,12 @@ def restore(self, backup, volume_id, volume_file): metadata_version = metadata['version'] LOG.debug('Restoring backup version %s', metadata_version) try: - restore_func = getattr(self, self.DRIVER_VERSION_MAPPING.get( - metadata_version)) - except TypeError: + restore_handle = importutils.import_object( + self.DRIVER_VERSION_MAPPING[metadata_version], + self, + volume_id, + volume_file) + except (KeyError, ImportError): err = (_('No support to restore backup version %s') % metadata_version) raise exception.InvalidBackup(reason=err) @@ -782,14 +707,13 @@ def restore(self, backup, volume_id, volume_file): backup_list.append(prev_backup) current_backup = prev_backup - # Do a full restore first, then layer the incremental backups - # on top of it in order. + # Layer the backups in order, from the parent to the last child index = len(backup_list) - 1 while index >= 0: backup1 = backup_list[index] index = index - 1 metadata = self._read_metadata(backup1) - restore_func(backup1, volume_id, metadata, volume_file, backup) + restore_handle.add_backup(backup1, metadata) volume_meta = metadata.get('volume_meta', None) try: @@ -802,6 +726,8 @@ def restore(self, backup, volume_id, volume_file): LOG.error(msg) raise exception.BackupOperationError(msg) + restore_handle.finish_restore() + LOG.debug('restore %(backup_id)s to %(volume_id)s finished.', {'backup_id': backup_id, 'volume_id': volume_id}) @@ -836,3 +762,229 @@ def delete_backup(self, backup): eventlet.sleep(0) LOG.debug('delete %s finished.', backup['id']) + + +class BackupRestoreHandle(object, metaclass=abc.ABCMeta): + """Class used to reconstruct a backup from chunks.""" + def __init__(self, chunked_driver, volume_id, volume_file): + self._driver = chunked_driver + self._volume_id = volume_id + self._volume_file = volume_file + self._segments = [] + self._object_readers = {} + self._idx = -1 + + @abc.abstractmethod + def add_backup(self, backup, metadata): + """This is called for each backup in the incremental backups chain.""" + return + + def finish_restore(self): + for segment in self._segments: + LOG.debug('restoring object. backup: %(backup_id)s, ' + 'container: %(container)s, object name: ' + '%(object_name)s, volume: %(volume_id)s.', + { + 'backup_id': segment.obj['backup_id'], + 'container': segment.obj['container'], + 'object_name': segment.obj['name'], + 'volume_id': self._volume_id, + }) + + # write the segment bytes to the file + self._volume_file.write(self._read_segment(segment)) + + # force flush every write to avoid long blocking write on close + self._volume_file.flush() + + # Be tolerant to IO implementations that do not support fileno() + try: + fileno = self._volume_file.fileno() + except IOError: + LOG.info("volume_file does not support fileno() so skipping " + "fsync()") + else: + os.fsync(fileno) + + # Restoring a backup to a volume can take some time. Yield so other + # threads can run, allowing for among other things the service + # status to be updated + eventlet.sleep(0) + + def _read_segment(self, segment): + """Reads the bytes of a segment""" + buff_reader = self._get_reader(segment) + # seek inside the backup chunk containing this segment + offset_diff = segment.offset - segment.obj['offset'] + buff_reader.seek(offset_diff) + # read the segment's length from the chunk + data = buff_reader.read(segment.length) + eventlet.tpool.execute(self._clear_reader, segment) + return data + + def _get_reader(self, segment): + """Keeps an internal cache of object readers. + + Avoids calling the storage backend multiple times for objects which + have been split into multiple segments due to merging. + """ + obj_name = segment.obj['name'] + obj_reader = self._object_readers.get(obj_name) + if not obj_reader: + obj_reader = self._get_new_reader(segment) + self._object_readers[obj_name] = obj_reader + return obj_reader + + def _get_new_reader(self, segment): + with self._driver._get_object_reader( + segment.obj['container'], + segment.obj['name'], + extra_metadata=segment.obj['extra_metadata']) \ + as reader: + return io.BytesIO(self._get_raw_bytes(reader, segment.obj)) + + def _get_raw_bytes(self, reader, obj): + """Get the bytes of a backup chunk, decompressing if needed""" + compression_algorithm = obj['compression'] + decompressor = self._driver._get_compressor(compression_algorithm) + if decompressor is not None: + LOG.debug('decompressing data using %s algorithm', + compression_algorithm) + return decompressor.decompress(reader.read()) + return reader.read() + + def _clear_reader(self, segment): + """Clear the object reader for a segment, if needed. + + If there is no further segment for the same object, we close and + remove the corresponding object reader, freeing up the memory. + """ + obj_name = segment.obj['name'] + for _segment in self._segments[self._idx + 1:]: + if obj_name == _segment.obj['name']: + return + + self._object_readers[obj_name].close() + self._object_readers.pop(obj_name) + + def add_object(self, metadata_object): + """Merges a backup chunk over the self._segments list. + + The backup chunks are expected to come in order. + :param metadata_object: the backup chunk + """ + # make a copy because we will modify it later + alt_obj = metadata_object.copy() + found = False + idx = 0 + while idx < len(self._segments): + segment = self._segments[idx] + offset = alt_obj['offset'] + length = alt_obj['length'] + end = offset + length + + # the object can be merged with this segment + if segment.offset <= offset < segment.end: + found = True + # remove the segment from the list, we're going to re-add + # only parts of it to the list or nothing at all + self._segments.pop(idx) + + # if the object starts after this segment's offset, then we + # keep the beginning of this segment + diff = offset - segment.offset + if diff > 0: + self._segments.insert(idx, + Segment.of(segment, + length=diff)) + idx += 1 + + # if the object ends before this segment's end, then we keep + # the last part of this segment, otherwise we don't + diff = segment.end - end + if diff > 0: + self._segments.insert(idx, Segment(alt_obj)) + idx += 1 + self._segments.insert(idx, + Segment.of(segment, + length=diff, + offset=end)) + idx += 1 + else: + self._segments.insert(idx, + Segment(alt_obj, + length=length + diff)) + idx += 1 + # if there is nothing left from this object, we're done + if diff == 0: + break + # if there is something left from this object, continue + # merging it over the next segment + alt_obj['offset'] = segment.end + alt_obj['length'] = abs(diff) + else: + idx += 1 + + # we did not find a segment which can be merged with this object, + # so we're adding this object straight to the list, just as it is. + if not found: + self._segments.append(Segment(alt_obj)) + + +class BackupRestoreHandleV1(BackupRestoreHandle): + """Handles restoring of V1 backups.""" + + def add_backup(self, backup, metadata): + """Processes a v1 volume backup for being restored.""" + metadata_objects = metadata['objects'] + metadata_object_names = [] + for obj in metadata_objects: + metadata_object_names.extend(obj.keys()) + LOG.debug('metadata_object_names = %s.', metadata_object_names) + prune_list = [self._driver._metadata_filename(backup), + self._driver._sha256_filename(backup)] + object_names = [object_name for object_name in + self._driver._generate_object_names(backup) + if object_name not in prune_list] + if sorted(object_names) != sorted(metadata_object_names): + err = _('restore_backup aborted, actual object list ' + 'does not match object list stored in metadata.') + raise exception.InvalidBackup(reason=err) + + for metadata_object in metadata_objects: + object_name, obj = list(metadata_object.items())[0] + # keep the information needed to read the object from the + # storage backend + obj['name'] = object_name + obj['backup_id'] = backup['id'] + obj['container'] = backup['container'] + obj['extra_metadata'] = metadata.get('extra_metadata') + + self.add_object(obj) + + +class Segment(object): + """Class being used to represent a segment of a backup object (chunk). + + It helps keeping track of multiple segments of the same chunk, in the + context of using only a few parts of a chunk for restoring incremental + backups. + + :param obj: The original backup chunk this segment belongs to + :param offset: (optional) offset where this segment starts. Default is + obj['offset']. It must be relative to the backup file. + :param length: (optional) length of this segment + """ + + def __init__(self, obj, offset=None, length=None): + self.obj = obj + self.offset = obj['offset'] if offset is None else offset + self.length = obj['length'] if length is None else length + self.end = self.offset + self.length + + @staticmethod + def of(segment, offset=None, length=None): + """Returns a new segment with different offset and/or length.""" + return Segment(segment.obj, + segment.offset if offset is None else offset, + segment.length if length is None else length) diff --git a/cinder/tests/unit/backup/drivers/test_backup_handle.py b/cinder/tests/unit/backup/drivers/test_backup_handle.py new file mode 100644 index 00000000000..955e17e96bc --- /dev/null +++ b/cinder/tests/unit/backup/drivers/test_backup_handle.py @@ -0,0 +1,184 @@ +# Copyright (C) 2020 SAP SE +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +from cinder.backup import chunkeddriver +from cinder.tests.unit import test +import mock + + +class BackupRestoreHandleV1TestCase(test.TestCase): + + BACKUP_RESTORE_HANDLE = chunkeddriver.BackupRestoreHandle + + def setUp(self): + super(BackupRestoreHandleV1TestCase, self).setUp() + self._driver = mock.Mock() + self._volume_file = mock.Mock() + self._volume_id = 'volume-01' + self._obj = { + 'offset': 100, + 'length': 50, + 'container': 'obj_container', + 'name': 'obj_name', + 'extra_metadata': {'foo': 'bar'}, + 'compression': None + } + self._segment = chunkeddriver.Segment(self._obj) + + def test_add_object(self): + obj1 = {'name': 'obj1', 'offset': 0, 'length': 100} + obj2 = {'name': 'obj2', 'offset': 100, 'length': 100} + # incremental + obj3 = {'name': 'obj3', 'offset': 50, 'length': 100} + obj4 = {'name': 'obj4', 'offset': 60, 'length': 50} + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + handle.add_object(obj1) + handle.add_object(obj2) + handle.add_object(obj3) + handle.add_object(obj4) + + ranges = handle._segments + + self.assertEqual(0, ranges[0].offset) + self.assertEqual(50, ranges[0].end) + + self.assertEqual(50, ranges[1].offset) + self.assertEqual(60, ranges[1].end) + + self.assertEqual(60, ranges[2].offset) + self.assertEqual(100, ranges[2].end) + + self.assertEqual(100, ranges[3].offset) + self.assertEqual(110, ranges[3].end) + + self.assertEqual(110, ranges[4].offset) + self.assertEqual(150, ranges[4].end) + + self.assertEqual(150, ranges[5].offset) + self.assertEqual(200, ranges[5].end) + + @mock.patch.object(BACKUP_RESTORE_HANDLE, '_get_reader') + @mock.patch.object(BACKUP_RESTORE_HANDLE, '_clear_reader') + def test_read_segment(self, clear_reader, get_reader): + buff_reader_mock = mock.Mock() + buff_reader_mock.read.return_value = b"foo" + get_reader.return_value = buff_reader_mock + + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + data = handle._read_segment(self._segment) + + get_reader.assert_called_once_with(self._segment) + buff_reader_mock.seek.assert_called_once_with( + self._segment.offset - self._segment.obj['offset']) + clear_reader.assert_called_once_with(self._segment) + self.assertEqual(data, b"foo") + + @mock.patch.object(BACKUP_RESTORE_HANDLE, '_get_new_reader') + def test_get_reader(self, get_new_reader): + new_reader = mock.Mock() + get_new_reader.return_value = new_reader + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + handle._get_reader(self._segment) + get_new_reader.assert_called_once_with(self._segment) + self.assertEqual(handle._object_readers, { + self._segment.obj['name']: new_reader + }) + + @mock.patch.object(BACKUP_RESTORE_HANDLE, '_get_raw_bytes') + def test_get_new_reader(self, get_raw_bytes): + raw_bytes = b'data' + get_raw_bytes.return_value = raw_bytes + obj_reader = mock.Mock() + get_obj_reader = mock.Mock() + get_obj_reader.__enter__ = mock.Mock(return_value=obj_reader) + get_obj_reader.__exit__ = mock.Mock(return_value=False) + self._driver._get_object_reader.return_value = get_obj_reader + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + bytes_io = handle._get_new_reader(self._segment) + self._driver._get_object_reader.assert_called_once_with( + self._segment.obj['container'], + self._segment.obj['name'], + extra_metadata=self._segment.obj['extra_metadata']) + get_raw_bytes.assert_called_once_with(obj_reader, self._segment.obj) + self.assertEqual(bytes_io.getvalue(), raw_bytes) + + def test_get_raw_bytes(self, decompress=False): + compressor = None + obj = self._obj.copy() + if decompress: + compressor = mock.Mock() + obj['compression'] = 'zlib' + reader = mock.Mock() + reader_ret = mock.Mock() + reader.read.return_value = reader_ret + self._driver._get_compressor.return_value = compressor + + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + handle._get_raw_bytes(reader, obj) + + self._driver._get_compressor.\ + assert_called_once_with(obj['compression']) + reader.read.assert_called_once_with() + + if decompress: + compressor.decompress.assert_called_once_with(reader_ret) + + def test_get_raw_bytes_decompressed(self): + self.test_get_raw_bytes(decompress=True) + + def test_clear_reader(self): + obj_reader = mock.Mock() + obj = self._obj.copy() + obj['name'] = 'obj_name_2' + obj_readers = {self._obj['name']: obj_reader} + obj_readers_mock = mock.MagicMock() + obj_readers_mock.__getitem__.side_effect = obj_readers.__getitem__ + + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + self._volume_file) + handle._object_readers = obj_readers_mock + handle._segments = [self._segment, + chunkeddriver.Segment(obj), + chunkeddriver.Segment(self._obj)] + + handle._idx = 0 + handle._clear_reader(self._segment) + obj_readers_mock.__getitem__.assert_not_called() + obj_reader.close.assert_not_called() + obj_readers_mock.pop.assert_not_called() + + handle._idx = 1 + handle._clear_reader(self._segment) + obj_readers_mock.__getitem__.assert_not_called() + obj_reader.close.assert_not_called() + obj_readers_mock.pop.assert_not_called() + + handle._idx = 2 + handle._clear_reader(self._segment) + obj_readers_mock.__getitem__.assert_called_once_with(self._obj['name']) + obj_reader.close.assert_called_once_with() + obj_readers_mock.pop.assert_called_once_with(self._obj['name']) From e391b72a1973d14258e288008365e989f20ed2da Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 26 Mar 2020 11:33:50 -0400 Subject: [PATCH 021/149] [SAP] add concourse_unit_test_task This patch adds the concourse_unit_test_task file so we can run unit tests during the loci image build process. --- concourse_unit_test_task | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 concourse_unit_test_task diff --git a/concourse_unit_test_task b/concourse_unit_test_task new file mode 100644 index 00000000000..97474e489df --- /dev/null +++ b/concourse_unit_test_task @@ -0,0 +1,8 @@ +export DEBIAN_FRONTEND=noninteractive && \ +export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/queens-m3/upper-constraints.txt && \ +apt-get update && \ +apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext && \ +pip install tox "six>=1.14.0" && \ +git clone -b stable/queens-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ +cd cinder && \ +tox -e py27,pep8 From 3663c1b3626d8b5b3dd130ffae6ced6094608b11 Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 27 Mar 2020 11:04:30 -0400 Subject: [PATCH 022/149] [SAP] upgrade pip for concourse_unit_test_task There is a bug in setuptools that prevents python installs from working correctly and you end up with an error "error: 'egg_base' must be a directory name (got `src`)" This patch upgrades the version of pip for running unit tests, which should fix the error. --- concourse_unit_test_task | 1 + 1 file changed, 1 insertion(+) diff --git a/concourse_unit_test_task b/concourse_unit_test_task index 97474e489df..ac17d462ebe 100644 --- a/concourse_unit_test_task +++ b/concourse_unit_test_task @@ -2,6 +2,7 @@ export DEBIAN_FRONTEND=noninteractive && \ export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/queens-m3/upper-constraints.txt && \ apt-get update && \ apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext && \ +pip install -U pip && \ pip install tox "six>=1.14.0" && \ git clone -b stable/queens-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ cd cinder && \ From d6351153043d43bbb135f9bcd446093cfd967534 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 30 Mar 2020 18:32:38 +0000 Subject: [PATCH 023/149] [SAP] fix volume migration for vmware This patch adds the import_data options during volume attach for migrating a volume. When a volume is attached locally for work during migration the volume needs to be writeable in order for cinder to copy bits into the volume. This import_data section of the connection_properties, instructs os-brick to create a write handle for the http connection to the volume. This is needed for migrating a volume from one shard to another, since cinder's generic volume copy takes over. --- cinder/volume/drivers/vmware/vmdk.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 213c08b813f..fd0d5fb7de9 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -41,6 +41,7 @@ from cinder.i18n import _ from cinder.image import image_utils from cinder import interface +from cinder import utils from cinder.volume import configuration from cinder.volume import driver from cinder.volume.drivers.vmware import datastore as hub @@ -776,7 +777,12 @@ def _get_connection_info(self, volume, backing, connector): # instruct os-brick to use ImportVApp and HttpNfc upload for # disconnecting the volume - if volume['status'] == 'restoring-backup': + # + # If we are migrating to this volume, we need to + # create a writeable handle for the migration to work. + if (volume['status'] == 'restoring-backup' or + (volume['status'] == 'available' and + volume['migration_status'].startswith('target:'))): connection_info['data']['import_data'] = \ self._get_connection_import_data(volume) @@ -882,6 +888,7 @@ def _initialize_connection(self, volume, connector): return self._get_connection_info(volume, backing, connector) + @volume_utils.trace def initialize_connection(self, volume, connector): """Allow connection to connector and return connection info. From d1603bfb6eba35934fbb7a9e1bbbfc5b3c34dbe4 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 14 Apr 2020 09:10:55 -0400 Subject: [PATCH 024/149] [SAP] Fix when migration status is None This patch fixes an issue that tempest found during test_volume_snapshot_backup and the volume['migration_status'] was None and can't be dereferenced as a string. --- cinder/volume/drivers/vmware/vmdk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index fd0d5fb7de9..8238d049e82 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -782,6 +782,7 @@ def _get_connection_info(self, volume, backing, connector): # create a writeable handle for the migration to work. if (volume['status'] == 'restoring-backup' or (volume['status'] == 'available' and + volume['migration_status'] and volume['migration_status'].startswith('target:'))): connection_info['data']['import_data'] = \ self._get_connection_import_data(volume) From 7e0c94d451abec484ad9ade8075936dee94332c4 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 14 Apr 2020 10:05:32 -0400 Subject: [PATCH 025/149] [SAP] fix force detach when connector is None This patch ensures that the connector is not None during force detach. This issue was found during a tempest run. --- cinder/volume/drivers/vmware/vmdk.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8238d049e82..896db1d3e23 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -910,13 +910,14 @@ def initialize_connection(self, volume, connector): """ return self._initialize_connection(volume, connector) + @volume_utils.trace def terminate_connection(self, volume, connector, force=False, **kwargs): # Checking if the connection was used to restore from a backup. In # that case, the VMDK connector in os-brick created a new backing # which will replace the initial one. Here we set the proper name # and backing uuid for the new backing, because os-brick doesn't do it. - if 'platform' in connector and 'os_type' in connector and \ - volume['status'] == 'restoring-backup': + if (connector and 'platform' in connector and 'os_type' in connector + and volume['status'] == 'restoring-backup'): backing = self.volumeops.get_backing_by_uuid(volume['id']) self.volumeops.rename_backing(backing, volume['name']) From 43f066ebb27e5fa4f400e958ef8e9ae50fa3240e Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 6 May 2020 09:15:36 -0400 Subject: [PATCH 026/149] [SAP] add cinder host to notify log line This patch adds the cinder host to the update volume stats notification in the log, so we can track which cinder host is updating at what time. --- cinder/manager.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cinder/manager.py b/cinder/manager.py index 6689b3d0c34..6a02abf901f 100644 --- a/cinder/manager.py +++ b/cinder/manager.py @@ -194,7 +194,8 @@ def update_service_capabilities(self, capabilities): def _publish_service_capabilities(self, context): """Pass data back to the scheduler at a periodic interval.""" if self.last_capabilities: - LOG.debug('Notifying Schedulers of capabilities ...') + LOG.debug('Notifying Schedulers of capabilities for %(host)s...', + {'host': self.host}) self.scheduler_rpcapi.update_service_capabilities( context, self.service_name, From e69177c86e7c08657d2dfa14fc204945b6bb46f8 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 7 May 2020 14:45:24 -0400 Subject: [PATCH 027/149] [SAP] Added validation for vmware_storage_profile This patch adds some driver startup validation for the cinder.conf vmware_storage_profile setting. It makes sure the storage profile exists. This patch also adds some validation to the get_volume_stats, to make sure not to try and call vcenter when we didn't find any datastores during get_volume_stats() time. We simply log a warning and move on. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 41 +++++++++++++++++++ cinder/volume/drivers/vmware/vmdk.py | 19 +++++++-- 2 files changed, 56 insertions(+), 4 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index b9414b999d6..65e7c491453 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -90,6 +90,7 @@ class VMwareVcVmdkDriverTestCase(test.TestCase): IMAGE_ID = 'eb87f4b0-d625-47f8-bb45-71c43b486d3a' IMAGE_NAME = 'image-1' ADAPTER_TYPE = volumeops.VirtualDiskAdapterType.BUS_LOGIC + STORAGE_PROFILE = 'gold' def setUp(self): super(VMwareVcVmdkDriverTestCase, self).setUp() @@ -117,6 +118,7 @@ def setUp(self): vmware_datastore_regex=None, reserved_percentage=0, vmware_profile_check_on_attach=True, + vmware_storage_profile=[self.STORAGE_PROFILE], ) self._db = mock.Mock() @@ -126,6 +128,45 @@ def setUp(self): self._context = context.get_admin_context() self.updated_at = timeutils.utcnow() + @mock.patch.object(VMDK_DRIVER, 'session') + @mock.patch('oslo_vmware.pbm.get_profile_id_by_name') + def test_check_for_setup_error(self, get_profile_id_by_name, session): + profile_id = mock.sentinel.profile_id + get_profile_id_by_name.return_value = mock.Mock(uniqueId=profile_id) + self._driver._storage_policy_enabled = True + + # set config + self._driver.check_for_setup_error() + get_profile_id_by_name.assert_called_once_with(session, + self.STORAGE_PROFILE) + + @mock.patch.object(VMDK_DRIVER, 'session') + @mock.patch('oslo_vmware.pbm.get_profile_id_by_name') + def test_check_for_setup_error_fail(self, get_profile_id_by_name, session): + get_profile_id_by_name.return_value = None + self._driver._storage_policy_enabled = True + + # set config + self.assertRaises(cinder_exceptions.InvalidInput, + self._driver.check_for_setup_error) + get_profile_id_by_name.assert_called_once_with(session, + self.STORAGE_PROFILE) + + @mock.patch.object(VMDK_DRIVER, '_get_storage_profile') + @mock.patch('oslo_vmware.pbm.get_profile_id_by_name') + def test_get_storage_profile_id( + self, get_profile_id_by_name, session, get_storage_profile): + get_storage_profile.return_value = 'gold' + profile_id = mock.sentinel.profile_id + get_profile_id_by_name.return_value = mock.Mock(uniqueId=profile_id) + + self._driver._storage_policy_enabled = True + volume = self._create_volume_dict() + self.assertEqual(profile_id, + self._driver._get_storage_profile_id(volume)) + get_storage_profile.assert_called_once_with(volume) + get_profile_id_by_name.assert_called_once_with(session, 'gold') + @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') def test_get_volume_stats(self, _get_datastore_summaries, vops): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 896db1d3e23..6b9e0193664 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -344,7 +344,15 @@ def _validate_params(self): raise exception.InvalidInput(reason=reason) def check_for_setup_error(self): - pass + # make sure if the storage profile is set that it exists. + for storage_profile in self.configuration.vmware_storage_profile: + if self._storage_policy_enabled and storage_profile: + profile_id = self._get_storage_profile_by_name(storage_profile) + if not profile_id: + reason = (_("Failed to find storage profile '%s'") + % storage_profile) + raise exception.InvalidInput(reason=reason) + def _update_volume_stats(self): if self.configuration.safe_get('vmware_enable_volume_stats'): @@ -604,13 +612,16 @@ def _get_disk_type(volume): return VMwareVcVmdkDriver._get_extra_spec_disk_type( volume['volume_type_id']) + def _get_storage_profile_by_name(self, storage_profile): + profile = pbm.get_profile_id_by_name(self.session, storage_profile) + if profile: + return profile.uniqueId + def _get_storage_profile_id(self, volume): storage_profile = self._get_storage_profile(volume) profile_id = None if self._storage_policy_enabled and storage_profile: - profile = pbm.get_profile_id_by_name(self.session, storage_profile) - if profile: - profile_id = profile.uniqueId + profile_id = self._get_storage_profile_by_name(storage_profile) return profile_id def _get_extra_config(self, volume): From ada8b43a443554aa53b125f5831d349a95a549d9 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 19 May 2020 15:13:48 +0300 Subject: [PATCH 028/149] [SAP] Improve the deletion of object readers in BackupRestoreHandle (#51) * Improve the deletion of object readers in BackupRestoreHandle This deletes right away an object reader which is not needed anymore, without waiting for the garbage collector to take care of it. As an effect, this should lower the memory consumption. * Fix finish_restore to increment the current object index This fixes the high memory consumption caused by _clear_reader which was not able to do its job because the self._idx was not reflecting the correct value. * fix pep8 - use delayed string interpolation --- cinder/backup/chunkeddriver.py | 6 ++--- .../unit/backup/drivers/test_backup_handle.py | 22 ++++++++++++++++--- 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 79fdd3a6ba8..66be1d319e6 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -790,7 +790,7 @@ def finish_restore(self): 'object_name': segment.obj['name'], 'volume_id': self._volume_id, }) - + self._idx += 1 # write the segment bytes to the file self._volume_file.write(self._read_segment(segment)) @@ -863,9 +863,9 @@ def _clear_reader(self, segment): for _segment in self._segments[self._idx + 1:]: if obj_name == _segment.obj['name']: return - self._object_readers[obj_name].close() - self._object_readers.pop(obj_name) + del self._object_readers[obj_name] + LOG.debug("Cleared reader for object %s", segment.obj['name']) def add_object(self, metadata_object): """Merges a backup chunk over the self._segments list. diff --git a/cinder/tests/unit/backup/drivers/test_backup_handle.py b/cinder/tests/unit/backup/drivers/test_backup_handle.py index 955e17e96bc..36fe0e8d578 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_handle.py +++ b/cinder/tests/unit/backup/drivers/test_backup_handle.py @@ -33,6 +33,7 @@ def setUp(self): 'length': 50, 'container': 'obj_container', 'name': 'obj_name', + 'backup_id': 'backup-1', 'extra_metadata': {'foo': 'bar'}, 'compression': None } @@ -169,16 +170,31 @@ def test_clear_reader(self): handle._clear_reader(self._segment) obj_readers_mock.__getitem__.assert_not_called() obj_reader.close.assert_not_called() - obj_readers_mock.pop.assert_not_called() handle._idx = 1 handle._clear_reader(self._segment) obj_readers_mock.__getitem__.assert_not_called() obj_reader.close.assert_not_called() - obj_readers_mock.pop.assert_not_called() handle._idx = 2 handle._clear_reader(self._segment) obj_readers_mock.__getitem__.assert_called_once_with(self._obj['name']) obj_reader.close.assert_called_once_with() - obj_readers_mock.pop.assert_called_once_with(self._obj['name']) + + @mock.patch.object(BACKUP_RESTORE_HANDLE, '_read_segment') + def test_finish_restore(self, read_segment): + segment_bytes = b'foo' + read_segment.return_value = segment_bytes + file_handle = mock.Mock() + file_handle.fileno.side_effect = IOError + handle = chunkeddriver.BackupRestoreHandleV1(self._driver, + self._volume_id, + file_handle) + handle._segments = [self._segment] + + handle.finish_restore() + + read_segment.assert_called_once_with(self._segment) + file_handle.write.assert_called_once_with(segment_bytes) + file_handle.fileno.assert_called_once_with() + self.assertEqual(handle._idx, 0) From bb62c098430e5bff06a244cb07f5dd8f56ee6ba9 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Thu, 9 Apr 2020 22:58:18 +0300 Subject: [PATCH 029/149] [SAP] Allow a driver to specify additional RPC endpoints Drivers may need to create their own RPC communication channel to exchange vendor-specific logic. A driver can now specify additional RPC endpoints by appending it to the `self.additional_endpoints` list. The manager will always initialize that property as an empty list and take care of exposing the endpoints after the driver has been loaded. This is a feature which is already possible in Nova. --- cinder/volume/driver.py | 1 + cinder/volume/manager.py | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cinder/volume/driver.py b/cinder/volume/driver.py index f1666b84cd0..719970532c8 100644 --- a/cinder/volume/driver.py +++ b/cinder/volume/driver.py @@ -405,6 +405,7 @@ def __init__(self, execute=utils.execute, *args, **kwargs): self.host = kwargs.get('host') self.cluster_name = kwargs.get('cluster_name') self.configuration = kwargs.get('configuration', None) + self.additional_endpoints = kwargs.get('additional_endpoints', []) if self.configuration: self.configuration.append_config_values(volume_opts) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 5263019181a..1aab27848f2 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -266,6 +266,8 @@ def __init__(self, volume_driver=None, service_name: str = None, requests.packages.urllib3.exceptions.InsecurePlatformWarning) self.key_manager = key_manager.API(CONF) + # A driver can feed additional RPC endpoints into this list + driver_additional_endpoints = [] self.driver = importutils.import_object( volume_driver, configuration=self.configuration, @@ -273,7 +275,9 @@ def __init__(self, volume_driver=None, service_name: str = None, host=self.host, cluster_name=self.cluster, is_vol_db_empty=vol_db_empty, - active_backend_id=curr_active_backend_id) + active_backend_id=curr_active_backend_id, + additional_endpoints=driver_additional_endpoints) + self.additional_endpoints.extend(driver_additional_endpoints) if self.cluster and not self.driver.SUPPORTS_ACTIVE_ACTIVE: msg = _('Active-Active configuration is not currently supported ' From 3dea1ba2255ff4497040db08e1a06533bb9eb15f Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Thu, 9 Apr 2020 23:05:06 +0300 Subject: [PATCH 030/149] [SAP] implementation of volume migration Implement driver's `migrate_volume`. It adds a new RPC server and client for communicating with the destination host to get information needed to build the relocate spec for a particular volume: datastore, resource pool, host, vcenter info, and to perform `move_backing_to_folder` after the volume was relocated, or, to create the backing on the destination host if it doesn't already exist. --- .../unit/volume/drivers/vmware/test_remote.py | 116 ++++++++++++++++++ .../volume/drivers/vmware/test_vmware_vmdk.py | 70 +++++++++++ .../drivers/vmware/test_vmware_volumeops.py | 3 +- cinder/volume/drivers/vmware/remote.py | 86 +++++++++++++ cinder/volume/drivers/vmware/vmdk.py | 74 +++++++++++ cinder/volume/drivers/vmware/volumeops.py | 30 ++++- 6 files changed, 375 insertions(+), 4 deletions(-) create mode 100644 cinder/tests/unit/volume/drivers/vmware/test_remote.py create mode 100644 cinder/volume/drivers/vmware/remote.py diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py new file mode 100644 index 00000000000..d54d5d4e32a --- /dev/null +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -0,0 +1,116 @@ +# Copyright (c) 2020 SAP SE +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from cinder.tests.unit import test +from cinder.tests.unit import fake_volume +from cinder.volume.drivers.vmware import remote as vmware_remote + +import mock + + +class VmdkDriverRemoteApiTest(test.RPCAPITestCase): + + def setUp(self): + super(VmdkDriverRemoteApiTest, self).setUp() + self.rpcapi = vmware_remote.VmdkDriverRemoteApi + self.base_version = \ + vmware_remote.VmdkDriverRemoteApi.RPC_DEFAULT_VERSION + self._fake_host = 'fake_host' + self._fake_volume = fake_volume.fake_db_volume() + + def test_get_service_locator_info(self): + self._test_rpc_api('get_service_locator_info', + rpc_method='call', + server=self._fake_host, + host=self._fake_host) + + def test_select_ds_for_volume(self): + self._test_rpc_api('select_ds_for_volume', + rpc_method='call', + server=self._fake_host, + host=self._fake_host, + volume=self._fake_volume) + + def test_move_backing_to_folder(self): + self._test_rpc_api('move_volume_backing_to_folder', + rpc_method='call', + server=self._fake_host, + host=self._fake_host, + volume=self._fake_volume, + folder='fake-folder') + + def test_create_backing(self): + self._test_rpc_api('create_backing', + rpc_method='call', + server=self._fake_host, + host=self._fake_host, + volume=self._fake_volume) + + +class VmdkDriverRemoteServiceTest(test.TestCase): + + def setUp(self): + super(VmdkDriverRemoteServiceTest, self).setUp() + self._volumeops = mock.Mock() + self._driver = mock.Mock( + volumeops=self._volumeops, + service_locator_info=mock.sentinel.service_locator) + self._service = vmware_remote.VmdkDriverRemoteService(self._driver) + self._ctxt = mock.Mock() + self._fake_volume = fake_volume.fake_db_volume() + + def test_get_service_locator_info(self): + ret_val = self._service.get_service_locator_info(self._ctxt) + self.assertEqual(mock.sentinel.service_locator, ret_val) + + def test_select_ds_for_volume(self): + fake_host = mock.Mock(value='fake-host') + fake_rp = mock.Mock(value='fake-rp') + fake_folder = mock.Mock(value='fake-folder') + fake_summary = mock.Mock(datastore=mock.Mock(vlaue='fake-ds')) + + self._driver._select_ds_for_volume.return_value = \ + (fake_host, fake_rp, fake_folder, fake_summary) + ret_val = self._service.select_ds_for_volume(self._ctxt, + self._fake_volume) + self._driver._select_ds_for_volume.assert_called_once_with( + self._fake_volume) + self.assertEqual({ + 'host': fake_host.value, + 'resource_pool': fake_rp.value, + 'folder': fake_folder.value, + 'datastore': fake_summary.datastore.value + }, ret_val) + + @mock.patch('oslo_vmware.vim_util.get_moref') + def test_move_volume_backing_to_folder(self, get_moref): + fake_backing = mock.Mock(value='fake-backing') + folder_name = 'fake-folder' + fake_folder = mock.Mock(value=folder_name) + get_moref.return_value = fake_folder + self._volumeops.get_backing.return_value = fake_backing + + self._service.move_volume_backing_to_folder( + self._ctxt, self._fake_volume, folder_name) + + self._volumeops.get_backing.assert_called_once_with( + self._fake_volume['name'], self._fake_volume['id']) + get_moref.assert_called_once_with(folder_name, 'Folder') + self._volumeops.move_backing_to_folder.assert_called_once_with( + fake_backing, fake_folder) + + def test_create_backing(self): + self._service.create_backing(self._ctxt, self._fake_volume) + self._driver._create_backing.assert_called_once_with(self._fake_volume) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 65e7c491453..e9966e56d37 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -123,6 +123,7 @@ def setUp(self): self._db = mock.Mock() self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config, + additional_endpoints=[], db=self._db) self._context = context.get_admin_context() @@ -3523,6 +3524,75 @@ def test_revert_to_snapshot(self, vops): vops.revert_to_snapshot.assert_called_once_with(backing, snapshot.name) + @mock.patch.object(VMDK_DRIVER, 'volumeops') + @mock.patch('oslo_vmware.vim_util.get_moref') + def test_migrate_volume(self, get_moref, vops, backing=None, + raises_error=False): + r_api = mock.Mock() + self._driver._remote_api = r_api + volume = self._create_volume_obj() + vops.get_backing.return_value = backing + host = {'host': 'fake-host', 'capabilities': {}} + ds_info = {'host': 'fake-ds-host', 'resource_pool': 'fake-rp', + 'datastore': 'fake-ds-name', 'folder': 'fake-folder'} + get_moref.side_effect = [ + mock.sentinel.host_ref, + mock.sentinel.rp_ref, + mock.sentinel.ds_ref + ] + r_api.get_service_locator_info.return_value = \ + mock.sentinel.service_locator + r_api.select_ds_for_volume.return_value = ds_info + if raises_error: + r_api.move_volume_backing_to_folder.side_effect = Exception + + ret_val = self._driver.migrate_volume(mock.sentinel.context, volume, + host) + + vops.get_backing.assert_called_once_with(volume.name, volume.id) + + if not backing: + r_api.create_backing.assert_called_once_with( + mock.sentinel.context, host['host'], volume) + r_api.get_service_locator_info.assert_not_called() + r_api.select_ds_for_volume.assert_not_called() + vops.relocate_backing.assert_not_called() + r_api.move_volume_backing_to_folder.assert_not_called() + get_moref.assert_not_called() + self.assertEqual((True, None), ret_val) + else: + dest_host = host['host'] + + r_api.get_service_locator_info.assert_called_once_with( + mock.sentinel.context, dest_host) + + r_api.select_ds_for_volume.assert_called_once_with( + mock.sentinel.context, dest_host, volume) + + get_moref.assert_has_calls([ + mock.call(ds_info['host'], 'HostSystem'), + mock.call(ds_info['resource_pool'], 'ResourcePool'), + mock.call(ds_info['datastore'], 'Datastore')]) + + vops.relocate_backing.assert_called_once_with( + backing, mock.sentinel.ds_ref, mock.sentinel.rp_ref, + mock.sentinel.host_ref, service=mock.sentinel.service_locator) + + r_api.move_volume_backing_to_folder.assert_called_once_with( + mock.sentinel.context, dest_host, volume, ds_info['folder']) + + if raises_error: + self.assertEqual((True, {'migration_status': 'error'}), + ret_val) + else: + self.assertEqual((True, None), ret_val) + + def test_migrate_volume_relocate_existing_backing(self): + self.test_migrate_volume(backing=mock.Mock()) + + def test_migrate_volume_move_to_folder_error(self): + self.test_migrate_volume(backing=mock.Mock(), raises_error=True) + @ddt.ddt class ImageDiskTypeTest(test.TestCase): diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 27743fce5f7..75050755990 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -852,7 +852,8 @@ def test_relocate_backing(self, get_relocate_spec, get_disk_device): get_disk_device.assert_called_once_with(backing) get_relocate_spec.assert_called_once_with(datastore, resource_pool, host, disk_move_type, - disk_type, disk_device) + disk_type, disk_device, + service=None) self.session.invoke_api.assert_called_once_with(self.session.vim, 'RelocateVM_Task', backing, diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py new file mode 100644 index 00000000000..c4d1ddd5be6 --- /dev/null +++ b/cinder/volume/drivers/vmware/remote.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020 SAP SE +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +""" +RPC server and client for communicating with other VMDK drivers directly. +This is the gateway which allows us gathering VMWare related information from +other hosts and perform cross vCenter operations. +""" +from cinder import rpc +from cinder.volume.rpcapi import VolumeAPI +from cinder.volume import volume_utils +from oslo_vmware import vim_util + +import oslo_messaging as messaging + + +class VmdkDriverRemoteApi(rpc.RPCAPI): + RPC_API_VERSION = VolumeAPI.RPC_API_VERSION + RPC_DEFAULT_VERSION = RPC_API_VERSION + TOPIC = VolumeAPI.TOPIC + BINARY = VolumeAPI.BINARY + + def _get_cctxt(self, host=None, version=None, **kwargs): + kwargs['server'] = volume_utils.extract_host(host) + return super(VmdkDriverRemoteApi, self)._get_cctxt(version=version, + **kwargs) + + def get_service_locator_info(self, ctxt, host): + cctxt = self._get_cctxt(host) + return cctxt.call(ctxt, 'get_service_locator_info') + + def select_ds_for_volume(self, ctxt, host, volume): + cctxt = self._get_cctxt(host) + return cctxt.call(ctxt, 'select_ds_for_volume', volume=volume) + + def move_volume_backing_to_folder(self, ctxt, host, volume, folder): + cctxt = self._get_cctxt(host) + return cctxt.call(ctxt, 'move_volume_backing_to_folder', volume=volume, + folder=folder) + + def create_backing(self, ctxt, host, volume): + cctxt = self._get_cctxt(host) + return cctxt.call(ctxt, 'create_backing', volume=volume) + + +class VmdkDriverRemoteService(object): + RPC_API_VERSION = VmdkDriverRemoteApi.RPC_API_VERSION + + target = messaging.Target(version=RPC_API_VERSION) + + def __init__(self, driver): + self._driver = driver + + def get_service_locator_info(self, ctxt): + return self._driver.service_locator_info + + def select_ds_for_volume(self, ctxt, volume): + (host, rp, folder, summary) = self._driver._select_ds_for_volume( + volume) + return { + 'host': host.value, + 'resource_pool': rp.value, + 'folder': folder.value, + 'datastore': summary.datastore.value + } + + def move_volume_backing_to_folder(self, ctxt, volume, folder): + backing = self._driver.volumeops.get_backing(volume['name'], + volume['id']) + folder_ref = vim_util.get_moref(folder, 'Folder') + self._driver.volumeops.move_backing_to_folder(backing, folder_ref) + + def create_backing(self, ctxt, volume): + return self._driver._create_backing(volume) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 6b9e0193664..d80d90d3c73 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -23,7 +23,9 @@ """ import math +import OpenSSL import re +import ssl from oslo_config import cfg from oslo_log import log as logging @@ -46,6 +48,7 @@ from cinder.volume import driver from cinder.volume.drivers.vmware import datastore as hub from cinder.volume.drivers.vmware import exceptions as vmdk_exceptions +from cinder.volume.drivers.vmware import remote as remote_api from cinder.volume.drivers.vmware import volumeops from cinder.volume import volume_types from cinder.volume import volume_utils @@ -320,6 +323,10 @@ def __init__(self, *args, **kwargs): self._clusters = None self._dc_cache = {} self._ds_regex = None + self.additional_endpoints.append([ + remote_api.VmdkDriverRemoteService(self) + ]) + self._remote_api = remote_api.VmdkDriverRemoteApi() @staticmethod def get_driver_options(): @@ -748,6 +755,30 @@ def _select_ds_for_volume(self, volume, host=None, create_params=None): return (host_ref, resource_pool, folder, summary) + @property + def service_locator_info(self): + """Returns information needed to build a ServiceLocator spec.""" + # vCenter URL + host = self.configuration.vmware_host_ip + port = self.configuration.vmware_host_port + url = "https://" + host + if port: + url += ":" + str(port) + # ssl thumbprint + cert = ssl.get_server_certificate((host, port or 443)) + x509 = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM, + cert) + return { + 'url': url, + 'ssl_thumbprint': x509.digest("sha1"), + 'instance_uuid': + self.session.vim.service_content.about.instanceUuid, + 'credential': { + 'username': self.configuration.vmware_host_username, + 'password': self.configuration.vmware_host_password + } + } + def _get_connection_info(self, volume, backing, connector): connection_info = {'driver_volume_type': 'vmdk'} connection_info['data'] = { @@ -2542,3 +2573,46 @@ def revert_to_snapshot(self, context, volume, snapshot): LOG.debug("Backing does not exist for volume.", resource=volume) else: self.volumeops.revert_to_snapshot(backing, snapshot.name) + + def migrate_volume(self, context, volume, host): + """Migrate a volume to the specified host. + + If the backing is not created, the dest host will create it. + """ + backing = self.volumeops.get_backing(volume.name, volume.id) + dest_host = host['host'] + # If the backing is not yet created, let the destination host create it + # In this case, migration is not necessary + if not backing: + LOG.info("There is no backing for the volume: %(volume_name)s. " + "Creating it on dest_host %(dest_host)s.", + {'volume_name': volume.name, 'dest_host': dest_host}) + self._remote_api.create_backing(context, host['host'], volume) + return (True, None) + + service_locator = self._remote_api.get_service_locator_info(context, + dest_host) + ds_info = self._remote_api.select_ds_for_volume(context, dest_host, + volume) + host_ref = vim_util.get_moref(ds_info['host'], 'HostSystem') + rp_ref = vim_util.get_moref(ds_info['resource_pool'], 'ResourcePool') + ds_ref = vim_util.get_moref(ds_info['datastore'], 'Datastore') + + self.volumeops.relocate_backing(backing, ds_ref, rp_ref, host_ref, + service=service_locator) + try: + self._remote_api.move_volume_backing_to_folder( + context, dest_host, volume, ds_info['folder']) + except Exception: + # At this point the backing has been migrated to the new host. + # If this movement to folder fails, we let the manager know the + # migration happened so that it will save the new host, + # but we update its status to 'error' so that someone can check + # the logs and perform a manual action. + LOG.exception("Failed to move the backing %(volume_id)s to folder " + "%(folder)s.", + {'volume_id': volume['id'], + 'folder': ds_info['folder']},) + return (True, {'migration_status': 'error'}) + + return (True, None) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index ed318e43cf7..3b522dc9c74 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1015,7 +1015,8 @@ def _create_relocate_spec_disk_locator(self, datastore, disk_type, return disk_locator def _get_relocate_spec(self, datastore, resource_pool, host, - disk_move_type, disk_type=None, disk_device=None): + disk_move_type, disk_type=None, disk_device=None, + service=None): """Return spec for relocating volume backing. :param datastore: Reference to the datastore @@ -1039,11 +1040,29 @@ def _get_relocate_spec(self, datastore, resource_pool, host, disk_device) relocate_spec.disk = [disk_locator] + if service is not None: + relocate_spec.service = self._get_service_locator_spec(service) + LOG.debug("Spec for relocating the backing: %s.", relocate_spec) return relocate_spec + def _get_service_locator_spec(self, service): + cf = self._session.vim.client.factory + service_locator = cf.create("ns0:ServiceLocator") + service_locator.instanceUuid = service['instance_uuid'] + service_locator.sslThumbprint = service['ssl_thumbprint'] + service_locator.url = service['url'] + + credential = cf.create("ns0:ServiceLocatorNamePassword") + credential.password = service['credential']['password'] + credential.username = service['credential']['username'] + service_locator.credential = credential + + return service_locator + def relocate_backing( - self, backing, datastore, resource_pool, host, disk_type=None): + self, backing, datastore, resource_pool, host, disk_type=None, + service=None): """Relocates backing to the input datastore and resource pool. The implementation uses moveAllDiskBackingsAndAllowSharing disk move @@ -1054,6 +1073,7 @@ def relocate_backing( :param resource_pool: Reference to the resource pool :param host: Reference to the host :param disk_type: destination disk type + :param service: destination service (for cross vCenter) """ LOG.debug("Relocating backing: %(backing)s to datastore: %(ds)s " "and resource pool: %(rp)s with destination disk type: " @@ -1065,6 +1085,10 @@ def relocate_backing( # Relocate the volume backing disk_move_type = 'moveAllDiskBackingsAndAllowSharing' + # For migration to other vCenter service the disk_move_type needs to be + # moveAllDiskBackingsAndDisallowSharing + if service is not None: + disk_move_type = 'moveAllDiskBackingsAndDisallowSharing' disk_device = None if disk_type is not None: @@ -1072,7 +1096,7 @@ def relocate_backing( relocate_spec = self._get_relocate_spec(datastore, resource_pool, host, disk_move_type, disk_type, - disk_device) + disk_device, service=service) task = self._session.invoke_api(self._session.vim, 'RelocateVM_Task', backing, spec=relocate_spec) From 785043bd1cdf96fc4baf136c7253484d9d6450ee Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Thu, 23 Apr 2020 11:34:35 +0300 Subject: [PATCH 031/149] [SAP] Reporting location_info Report location_info in the capabilities. It is a concatenation of a static name and the current vcenter service instanceUuid. The location_info is being used in the migrate_volume to make sure we don't attempt to migrate non-vmware volumes. Additionally, this commit removes the creation of the backing on the dest_host, in case of a migration for a volume which does not already have a backing created. The backing will be created on initialize_connection anyway. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 60 +++++++++++++++---- cinder/volume/drivers/vmware/vmdk.py | 30 ++++++++-- 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index e9966e56d37..fdf206c2dd0 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -118,7 +118,7 @@ def setUp(self): vmware_datastore_regex=None, reserved_percentage=0, vmware_profile_check_on_attach=True, - vmware_storage_profile=[self.STORAGE_PROFILE], + vmware_storage_profile=[self.STORAGE_PROFILE], ) self._db = mock.Mock() @@ -168,9 +168,15 @@ def test_get_storage_profile_id( get_storage_profile.assert_called_once_with(volume) get_profile_id_by_name.assert_called_once_with(session, 'gold') + @mock.patch.object(VMDK_DRIVER, 'session') @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') - def test_get_volume_stats(self, _get_datastore_summaries, vops): + def test_get_volume_stats(self, _get_datastore_summaries, vops, + session): + retr_result_mock = mock.Mock(spec=['objects']) + retr_result_mock.objects = [] + session.vim.RetrievePropertiesEx.return_value = retr_result_mock + session.vim.service_content.instanceUuid = 'fake-service' FREE_GB = 7 TOTAL_GB = 11 @@ -203,6 +209,8 @@ def __init__(self, **kwargs): self.assertEqual(TOTAL_GB, stats['total_capacity_gb']) self.assertEqual(FREE_GB, stats['free_capacity_gb']) self.assertFalse(stats['shared_targets']) + self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", + stats['location_info']) def test_test_volume_stats_disabled(self): RESERVED_PERCENTAGE = 0 @@ -3527,12 +3535,19 @@ def test_revert_to_snapshot(self, vops): @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch('oslo_vmware.vim_util.get_moref') def test_migrate_volume(self, get_moref, vops, backing=None, - raises_error=False): + raises_error=False, capabilities=None): r_api = mock.Mock() self._driver._remote_api = r_api volume = self._create_volume_obj() vops.get_backing.return_value = backing - host = {'host': 'fake-host', 'capabilities': {}} + if capabilities is None: + capabilities = { + 'location_info': vmdk.LOCATION_DRIVER_NAME + ":foo_vcenter" + } + host = { + 'host': 'fake-host', + 'capabilities': capabilities + } ds_info = {'host': 'fake-ds-host', 'resource_pool': 'fake-rp', 'datastore': 'fake-ds-name', 'folder': 'fake-folder'} get_moref.side_effect = [ @@ -3549,20 +3564,26 @@ def test_migrate_volume(self, get_moref, vops, backing=None, ret_val = self._driver.migrate_volume(mock.sentinel.context, volume, host) - vops.get_backing.assert_called_once_with(volume.name, volume.id) + dest_host = host['host'] - if not backing: - r_api.create_backing.assert_called_once_with( - mock.sentinel.context, host['host'], volume) + def _assertions_migration_not_called(): r_api.get_service_locator_info.assert_not_called() r_api.select_ds_for_volume.assert_not_called() vops.relocate_backing.assert_not_called() r_api.move_volume_backing_to_folder.assert_not_called() get_moref.assert_not_called() + + def _assertions_for_no_backing(): + vops.get_backing.assert_called_once_with(volume.name, volume.id) + _assertions_migration_not_called() self.assertEqual((True, None), ret_val) - else: - dest_host = host['host'] + def _assertions_migration_not_performed(): + _assertions_migration_not_called() + self.assertEqual((False, None), ret_val) + + def _assertions_for_migration(): + vops.get_backing.assert_called_once_with(volume.name, volume.id) r_api.get_service_locator_info.assert_called_once_with( mock.sentinel.context, dest_host) @@ -3587,12 +3608,31 @@ def test_migrate_volume(self, get_moref, vops, backing=None, else: self.assertEqual((True, None), ret_val) + if capabilities and 'location_info' in capabilities: + if vmdk.LOCATION_DRIVER_NAME in capabilities['location_info']: + if backing: + _assertions_for_migration() + else: + _assertions_for_no_backing() + else: + _assertions_migration_not_performed() + else: + _assertions_migration_not_performed() + def test_migrate_volume_relocate_existing_backing(self): self.test_migrate_volume(backing=mock.Mock()) def test_migrate_volume_move_to_folder_error(self): self.test_migrate_volume(backing=mock.Mock(), raises_error=True) + def test_migrate_volume_missing_location_info(self): + self.test_migrate_volume(backing=mock.Mock(), capabilities={}) + + def test_migrate_volume_invalid_location_info(self): + self.test_migrate_volume(backing=mock.Mock(), capabilities={ + 'location_info': 'invalid-location-info' + }) + @ddt.ddt class ImageDiskTypeTest(test.TestCase): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index d80d90d3c73..10b2c5eb07f 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -72,6 +72,8 @@ EXTENSION_KEY = 'org.openstack.storage' EXTENSION_TYPE = 'volume' +LOCATION_DRIVER_NAME = 'VMwareVcVmdkDriver' + vmdk_opts = [ cfg.StrOpt('vmware_host_ip', help='IP address for connecting to VMware vCenter server.'), @@ -429,6 +431,10 @@ def _get_volume_stats(self): break data['total_capacity_gb'] = round(global_capacity / units.Gi) data['free_capacity_gb'] = round(global_free / units.Gi) + location_info = '%(driver_name)s:%(vcenter)s' % { + 'driver_name': LOCATION_DRIVER_NAME, + 'vcenter': self.session.vim.service_content.about.instanceUuid} + data['location_info'] = location_info self._stats = data return data @@ -2577,17 +2583,31 @@ def revert_to_snapshot(self, context, volume, snapshot): def migrate_volume(self, context, volume, host): """Migrate a volume to the specified host. - If the backing is not created, the dest host will create it. + If the backing is not created, returns success. """ + + false_ret = (False, None) + if volume['status'] != 'available': + return false_ret + if 'location_info' not in host['capabilities']: + return false_ret + info = host['capabilities']['location_info'] + try: + (driver_name, vcenter) = info.split(':') + except ValueError: + return false_ret + + if driver_name != LOCATION_DRIVER_NAME: + return false_ret + backing = self.volumeops.get_backing(volume.name, volume.id) dest_host = host['host'] - # If the backing is not yet created, let the destination host create it - # In this case, migration is not necessary + # If the backing is not yet created, there is no need to migrate if not backing: LOG.info("There is no backing for the volume: %(volume_name)s. " - "Creating it on dest_host %(dest_host)s.", + "No need for a migration. The volume will be assigned to" + " %(dest_host)s.", {'volume_name': volume.name, 'dest_host': dest_host}) - self._remote_api.create_backing(context, host['host'], volume) return (True, None) service_locator = self._remote_api.get_service_locator_info(context, From 5267d19bf60a393c2ff6da07b439f773e3ac7cde Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 9 Jun 2020 19:54:35 +0300 Subject: [PATCH 032/149] [SAP] Ability to reschedule and migrate volumes upon attachment (#50) * Ability to reschedule and migrate volumes upon attachment Given a volume which is being scheduled to locaion A and a connector (VM) which resides in location B and wants to attach that volume. The volume backend is now able to reject that connector (for example if A is not reachable by B) by throwing an exception. In this case, cinder will reschedule the volume to a new host filtering by the `connection_capabilities` provided by the volume backend and by the connector. All `connection_capabilities` provided by the connector must be in the `connection_capabilities` exposed by the volume backend for this scheduling to take place. Once a new host is selected, a migration is performed and eventually the connection_info is passed back to the connector. This functionality is disabled by default and can be enabled by setting `allow_migration_on_attach=True`. This commit also includes an implementation for VMwareVcVmdkDriver, which is used to reject connectors comming from different vCenter services. * VMware - add reason for ConnectorRejected exception * Add the migrate on attach functionality for attachment_update attachment_update also invokes initialize_connection on the driver, so it must handle a ConnectorRejected exception accordingly. * fix pep8 checks * Improve unit tests and logging - Add more unit tests around find_backend_for_connector, covering scenarios with multiple hosts having multiple connection_capabilities. - Add more meaningful logs around the ConnectorRejected exception - Add exception.InvalidConnectionCapabilities which is used to mark missing connection_capabilities field in a connector, at least for now. - Fix find_backend_for_connector to throw that error for connector with missing connection_capabilities field, instead of defaulting to []. * Adding more tests for find_backend_for_connector() - Cover a few more cases for different connection_capabilities - Add specific connection_capabilities in the testcase itself, instead of fakes.py * Add a test failing for a bigger volume Adding a test that fails if we ask for a specific host that was skipped by the scheduler filtering due to it's capacity limits. --- cinder/exception.py | 8 ++ cinder/interface/volume_driver.py | 3 + cinder/opts.py | 1 + cinder/scheduler/driver.py | 4 + cinder/scheduler/filter_scheduler.py | 37 ++++++ cinder/scheduler/manager.py | 11 ++ cinder/scheduler/rpcapi.py | 5 + .../unit/scheduler/test_filter_scheduler.py | 125 ++++++++++++++++++ cinder/tests/unit/scheduler/test_scheduler.py | 18 +++ .../unit/volume/drivers/vmware/test_fcd.py | 4 +- .../volume/drivers/vmware/test_vmware_vmdk.py | 4 +- cinder/tests/unit/volume/test_volume.py | 53 ++++++++ cinder/volume/api.py | 84 ++++++++++-- cinder/volume/drivers/vmware/vmdk.py | 17 ++- cinder/volume/manager.py | 10 +- cinder/volume/rpcapi.py | 8 +- 16 files changed, 374 insertions(+), 18 deletions(-) diff --git a/cinder/exception.py b/cinder/exception.py index 9b6cd1534f8..5bfa7b2ff6d 100644 --- a/cinder/exception.py +++ b/cinder/exception.py @@ -602,6 +602,10 @@ class NoValidBackend(CinderException): message = _("No valid backend was found. %(reason)s") +class InvalidConnectionCapabilities(CinderException): + message = _("Invalid connection capabilities. %(reason)s") + + class QuotaError(CinderException): message = _("Quota exceeded: code=%(code)s") code = 413 @@ -694,6 +698,10 @@ class InvalidConnectorException(VolumeDriverException): message = _("Connector doesn't have required information: %(missing)s") +class ConnectorRejected(VolumeDriverException): + message = _("Connector can't be used with this driver anymore. %(reason)s") + + class GlanceMetadataExists(Invalid): message = _("Glance metadata cannot be updated, key %(key)s" " exists for volume id %(volume_id)s") diff --git a/cinder/interface/volume_driver.py b/cinder/interface/volume_driver.py index e3a7640c5e9..27cd75fe70a 100644 --- a/cinder/interface/volume_driver.py +++ b/cinder/interface/volume_driver.py @@ -138,6 +138,9 @@ def get_volume_stats(self, refresh=False): * online_extend_support (Boolean) Whether the backend supports in-use volume extend or not. Defaults to True. + * connection_capabilities (list) + A list of capabilities that need to match the connector's + capabilities when a connection is being initialized. The returned dict may also contain a list, "pools", which has a similar dict for each pool being used with the backend. diff --git a/cinder/opts.py b/cinder/opts.py index e695ce79797..eea37fa7c34 100644 --- a/cinder/opts.py +++ b/cinder/opts.py @@ -265,6 +265,7 @@ def list_opts(): [cinder_volume_api.volume_host_opt], [cinder_volume_api.volume_same_az_opt], [cinder_volume_api.az_cache_time_opt], + [cinder_volume_api.migrate_on_attach_opt], cinder_volume_driver.volume_opts, cinder_volume_driver.iser_opts, cinder_volume_driver.nvmet_opts, diff --git a/cinder/scheduler/driver.py b/cinder/scheduler/driver.py index fe92905bee2..8e0cae99f02 100644 --- a/cinder/scheduler/driver.py +++ b/cinder/scheduler/driver.py @@ -150,6 +150,10 @@ def schedule_create_group(self, context, group, raise NotImplementedError(_( "Must implement schedule_create_group")) + def find_backend_for_connector(self, context, connector, request_spec): + raise NotImplementedError(_("Must implement" + "find_backend_for_connector")) + def get_pools(self, context, filters): """Must override schedule method for scheduler to work.""" raise NotImplementedError(_( diff --git a/cinder/scheduler/filter_scheduler.py b/cinder/scheduler/filter_scheduler.py index 2f047d81aea..462c4e357a9 100644 --- a/cinder/scheduler/filter_scheduler.py +++ b/cinder/scheduler/filter_scheduler.py @@ -111,6 +111,43 @@ def schedule_create_volume(self, context, request_spec, filter_properties): filter_properties, allow_reschedule=True) + def find_backend_for_connector(self, context, connector, request_spec): + key = 'connection_capabilities' + if key not in connector: + raise exception.InvalidConnectionCapabilities( + reason=_("The connector doesn't contain a %s field.") % key) + + weighed_backends = self._get_weighted_candidates(context, request_spec) + if not weighed_backends: + raise exception.NoValidBackend(reason=_("No weighed backends " + "available")) + connector_capabilities = set(connector[key]) + + def _backend_matches_connector(bck): + if key not in bck.obj.capabilities: + LOG.debug("Backend %(backend) doesn't contain %(key)s.", + {'backend': bck.obj.host, 'key': key}) + return False + backend_capabilities = set(bck.obj.capabilities.get(key)) + if connector_capabilities & backend_capabilities ==\ + connector_capabilities: + return True + LOG.debug("Requested %(key)s %(req)s not found in " + "%(host)s backend's %(key)s %(given)s.", + {'key': key, 'req': connector_capabilities, + 'host': bck.obj.host, 'given': backend_capabilities}) + return False + + weighed_backends = [b for b in weighed_backends if + _backend_matches_connector(b)] + + if not weighed_backends: + raise exception.NoValidBackend( + reason=_("No backend matched the given connector.")) + + top_backend = self._choose_top_backend(weighed_backends, request_spec) + return top_backend.obj + def backend_passes_filters(self, context, backend, request_spec, filter_properties): """Check if the specified backend passes the filters.""" diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index d16385685f1..9b32a6f9745 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -367,6 +367,17 @@ def _retype_volume_set_error(self, context, ex, request_spec, reservations, old_reservations) + @append_operation_type() + def find_backend_for_connector(self, context, connector, request_spec): + self._wait_for_scheduler() + + backend = self.driver.find_backend_for_connector(context, + connector, + request_spec) + return {'host': backend.obj.host, + 'cluster_name': backend.obj.cluster_name, + 'capabilities': backend.obj.capabilities} + @append_operation_type() def manage_existing(self, context, volume, request_spec, filter_properties=None): diff --git a/cinder/scheduler/rpcapi.py b/cinder/scheduler/rpcapi.py index bc421b793f3..02c6a6ed912 100644 --- a/cinder/scheduler/rpcapi.py +++ b/cinder/scheduler/rpcapi.py @@ -267,3 +267,8 @@ def create_backup(self, ctxt, backup): cctxt = self._get_cctxt() msg_args = {'backup': backup} return cctxt.cast(ctxt, 'create_backup', **msg_args) + + def find_backend_for_connector(self, context, connector, request_spec): + cctxt = self._get_cctxt() + return cctxt.call(context, 'find_backend_for_connector', + connector=connector, request_spec=request_spec) diff --git a/cinder/tests/unit/scheduler/test_filter_scheduler.py b/cinder/tests/unit/scheduler/test_filter_scheduler.py index 142872ba787..f1e2432a3d6 100644 --- a/cinder/tests/unit/scheduler/test_filter_scheduler.py +++ b/cinder/tests/unit/scheduler/test_filter_scheduler.py @@ -558,3 +558,128 @@ def test_retype_policy_demand_migrate_fail(self, _mock_service_get_topic): self.assertRaises(exception.NoValidBackend, sched.find_retype_backend, ctx, request_spec, filter_properties={}, migration_policy='on-demand') + + @mock.patch('cinder.db.service_get_all') + def test_find_backend_for_connector_with_valid_connection_capabilities( + self, _mock_service_get_all): + sched = fakes.FakeFilterScheduler() + host_mgr = fakes.FakeHostManager() + host_mgr.service_states['host3']['connection_capabilities'] = [ + 'host3', 'host-3', 'common'] + host_mgr.service_states['host1']['connection_capabilities'] = [ + 'host1', 'host-1', 'common'] + sched.host_manager = host_mgr + ctx = context.RequestContext('user', 'project', is_admin=True) + + fakes.mock_host_manager_db_calls(_mock_service_get_all) + request_spec = {'volume_id': fake.VOLUME_ID, + 'volume_type': {'name': 'LVM_iSCSI'}, + 'volume_properties': {'project_id': 1, + 'size': 10}} + request_spec = objects.RequestSpec.from_primitives(request_spec) + + # host1 is the top weighed backend + connector = {'connection_capabilities': []} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host1', volume_utils.extract_host(backend.host)) + + connector = {'connection_capabilities': ['common']} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host1', volume_utils.extract_host(backend.host)) + + connector = {'connection_capabilities': ['host-1']} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host1', volume_utils.extract_host(backend.host)) + + connector = {'connection_capabilities': ['host-1', 'host1']} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host1', volume_utils.extract_host(backend.host)) + + # host3 has a lower weight but it matches the + # connection_capabilities exposed by the connector + connector = {'connection_capabilities': ['host-3']} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host3', volume_utils.extract_host(backend.host)) + + connector = {'connection_capabilities': ['host3', 'host-3']} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host3', volume_utils.extract_host(backend.host)) + + @mock.patch('cinder.db.service_get_all') + def test_find_backend_for_connector_with_invalid_connection_capabilities( + self, _mock_service_get_all): + sched = fakes.FakeFilterScheduler() + host_mgr = fakes.FakeHostManager() + host_mgr.service_states['host1']['connection_capabilities'] = [ + 'host-1'] + host_mgr.service_states['host3']['connection_capabilities'] = [ + 'host-3'] + sched.host_manager = host_mgr + + ctx = context.RequestContext('user', 'project', is_admin=True) + + fakes.mock_host_manager_db_calls(_mock_service_get_all) + request_spec = {'volume_id': fake.VOLUME_ID, + 'volume_type': {'name': 'LVM_iSCSI'}, + 'volume_properties': {'project_id': 1, + 'size': 10}} + request_spec = objects.RequestSpec.from_primitives(request_spec) + + connector = {'connection_capabilities': ['unknown_capability']} + self.assertRaises(exception.NoValidBackend, + sched.find_backend_for_connector, + ctx, connector, request_spec) + + connector = {'connection_capabilities': ['host-1', + 'unknown_capability']} + self.assertRaises(exception.NoValidBackend, + sched.find_backend_for_connector, + ctx, connector, request_spec) + + connector = {'connection_capabilities': ['host-3', + 'host-1']} + self.assertRaises(exception.NoValidBackend, + sched.find_backend_for_connector, + ctx, connector, request_spec) + + connector = {} + self.assertRaises(exception.InvalidConnectionCapabilities, + sched.find_backend_for_connector, + ctx, connector, request_spec) + + @mock.patch('cinder.db.service_get_all') + def test_find_backend_for_connector_with_bigger_volume( + self, _mock_service_get_all): + sched = fakes.FakeFilterScheduler() + host_mgr = fakes.FakeHostManager() + host_mgr.service_states['host3']['connection_capabilities'] = [ + 'host-3'] + host_mgr.service_states['host1']['connection_capabilities'] = [ + 'host-1'] + sched.host_manager = host_mgr + + ctx = context.RequestContext('user', 'project', is_admin=True) + + fakes.mock_host_manager_db_calls(_mock_service_get_all) + request_spec = {'volume_id': fake.VOLUME_ID, + 'volume_type': {'name': 'LVM_iSCSI'}, + 'volume_properties': {'project_id': 1, + 'size': 300}} + + # host3 won't fit 300 gb + connector = {'connection_capabilities': ['host-3']} + self.assertRaises(exception.NoValidBackend, + sched.find_backend_for_connector, + ctx, connector, request_spec) + + # host1 will fit it + connector = {'connection_capabilities': []} + backend = sched.find_backend_for_connector(ctx, connector, + request_spec) + self.assertEqual('host1', volume_utils.extract_host(backend.host)) diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index 4036e7d59ab..c8bd4e7ccc8 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -628,6 +628,24 @@ def test_create_backup_no_service(self, mock_volume_update, mock_error.assert_called_once_with( backup, 'Service not found for creating backup.') + @mock.patch('cinder.scheduler.driver.Scheduler.find_backend_for_connector') + def test_find_backend_for_connector(self, _mock_find_backend_for_conector): + connector = mock.Mock() + request_spec = mock.Mock() + backend_obj = mock.Mock(host='fake-host', + cluster_name='fake-cluster', capabilities=[]) + backend_ret = mock.Mock(obj=backend_obj) + _mock_find_backend_for_conector.return_value = backend_ret + ret = self.manager.find_backend_for_connector(self.context, + connector, request_spec) + _mock_find_backend_for_conector.assert_called_once_with( + self.context, connector, request_spec) + self.assertEqual(ret, { + 'host': backend_ret.obj.host, + 'cluster_name': backend_ret.obj.cluster_name, + 'capabilities': backend_ret.obj.capabilities + }) + class SchedulerTestCase(test.TestCase): """Test case for base scheduler driver class.""" diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index 552c2291cff..4e3126aa746 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -84,9 +84,11 @@ def test_do_setup(self, vops, vmdk_do_setup): self.assertTrue(self._driver._use_fcd_snapshot) self.assertTrue(self._driver._storage_policy_enabled) + @mock.patch.object(VMDK_DRIVER, 'session') @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') - def test_get_volume_stats(self, _get_datastore_summaries, vops): + def test_get_volume_stats(self, _get_datastore_summaries, vops, + session): FREE_GB = 7 TOTAL_GB = 11 diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index fdf206c2dd0..aad3c2010eb 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -172,11 +172,11 @@ def test_get_storage_profile_id( @mock.patch.object(VMDK_DRIVER, 'volumeops') @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') def test_get_volume_stats(self, _get_datastore_summaries, vops, - session): + session): retr_result_mock = mock.Mock(spec=['objects']) retr_result_mock.objects = [] session.vim.RetrievePropertiesEx.return_value = retr_result_mock - session.vim.service_content.instanceUuid = 'fake-service' + session.vim.service_content.about.instanceUuid = 'fake-service' FREE_GB = 7 TOTAL_GB = 11 diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index 9f4c8150200..0eb706efccd 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -3350,6 +3350,59 @@ def test_tpool_size(self): self.assertListEqual([], eventlet.tpool._threads) eventlet.tpool._nthreads = 20 + @mock.patch('cinder.volume.volume_types.get_volume_type') + def test_initialize_connection_with_rejected_connector( + self, fake_get_volume_type): + ini_ret = {'ip': '1.2.3.4'} + connector = {'ip': '0.0.0.0'} + volume_type = 'fake-volume-type' + volume = tests_utils.create_volume(self.context) + host_obj = {'host': 'fake-host', + 'cluster_name': 'fake-cluster', + 'capabilities': {}} + + self.override_config('allow_migration_on_attach', True) + fake_get_volume_type.return_value = volume_type + + volume_api = cinder.volume.api.API() + scheduler_rpcapi = mock.Mock() + volume_api.scheduler_rpcapi = scheduler_rpcapi + volume_api.scheduler_rpcapi.find_backend_for_connector.return_value =\ + host_obj + + volume_rpcapi = mock.Mock() + volume_api.volume_rpcapi = volume_rpcapi + + call_times = {volume.id: -1} + + def _initialize_connection_side_effect(context, volume, connector): + call_times[volume.id] += 1 + if call_times[volume.id] == 0: + # First time it rejects the connector + raise exception.ConnectorRejected(reason=None) + if call_times[volume.id] == 1: + # Second time (after migration) it returns the connection data + return ini_ret + + volume_rpcapi.initialize_connection.side_effect =\ + _initialize_connection_side_effect + + conn_result =\ + volume_api.initialize_connection(self.context, volume, connector) + + self.assertEqual(conn_result, ini_ret) + volume_rpcapi.initialize_connection.assert_has_calls([ + mock.call(self.context, volume, connector), + mock.call(self.context, volume, connector) + ]) + volume_rpcapi.migrate_volume.assert_called_once_with( + self.context, volume, mock.ANY, force_host_copy=False, + wait_for_completion=True) + backend = volume_rpcapi.migrate_volume.call_args[0][2] + self.assertEqual(backend.host, host_obj['host']) + self.assertEqual(backend.cluster_name, host_obj['cluster_name']) + self.assertEqual(backend.capabilities, host_obj['capabilities']) + class VolumeTestCaseLocks(base.BaseVolumeTestCase): MOCK_TOOZ = False diff --git a/cinder/volume/api.py b/cinder/volume/api.py index c7ac1af1d4c..849e5149bcf 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -54,6 +54,7 @@ from cinder.policies import volumes as vol_policy from cinder import quota from cinder import quota_utils +from cinder.scheduler import host_manager from cinder.scheduler import rpcapi as scheduler_rpcapi from cinder import utils from cinder.volume.flows.api import create_volume @@ -82,13 +83,20 @@ help='Cache volume availability zones in ' 'memory for the provided duration in ' 'seconds') +migrate_on_attach_opt = cfg.BoolOpt('allow_migration_on_attach', + default=False, + help="A host might recognise a connector " + "as valid but it can't use it to " + "initialize a connection. This will " + "allow to migrate the volume to a " + "valid host prior to attachment.") CONF = cfg.CONF CONF.register_opt(allow_force_upload_opt) CONF.register_opt(volume_host_opt) CONF.register_opt(volume_same_az_opt) CONF.register_opt(az_cache_time_opt) - +CONF.register_opt(migrate_on_attach_opt) CONF.import_opt('glance_core_properties', 'cinder.image.glance') LOG = logging.getLogger(__name__) @@ -797,6 +805,32 @@ def detach(self, context, volume, attachment_id): resource=volume) return detach_results + def _migrate_by_connector(self, ctxt, volume, connector): + volume_type = {} + if volume.volume_type_id: + volume_type = volume_types.get_volume_type( + ctxt.elevated(), volume.volume_type_id) + request_spec = { + 'volume_properties': volume, + 'volume_type': volume_type, + 'volume_id': volume.id} + try: + dest = self.scheduler_rpcapi.find_backend_for_connector( + ctxt, connector, request_spec) + except exception.NoValidBackend: + LOG.error("The connector was rejected by the backend. Could not " + "find another backend compatible with the connector %s.", + connector) + return None + backend = host_manager.BackendState(host=dest['host'], + cluster_name=dest['cluster_name'], + capabilities=dest['capabilities']) + LOG.debug("Invoking migrate_volume to host=%(host).", dest['host']) + self.volume_rpcapi.migrate_volume(ctxt, volume, backend, + force_host_copy=False, + wait_for_completion=True) + volume.refresh() + def initialize_connection(self, context, volume, connector): context.authorize(vol_action_policy.INITIALIZE_POLICY, target_obj=volume) @@ -807,9 +841,25 @@ def initialize_connection(self, context, volume, connector): msg = _("The volume connection cannot be initialized in " "maintenance mode.") raise exception.InvalidVolume(reason=msg) - init_results = self.volume_rpcapi.initialize_connection(context, - volume, - connector) + + def _migrate_and_initialize_connection(): + self._migrate_by_connector(context, volume, connector) + return self.volume_rpcapi.initialize_connection(context, volume, + connector) + init_results = None + try: + init_results = self.volume_rpcapi.initialize_connection(context, + volume, + connector) + except exception.ConnectorRejected: + with excutils.save_and_reraise_exception() as exc_context: + if CONF.allow_migration_on_attach: + LOG.info("The connector was rejected by the volume " + "backend while initializing the connection. " + "Attempting to migrate it.") + init_results = _migrate_and_initialize_connection() + exc_context.reraise = False + LOG.info("Initialize volume connection completed successfully.", resource=volume) return init_results @@ -2256,12 +2306,26 @@ def attachment_update(self, ctxt, attachment_ref, connector): '%(vol)s') % {'vol': volume_ref.id} raise exception.InvalidVolume(reason=msg) - - connection_info = ( - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id)) + connection_info = None + try: + connection_info = ( + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id)) + except exception.ConnectorRejected: + with excutils.save_and_reraise_exception() as exc_context: + if CONF.allow_migration_on_attach: + LOG.info("The connector was rejected by the volume " + "backend while updating the attachments. " + "Trying to migrate it.") + exc_context.reraise = False + self._migrate_by_connector(ctxt, volume_ref, connector) + connection_info =\ + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id) attachment_ref.connection_info = connection_info attachment_ref.save() return attachment_ref diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 10b2c5eb07f..6a606df555f 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -389,6 +389,10 @@ def _get_fake_stats(self): self._stats = data return self._stats + def _get_connection_capabilities(self): + return ['vmware_service_instance_uuid:%s' % + self.session.vim.service_content.about.instanceUuid] + def _get_volume_stats(self): """Fetch the stats about the backend. @@ -409,7 +413,8 @@ def _get_volume_stats(self): 'shared_targets': False, 'thin_provisioning_support': True, 'thick_provisioning_support': True, - 'max_over_subscription_ratio': max_over_subscription_ratio} + 'max_over_subscription_ratio': max_over_subscription_ratio, + 'connection_capabilities': self._get_connection_capabilities()} ds_summaries = self._get_datastore_summaries() available_hosts = self._get_hosts(self._clusters) global_capacity = 0 @@ -905,7 +910,17 @@ def _initialize_connection(self, volume, connector): :param connector: Connector information :return: Return connection information """ + # Check that connection_capabilities match + # This ensures the connector is bound to the same vCenter service + if 'connection_capabilities' in connector: + missing = set(self._get_connection_capabilities()) -\ + set(connector['connection_capabilities']) + if missing: + raise exception.ConnectorRejected( + reason="Connector is missing %s" % ', '.join(missing)) + backing = self.volumeops.get_backing(volume.name, volume.id) + if 'instance' in connector: # The instance exists instance = vim_util.get_moref(connector['instance'], diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 1aab27848f2..60e1eb9241f 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -1912,6 +1912,9 @@ def initialize_connection(self, try: conn_info = self.driver.initialize_connection(volume, connector) + except exception.ConnectorRejected: + with excutils.save_and_reraise_exception(): + LOG.info("The connector was rejected by the volume driver.") except Exception as err: err_msg = (_("Driver initialize connection failed " "(error: %(err)s).") % {'err': err}) @@ -2343,7 +2346,9 @@ def _migrate_volume_generic(self, # Copy the source volume to the destination volume try: attachments = volume.volume_attachment - if not attachments: + # A volume might have attachments created, but if it is reserved + # it means it's being migrated prior to the attachment completion. + if not attachments or volume.status == 'reserved': # Pre- and post-copy driver-specific actions self.driver.before_volume_copy(ctxt, volume, new_volume, remote='dest') @@ -4700,6 +4705,9 @@ def _connection_create(self, try: conn_info = self.driver.initialize_connection(volume, connector) + except exception.ConnectorRejected: + with excutils.save_and_reraise_exception(): + LOG.info("The connector was rejected by the volume driver.") except Exception as err: err_msg = (_("Driver initialize connection failed " "(error: %(err)s).") % {'err': err}) diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 9b72583bca9..3b8083649b7 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -259,7 +259,8 @@ def extend_volume(self, ctxt, volume, new_size, reservations): cctxt.cast(ctxt, 'extend_volume', volume=volume, new_size=new_size, reservations=reservations) - def migrate_volume(self, ctxt, volume, dest_backend, force_host_copy): + def migrate_volume(self, ctxt, volume, dest_backend, force_host_copy, + wait_for_completion=False): backend_p = {'host': dest_backend.host, 'cluster_name': dest_backend.cluster_name, 'capabilities': dest_backend.capabilities} @@ -270,8 +271,9 @@ def migrate_volume(self, ctxt, volume, dest_backend, force_host_copy): del backend_p['cluster_name'] cctxt = self._get_cctxt(volume.service_topic_queue, version) - cctxt.cast(ctxt, 'migrate_volume', volume=volume, host=backend_p, - force_host_copy=force_host_copy) + method = 'call' if wait_for_completion else 'cast' + getattr(cctxt, method)(ctxt, 'migrate_volume', volume=volume, + host=backend_p, force_host_copy=force_host_copy) def migrate_volume_completion(self, ctxt, volume, new_volume, error): cctxt = self._get_cctxt(volume.service_topic_queue) From 212e6b94c6f3766e2f0f8b488cedcb75d844aff2 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Fri, 12 Jun 2020 14:23:34 +0300 Subject: [PATCH 033/149] [SAP] fix - scheduler manager find_backend_for_connector return value This fixes the bug of manager trying to access .obj of a PoolState, instead it should access the PoolState properties directly, because that's what the scheduler driver returns. --- cinder/scheduler/manager.py | 6 +++--- cinder/tests/unit/scheduler/test_scheduler.py | 9 ++++----- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 9b32a6f9745..65790be8124 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -374,9 +374,9 @@ def find_backend_for_connector(self, context, connector, request_spec): backend = self.driver.find_backend_for_connector(context, connector, request_spec) - return {'host': backend.obj.host, - 'cluster_name': backend.obj.cluster_name, - 'capabilities': backend.obj.capabilities} + return {'host': backend.host, + 'cluster_name': backend.cluster_name, + 'capabilities': backend.capabilities} @append_operation_type() def manage_existing(self, context, volume, request_spec, diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index c8bd4e7ccc8..f33b3bf04df 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -632,18 +632,17 @@ def test_create_backup_no_service(self, mock_volume_update, def test_find_backend_for_connector(self, _mock_find_backend_for_conector): connector = mock.Mock() request_spec = mock.Mock() - backend_obj = mock.Mock(host='fake-host', + backend_ret = mock.Mock(host='fake-host', cluster_name='fake-cluster', capabilities=[]) - backend_ret = mock.Mock(obj=backend_obj) _mock_find_backend_for_conector.return_value = backend_ret ret = self.manager.find_backend_for_connector(self.context, connector, request_spec) _mock_find_backend_for_conector.assert_called_once_with( self.context, connector, request_spec) self.assertEqual(ret, { - 'host': backend_ret.obj.host, - 'cluster_name': backend_ret.obj.cluster_name, - 'capabilities': backend_ret.obj.capabilities + 'host': backend_ret.host, + 'cluster_name': backend_ret.cluster_name, + 'capabilities': backend_ret.capabilities }) From 0511e2c1bf116cb31aee8c00ce45585a462fb26a Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 15 Jun 2020 10:58:20 +0300 Subject: [PATCH 034/149] [SAP] FIX vmware migrations issues (#55) This includes several fixes to make both host migrations and driver migrations work. * It allows to migrate_volume for 'reserved' volumes as well, so that a volume can be migrated by the driver before it is attached * It fixes the host migration (with force_host_copy enabled) by implementing update_migrated_volume which updates the backing uuids. Also fixes the terminate_connection to match the condition from the initialize_connection for renaming and updating the backing after a migration or a backup restore. * It fixes a development artifact which was causing the RPC endpoints of the VMDK driver to not be advertised (it was appending to the additional_endpoints instead of extending the list). --- .../volume/drivers/vmware/test_vmware_vmdk.py | 46 +++++++++++++++---- cinder/volume/drivers/vmware/vmdk.py | 40 +++++++++++++--- 2 files changed, 71 insertions(+), 15 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index aad3c2010eb..fb3349ef8ca 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -238,6 +238,7 @@ def _create_volume_dict(self, attachment=None, project_id=PROJECT_ID): return {'id': vol_id, + 'name_id': vol_id, 'display_name': display_name, 'name': 'volume-%s' % vol_id, 'volume_type_id': volume_type_id, @@ -295,11 +296,13 @@ def test_create_volume_with_lazy_create_disabled(self, create_backing): @mock.patch.object(VMDK_DRIVER, 'volumeops') def test_delete_volume_without_backing(self, vops): vops.get_backing.return_value = None + vops.get_backing_by_uuid.return_value = None - volume = self._create_volume_dict() + volume = self._create_volume_obj() self._driver.delete_volume(volume) vops.get_backing.assert_called_once_with(volume['name'], volume['id']) + vops.get_backing_by_uuid.assert_called_once_with(volume['name_id']) self.assertFalse(vops.delete_backing.called) @mock.patch.object(VMDK_DRIVER, 'volumeops') @@ -3608,16 +3611,17 @@ def _assertions_for_migration(): else: self.assertEqual((True, None), ret_val) - if capabilities and 'location_info' in capabilities: - if vmdk.LOCATION_DRIVER_NAME in capabilities['location_info']: - if backing: - _assertions_for_migration() - else: - _assertions_for_no_backing() + if capabilities and 'location_info' in capabilities: + if capabilities['location_info'].startswith( + '%s:' % vmdk.LOCATION_DRIVER_NAME): + if backing: + _assertions_for_migration() else: - _assertions_migration_not_performed() + _assertions_for_no_backing() else: _assertions_migration_not_performed() + else: + _assertions_migration_not_performed() def test_migrate_volume_relocate_existing_backing(self): self.test_migrate_volume(backing=mock.Mock()) @@ -3633,6 +3637,32 @@ def test_migrate_volume_invalid_location_info(self): 'location_info': 'invalid-location-info' }) + @mock.patch.object(VMDK_DRIVER, 'volumeops') + def test_update_migrated_volume(self, vops): + volume = self._create_volume_obj() + new_volume = self._create_volume_obj(vol_id='new-id') + backing = mock.Mock() + vops.get_backing.return_value = backing + ret_val = self._driver.update_migrated_volume(self._context, volume, + new_volume, 'old-status') + vops.rename_backing.assert_called_once_with(backing, volume['name']) + vops.update_backing_uuid.assert_called_once_with(backing, volume['id']) + vops.update_backing_disk_uuid.assert_called_once_with(backing, + volume['id']) + self.assertIsNone(ret_val) + + @mock.patch.object(VMDK_DRIVER, 'volumeops') + def test_update_migrated_volume_without_backing(self, vops): + volume = self._create_volume_obj() + new_volume = self._create_volume_obj(vol_id='new-id') + vops.get_backing.return_value = None + ret_val = self._driver.update_migrated_volume(self._context, volume, + new_volume, 'old-status') + vops.rename_backing.assert_not_called() + vops.update_backing_uuid.assert_not_called() + vops.update_backing_disk_uuid.assert_not_called() + self.assertIsNone(ret_val) + @ddt.ddt class ImageDiskTypeTest(test.TestCase): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 6a606df555f..9daccb0440f 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -325,7 +325,7 @@ def __init__(self, *args, **kwargs): self._clusters = None self._dc_cache = {} self._ds_regex = None - self.additional_endpoints.append([ + self.additional_endpoints.extend([ remote_api.VmdkDriverRemoteService(self) ]) self._remote_api = remote_api.VmdkDriverRemoteApi() @@ -568,6 +568,11 @@ def _delete_volume(self, volume): :param volume: Volume object """ backing = self.volumeops.get_backing(volume['name'], volume['id']) + if not backing: + # If a volume has just been migrated, the manager assigned the + # temporary ID in the `volume` parameter, but instead it has set + # the correct ID to _name_id, which we need to perform deletion. + backing = self.volumeops.get_backing_by_uuid(volume.name_id) if not backing: LOG.info("Backing not available, no operation " "to be performed.") @@ -833,10 +838,7 @@ def _get_connection_info(self, volume, backing, connector): # # If we are migrating to this volume, we need to # create a writeable handle for the migration to work. - if (volume['status'] == 'restoring-backup' or - (volume['status'] == 'available' and - volume['migration_status'] and - volume['migration_status'].startswith('target:'))): + if self._is_volume_subject_to_import_vapp(volume): connection_info['data']['import_data'] = \ self._get_connection_import_data(volume) @@ -850,6 +852,12 @@ def _get_connection_info(self, volume, backing, connector): return connection_info + def _is_volume_subject_to_import_vapp(self, volume): + return (volume['status'] == 'restoring-backup' or + (volume['status'] == 'available' and + volume['migration_status'] and + volume['migration_status'].startswith('target:'))) + def _get_connection_import_data(self, volume): (host, rp, folder, summary) = self._select_ds_for_volume( volume) @@ -980,7 +988,7 @@ def terminate_connection(self, volume, connector, force=False, **kwargs): # which will replace the initial one. Here we set the proper name # and backing uuid for the new backing, because os-brick doesn't do it. if (connector and 'platform' in connector and 'os_type' in connector - and volume['status'] == 'restoring-backup'): + and self._is_volume_subject_to_import_vapp(volume)): backing = self.volumeops.get_backing_by_uuid(volume['id']) self.volumeops.rename_backing(backing, volume['name']) @@ -2602,7 +2610,11 @@ def migrate_volume(self, context, volume, host): """ false_ret = (False, None) - if volume['status'] != 'available': + allowed_statuses = ['available', 'reserved'] + if volume['status'] not in allowed_statuses: + LOG.debug('Only %s volumes can be migrated using backend ' + 'assisted migration. Falling back to generic migration.', + " or ".join(allowed_statuses)) return false_ret if 'location_info' not in host['capabilities']: return false_ret @@ -2651,3 +2663,17 @@ def migrate_volume(self, context, volume, host): return (True, {'migration_status': 'error'}) return (True, None) + + def update_migrated_volume(self, ctxt, volume, new_volume, + original_volume_status): + backing = self.volumeops.get_backing(new_volume['name'], + new_volume['id']) + if not backing: + LOG.warning("Backing was not found after migration.") + return None + + self.volumeops.rename_backing(backing, volume['name']) + self.volumeops.update_backing_uuid(backing, volume['id']) + self.volumeops.update_backing_disk_uuid(backing, volume['id']) + + return None From 3aff53a316f0073aca46d07be6fd80e29e27e788 Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 19 Jun 2020 08:41:01 -0400 Subject: [PATCH 035/149] [SAP] Added fix for check_for_setup_error This patch updates the driver version to reflect the changes to add volume migration for soft sharding. Also adds the fix for the vmware_storage_profile config value not being set and tested during check_for_setup_error. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 10 +++++++++ cinder/volume/drivers/vmware/vmdk.py | 21 ++++++++++++------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index fb3349ef8ca..23cde379f92 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -141,6 +141,16 @@ def test_check_for_setup_error(self, get_profile_id_by_name, session): get_profile_id_by_name.assert_called_once_with(session, self.STORAGE_PROFILE) + @mock.patch.object(VMDK_DRIVER, 'session') + def test_check_for_setup_error_no_profile(self, session): + self._driver._storage_policy_enabled = True + + self._config.vmware_storage_profile = None + self._driver.check_for_setup_error() + + self._config.vmware_storage_profile = [] + self._driver.check_for_setup_error() + @mock.patch.object(VMDK_DRIVER, 'session') @mock.patch('oslo_vmware.pbm.get_profile_id_by_name') def test_check_for_setup_error_fail(self, get_profile_id_by_name, session): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 9daccb0440f..d12e4388dd8 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -301,7 +301,10 @@ class VMwareVcVmdkDriver(driver.VolumeDriver): # get_volume_stats() # 3.4.4.99.0 - Added reporting of thin_provisioning_support, # max_over_subscription_ratio. - VERSION = '3.4.2.99.0' + # 3.4.2.99.1 - VMware implementation of volume migration + # 3.4.2.99.2 - Added soft sharding volume migration, fixed a small issue + # in check_for_setup_error where storage_profile not set. + VERSION = '3.4.2.99.2' # ThirdPartySystems wiki page CI_WIKI_NAME = "VMware_CI" @@ -354,13 +357,15 @@ def _validate_params(self): def check_for_setup_error(self): # make sure if the storage profile is set that it exists. - for storage_profile in self.configuration.vmware_storage_profile: - if self._storage_policy_enabled and storage_profile: - profile_id = self._get_storage_profile_by_name(storage_profile) - if not profile_id: - reason = (_("Failed to find storage profile '%s'") - % storage_profile) - raise exception.InvalidInput(reason=reason) + if self.configuration.vmware_storage_profile: + for storage_profile in self.configuration.vmware_storage_profile: + if self._storage_policy_enabled and storage_profile: + profile_id = self._get_storage_profile_by_name( + storage_profile) + if not profile_id: + reason = (_("Failed to find storage profile '%s'") + % storage_profile) + raise exception.InvalidInput(reason=reason) def _update_volume_stats(self): From f292c6d5b0731fa0f2d81abf40237b9898f15e6f Mon Sep 17 00:00:00 2001 From: chuan137 Date: Thu, 6 Aug 2020 15:14:55 +0200 Subject: [PATCH 036/149] [SAP] need jaeger-client for osprofiler This patch adds the jaeger-client to the custom-requirements.txt, which is required when osprofiler uses jaeger as backend. --- custom-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/custom-requirements.txt b/custom-requirements.txt index 0ec7a34af8b..aafd956149b 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -2,6 +2,7 @@ # needed for osprofiler redis +jaeger-client -e git+https://github.com/sapcc/python-agentliveness.git#egg=agentliveness -e git+https://github.com/sapcc/raven-python.git@ccloud#egg=raven From 66ad4ff6297a45fb3bd45533940ff4d360f6d7eb Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 4 Sep 2020 09:34:30 -0400 Subject: [PATCH 037/149] [SAP] Randomize the selection of best datastores This patch adds a new config option that allows for randomizing the selection of datastores at backing creation time. First the best datastores are chosen, which are the most connected datatstores with the most space available. Then that list of datastores is randomized for selection. The first datastore in that random list is chosen. This helps the driver not always pick the same datastore when each datastore reports lots of free space. Change-Id: I3992fa1a0cb5f059140685c74f70f8047bbb1480 [SAP] Fix randomize_datastore selection. This patch passes in the randomization settings to the datastore selector object during driver do_setup() time. Previously those values were passed ony if the ds object didn't already exist. --- .../volume/drivers/vmware/test_vmware_vmdk.py | 2 ++ cinder/volume/drivers/vmware/datastore.py | 11 +++++++++- cinder/volume/drivers/vmware/vmdk.py | 22 +++++++++++++++++-- 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 23cde379f92..743bdc2130c 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -119,6 +119,8 @@ def setUp(self): reserved_percentage=0, vmware_profile_check_on_attach=True, vmware_storage_profile=[self.STORAGE_PROFILE], + vmware_select_random_best_datastore=False, + vmware_random_datastore_range=None, ) self._db = mock.Mock() diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index ceb56b492cf..95eece84848 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -55,12 +55,15 @@ class DatastoreSelector(object): PROFILE_NAME = "storageProfileName" # TODO(vbala) Remove dependency on volumeops. - def __init__(self, vops, session, max_objects, ds_regex=None): + def __init__(self, vops, session, max_objects, ds_regex=None, + random_ds=False, random_ds_range=None): self._vops = vops self._session = session self._max_objects = max_objects self._ds_regex = ds_regex self._profile_id_cache = {} + self._random_ds = random_ds + self._random_ds_range = random_ds_range @coordination.synchronized('vmware-datastore-profile-{profile_name}') def get_profile_id(self, profile_name): @@ -246,6 +249,12 @@ def _select_host(host_mounts): return host_mount.key sorted_ds_props = sorted(datastores.values(), key=_sort_key) + if self._random_ds: + LOG.debug('Shuffling best datastore selection.') + if self._random_ds_range: + sorted_ds_props = sorted_ds_props[:self._random_ds_range] + random.shuffle(sorted_ds_props) + for ds_props in sorted_ds_props: host_ref = _select_host(ds_props['host']) if host_ref: diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index d12e4388dd8..8c6fecaaa81 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -182,6 +182,20 @@ 'volumes to that DS and move the volumes away manually. ' 'Not disabling this would mean cinder moves the volumes ' 'around, which can take a long time and leads to timeouts.'), + cfg.BoolOpt('vmware_select_random_best_datastore', + default=False, + help='If True, driver will randomize the picking of ' + 'best datastore from best possible datastores ' + 'during volume backing creation. Best possible datastores ' + 'are most connected hosts and most free space.'), + cfg.IntOpt('vmware_random_datastore_range', + default=None, + help='If vmware_select_random_best_datastore is enabled ' + 'this enables subselecting a range of datastores to pick from ' + 'after they have been sorted. ie. If there are 10 ' + 'datastores, and vmware_random_datastore_range is set to 5 ' + 'Then it will filter in 5 datastores prior to randomizing ' + 'the datastores to pick from.'), ] CONF = cfg.CONF @@ -738,7 +752,6 @@ def _select_datastore(self, req, host=None): LOG.error("There are no valid hosts available in " "configured cluster(s): %s.", self._clusters) raise vmdk_exceptions.NoValidHostException() - best_candidate = self.ds_sel.select_datastore(req, hosts=hosts) if not best_candidate: LOG.error("There is no valid datastore satisfying " @@ -2228,8 +2241,13 @@ def do_setup(self, context): max_objects = self.configuration.vmware_max_objects_retrieval self._volumeops = volumeops.VMwareVolumeOps( self.session, max_objects, EXTENSION_KEY, EXTENSION_TYPE) + random_ds = self.configuration.vmware_select_random_best_datastore + random_ds_range = self.configuration.vmware_random_datastore_range self._ds_sel = hub.DatastoreSelector( - self.volumeops, self.session, max_objects, ds_regex=self._ds_regex) + self.volumeops, self.session, max_objects, + ds_regex=self._ds_regex, + random_ds=random_ds, + random_ds_range=random_ds_range) # Get clusters to be used for backing VM creation. cluster_names = self.configuration.vmware_cluster_name From b4a04e8ff6266e8d9437f2f3527dae511762b70f Mon Sep 17 00:00:00 2001 From: Dmitry Galkin Date: Fri, 18 Dec 2020 15:45:22 +0100 Subject: [PATCH 038/149] [SAP] added openstack-rate-limit-middleware to custom requirements. - needed for https://github.com/sapcc/helm-charts/pull/2149 --- custom-requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/custom-requirements.txt b/custom-requirements.txt index aafd956149b..1efadfcaa5d 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -8,3 +8,4 @@ jaeger-client -e git+https://github.com/sapcc/raven-python.git@ccloud#egg=raven -e git+https://github.com/sapcc/openstack-watcher-middleware.git#egg=watcher-middleware -e git+https://github.com/sapcc/openstack-audit-middleware.git#egg=audit-middleware +-e git+https://github.com/sapcc/openstack-rate-limit-middleware.git#egg=rate-limit-middleware From a5cd2f6d4e6ba69108eb3ade060524b9a58a8e6f Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Thu, 28 May 2020 12:44:10 +0300 Subject: [PATCH 039/149] [SAP] fix pep8 checks --- .../unit/volume/drivers/vmware/test_vmware_vmdk.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 743bdc2130c..94a5c3a53d2 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -1909,17 +1909,14 @@ def _test_do_setup( mock_session.pbm_wsdl_loc_set.assert_called_once_with(pbm_wsdl) self.assertEqual(enable_pbm, self._driver._storage_policy_enabled) register_extension.assert_called_once() - vops_cls.assert_called_once_with( - session, - self._driver.configuration.vmware_max_objects_retrieval, - vmdk.EXTENSION_KEY, - vmdk.EXTENSION_TYPE) - self.assertEqual(vops_cls.return_value, self._driver._volumeops) + cfg = self._driver.configuration ds_sel_cls.assert_called_once_with( vops, session, self._driver.configuration.vmware_max_objects_retrieval, - ds_regex=ds_regex) + ds_regex=ds_regex, + random_ds=cfg.vmware_select_random_best_datastore, + random_ds_range=cfg.vmware_random_datastore_range) self.assertEqual(ds_sel_cls.return_value, self._driver._ds_sel) vops.get_cluster_refs.assert_called_once_with( self._driver.configuration.vmware_cluster_name) From 8f06b23a2408c4800b597913cb12af6bc90adca1 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 15 Sep 2020 13:18:02 -0400 Subject: [PATCH 040/149] [SAP] fix DB migration scripts This patch updates the migrate_versions scripts for the upgrade from queens all the way to Train. The upstream Train release collapsed all of the migrate scripts into a single script called 123_cinder_init.py. Since we are upgrading from queens we had to add placeholders for version 120, 121, 122, and also replace the 123_cinder_init.py with the rocky branch script for version 123, which is 123_add_transfer_no_snapshots --- .../migrate_repo/versions/120_placeholder.py | 22 + .../migrate_repo/versions/121_placeholder.py | 22 + .../migrate_repo/versions/122_placeholder.py | 22 + .../versions/123_add_transfer_no_snapshots.py | 21 + .../migrate_repo/versions/123_cinder_init.py | 906 ------------------ 5 files changed, 87 insertions(+), 906 deletions(-) create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/121_placeholder.py create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/122_placeholder.py create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/123_add_transfer_no_snapshots.py delete mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/123_cinder_init.py diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py b/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py new file mode 100644 index 00000000000..9609bdf16ae --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py @@ -0,0 +1,22 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# This is a placeholder for Queens backports. +# Do not use this number for new Rocky work. New work starts after +# all the placeholders. +# +# See this for more information: +# http://lists.openstack.org/pipermail/openstack-dev/2013-March/006827.html + + +def upgrade(migrate_engine): + pass diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/121_placeholder.py b/cinder/db/sqlalchemy/migrate_repo/versions/121_placeholder.py new file mode 100644 index 00000000000..9609bdf16ae --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/121_placeholder.py @@ -0,0 +1,22 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# This is a placeholder for Queens backports. +# Do not use this number for new Rocky work. New work starts after +# all the placeholders. +# +# See this for more information: +# http://lists.openstack.org/pipermail/openstack-dev/2013-March/006827.html + + +def upgrade(migrate_engine): + pass diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/122_placeholder.py b/cinder/db/sqlalchemy/migrate_repo/versions/122_placeholder.py new file mode 100644 index 00000000000..9609bdf16ae --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/122_placeholder.py @@ -0,0 +1,22 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +# This is a placeholder for Queens backports. +# Do not use this number for new Rocky work. New work starts after +# all the placeholders. +# +# See this for more information: +# http://lists.openstack.org/pipermail/openstack-dev/2013-March/006827.html + + +def upgrade(migrate_engine): + pass diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/123_add_transfer_no_snapshots.py b/cinder/db/sqlalchemy/migrate_repo/versions/123_add_transfer_no_snapshots.py new file mode 100644 index 00000000000..4cee7fe793b --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/123_add_transfer_no_snapshots.py @@ -0,0 +1,21 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from sqlalchemy import Boolean, Column, MetaData, Table + + +def upgrade(migrate_engine): + """Add the no_snapshots column to the transfers table.""" + meta = MetaData(bind=migrate_engine) + transfers = Table('transfers', meta, autoload=True) + if not hasattr(transfers.c, 'no_snapshots'): + transfers.create_column(Column('no_snapshots', Boolean, default=False)) diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/123_cinder_init.py b/cinder/db/sqlalchemy/migrate_repo/versions/123_cinder_init.py deleted file mode 100644 index bdca01dc19c..00000000000 --- a/cinder/db/sqlalchemy/migrate_repo/versions/123_cinder_init.py +++ /dev/null @@ -1,906 +0,0 @@ -# Copyright 2012 OpenStack Foundation -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -import datetime -import uuid - -from oslo_config import cfg -from oslo_utils import timeutils -from sqlalchemy.dialects import mysql -from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Index, Integer -from sqlalchemy import MetaData, String, Table, Text, UniqueConstraint, text -from sqlalchemy.sql import expression - -from cinder.volume import group_types as volume_group_types - -# Get default values via config. The defaults will either -# come from the default values set in the quota option -# configuration or via cinder.conf if the user has configured -# default values for quotas there. -CONF = cfg.CONF -CONF.import_opt('quota_volumes', 'cinder.quota') -CONF.import_opt('quota_snapshots', 'cinder.quota') -CONF.import_opt('quota_gigabytes', 'cinder.quota') -CONF.import_opt('quota_consistencygroups', 'cinder.quota') - -CLASS_NAME = 'default' -CREATED_AT = datetime.datetime.now() # noqa - - -def define_tables(meta): - services = Table( - 'services', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('host', String(255)), - Column('binary', String(255)), - Column('topic', String(255)), - Column('report_count', Integer, nullable=False), - Column('disabled', Boolean), - Column('availability_zone', String(255)), - Column('disabled_reason', String(255)), - Column('modified_at', DateTime(timezone=False)), - Column('rpc_current_version', String(36)), - Column('object_current_version', String(36)), - Column('replication_status', String(36), default='not-capable'), - Column('frozen', Boolean, default=False), - Column('active_backend_id', String(255)), - Column('cluster_name', String(255), nullable=True), - Column('uuid', String(36), nullable=True), - Index('services_uuid_idx', 'uuid', unique=True), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - consistencygroups = Table( - 'consistencygroups', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', String(36), primary_key=True, nullable=False), - Column('user_id', String(255)), - Column('project_id', String(255)), - Column('host', String(255)), - Column('availability_zone', String(255)), - Column('name', String(255)), - Column('description', String(255)), - Column('volume_type_id', String(255)), - Column('status', String(255)), - Column('cgsnapshot_id', String(36)), - Column('source_cgid', String(36)), - Column('cluster_name', String(255), nullable=True), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - cgsnapshots = Table( - 'cgsnapshots', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', String(36), primary_key=True, nullable=False), - Column('consistencygroup_id', String(36), - ForeignKey('consistencygroups.id'), - nullable=False, - index=True), - Column('user_id', String(255)), - Column('project_id', String(255)), - Column('name', String(255)), - Column('description', String(255)), - Column('status', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - groups = Table( - 'groups', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('user_id', String(length=255)), - Column('project_id', String(length=255)), - Column('cluster_name', String(255)), - Column('host', String(length=255)), - Column('availability_zone', String(length=255)), - Column('name', String(length=255)), - Column('description', String(length=255)), - Column('group_type_id', String(length=36)), - Column('status', String(length=255)), - Column('group_snapshot_id', String(36)), - Column('source_group_id', String(36)), - Column('replication_status', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - group_snapshots = Table( - 'group_snapshots', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', String(36), primary_key=True), - Column('group_id', String(36), - ForeignKey('groups.id'), - nullable=False, - index=True), - Column('user_id', String(length=255)), - Column('project_id', String(length=255)), - Column('name', String(length=255)), - Column('description', String(length=255)), - Column('status', String(length=255)), - Column('group_type_id', String(length=36)), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - volumes = Table( - 'volumes', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('ec2_id', String(255)), - Column('user_id', String(255)), - Column('project_id', String(255)), - Column('host', String(255)), - Column('size', Integer), - Column('availability_zone', String(255)), - Column('status', String(255)), - Column('attach_status', String(255)), - Column('scheduled_at', DateTime), - Column('launched_at', DateTime), - Column('terminated_at', DateTime), - Column('display_name', String(255)), - Column('display_description', String(255)), - Column('provider_location', String(256)), - Column('provider_auth', String(256)), - Column('snapshot_id', String(36)), - Column('volume_type_id', String(36)), - Column('source_volid', String(36)), - Column('bootable', Boolean), - Column('provider_geometry', String(255)), - Column('_name_id', String(36)), - Column('encryption_key_id', String(36)), - Column('migration_status', String(255)), - Column('replication_status', String(255)), - Column('replication_extended_status', String(255)), - Column('replication_driver_data', String(255)), - Column('consistencygroup_id', String(36), - ForeignKey('consistencygroups.id'), index=True), - Column('provider_id', String(255)), - Column('multiattach', Boolean), - Column('previous_status', String(255)), - Column('cluster_name', String(255), nullable=True), - Column('group_id', String(36), ForeignKey('groups.id'), index=True), - Column('service_uuid', String(36), ForeignKey('services.uuid'), - nullable=True), - Column('shared_targets', Boolean, default=True), - Index('volumes_service_uuid_idx', 'service_uuid', 'deleted'), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_attachment = Table( - 'volume_attachment', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('volume_id', String(36), ForeignKey('volumes.id'), - nullable=False, index=True), - Column('attached_host', String(255)), - Column('instance_uuid', String(36)), - Column('mountpoint', String(255)), - Column('attach_time', DateTime), - Column('detach_time', DateTime), - Column('attach_mode', String(36)), - Column('attach_status', String(255)), - Column('connection_info', Text), - Column('connector', Text), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - attachment_specs = Table( - 'attachment_specs', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(), default=False), - Column('id', Integer, primary_key=True, nullable=False), - Column('attachment_id', String(36), - ForeignKey('volume_attachment.id'), - nullable=False, - index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - snapshots = Table( - 'snapshots', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('volume_id', String(36), - ForeignKey('volumes.id', name='snapshots_volume_id_fkey'), - nullable=False, index=True), - Column('user_id', String(255)), - Column('project_id', String(255)), - Column('status', String(255)), - Column('progress', String(255)), - Column('volume_size', Integer), - Column('scheduled_at', DateTime), - Column('display_name', String(255)), - Column('display_description', String(255)), - Column('provider_location', String(255)), - Column('encryption_key_id', String(36)), - Column('volume_type_id', String(36)), - Column('cgsnapshot_id', String(36), - ForeignKey('cgsnapshots.id'), index=True), - Column('provider_id', String(255)), - Column('provider_auth', String(255)), - Column('group_snapshot_id', String(36), - ForeignKey('group_snapshots.id'), index=True), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - snapshot_metadata = Table( - 'snapshot_metadata', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('snapshot_id', String(36), ForeignKey('snapshots.id'), - nullable=False, index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - quality_of_service_specs = Table( - 'quality_of_service_specs', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', String(36), primary_key=True, nullable=False), - Column('specs_id', String(36), - ForeignKey('quality_of_service_specs.id'), - index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_types = Table( - 'volume_types', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('name', String(255)), - Column('qos_specs_id', String(36), - ForeignKey('quality_of_service_specs.id'), index=True), - Column('is_public', Boolean), - Column('description', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_type_projects = Table( - 'volume_type_projects', meta, - Column('id', Integer, primary_key=True, nullable=False), - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('volume_type_id', String(36), - ForeignKey('volume_types.id')), - Column('project_id', String(255)), - Column('deleted', Integer), - UniqueConstraint('volume_type_id', 'project_id', 'deleted'), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_metadata = Table( - 'volume_metadata', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('volume_id', String(36), ForeignKey('volumes.id'), - nullable=False, index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_type_extra_specs = Table( - 'volume_type_extra_specs', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('volume_type_id', String(36), - ForeignKey('volume_types.id', - name='volume_type_extra_specs_ibfk_1'), - nullable=False, - index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - quotas = Table( - 'quotas', meta, - Column('id', Integer, primary_key=True, nullable=False), - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('project_id', String(255)), - Column('resource', String(255), nullable=False), - Column('hard_limit', Integer), - Column('allocated', Integer, default=0), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - quota_classes = Table( - 'quota_classes', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, - name=None)), - Column('id', Integer(), primary_key=True), - Column('class_name', String(255), index=True), - Column('resource', String(255)), - Column('hard_limit', Integer(), nullable=True), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - quota_usages = Table( - 'quota_usages', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, - name=None)), - Column('id', Integer(), primary_key=True), - Column('project_id', String(255), index=True), - Column('resource', String(255)), - Column('in_use', Integer(), nullable=False), - Column('reserved', Integer(), nullable=False), - Column('until_refresh', Integer(), nullable=True), - Index('quota_usage_project_resource_idx', - 'project_id', 'resource'), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - reservations = Table( - 'reservations', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, - name=None)), - Column('id', Integer(), primary_key=True), - Column('uuid', String(36), nullable=False), - Column('usage_id', - Integer(), - ForeignKey('quota_usages.id'), - nullable=True, - index=True), - Column('project_id', String(255), index=True), - Column('resource', String(255)), - Column('delta', Integer(), nullable=False), - Column('expire', DateTime(timezone=False)), - Column('allocated_id', Integer, ForeignKey('quotas.id'), - nullable=True, - index=True), - Index('reservations_deleted_expire_idx', - 'deleted', 'expire'), - Index('reservations_deleted_uuid_idx', - 'deleted', 'uuid'), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - volume_glance_metadata = Table( - 'volume_glance_metadata', - meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', Integer(), primary_key=True, nullable=False), - Column('volume_id', String(36), ForeignKey('volumes.id'), index=True), - Column('snapshot_id', String(36), - ForeignKey('snapshots.id'), index=True), - Column('key', String(255)), - Column('value', Text), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - backups = Table( - 'backups', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('id', String(36), primary_key=True, nullable=False), - Column('volume_id', String(36), nullable=False), - Column('user_id', String(255)), - Column('project_id', String(255)), - Column('host', String(255)), - Column('availability_zone', String(255)), - Column('display_name', String(255)), - Column('display_description', String(255)), - Column('container', String(255)), - Column('status', String(255)), - Column('fail_reason', String(255)), - Column('service_metadata', String(255)), - Column('service', String(255)), - Column('size', Integer()), - Column('object_count', Integer()), - Column('parent_id', String(36)), - Column('temp_volume_id', String(36)), - Column('temp_snapshot_id', String(36)), - Column('num_dependent_backups', Integer, default=0), - Column('snapshot_id', String(36)), - Column('data_timestamp', DateTime), - Column('restore_volume_id', String(36)), - Column('encryption_key_id', String(36)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - backup_metadata = Table( - 'backup_metadata', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(), default=False), - Column('id', Integer, primary_key=True, nullable=False), - Column('backup_id', String(36), - ForeignKey('backups.id'), - nullable=False, - index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - transfers = Table( - 'transfers', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean), - Column('id', String(36), primary_key=True, nullable=False), - Column('volume_id', String(36), ForeignKey('volumes.id'), - nullable=False, index=True), - Column('display_name', String(255)), - Column('salt', String(255)), - Column('crypt_hash', String(255)), - Column('expires_at', DateTime(timezone=False)), - Column('no_snapshots', Boolean, default=False), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - # Sqlite needs to handle nullable differently - is_nullable = (meta.bind.name == 'sqlite') - - encryption = Table( - 'encryption', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(create_constraint=True, name=None)), - Column('cipher', String(255)), - Column('control_location', String(255), nullable=is_nullable), - Column('key_size', Integer), - Column('provider', String(255), nullable=is_nullable), - # NOTE(joel-coffman): The volume_type_id must be unique or else the - # referenced volume type becomes ambiguous. That is, specifying the - # volume type is not sufficient to identify a particular encryption - # scheme unless each volume type is associated with at most one - # encryption scheme. - Column('volume_type_id', String(36), nullable=is_nullable), - # NOTE (smcginnis): nullable=True triggers this to not set a default - # value, but since it's a primary key the resulting schema will end up - # still being NOT NULL. This is avoiding a case in MySQL where it will - # otherwise set this to NOT NULL DEFAULT ''. May be harmless, but - # inconsistent with previous schema. - Column('encryption_id', String(36), primary_key=True, nullable=True), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - volume_admin_metadata = Table( - 'volume_admin_metadata', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('volume_id', String(36), ForeignKey('volumes.id'), - nullable=False, index=True), - Column('key', String(255)), - Column('value', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - initiator_data = Table( - 'driver_initiator_data', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('id', Integer, primary_key=True, nullable=False), - Column('initiator', String(255), index=True, nullable=False), - Column('namespace', String(255), nullable=False), - Column('key', String(255), nullable=False), - Column('value', String(255)), - UniqueConstraint('initiator', 'namespace', 'key'), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - image_volume_cache = Table( - 'image_volume_cache_entries', meta, - Column('image_updated_at', DateTime(timezone=False)), - Column('id', Integer, primary_key=True, nullable=False), - Column('host', String(255), index=True, nullable=False), - Column('image_id', String(36), index=True, nullable=False), - Column('volume_id', String(36), nullable=False), - Column('size', Integer, nullable=False), - Column('last_used', DateTime, nullable=False), - Column('cluster_name', String(255)), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - messages = Table( - 'messages', meta, - Column('id', String(36), primary_key=True, nullable=False), - Column('project_id', String(255), nullable=False), - Column('request_id', String(255)), - Column('resource_type', String(36)), - Column('resource_uuid', String(255), nullable=True), - Column('event_id', String(255), nullable=False), - Column('message_level', String(255), nullable=False), - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean), - Column('expires_at', DateTime(timezone=False), index=True), - Column('detail_id', String(10), nullable=True), - Column('action_id', String(10), nullable=True), - mysql_engine='InnoDB', - mysql_charset='utf8' - ) - - cluster = Table( - 'clusters', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(), default=False), - Column('id', Integer, primary_key=True, nullable=False), - Column('name', String(255), nullable=False), - Column('binary', String(255), nullable=False), - Column('disabled', Boolean(), default=False), - Column('disabled_reason', String(255)), - Column('race_preventer', Integer, nullable=False, default=0), - Column('replication_status', String(length=36), default='not-capable'), - Column('active_backend_id', String(length=255)), - Column('frozen', Boolean, nullable=False, default=False, - server_default=expression.false()), - # To remove potential races on creation we have a constraint set on - # name and race_preventer fields, and we set value on creation to 0, so - # 2 clusters with the same name will fail this constraint. On deletion - # we change this field to the same value as the id which will be unique - # and will not conflict with the creation of another cluster with the - # same name. - UniqueConstraint('name', 'binary', 'race_preventer'), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - workers = Table( - 'workers', meta, - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean(), default=False), - Column('id', Integer, primary_key=True), - Column('resource_type', String(40), nullable=False), - Column('resource_id', String(36), nullable=False), - Column('status', String(255), nullable=False), - Column('service_id', Integer, ForeignKey('services.id'), - nullable=True, index=True), - Column('race_preventer', Integer, nullable=False, default=0, - server_default=text('0')), - UniqueConstraint('resource_type', 'resource_id'), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - group_types = Table( - 'group_types', meta, - Column('id', String(36), primary_key=True, nullable=False), - Column('name', String(255), nullable=False), - Column('description', String(255)), - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean), - Column('is_public', Boolean), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - group_type_specs = Table( - 'group_type_specs', meta, - Column('id', Integer, primary_key=True, nullable=False), - Column('key', String(255)), - Column('value', String(255)), - Column('group_type_id', String(36), - ForeignKey('group_types.id'), - nullable=False, - index=True), - Column('created_at', DateTime(timezone=False)), - Column('updated_at', DateTime(timezone=False)), - Column('deleted_at', DateTime(timezone=False)), - Column('deleted', Boolean), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - group_type_projects = Table( - 'group_type_projects', meta, - Column('id', Integer, primary_key=True, nullable=False), - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('group_type_id', String(36), - ForeignKey('group_types.id')), - Column('project_id', String(length=255)), - Column('deleted', Boolean(create_constraint=True, name=None)), - UniqueConstraint('group_type_id', 'project_id', 'deleted'), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - grp_vt_mapping = Table( - 'group_volume_type_mapping', meta, - Column('created_at', DateTime), - Column('updated_at', DateTime), - Column('deleted_at', DateTime), - Column('deleted', Boolean), - Column('id', Integer, primary_key=True, nullable=False), - Column('volume_type_id', String(36), ForeignKey('volume_types.id'), - nullable=False, - index=True), - Column('group_id', String(36), - ForeignKey('groups.id'), nullable=False, - index=True), - mysql_engine='InnoDB', - mysql_charset='utf8', - ) - - return [consistencygroups, - cgsnapshots, - groups, - group_snapshots, - services, - volumes, - volume_attachment, - attachment_specs, - snapshots, - snapshot_metadata, - quality_of_service_specs, - volume_types, - volume_type_projects, - quotas, - volume_metadata, - volume_type_extra_specs, - quota_classes, - quota_usages, - reservations, - volume_glance_metadata, - backups, - backup_metadata, - transfers, - encryption, - volume_admin_metadata, - initiator_data, - image_volume_cache, - messages, - cluster, - workers, - group_types, - group_type_specs, - group_type_projects, - grp_vt_mapping] - - -def upgrade(migrate_engine): - meta = MetaData() - meta.bind = migrate_engine - - # create all tables - # Take care on create order for those with FK dependencies - tables = define_tables(meta) - - for table in tables: - table.create() - - if migrate_engine.name == "mysql": - tables = ["consistencygroups", - "cgsnapshots", - "snapshots", - "snapshot_metadata", - "quality_of_service_specs", - "volume_types", - "volume_type_projects", - "volumes", - "volume_attachment", - "migrate_version", - "quotas", - "services", - "volume_metadata", - "volume_type_extra_specs", - "quota_classes", - "quota_usages", - "reservations", - "volume_glance_metadata", - "backups", - "backup_metadata", - "transfers", - "encryption", - "volume_admin_metadata", - "driver_initiator_data", - "image_volume_cache_entries"] - - migrate_engine.execute("SET foreign_key_checks = 0") - for table in tables: - migrate_engine.execute( - "ALTER TABLE %s CONVERT TO CHARACTER SET utf8" % table) - migrate_engine.execute("SET foreign_key_checks = 1") - migrate_engine.execute( - "ALTER DATABASE %s DEFAULT CHARACTER SET utf8" % - migrate_engine.url.database) - migrate_engine.execute("ALTER TABLE %s Engine=InnoDB" % table) - - # Set default quota class values - quota_classes = Table('quota_classes', meta, autoload=True) - qci = quota_classes.insert() - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'volumes', - 'hard_limit': CONF.quota_volumes, - 'deleted': False, }) - # Set default snapshots - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'snapshots', - 'hard_limit': CONF.quota_snapshots, - 'deleted': False, }) - # Set default gigabytes - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'gigabytes', - 'hard_limit': CONF.quota_gigabytes, - 'deleted': False, }) - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'consistencygroups', - 'hard_limit': CONF.quota_consistencygroups, - 'deleted': False, }) - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'per_volume_gigabytes', - 'hard_limit': -1, - 'deleted': False, }) - qci.execute({'created_at': CREATED_AT, - 'class_name': CLASS_NAME, - 'resource': 'groups', - 'hard_limit': CONF.quota_groups, - 'deleted': False, }) - - workers = Table('workers', meta, autoload=True) - - # This is only necessary for mysql, and since the table is not in use this - # will only be a schema update. - if migrate_engine.name.startswith('mysql'): - try: - workers.c.updated_at.alter(mysql.DATETIME(fsp=6)) - except Exception: - # MySQL v5.5 or earlier don't support sub-second resolution so we - # may have cleanup races in Active-Active configurations, that's - # why upgrading is recommended in that case. - # Code in Cinder is capable of working with 5.5, so for 5.5 there's - # no problem - pass - - # TODO(geguileo): Once we remove support for MySQL 5.5 we have to create - # an upgrade migration to remove this row. - # Set workers table sub-second support sentinel - wi = workers.insert() - now = timeutils.utcnow().replace(microsecond=123) - wi.execute({'created_at': now, - 'updated_at': now, - 'deleted': False, - 'resource_type': 'SENTINEL', - 'resource_id': 'SUB-SECOND', - 'status': 'OK'}) - - # Create default group type - group_types = Table('group_types', meta, autoload=True) - group_type_specs = Table('group_type_specs', meta, autoload=True) - - now = timeutils.utcnow() - grp_type_id = "%s" % uuid.uuid4() - group_type_dicts = { - 'id': grp_type_id, - 'name': volume_group_types.DEFAULT_CGSNAPSHOT_TYPE, - 'description': 'Default group type for migrating cgsnapshot', - 'created_at': now, - 'updated_at': now, - 'deleted': False, - 'is_public': True, - } - grp_type = group_types.insert() - grp_type.execute(group_type_dicts) - - group_spec_dicts = { - 'key': 'consistent_group_snapshot_enabled', - 'value': ' True', - 'group_type_id': grp_type_id, - 'created_at': now, - 'updated_at': now, - 'deleted': False, - } - grp_spec = group_type_specs.insert() - grp_spec.execute(group_spec_dicts) From 1ab2ef7b2cf87c3423595f51044002e784e7dc07 Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 25 Sep 2020 10:12:03 -0400 Subject: [PATCH 041/149] [SAP] Add db 120_cinder_init.py migration Upstream cinder train refactored all of the previous db migration scripts into 123_cinder_init.py to collapse all of the migrations into a single file. In order to keep the unit tests working, I had to collapse the db migration scripts into 120_cinder_init.py which includes our db patches, as our db schema version in queens ended with version 120. This ensures that 120 will be skipped when we upgrade from queens to train, and the first upgrade script that runs will start with 121. This patch fixes an issue of tox -epy27 failing in test. --- cinder/db/migration.py | 2 +- .../migrate_repo/versions/120_cinder_init.py | 906 ++++++++++++++++++ .../migrate_repo/versions/120_placeholder.py | 22 - 3 files changed, 907 insertions(+), 23 deletions(-) create mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/120_cinder_init.py delete mode 100644 cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py diff --git a/cinder/db/migration.py b/cinder/db/migration.py index 4f4e1ca695b..0485b1f2445 100644 --- a/cinder/db/migration.py +++ b/cinder/db/migration.py @@ -26,7 +26,7 @@ from cinder.db.sqlalchemy import api as db_api -INIT_VERSION = 122 +INIT_VERSION = 119 _IMPL = None _LOCK = threading.Lock() diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/120_cinder_init.py b/cinder/db/sqlalchemy/migrate_repo/versions/120_cinder_init.py new file mode 100644 index 00000000000..d007795b1e1 --- /dev/null +++ b/cinder/db/sqlalchemy/migrate_repo/versions/120_cinder_init.py @@ -0,0 +1,906 @@ +# Copyright 2012 OpenStack Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import datetime +import uuid + +from oslo_config import cfg +from oslo_utils import timeutils +from sqlalchemy.dialects import mysql +from sqlalchemy import Boolean, Column, DateTime, ForeignKey, Index, Integer +from sqlalchemy import MetaData, String, Table, Text, UniqueConstraint, text +from sqlalchemy.sql import expression + +from cinder.volume import group_types as volume_group_types + +# Get default values via config. The defaults will either +# come from the default values set in the quota option +# configuration or via cinder.conf if the user has configured +# default values for quotas there. +CONF = cfg.CONF +CONF.import_opt('quota_volumes', 'cinder.quota') +CONF.import_opt('quota_snapshots', 'cinder.quota') +CONF.import_opt('quota_gigabytes', 'cinder.quota') +CONF.import_opt('quota_consistencygroups', 'cinder.quota') + +CLASS_NAME = 'default' +CREATED_AT = datetime.datetime.now() # noqa + + +def define_tables(meta): + services = Table( + 'services', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('host', String(255)), + Column('binary', String(255)), + Column('topic', String(255)), + Column('report_count', Integer, nullable=False), + Column('disabled', Boolean), + Column('availability_zone', String(255)), + Column('disabled_reason', String(255)), + Column('modified_at', DateTime(timezone=False)), + Column('rpc_current_version', String(36)), + Column('object_current_version', String(36)), + Column('replication_status', String(36), default='not-capable'), + Column('frozen', Boolean, default=False), + Column('active_backend_id', String(255)), + Column('cluster_name', String(255), nullable=True), + Column('uuid', String(36), nullable=True), + Index('services_uuid_idx', 'uuid', unique=True), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + consistencygroups = Table( + 'consistencygroups', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', String(36), primary_key=True, nullable=False), + Column('user_id', String(255)), + Column('project_id', String(255)), + Column('host', String(255)), + Column('availability_zone', String(255)), + Column('name', String(255)), + Column('description', String(255)), + Column('volume_type_id', String(255)), + Column('status', String(255)), + Column('cgsnapshot_id', String(36)), + Column('source_cgid', String(36)), + Column('cluster_name', String(255), nullable=True), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + cgsnapshots = Table( + 'cgsnapshots', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', String(36), primary_key=True, nullable=False), + Column('consistencygroup_id', String(36), + ForeignKey('consistencygroups.id'), + nullable=False, + index=True), + Column('user_id', String(255)), + Column('project_id', String(255)), + Column('name', String(255)), + Column('description', String(255)), + Column('status', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + groups = Table( + 'groups', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('user_id', String(length=255)), + Column('project_id', String(length=255)), + Column('cluster_name', String(255)), + Column('host', String(length=255)), + Column('availability_zone', String(length=255)), + Column('name', String(length=255)), + Column('description', String(length=255)), + Column('group_type_id', String(length=36)), + Column('status', String(length=255)), + Column('group_snapshot_id', String(36)), + Column('source_group_id', String(36)), + Column('replication_status', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + group_snapshots = Table( + 'group_snapshots', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', String(36), primary_key=True), + Column('group_id', String(36), + ForeignKey('groups.id'), + nullable=False, + index=True), + Column('user_id', String(length=255)), + Column('project_id', String(length=255)), + Column('name', String(length=255)), + Column('description', String(length=255)), + Column('status', String(length=255)), + Column('group_type_id', String(length=36)), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + volumes = Table( + 'volumes', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('ec2_id', String(255)), + Column('user_id', String(255)), + Column('project_id', String(255)), + Column('host', String(255)), + Column('size', Integer), + Column('availability_zone', String(255)), + Column('status', String(255)), + Column('attach_status', String(255)), + Column('scheduled_at', DateTime), + Column('launched_at', DateTime), + Column('terminated_at', DateTime), + Column('display_name', String(255)), + Column('display_description', String(255)), + Column('provider_location', String(256)), + Column('provider_auth', String(256)), + Column('snapshot_id', String(36)), + Column('volume_type_id', String(36)), + Column('source_volid', String(36)), + Column('bootable', Boolean), + Column('provider_geometry', String(255)), + Column('_name_id', String(36)), + Column('encryption_key_id', String(36)), + Column('migration_status', String(255)), + Column('replication_status', String(255)), + Column('replication_extended_status', String(255)), + Column('replication_driver_data', String(255)), + Column('consistencygroup_id', String(36), + ForeignKey('consistencygroups.id'), index=True), + Column('provider_id', String(255)), + Column('multiattach', Boolean), + Column('previous_status', String(255)), + Column('cluster_name', String(255), nullable=True), + Column('group_id', String(36), ForeignKey('groups.id'), index=True), + Column('service_uuid', String(36), ForeignKey('services.uuid'), + nullable=True), + Column('shared_targets', Boolean, default=True), + Index('volumes_service_uuid_idx', 'service_uuid', 'deleted'), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_attachment = Table( + 'volume_attachment', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('volume_id', String(36), ForeignKey('volumes.id'), + nullable=False, index=True), + Column('attached_host', String(255)), + Column('instance_uuid', String(36)), + Column('mountpoint', String(255)), + Column('attach_time', DateTime), + Column('detach_time', DateTime), + Column('attach_mode', String(36)), + Column('attach_status', String(255)), + Column('connection_info', Text), + Column('connector', Text), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + attachment_specs = Table( + 'attachment_specs', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(), default=False), + Column('id', Integer, primary_key=True, nullable=False), + Column('attachment_id', String(36), + ForeignKey('volume_attachment.id'), + nullable=False, + index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + snapshots = Table( + 'snapshots', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('volume_id', String(36), + ForeignKey('volumes.id', name='snapshots_volume_id_fkey'), + nullable=False, index=True), + Column('user_id', String(255)), + Column('project_id', String(255)), + Column('status', String(255)), + Column('progress', String(255)), + Column('volume_size', Integer), + Column('scheduled_at', DateTime), + Column('display_name', String(255)), + Column('display_description', String(255)), + Column('provider_location', String(255)), + Column('encryption_key_id', String(36)), + Column('volume_type_id', String(36)), + Column('cgsnapshot_id', String(36), + ForeignKey('cgsnapshots.id'), index=True), + Column('provider_id', String(255)), + Column('provider_auth', String(255)), + Column('group_snapshot_id', String(36), + ForeignKey('group_snapshots.id'), index=True), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + snapshot_metadata = Table( + 'snapshot_metadata', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('snapshot_id', String(36), ForeignKey('snapshots.id'), + nullable=False, index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + quality_of_service_specs = Table( + 'quality_of_service_specs', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', String(36), primary_key=True, nullable=False), + Column('specs_id', String(36), + ForeignKey('quality_of_service_specs.id'), + index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_types = Table( + 'volume_types', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('name', String(255)), + Column('qos_specs_id', String(36), + ForeignKey('quality_of_service_specs.id'), index=True), + Column('is_public', Boolean), + Column('description', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_type_projects = Table( + 'volume_type_projects', meta, + Column('id', Integer, primary_key=True, nullable=False), + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('volume_type_id', String(36), + ForeignKey('volume_types.id')), + Column('project_id', String(255)), + Column('deleted', Integer), + UniqueConstraint('volume_type_id', 'project_id', 'deleted'), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_metadata = Table( + 'volume_metadata', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('volume_id', String(36), ForeignKey('volumes.id'), + nullable=False, index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_type_extra_specs = Table( + 'volume_type_extra_specs', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('volume_type_id', String(36), + ForeignKey('volume_types.id', + name='volume_type_extra_specs_ibfk_1'), + nullable=False, + index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + quotas = Table( + 'quotas', meta, + Column('id', Integer, primary_key=True, nullable=False), + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('project_id', String(255)), + Column('resource', String(255), nullable=False), + Column('hard_limit', Integer), + Column('allocated', Integer, default=0), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + quota_classes = Table( + 'quota_classes', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, + name=None)), + Column('id', Integer(), primary_key=True), + Column('class_name', String(255), index=True), + Column('resource', String(255)), + Column('hard_limit', Integer(), nullable=True), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + quota_usages = Table( + 'quota_usages', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, + name=None)), + Column('id', Integer(), primary_key=True), + Column('project_id', String(255), index=True), + Column('resource', String(255)), + Column('in_use', Integer(), nullable=False), + Column('reserved', Integer(), nullable=False), + Column('until_refresh', Integer(), nullable=True), + Index('quota_usage_project_resource_idx', + 'project_id', 'resource'), + UniqueConstraint('project_id', 'resource', 'deleted'), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + reservations = Table( + 'reservations', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, + name=None)), + Column('id', Integer(), primary_key=True), + Column('uuid', String(36), nullable=False), + Column('usage_id', + Integer(), + ForeignKey('quota_usages.id'), + nullable=True, + index=True), + Column('project_id', String(255), index=True), + Column('resource', String(255)), + Column('delta', Integer(), nullable=False), + Column('expire', DateTime(timezone=False)), + Column('allocated_id', Integer, ForeignKey('quotas.id'), + nullable=True, + index=True), + Index('reservations_deleted_expire_idx', + 'deleted', 'expire'), + Index('reservations_deleted_uuid_idx', + 'deleted', 'uuid'), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + volume_glance_metadata = Table( + 'volume_glance_metadata', + meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', Integer(), primary_key=True, nullable=False), + Column('volume_id', String(36), ForeignKey('volumes.id'), index=True), + Column('snapshot_id', String(36), + ForeignKey('snapshots.id'), index=True), + Column('key', String(255)), + Column('value', Text), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + backups = Table( + 'backups', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('id', String(36), primary_key=True, nullable=False), + Column('volume_id', String(36), nullable=False), + Column('user_id', String(255)), + Column('project_id', String(255)), + Column('host', String(255)), + Column('availability_zone', String(255)), + Column('display_name', String(255)), + Column('display_description', String(255)), + Column('container', String(255)), + Column('status', String(255)), + Column('fail_reason', String(255)), + Column('service_metadata', String(255)), + Column('service', String(255)), + Column('size', Integer()), + Column('object_count', Integer()), + Column('parent_id', String(36)), + Column('temp_volume_id', String(36)), + Column('temp_snapshot_id', String(36)), + Column('num_dependent_backups', Integer, default=0), + Column('snapshot_id', String(36)), + Column('data_timestamp', DateTime), + Column('restore_volume_id', String(36)), + Column('encryption_key_id', String(36)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + backup_metadata = Table( + 'backup_metadata', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(), default=False), + Column('id', Integer, primary_key=True, nullable=False), + Column('backup_id', String(36), + ForeignKey('backups.id'), + nullable=False, + index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + transfers = Table( + 'transfers', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean), + Column('id', String(36), primary_key=True, nullable=False), + Column('volume_id', String(36), ForeignKey('volumes.id'), + nullable=False, index=True), + Column('display_name', String(255)), + Column('salt', String(255)), + Column('crypt_hash', String(255)), + Column('expires_at', DateTime(timezone=False)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + # Sqlite needs to handle nullable differently + is_nullable = (meta.bind.name == 'sqlite') + + encryption = Table( + 'encryption', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(create_constraint=True, name=None)), + Column('cipher', String(255)), + Column('control_location', String(255), nullable=is_nullable), + Column('key_size', Integer), + Column('provider', String(255), nullable=is_nullable), + # NOTE(joel-coffman): The volume_type_id must be unique or else the + # referenced volume type becomes ambiguous. That is, specifying the + # volume type is not sufficient to identify a particular encryption + # scheme unless each volume type is associated with at most one + # encryption scheme. + Column('volume_type_id', String(36), nullable=is_nullable), + # NOTE (smcginnis): nullable=True triggers this to not set a default + # value, but since it's a primary key the resulting schema will end up + # still being NOT NULL. This is avoiding a case in MySQL where it will + # otherwise set this to NOT NULL DEFAULT ''. May be harmless, but + # inconsistent with previous schema. + Column('encryption_id', String(36), primary_key=True, nullable=True), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + volume_admin_metadata = Table( + 'volume_admin_metadata', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('volume_id', String(36), ForeignKey('volumes.id'), + nullable=False, index=True), + Column('key', String(255)), + Column('value', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + initiator_data = Table( + 'driver_initiator_data', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('id', Integer, primary_key=True, nullable=False), + Column('initiator', String(255), index=True, nullable=False), + Column('namespace', String(255), nullable=False), + Column('key', String(255), nullable=False), + Column('value', String(255)), + UniqueConstraint('initiator', 'namespace', 'key'), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + image_volume_cache = Table( + 'image_volume_cache_entries', meta, + Column('image_updated_at', DateTime(timezone=False)), + Column('id', Integer, primary_key=True, nullable=False), + Column('host', String(255), index=True, nullable=False), + Column('image_id', String(36), index=True, nullable=False), + Column('volume_id', String(36), nullable=False), + Column('size', Integer, nullable=False), + Column('last_used', DateTime, nullable=False), + Column('cluster_name', String(255)), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + messages = Table( + 'messages', meta, + Column('id', String(36), primary_key=True, nullable=False), + Column('project_id', String(255), nullable=False), + Column('request_id', String(255)), + Column('resource_type', String(36)), + Column('resource_uuid', String(255), nullable=True), + Column('event_id', String(255), nullable=False), + Column('message_level', String(255), nullable=False), + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean), + Column('expires_at', DateTime(timezone=False), index=True), + Column('detail_id', String(10), nullable=True), + Column('action_id', String(10), nullable=True), + mysql_engine='InnoDB', + mysql_charset='utf8' + ) + + cluster = Table( + 'clusters', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(), default=False), + Column('id', Integer, primary_key=True, nullable=False), + Column('name', String(255), nullable=False), + Column('binary', String(255), nullable=False), + Column('disabled', Boolean(), default=False), + Column('disabled_reason', String(255)), + Column('race_preventer', Integer, nullable=False, default=0), + Column('replication_status', String(length=36), default='not-capable'), + Column('active_backend_id', String(length=255)), + Column('frozen', Boolean, nullable=False, default=False, + server_default=expression.false()), + # To remove potential races on creation we have a constraint set on + # name and race_preventer fields, and we set value on creation to 0, so + # 2 clusters with the same name will fail this constraint. On deletion + # we change this field to the same value as the id which will be unique + # and will not conflict with the creation of another cluster with the + # same name. + UniqueConstraint('name', 'binary', 'race_preventer'), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + workers = Table( + 'workers', meta, + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean(), default=False), + Column('id', Integer, primary_key=True), + Column('resource_type', String(40), nullable=False), + Column('resource_id', String(36), nullable=False), + Column('status', String(255), nullable=False), + Column('service_id', Integer, ForeignKey('services.id'), + nullable=True, index=True), + Column('race_preventer', Integer, nullable=False, default=0, + server_default=text('0')), + UniqueConstraint('resource_type', 'resource_id'), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + group_types = Table( + 'group_types', meta, + Column('id', String(36), primary_key=True, nullable=False), + Column('name', String(255), nullable=False), + Column('description', String(255)), + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean), + Column('is_public', Boolean), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + group_type_specs = Table( + 'group_type_specs', meta, + Column('id', Integer, primary_key=True, nullable=False), + Column('key', String(255)), + Column('value', String(255)), + Column('group_type_id', String(36), + ForeignKey('group_types.id'), + nullable=False, + index=True), + Column('created_at', DateTime(timezone=False)), + Column('updated_at', DateTime(timezone=False)), + Column('deleted_at', DateTime(timezone=False)), + Column('deleted', Boolean), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + group_type_projects = Table( + 'group_type_projects', meta, + Column('id', Integer, primary_key=True, nullable=False), + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('group_type_id', String(36), + ForeignKey('group_types.id')), + Column('project_id', String(length=255)), + Column('deleted', Boolean(create_constraint=True, name=None)), + UniqueConstraint('group_type_id', 'project_id', 'deleted'), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + grp_vt_mapping = Table( + 'group_volume_type_mapping', meta, + Column('created_at', DateTime), + Column('updated_at', DateTime), + Column('deleted_at', DateTime), + Column('deleted', Boolean), + Column('id', Integer, primary_key=True, nullable=False), + Column('volume_type_id', String(36), ForeignKey('volume_types.id'), + nullable=False, + index=True), + Column('group_id', String(36), + ForeignKey('groups.id'), nullable=False, + index=True), + mysql_engine='InnoDB', + mysql_charset='utf8', + ) + + return [consistencygroups, + cgsnapshots, + groups, + group_snapshots, + services, + volumes, + volume_attachment, + attachment_specs, + snapshots, + snapshot_metadata, + quality_of_service_specs, + volume_types, + volume_type_projects, + quotas, + volume_metadata, + volume_type_extra_specs, + quota_classes, + quota_usages, + reservations, + volume_glance_metadata, + backups, + backup_metadata, + transfers, + encryption, + volume_admin_metadata, + initiator_data, + image_volume_cache, + messages, + cluster, + workers, + group_types, + group_type_specs, + group_type_projects, + grp_vt_mapping] + + +def upgrade(migrate_engine): + meta = MetaData() + meta.bind = migrate_engine + + # create all tables + # Take care on create order for those with FK dependencies + tables = define_tables(meta) + + for table in tables: + table.create() + + if migrate_engine.name == "mysql": + tables = ["consistencygroups", + "cgsnapshots", + "snapshots", + "snapshot_metadata", + "quality_of_service_specs", + "volume_types", + "volume_type_projects", + "volumes", + "volume_attachment", + "migrate_version", + "quotas", + "services", + "volume_metadata", + "volume_type_extra_specs", + "quota_classes", + "quota_usages", + "reservations", + "volume_glance_metadata", + "backups", + "backup_metadata", + "transfers", + "encryption", + "volume_admin_metadata", + "driver_initiator_data", + "image_volume_cache_entries"] + + migrate_engine.execute("SET foreign_key_checks = 0") + for table in tables: + migrate_engine.execute( + "ALTER TABLE %s CONVERT TO CHARACTER SET utf8" % table) + migrate_engine.execute("SET foreign_key_checks = 1") + migrate_engine.execute( + "ALTER DATABASE %s DEFAULT CHARACTER SET utf8" % + migrate_engine.url.database) + migrate_engine.execute("ALTER TABLE %s Engine=InnoDB" % table) + + # Set default quota class values + quota_classes = Table('quota_classes', meta, autoload=True) + qci = quota_classes.insert() + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'volumes', + 'hard_limit': CONF.quota_volumes, + 'deleted': False, }) + # Set default snapshots + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'snapshots', + 'hard_limit': CONF.quota_snapshots, + 'deleted': False, }) + # Set default gigabytes + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'gigabytes', + 'hard_limit': CONF.quota_gigabytes, + 'deleted': False, }) + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'consistencygroups', + 'hard_limit': CONF.quota_consistencygroups, + 'deleted': False, }) + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'per_volume_gigabytes', + 'hard_limit': -1, + 'deleted': False, }) + qci.execute({'created_at': CREATED_AT, + 'class_name': CLASS_NAME, + 'resource': 'groups', + 'hard_limit': CONF.quota_groups, + 'deleted': False, }) + + workers = Table('workers', meta, autoload=True) + + # This is only necessary for mysql, and since the table is not in use this + # will only be a schema update. + if migrate_engine.name.startswith('mysql'): + try: + workers.c.updated_at.alter(mysql.DATETIME(fsp=6)) + except Exception: + # MySQL v5.5 or earlier don't support sub-second resolution so we + # may have cleanup races in Active-Active configurations, that's + # why upgrading is recommended in that case. + # Code in Cinder is capable of working with 5.5, so for 5.5 there's + # no problem + pass + + # TODO(geguileo): Once we remove support for MySQL 5.5 we have to create + # an upgrade migration to remove this row. + # Set workers table sub-second support sentinel + wi = workers.insert() + now = timeutils.utcnow().replace(microsecond=123) + wi.execute({'created_at': now, + 'updated_at': now, + 'deleted': False, + 'resource_type': 'SENTINEL', + 'resource_id': 'SUB-SECOND', + 'status': 'OK'}) + + # Create default group type + group_types = Table('group_types', meta, autoload=True) + group_type_specs = Table('group_type_specs', meta, autoload=True) + + now = timeutils.utcnow() + grp_type_id = "%s" % uuid.uuid4() + group_type_dicts = { + 'id': grp_type_id, + 'name': volume_group_types.DEFAULT_CGSNAPSHOT_TYPE, + 'description': 'Default group type for migrating cgsnapshot', + 'created_at': now, + 'updated_at': now, + 'deleted': False, + 'is_public': True, + } + grp_type = group_types.insert() + grp_type.execute(group_type_dicts) + + group_spec_dicts = { + 'key': 'consistent_group_snapshot_enabled', + 'value': ' True', + 'group_type_id': grp_type_id, + 'created_at': now, + 'updated_at': now, + 'deleted': False, + } + grp_spec = group_type_specs.insert() + grp_spec.execute(group_spec_dicts) diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py b/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py deleted file mode 100644 index 9609bdf16ae..00000000000 --- a/cinder/db/sqlalchemy/migrate_repo/versions/120_placeholder.py +++ /dev/null @@ -1,22 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); you may -# not use this file except in compliance with the License. You may obtain -# a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations -# under the License. - -# This is a placeholder for Queens backports. -# Do not use this number for new Rocky work. New work starts after -# all the placeholders. -# -# See this for more information: -# http://lists.openstack.org/pipermail/openstack-dev/2013-March/006827.html - - -def upgrade(migrate_engine): - pass From 5d6c2323c226a34f2b6ea48a7c756c403161b5ea Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 16 Nov 2020 10:15:45 -0500 Subject: [PATCH 042/149] [SAP] Fix create from snapshot with larger size This patch fixes a problem when creating a volume from snapshot when the size of the volume is larger than the original snapshot. The problem was caused by python creating a float value from the size calculation, and the vmware api not being able to process the size float. The value needs to be a whole number/int. --- cinder/volume/drivers/vmware/vmdk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8c6fecaaa81..b17f199bf6e 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2432,7 +2432,7 @@ def _create_volume_from_template(self, volume, path): disk_type = VMwareVcVmdkDriver._get_disk_type(volume) device_changes = None if volume['size']: - new_size_in_kb = volume['size'] * units.Gi / units.Ki + new_size_in_kb = int(volume['size'] * units.Gi / units.Ki) disk_device = self.volumeops._get_disk_device(template) if new_size_in_kb > disk_device.capacityInKB: device_changes = self.volumeops._create_spec_for_disk_expand( From 8e65962b8afc8d2ec93085efee0ec43286bf9ca3 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 28 Jan 2021 08:11:05 -0500 Subject: [PATCH 043/149] [SAP] add sap custom requirements This patch adds osbrick oslo.vmware into the custom-requirements.txt that used to be in upper-constraints.txt file from global requirements repo. Those were removed from upper constraints here: https://github.com/sapcc/requirements/commit/4aeedacaa0276b13b85c0237c9b848a83fbe4dde --- custom-requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/custom-requirements.txt b/custom-requirements.txt index 1efadfcaa5d..f398d2f90b9 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -9,3 +9,6 @@ jaeger-client -e git+https://github.com/sapcc/openstack-watcher-middleware.git#egg=watcher-middleware -e git+https://github.com/sapcc/openstack-audit-middleware.git#egg=audit-middleware -e git+https://github.com/sapcc/openstack-rate-limit-middleware.git#egg=rate-limit-middleware +-e git+https://github.com/sapcc/os-brick.git@stable/train-m3#egg=os-brick +-e git+https://github.com/sapcc/oslo.vmware.git@stable/train-m3#egg=oslo.vmware +-e git+https://github.com/sapcc/dnspython.git@ccloud#egg=dnspython From 3a8f98085ed290afadd175c446fdc20932f348a6 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 28 Sep 2020 15:01:32 -0400 Subject: [PATCH 044/149] [SAP] Add libpq-dev to concourse_unit_test_task This patch updates the running of the concourse unit test task against train to include the libpq-dev package as well as make sure we run py3 tox. --- concourse_unit_test_task | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/concourse_unit_test_task b/concourse_unit_test_task index ac17d462ebe..85a64a4f95e 100644 --- a/concourse_unit_test_task +++ b/concourse_unit_test_task @@ -1,9 +1,9 @@ export DEBIAN_FRONTEND=noninteractive && \ export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/queens-m3/upper-constraints.txt && \ apt-get update && \ -apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext && \ +apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext libpq-dev && \ pip install -U pip && \ pip install tox "six>=1.14.0" && \ git clone -b stable/queens-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ cd cinder && \ -tox -e py27,pep8 +tox -e py27,py3,pep8 From c905d2af2e677d66427707d0690574b82d040915 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 14 Sep 2020 10:14:27 -0400 Subject: [PATCH 045/149] [SAP] Fix pep8, missing backup restore feature This patch updates the SAP modified chunkedbackup driver to include raising the exception when a user cancels the restore process. That fixes a failure in the unit tests for the nfs driver expected exception raising of BackupRestoreCancel. This patch also fixes some pep8 issues with utils being renamed volume_utils in train. --- cinder/backup/chunkeddriver.py | 11 ++++++++--- cinder/tests/unit/backup/test_chunkeddriver.py | 5 +++-- concourse_unit_test_task | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 66be1d319e6..86ceb5108cd 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -713,7 +713,7 @@ def restore(self, backup, volume_id, volume_file): backup1 = backup_list[index] index = index - 1 metadata = self._read_metadata(backup1) - restore_handle.add_backup(backup1, metadata) + restore_handle.add_backup(backup1, metadata, backup, volume_id) volume_meta = metadata.get('volume_meta', None) try: @@ -775,7 +775,7 @@ def __init__(self, chunked_driver, volume_id, volume_file): self._idx = -1 @abc.abstractmethod - def add_backup(self, backup, metadata): + def add_backup(self, backup, metadata, requested_backup, volume_id): """This is called for each backup in the incremental backups chain.""" return @@ -934,7 +934,7 @@ def add_object(self, metadata_object): class BackupRestoreHandleV1(BackupRestoreHandle): """Handles restoring of V1 backups.""" - def add_backup(self, backup, metadata): + def add_backup(self, backup, metadata, requested_backup, volume_id): """Processes a v1 volume backup for being restored.""" metadata_objects = metadata['objects'] metadata_object_names = [] @@ -952,6 +952,11 @@ def add_backup(self, backup, metadata): raise exception.InvalidBackup(reason=err) for metadata_object in metadata_objects: + with requested_backup.as_read_deleted(): + requested_backup.refresh() + if requested_backup.status != fields.BackupStatus.RESTORING: + raise exception.BackupRestoreCancel(back_id=backup.id, + vol_id=volume_id) object_name, obj = list(metadata_object.items())[0] # keep the information needed to read the object from the # storage backend diff --git a/cinder/tests/unit/backup/test_chunkeddriver.py b/cinder/tests/unit/backup/test_chunkeddriver.py index 19583df57ea..2839e5e2441 100644 --- a/cinder/tests/unit/backup/test_chunkeddriver.py +++ b/cinder/tests/unit/backup/test_chunkeddriver.py @@ -455,7 +455,8 @@ def test_backup_invalid_size(self): self.backup, mock.Mock()) - def test_restore(self): + @mock.patch('cinder.backup.chunkeddriver.BackupRestoreHandleV1.add_backup') + def test_restore(self, mock_add_backup): volume_file = mock.Mock() restore_test = mock.Mock() self.driver._restore_v1 = restore_test @@ -468,7 +469,7 @@ def test_restore(self): self.driver.restore(backup, self.volume, volume_file) self.assertEqual(2, mock_put.call_count) - restore_test.assert_called() + mock_add_backup.assert_called() def test_delete_backup(self): with mock.patch.object(self.driver, 'delete_object') as mock_delete: diff --git a/concourse_unit_test_task b/concourse_unit_test_task index 85a64a4f95e..d85063548e8 100644 --- a/concourse_unit_test_task +++ b/concourse_unit_test_task @@ -1,5 +1,5 @@ export DEBIAN_FRONTEND=noninteractive && \ -export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/queens-m3/upper-constraints.txt && \ +export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/train-m3/upper-constraints.txt && \ apt-get update && \ apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext libpq-dev && \ pip install -U pip && \ From 045082f43e107e2a1dc9ee984549490abda6d70a Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 11 Sep 2020 14:10:20 -0400 Subject: [PATCH 046/149] [SAP] Fix more pep8 checks This patch fixes some pep8 checks not covered during queens. --- cinder/volume/drivers/vmware/volumeops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 3b522dc9c74..2efa44ef195 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1459,8 +1459,8 @@ def _create_spec_for_disk_expand(self, disk_device, new_size_in_kb): disk_spec.operation = 'edit' disk_spec.device = disk_device disk_spec.device.capacityInKB = new_size_in_kb - disk_spec.device.capacityInBytes =\ - disk_spec.device.capacityInKB * units.Ki + disk_spec.device.capacityInBytes = ( + disk_spec.device.capacityInKB * units.Ki) return disk_spec def detach_disk_from_backing(self, backing, disk_device): From 82f3a3afe634f2763807edbf498cc524a4d55247 Mon Sep 17 00:00:00 2001 From: Jakob Karge Date: Wed, 6 Jan 2021 21:25:27 +0100 Subject: [PATCH 047/149] [SAP] Handle sharding-enabled in scheduler shard filter If the project tags from keystone contain the tag "sharding_enabled" then the backends in _all_ shards will pass the shard filter for this project. This was done to facilitate both enabling sharding (only one simple tag to set), and mainly for frontend code to detect sharding status (mostly) without parsing tag strings. (If sharding is not enabled, then vc-* tags will have to be parsed to find out which shard(s) the project is on.) --- cinder/scheduler/filters/shard_filter.py | 13 ++++++++-- .../tests/unit/scheduler/test_shard_filter.py | 26 +++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index fafe7ed2f38..67612939b58 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -47,6 +47,9 @@ class ShardFilter(filters.BaseBackendFilter): Every project has tags assigned, which define the vCenter the project is in. This filter filters out any backend that's not configured for the shard of a project. + + Alternatively the project may have the "sharding_enabled" tag set, which + enables the project for backends in all shards. """ # project shards do not change within a request @@ -56,6 +59,7 @@ class ShardFilter(filters.BaseBackendFilter): _PROJECT_SHARD_CACHE_RETENTION_TIME = 10 * 60 _SHARD_PREFIX = 'vc-' _CAPABILITY_NAME = 'vcenter-shard' + _ALL_SHARDS = "sharding_enabled" def _get_keystone_adapter(self): """Return a keystone adapter @@ -117,7 +121,8 @@ def _update_cache(self): for project in data['projects']: project_id = project['id'] shards = [t for t in project['tags'] - if t.startswith(self._SHARD_PREFIX)] + if t.startswith(self._SHARD_PREFIX) + or t == self._ALL_SHARDS] self._PROJECT_SHARD_CACHE[project_id] = shards url = data['links']['next'] @@ -190,7 +195,11 @@ def backend_passes(self, backend_state, filter_properties): 'shard_prefix': self._SHARD_PREFIX}) return False - if configured_shards_set & set(shards): + if self._ALL_SHARDS in shards: + LOG.debug('project enabled for all shards %(project_shards)s.', + {'project_shards': shards}) + return True + elif configured_shards_set & set(shards): LOG.debug('%(backend)s shard %(backend_shards)s found in project ' 'shards %(project_shards)s.', {'backend': backend_state, diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 75d70e9d124..35251ba7c62 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -135,3 +135,29 @@ def test_shard_override_no_data(self): host = fakes.FakeBackendState('host1', {'capabilities': caps}) self.props['scheduler_hints'] = {'vcenter-shard': None} self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + + def test_sharding_enabled_any_backend_match(self): + self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled'] + self.props['request_spec']['volume_properties']['project_id'] = 'baz' + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + + def test_sharding_enabled_and_single_shard_any_backend_match(self): + self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled', + 'vc-a-1'] + self.props['request_spec']['volume_properties']['project_id'] = 'baz' + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + + def test_scheduler_hints_override_sharding_enabled(self): + self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled'] + self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} + self.props['request_spec']['volume_properties']['project_id'] = 'baz' + caps0 = {'vcenter-shard': 'vc-a-0'} + host0 = fakes.FakeBackendState('host0', {'capabilities': caps0}) + self.assertFalse(self.filt_cls.backend_passes(host0, self.props)) + caps1 = {'vcenter-shard': 'vc-a-1'} + host1 = fakes.FakeBackendState('host1', {'capabilities': caps1}) + self.assertTrue(self.filt_cls.backend_passes(host1, self.props)) From 01ef3acc88934cc7bf675c21ab7a91760c878640 Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 25 Sep 2020 15:26:20 -0400 Subject: [PATCH 048/149] [SAP] Fix py3 unit tests This patch updates one of the unit tests that fails py3 due to a change in our version of the vmdk driver. --- cinder/tests/unit/volume/drivers/vmware/test_fcd.py | 3 +++ .../tests/unit/volume/drivers/vmware/test_vmware_vmdk.py | 7 ++++++- concourse_unit_test_task | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index 4e3126aa746..e79a8b37473 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -44,6 +44,7 @@ class VMwareVStorageObjectDriverTestCase(test.TestCase): IP = 'localhost' PORT = 2321 IMG_TX_TIMEOUT = 10 + MAX_OBJECTS = 100 RESERVED_PERCENTAGE = 0 VMDK_DRIVER = vmdk.VMwareVcVmdkDriver FCD_DRIVER = fcd.VMwareVStorageObjectDriver @@ -65,6 +66,8 @@ def setUp(self): self._config.vmware_host_ip = self.IP self._config.vmware_host_port = self.PORT self._config.vmware_image_transfer_timeout_secs = self.IMG_TX_TIMEOUT + self._config.vmware_max_objects_retrieval = self.MAX_OBJECTS + self._config.vmware_storage_profile = None self._config.reserved_percentage = self.RESERVED_PERCENTAGE self._driver = fcd.VMwareVStorageObjectDriver( configuration=self._config) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 94a5c3a53d2..056ffcf4ba8 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2398,6 +2398,10 @@ def test_create_volume_from_template( tmp_backing = mock.sentinel.tmp_backing vops.clone_backing.return_value = tmp_backing + disk_device = mock.sentinel.disk_device + disk_device.capacityInKB = self.VOL_SIZE + 11111111 + vops._get_disk_device.return_value = disk_device + volume = self._create_volume_obj() inv_path = mock.sentinel.inv_path self._driver._create_volume_from_template(volume, inv_path) @@ -2413,7 +2417,8 @@ def test_create_volume_from_template( disk_type=disk_type, host=host, resource_pool=rp, - folder=folder) + folder=folder, + device_changes=None) create_volume_from_temp_backing.assert_called_once_with(volume, tmp_backing) diff --git a/concourse_unit_test_task b/concourse_unit_test_task index d85063548e8..aed7613082d 100644 --- a/concourse_unit_test_task +++ b/concourse_unit_test_task @@ -4,6 +4,6 @@ apt-get update && \ apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext libpq-dev && \ pip install -U pip && \ pip install tox "six>=1.14.0" && \ -git clone -b stable/queens-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ +git clone -b stable/train-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ cd cinder && \ -tox -e py27,py3,pep8 +tox -e pep8,py3 From 2cc1857203b71a10f1f7d4064571bb49b10f80ff Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 1 Apr 2021 16:00:39 -0400 Subject: [PATCH 049/149] SAP Rework backup process to make it async This patch updates the backup process to call the volume manager asynchronously to get the backup device in which to do the backup on. This fixes a major issue with certain cinder drivers that take a long time to create a temporary clone of the volume being backed up. Added verbose output --- cinder/backup/chunkeddriver.py | 4 +- cinder/backup/manager.py | 136 +++++++++++++++++++++++---------- cinder/backup/rpcapi.py | 9 ++- cinder/volume/manager.py | 38 +++++++-- cinder/volume/rpcapi.py | 12 ++- 5 files changed, 145 insertions(+), 54 deletions(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 86ceb5108cd..b3b1ceecb6c 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -369,10 +369,9 @@ def _backup_chunk(self, backup, container, data, data_offset, obj[object_name] = {} obj[object_name]['offset'] = data_offset obj[object_name]['length'] = len(data) - LOG.debug('Backing up chunk of data from volume.') algorithm, output_data = self._prepare_output_data(data) obj[object_name]['compression'] = algorithm - LOG.debug('About to put_object') + LOG.debug('About to put_object : %s', obj[object_name]) with self._get_object_writer( container, object_name, extra_metadata=extra_metadata ) as writer: @@ -587,6 +586,7 @@ def _notify_progress(): win32_disk_size - data_offset) else: read_bytes = self.chunk_size_bytes + LOG.debug("reading '%s' bytes", read_bytes) data = volume_file.read(read_bytes) if data == b'': diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 1d58c73d3a4..6fa980d09cd 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -339,7 +339,6 @@ def create_backup(self, context, backup): snapshot = objects.Snapshot.get_by_id( context, snapshot_id) if snapshot_id else None previous_status = volume.get('previous_status', None) - updates = {} if snapshot_id: log_message = ('Create backup started, backup: %(backup_id)s ' 'volume: %(volume_id)s snapshot: %(snapshot_id)s.' @@ -396,7 +395,12 @@ def create_backup(self, context, backup): backup.service = self.driver_name backup.save() - updates = self._run_backup(context, backup, volume) + + # Backup is done in 3 phases. + # _start_backup + # continue_backup + # _finish_backup + self._start_backup(context, backup, volume) except Exception as err: with excutils.save_and_reraise_exception(): if snapshot_id: @@ -409,41 +413,16 @@ def create_backup(self, context, backup): 'previous_status': 'error_backing-up'}) volume_utils.update_backup_error(backup, str(err)) - # Restore the original status. - if snapshot_id: - self.db.snapshot_update( - context, snapshot_id, - {'status': fields.SnapshotStatus.AVAILABLE}) - else: - self.db.volume_update(context, volume_id, - {'status': previous_status, - 'previous_status': 'backing-up'}) + @volume_utils.trace + def _start_backup(self, context, backup, volume): + """This starts the backup process. - # _run_backup method above updated the status for the backup, so it - # will reflect latest status, even if it is deleted - completion_msg = 'finished' - if backup.status in (fields.BackupStatus.DELETING, - fields.BackupStatus.DELETED): - completion_msg = 'aborted' - else: - backup.status = fields.BackupStatus.AVAILABLE - backup.size = volume['size'] + First we have to get the backup device from the volume manager. + This can take a long time to complete. Once the volume manager + is done creating/getting the backup device, we get a callback + to complete the process of backing up the volume. - if updates: - backup.update(updates) - backup.save() - - # Handle the num_dependent_backups of parent backup when child - # backup has created successfully. - if backup.parent_id: - parent_backup = objects.Backup.get_by_id(context, - backup.parent_id) - parent_backup.num_dependent_backups += 1 - parent_backup.save() - LOG.info('Create backup %s. backup: %s.', completion_msg, backup.id) - self._notify_about_backup_usage(context, backup, "create.end") - - def _run_backup(self, context, backup, volume): + """ # Save a copy of the encryption key ID in case the volume is deleted. if (volume.encryption_key_id is not None and backup.encryption_key_id is None): @@ -453,17 +432,37 @@ def _run_backup(self, context, backup, volume): volume.encryption_key_id) backup.save() + # This is an async call to the volume manager. We will get a + # callback from the volume manager to continue once it's done. + self.volume_rpcapi.get_backup_device(context, backup, volume) + + @volume_utils.trace + def continue_backup(self, context, backup, backup_device): + """This is the callback from the volume manager to continue. + + If something went wrong on the volume manager getting/creating + the backup_device, the backup_device will be None. + """ + volume_id = backup.volume_id + volume = objects.Volume.get_by_id(context, volume_id) + snapshot_id = backup.snapshot_id + snapshot = objects.Snapshot.get_by_id( + context, snapshot_id) if snapshot_id else None + previous_status = volume.get('previous_status', None) + backup_service = self.service(context) properties = volume_utils.brick_get_connector_properties() - # NOTE(geguileo): Not all I/O disk operations properly do greenthread - # context switching and may end up blocking the greenthread, so we go - # with native threads proxy-wrapping the device file object. + updates = {} try: - backup_device = self.volume_rpcapi.get_backup_device(context, - backup, - volume) + if not backup_device: + # The volume manager didn't provide a backup_device + # due to something going wrong. So we raise here and + # cleanup the volume state and the backup state to error. + raise exception.BackupOperationError("Failed to get backup " + "device from driver.") + attach_info = self._attach_device(context, backup_device.device_obj, properties, @@ -491,12 +490,65 @@ def _run_backup(self, context, backup, volume): backup_device.device_obj, properties, backup_device.is_snapshot, force=True, ignore_errors=True) + except Exception as err: + with excutils.save_and_reraise_exception(): + if snapshot_id: + snapshot.status = fields.SnapshotStatus.AVAILABLE + snapshot.save() + else: + self.db.volume_update( + context, volume_id, + {'status': previous_status, + 'previous_status': 'error_backing-up'}) + self._update_backup_error(backup, str(err)) finally: with backup.as_read_deleted(): backup.refresh() self._cleanup_temp_volumes_snapshots_when_backup_created( context, backup) - return updates + + LOG.info("finish backup!") + self._finish_backup(context, backup, volume, updates) + + @volume_utils.trace + def _finish_backup(self, context, backup, volume, updates): + volume_id = backup.volume_id + snapshot_id = backup.snapshot_id + previous_status = volume.get('previous_status', None) + + # Restore the original status. + if snapshot_id: + self.db.snapshot_update( + context, snapshot_id, + {'status': fields.SnapshotStatus.AVAILABLE}) + else: + self.db.volume_update(context, volume_id, + {'status': previous_status, + 'previous_status': 'backing-up'}) + + # continue_backup method above updated the status for the backup, so it + # will reflect latest status, even if it is deleted + completion_msg = 'finished' + if backup.status in (fields.BackupStatus.DELETING, + fields.BackupStatus.DELETED): + completion_msg = 'aborted' + else: + backup.status = fields.BackupStatus.AVAILABLE + backup.size = volume['size'] + + if updates: + backup.update(updates) + backup.save() + + # Handle the num_dependent_backups of parent backup when child + # backup has created successfully. + if backup.parent_id: + parent_backup = objects.Backup.get_by_id(context, + backup.parent_id) + parent_backup.num_dependent_backups += 1 + parent_backup.save() + LOG.info('Create backup %s. backup: %s.', completion_msg, backup.id) + self._notify_about_backup_usage(context, backup, "create.end") def _is_our_backup(self, backup): # Accept strings and Service OVO diff --git a/cinder/backup/rpcapi.py b/cinder/backup/rpcapi.py index 981526fcc86..bf9856c487d 100644 --- a/cinder/backup/rpcapi.py +++ b/cinder/backup/rpcapi.py @@ -47,9 +47,10 @@ class BackupAPI(rpc.RPCAPI): 2.0 - Remove 1.x compatibility 2.1 - Adds set_log_levels and get_log_levels 2.2 - Adds publish_service_capabilities + 2.3 - Adds continue_backup call """ - RPC_API_VERSION = '2.2' + RPC_API_VERSION = '2.3' RPC_DEFAULT_VERSION = '2.0' TOPIC = constants.BACKUP_TOPIC BINARY = 'cinder-backup' @@ -59,6 +60,12 @@ def create_backup(self, ctxt, backup): cctxt = self._get_cctxt(server=backup.host) cctxt.cast(ctxt, 'create_backup', backup=backup) + def continue_backup(self, ctxt, backup, backup_device): + LOG.debug("continue_backup in rpcapi backup_id %s", backup.id) + cctxt = self._get_cctxt(server=backup.host) + cctxt.cast(ctxt, 'continue_backup', backup=backup, + backup_device=backup_device) + def restore_backup(self, ctxt, backup_host, backup, volume_id): LOG.debug("restore_backup in rpcapi backup_id %s", backup.id) cctxt = self._get_cctxt(server=backup_host) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 60e1eb9241f..d9105bfbadc 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -55,6 +55,7 @@ import requests from taskflow import exceptions as tfe +from cinder.backup import rpcapi as backup_rpcapi from cinder.common import constants from cinder import compute from cinder import context @@ -4653,18 +4654,43 @@ def get_capabilities(self, context, discover): LOG.debug("Obtained capabilities list: %s.", capabilities) return capabilities - def get_backup_device(self, ctxt, backup, want_objects=False): - (backup_device, is_snapshot) = ( - self.driver.get_backup_device(ctxt, backup)) + @volume_utils.trace + def get_backup_device(self, ctxt, backup, want_objects=False, + async_call=False): + try: + (backup_device, is_snapshot) = ( + self.driver.get_backup_device(ctxt, backup)) + except Exception as ex: + if async_call: + LOG.exception("Failed to get backup device. " + "Calling backup continue_backup to cleanup") + rpcapi = backup_rpcapi.BackupAPI() + rpcapi.continue_backup(ctxt, backup, backup_device=None) + return + else: + while excutils.save_and_reraise_exception(): + LOG.exception("Failed to get backup device.") + secure_enabled = self.driver.secure_file_operations_enabled() backup_device_dict = {'backup_device': backup_device, 'secure_enabled': secure_enabled, 'is_snapshot': is_snapshot, } # TODO(sborkows): from_primitive method will be removed in O, so there # is a need to clean here then. - return (objects.BackupDeviceInfo.from_primitive(backup_device_dict, - ctxt) - if want_objects else backup_device_dict) + backup_device = ( + objects.BackupDeviceInfo.from_primitive(backup_device_dict, ctxt) + if want_objects else backup_device_dict) + + if async_call: + # we have to use an rpc call back to the backup manager to + # continue the backup + LOG.info("Calling backup continue_backup for: {}".format(backup)) + rpcapi = backup_rpcapi.BackupAPI() + rpcapi.continue_backup(ctxt, backup, backup_device) + else: + # The rpc api version doesn't support the async callback + # so we fallback to returning the value itself. + return backup_device def secure_file_operations_enabled(self, ctxt: context.RequestContext, diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 3b8083649b7..d7db6ac4d55 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -134,9 +134,10 @@ class VolumeAPI(rpc.RPCAPI): failover_replication, and list_replication_targets. 3.15 - Add revert_to_snapshot method 3.16 - Add no_snapshots to accept_transfer method + 3.17 - Make get_backup_device a cast (async) """ - RPC_API_VERSION = '3.16' + RPC_API_VERSION = '3.17' RPC_DEFAULT_VERSION = '3.0' TOPIC = constants.VOLUME_TOPIC BINARY = constants.VOLUME_BINARY @@ -346,8 +347,13 @@ def get_capabilities(self, ctxt, backend_id, discover): return cctxt.call(ctxt, 'get_capabilities', discover=discover) def get_backup_device(self, ctxt, backup, volume): - cctxt = self._get_cctxt(volume.service_topic_queue, ('3.2', '3.0')) - if cctxt.can_send_version('3.2'): + cctxt = self._get_cctxt(volume.service_topic_queue, + ('3.17', '3.2', '3.0')) + if cctxt.can_send_version('3.17'): + cctxt.cast(ctxt, 'get_backup_device', backup=backup, + want_objects=True, async_call=True) + backup_obj = None + elif cctxt.can_send_version('3.2'): backup_obj = cctxt.call(ctxt, 'get_backup_device', backup=backup, want_objects=True) else: From 97f8b42e1d5a695850b6b515ea6a65dbbe630d1e Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 10 May 2021 12:28:39 -0400 Subject: [PATCH 050/149] SAP fix the backup_rework unit tests This patch fixes the refactored backup unit tests. --- cinder/backup/manager.py | 2 +- cinder/tests/unit/backup/test_backup.py | 59 +++++++++++++++---------- cinder/tests/unit/volume/test_rpcapi.py | 22 ++++++++- 3 files changed, 57 insertions(+), 26 deletions(-) diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 6fa980d09cd..5b51440224e 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -500,7 +500,7 @@ def continue_backup(self, context, backup, backup_device): context, volume_id, {'status': previous_status, 'previous_status': 'error_backing-up'}) - self._update_backup_error(backup, str(err)) + volume_utils.update_backup_error(backup, str(err)) finally: with backup.as_read_deleted(): backup.refresh() diff --git a/cinder/tests/unit/backup/test_backup.py b/cinder/tests/unit/backup/test_backup.py index 970cbc7654e..82d7f1bc9b0 100644 --- a/cinder/tests/unit/backup/test_backup.py +++ b/cinder/tests/unit/backup/test_backup.py @@ -626,7 +626,7 @@ def test_create_backup_with_error(self): vol_id = self._create_volume_db_entry(size=1) backup = self._create_backup_db_entry(volume_id=vol_id) - mock_run_backup = self.mock_object(self.backup_mgr, '_run_backup') + mock_run_backup = self.mock_object(self.backup_mgr, '_start_backup') mock_run_backup.side_effect = FakeBackupException(str(uuid.uuid4())) self.assertRaises(FakeBackupException, self.backup_mgr.create_backup, @@ -639,22 +639,24 @@ def test_create_backup_with_error(self): self.assertEqual(fields.BackupStatus.ERROR, backup['status']) self.assertTrue(mock_run_backup.called) - @mock.patch('cinder.backup.manager.BackupManager._run_backup') - def test_create_backup_aborted(self, run_backup_mock): + @mock.patch('cinder.backup.manager.BackupManager._start_backup') + def test_create_backup_aborted(self, start_backup_mock): """Test error handling when abort occurs during backup creation.""" - def my_run_backup(*args, **kwargs): + def my_start_backup(*args, **kwargs): backup.destroy() with backup.as_read_deleted(): original_refresh() - run_backup_mock.side_effect = my_run_backup + start_backup_mock.side_effect = my_start_backup vol_id = self._create_volume_db_entry(size=1) backup = self._create_backup_db_entry(volume_id=vol_id) original_refresh = backup.refresh self.backup_mgr.create_backup(self.ctxt, backup) + vol = objects.Volume.get_by_id(self.ctxt, vol_id) + self.backup_mgr._finish_backup(self.ctxt, backup, vol, {}) - self.assertTrue(run_backup_mock.called) + self.assertTrue(start_backup_mock.called) vol = objects.Volume.get_by_id(self.ctxt, vol_id) self.assertEqual('available', vol.status) @@ -664,9 +666,9 @@ def my_run_backup(*args, **kwargs): backup.refresh() self.assertEqual(fields.BackupStatus.DELETED, backup.status) - @mock.patch('cinder.backup.manager.BackupManager._run_backup', + @mock.patch('cinder.backup.manager.BackupManager._start_backup', side_effect=FakeBackupException(str(uuid.uuid4()))) - def test_create_backup_with_snapshot_error(self, mock_run_backup): + def test_create_backup_with_snapshot_error(self, mock_start_backup): """Test error handling when error occurs during backup creation.""" vol_id = self._create_volume_db_entry(size=1) snapshot = self._create_snapshot_db_entry(status='backing-up', @@ -683,7 +685,7 @@ def test_create_backup_with_snapshot_error(self, mock_run_backup): backup.refresh() self.assertEqual(fields.BackupStatus.ERROR, backup.status) - self.assertTrue(mock_run_backup.called) + self.assertTrue(mock_start_backup.called) @mock.patch('cinder.volume.volume_utils.brick_get_connector_properties') @mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device') @@ -700,7 +702,7 @@ def test_create_backup(self, mock_isdir, mock_open, mock_temporary_chown, vol = objects.Volume.get_by_id(self.ctxt, vol_id) backup_device_dict = {'backup_device': vol, 'secure_enabled': False, 'is_snapshot': False, } - mock_get_backup_device.return_value = ( + mock_backup_device = ( objects.BackupDeviceInfo.from_primitive(backup_device_dict, self.ctxt, ['admin_metadata', @@ -715,6 +717,7 @@ def test_create_backup(self, mock_isdir, mock_open, mock_temporary_chown, mock_get_conn.return_value = properties self.backup_mgr.create_backup(self.ctxt, backup) + self.backup_mgr.continue_backup(self.ctxt, backup, mock_backup_device) mock_temporary_chown.assert_called_once_with('/dev/null') mock_attach_device.assert_called_once_with(self.ctxt, vol, @@ -764,7 +767,8 @@ def test_create_backup_set_parent_id_to_none(self, mock_isdir, mock_open, mock_open.return_value = open('/dev/null', 'rb') mock_brick.return_value = properties - self.backup_mgr.create_backup(self.ctxt, backup) + self.backup_mgr.continue_backup(self.ctxt, backup, + mock_backup_device) backup = db.backup_get(self.ctxt, backup.id) self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status) @@ -799,7 +803,8 @@ def test_create_backup_set_parent_id(self, mock_isdir, mock_open, mock_open.return_value = open('/dev/null', 'rb') mock_brick.return_value = properties - self.backup_mgr.create_backup(self.ctxt, backup) + self.backup_mgr.continue_backup(self.ctxt, backup, + mock_backup_device) backup = db.backup_get(self.ctxt, backup.id) self.assertEqual(fields.BackupStatus.AVAILABLE, backup.status) @@ -834,8 +839,8 @@ def test_create_backup_fail_with_excep(self, mock_isdir, mock_open, mock_brick.return_value = properties self.assertRaises(FakeBackupException, - self.backup_mgr.create_backup, - self.ctxt, backup) + self.backup_mgr.continue_backup, + self.ctxt, backup, mock_backup_device) vol = db.volume_get(self.ctxt, vol_id) self.assertEqual('available', vol.status) @@ -843,6 +848,7 @@ def test_create_backup_fail_with_excep(self, mock_isdir, mock_open, backup = db.backup_get(self.ctxt, backup.id) self.assertEqual(fields.BackupStatus.ERROR, backup.status) + @mock.patch('cinder.backup.manager.BackupManager._finish_backup') @mock.patch('cinder.volume.volume_utils.brick_get_connector_properties') @mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device') @mock.patch('cinder.utils.temporary_chown') @@ -852,7 +858,8 @@ def test_run_backup_with_dir_device_path(self, mock_isdir, mock_open, mock_chown, mock_backup_device, - mock_brick): + mock_brick, + mock_finish): backup_service = mock.Mock() backup_service.backup = mock.Mock( return_value=mock.sentinel.backup_update) @@ -868,22 +875,24 @@ def test_run_backup_with_dir_device_path(self, mock_isdir, self.backup_mgr._attach_device = mock.Mock( return_value=attach_info) self.backup_mgr._detach_device = mock.Mock() - output = self.backup_mgr._run_backup(self.ctxt, backup, volume) + self.backup_mgr.continue_backup(self.ctxt, backup, + mock_backup_device) mock_chown.assert_not_called() mock_open.assert_not_called() backup_service.backup.assert_called_once_with( backup, device_path) - self.assertEqual(mock.sentinel.backup_update, output) + mock_finish.called_once_with(self.ctxt, backup, volume, + mock.sentinel.backup_update) - @mock.patch('cinder.backup.manager.BackupManager._run_backup') + @mock.patch('cinder.backup.manager.BackupManager._start_backup') @ddt.data((fields.SnapshotStatus.BACKING_UP, 'available'), (fields.SnapshotStatus.BACKING_UP, 'in-use'), (fields.SnapshotStatus.AVAILABLE, 'available'), (fields.SnapshotStatus.AVAILABLE, 'in-use')) @ddt.unpack def test_create_backup_with_snapshot(self, snapshot_status, volume_status, - mock_run_backup): + mock_start_backup): vol_id = self._create_volume_db_entry(status=volume_status) snapshot = self._create_snapshot_db_entry(volume_id=vol_id, status=snapshot_status) @@ -892,6 +901,9 @@ def test_create_backup_with_snapshot(self, snapshot_status, volume_status, if snapshot_status == fields.SnapshotStatus.BACKING_UP: self.backup_mgr.create_backup(self.ctxt, backup) + vol = objects.Volume.get_by_id(self.ctxt, vol_id) + self.backup_mgr._finish_backup(self.ctxt, backup, vol, {}) + vol = objects.Volume.get_by_id(self.ctxt, vol_id) snapshot = objects.Snapshot.get_by_id(self.ctxt, snapshot.id) @@ -920,7 +932,7 @@ def test_create_backup_with_temp_snapshot(self, mock_isdir, snap = self._create_snapshot_db_entry(volume_id=vol_id) vol = objects.Volume.get_by_id(self.ctxt, vol_id) - mock_get_backup_device.return_value = ( + mock_backup_device = ( objects.BackupDeviceInfo.from_primitive({ 'backup_device': snap, 'secure_enabled': False, 'is_snapshot': True, }, @@ -945,6 +957,7 @@ def test_create_backup_with_temp_snapshot(self, mock_isdir, mock_open.return_value = open('/dev/null', 'rb') self.backup_mgr.create_backup(self.ctxt, backup) + self.backup_mgr.continue_backup(self.ctxt, backup, mock_backup_device) mock_temporary_chown.assert_called_once_with('/dev/null') mock_initialize_connection_snapshot.assert_called_once_with( self.ctxt, snap, properties) @@ -1020,9 +1033,9 @@ def test_create_backup_with_notify(self, notify): vol_id = self._create_volume_db_entry(size=vol_size) backup = self._create_backup_db_entry(volume_id=vol_id) - self.mock_object(self.backup_mgr, '_run_backup') + self.mock_object(self.backup_mgr, '_start_backup') self.backup_mgr.create_backup(self.ctxt, backup) - self.assertEqual(2, notify.call_count) + self.assertEqual(1, notify.call_count) @mock.patch('cinder.volume.rpcapi.VolumeAPI.get_backup_device') @mock.patch('cinder.volume.volume_utils.clone_encryption_key') @@ -1884,7 +1897,7 @@ def test_backup_max_operations_restore(self, mock_restore): self.assertEqual(1, mock_restore.call_count) self.assertEqual(1, mock_sem.__exit__.call_count) - @mock.patch('cinder.backup.manager.BackupManager._run_backup') + @mock.patch('cinder.backup.manager.BackupManager._start_backup') def test_backup_max_operations_backup(self, mock_backup): mock_sem = self.mock_object(self.backup_mgr, '_semaphore') vol_id = self._create_volume_db_entry( diff --git a/cinder/tests/unit/volume/test_rpcapi.py b/cinder/tests/unit/volume/test_rpcapi.py index fcbcb7878ba..32169e59655 100644 --- a/cinder/tests/unit/volume/test_rpcapi.py +++ b/cinder/tests/unit/volume/test_rpcapi.py @@ -413,7 +413,24 @@ def test_remove_export(self): 'volume_id': self.fake_volume_obj.id}) @ddt.data(None, 'mycluster') - def test_get_backup_device(self, cluster_name): + def test_get_backup_device_cast(self, cluster_name): + self._change_cluster_name(self.fake_volume_obj, cluster_name) + self._test_rpc_api('get_backup_device', + rpc_method='cast', + server=cluster_name or self.fake_volume_obj.host, + backup=self.fake_backup_obj, + volume=self.fake_volume_obj, + expected_kwargs_diff={ + 'want_objects': True, + 'async_call': True, + }, + retval=None, + version='3.17') + + @ddt.data(None, 'mycluster') + def test_get_backup_device_call(self, cluster_name): + self.can_send_version_mock.side_effect = (False, False, True, False, + True) self._change_cluster_name(self.fake_volume_obj, cluster_name) backup_device_dict = {'backup_device': self.fake_volume, 'is_snapshot': False, @@ -433,7 +450,8 @@ def test_get_backup_device(self, cluster_name): @ddt.data(None, 'mycluster') def test_get_backup_device_old(self, cluster_name): - self.can_send_version_mock.side_effect = (True, False, False) + self.can_send_version_mock.side_effect = (False, False, False, False, + False) self._change_cluster_name(self.fake_volume_obj, cluster_name) backup_device_dict = {'backup_device': self.fake_volume, 'is_snapshot': False, From 49c2e0e7178303cf915686c0c0dcca257d871830 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 8 Jul 2021 09:17:34 -0400 Subject: [PATCH 051/149] [SAP] Force volume create from backup to restoring This patch fixes an issue with calling create volume from backup. There are 2 ways to create a volume from a backup. 1) calling cinder backup-restore 2) calling cinder create --backup-id There is a series of issues with using #2 with vmware due to: A) the volume is put into 'creating' status before calling the backup manager to restore the bits to the new raw volume. B) the vmware driver uses the volume status field to determine if it should add required information to be returned in initialize_connection() to exist for os-brick to return a VmdkWriteHandle vs a VmdkReadHandle. Both A and B combined results in 100% failure rate for creating a volume from back up using technique #2. Changing the volume status to restoring after the raw cinder volume has been created fixes this issue. --- cinder/volume/flows/manager/create_volume.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cinder/volume/flows/manager/create_volume.py b/cinder/volume/flows/manager/create_volume.py index a5ca87336b3..f90984d85df 100644 --- a/cinder/volume/flows/manager/create_volume.py +++ b/cinder/volume/flows/manager/create_volume.py @@ -1087,6 +1087,7 @@ def _create_from_backup(self, context, volume, backup_id, **kwargs): {'id': backup_id}) model_update = self._create_raw_volume( context, volume, **kwargs) or {} + model_update['status'] = fields.VolumeStatus.RESTORING_BACKUP volume.update(model_update) volume.save() From 6945494e9231cfc515cf4b5e9b942d8a6fcb7c0f Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Mon, 2 Aug 2021 14:09:14 +0200 Subject: [PATCH 052/149] VmWare: Relocate with Storage IO Profile If we have a cinder volume type resulting in a disk with storage IO profile, then we also need to specify the target vcenter uuid of the storage profile in the disk-locator. Specifying it on the VM as profile is not enough, it will raise an unsupported operation error (as does not specifying it at all) --- .../unit/volume/drivers/vmware/test_remote.py | 6 +++- .../volume/drivers/vmware/test_vmware_vmdk.py | 7 ++-- .../drivers/vmware/test_vmware_volumeops.py | 1 + cinder/volume/drivers/vmware/remote.py | 6 +++- cinder/volume/drivers/vmware/vmdk.py | 1 + cinder/volume/drivers/vmware/volumeops.py | 36 ++++++++++++++----- 6 files changed, 44 insertions(+), 13 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py index d54d5d4e32a..1275fc6d222 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_remote.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -80,9 +80,12 @@ def test_select_ds_for_volume(self): fake_rp = mock.Mock(value='fake-rp') fake_folder = mock.Mock(value='fake-folder') fake_summary = mock.Mock(datastore=mock.Mock(vlaue='fake-ds')) + fake_profile_id = 'fake-uuid' self._driver._select_ds_for_volume.return_value = \ (fake_host, fake_rp, fake_folder, fake_summary) + self._driver._get_storage_profile_id.return_value = \ + fake_profile_id ret_val = self._service.select_ds_for_volume(self._ctxt, self._fake_volume) self._driver._select_ds_for_volume.assert_called_once_with( @@ -91,7 +94,8 @@ def test_select_ds_for_volume(self): 'host': fake_host.value, 'resource_pool': fake_rp.value, 'folder': fake_folder.value, - 'datastore': fake_summary.datastore.value + 'profile_id': fake_profile_id, + 'datastore': fake_summary.datastore.value, }, ret_val) @mock.patch('oslo_vmware.vim_util.get_moref') diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 056ffcf4ba8..72d0defd81d 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -3566,7 +3566,9 @@ def test_migrate_volume(self, get_moref, vops, backing=None, 'capabilities': capabilities } ds_info = {'host': 'fake-ds-host', 'resource_pool': 'fake-rp', - 'datastore': 'fake-ds-name', 'folder': 'fake-folder'} + 'datastore': 'fake-ds-name', 'folder': 'fake-folder', + 'profile_id': 'fake-profile-id', + } get_moref.side_effect = [ mock.sentinel.host_ref, mock.sentinel.rp_ref, @@ -3614,7 +3616,8 @@ def _assertions_for_migration(): vops.relocate_backing.assert_called_once_with( backing, mock.sentinel.ds_ref, mock.sentinel.rp_ref, - mock.sentinel.host_ref, service=mock.sentinel.service_locator) + mock.sentinel.host_ref, profile_id='fake-profile-id', + service=mock.sentinel.service_locator) r_api.move_volume_backing_to_folder.assert_called_once_with( mock.sentinel.context, dest_host, volume, ds_info['folder']) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 75050755990..6d44dacbd89 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -853,6 +853,7 @@ def test_relocate_backing(self, get_relocate_spec, get_disk_device): get_relocate_spec.assert_called_once_with(datastore, resource_pool, host, disk_move_type, disk_type, disk_device, + profile_id=None, service=None) self.session.invoke_api.assert_called_once_with(self.session.vim, 'RelocateVM_Task', diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py index c4d1ddd5be6..489e04b082e 100644 --- a/cinder/volume/drivers/vmware/remote.py +++ b/cinder/volume/drivers/vmware/remote.py @@ -69,11 +69,15 @@ def get_service_locator_info(self, ctxt): def select_ds_for_volume(self, ctxt, volume): (host, rp, folder, summary) = self._driver._select_ds_for_volume( volume) + + profile_id = self._driver._get_storage_profile_id(volume) + return { 'host': host.value, 'resource_pool': rp.value, 'folder': folder.value, - 'datastore': summary.datastore.value + 'datastore': summary.datastore.value, + 'profile_id': profile_id, } def move_volume_backing_to_folder(self, ctxt, volume, folder): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index b17f199bf6e..8817bfc5d06 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2669,6 +2669,7 @@ def migrate_volume(self, context, volume, host): ds_ref = vim_util.get_moref(ds_info['datastore'], 'Datastore') self.volumeops.relocate_backing(backing, ds_ref, rp_ref, host_ref, + profile_id=ds_info.get('profile_id'), service=service_locator) try: self._remote_api.move_volume_backing_to_folder( diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 2efa44ef195..85c75f0c956 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1004,19 +1004,26 @@ def get_summary(self, datastore): 'summary') def _create_relocate_spec_disk_locator(self, datastore, disk_type, - disk_device): + disk_device, profile_id=None): """Creates spec for disk type conversion during relocate.""" cf = self._session.vim.client.factory disk_locator = cf.create("ns0:VirtualMachineRelocateSpecDiskLocator") disk_locator.datastore = datastore disk_locator.diskId = disk_device.key - disk_locator.diskBackingInfo = self._create_disk_backing(disk_type, - None) + + if disk_type: + disk_locator.diskBackingInfo = self._create_disk_backing(disk_type, + None) + if profile_id: + profile_spec = cf.create("ns0:VirtualMachineDefinedProfileSpec") + profile_spec.profileId = profile_id + disk_locator.profile = [profile_spec] + return disk_locator def _get_relocate_spec(self, datastore, resource_pool, host, disk_move_type, disk_type=None, disk_device=None, - service=None): + profile_id=None, service=None): """Return spec for relocating volume backing. :param datastore: Reference to the datastore @@ -1025,6 +1032,8 @@ def _get_relocate_spec(self, datastore, resource_pool, host, :param disk_move_type: Disk move type option :param disk_type: Destination disk type :param disk_device: Virtual device corresponding to the disk + :param profile_id: ID of the profile to use (Cross vCenter Vmotion) + :param service: Service Locator (Cross vCenter Vmotion) :return: Spec for relocation """ cf = self._session.vim.client.factory @@ -1034,10 +1043,14 @@ def _get_relocate_spec(self, datastore, resource_pool, host, relocate_spec.host = host relocate_spec.diskMoveType = disk_move_type - if disk_type is not None and disk_device is not None: + # Either we want to convert the disk by specifing the disk_type + # or we need to determine the profile-id in the disk-locator + if not (disk_type is None and profile_id is None) \ + and disk_device is not None: disk_locator = self._create_relocate_spec_disk_locator(datastore, disk_type, - disk_device) + disk_device, + profile_id) relocate_spec.disk = [disk_locator] if service is not None: @@ -1062,7 +1075,7 @@ def _get_service_locator_spec(self, service): def relocate_backing( self, backing, datastore, resource_pool, host, disk_type=None, - service=None): + profile_id=None, service=None): """Relocates backing to the input datastore and resource pool. The implementation uses moveAllDiskBackingsAndAllowSharing disk move @@ -1073,6 +1086,7 @@ def relocate_backing( :param resource_pool: Reference to the resource pool :param host: Reference to the host :param disk_type: destination disk type + :param profile_id: Id of the profile (for cross vCenter) :param service: destination service (for cross vCenter) """ LOG.debug("Relocating backing: %(backing)s to datastore: %(ds)s " @@ -1090,13 +1104,17 @@ def relocate_backing( if service is not None: disk_move_type = 'moveAllDiskBackingsAndDisallowSharing' + # In case of a cross-vcenter vmotion with a profile-id, + # We need to specify the profile specifically for the disk disk_device = None - if disk_type is not None: + if disk_type is not None or profile_id is not None: disk_device = self._get_disk_device(backing) relocate_spec = self._get_relocate_spec(datastore, resource_pool, host, disk_move_type, disk_type, - disk_device, service=service) + disk_device, + profile_id=profile_id, + service=service) task = self._session.invoke_api(self._session.vim, 'RelocateVM_Task', backing, spec=relocate_spec) From f546097af077f39f13c10f96d82815bf1650e412 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Mon, 12 Jul 2021 11:45:21 +0200 Subject: [PATCH 053/149] Vmware: Create empty backing for live-migration The actual live-migration is happening in nova, but nova needs to know where to place the volume. An empty backing vm will be created, and nova takes care of the rest. The `datastore` in the connection info will be a mo-ref specifying the target destination (or initial location) and won't get updated over time. --- .../unit/volume/drivers/vmware/test_remote.py | 7 +- cinder/volume/drivers/vmware/remote.py | 10 +-- cinder/volume/drivers/vmware/vmdk.py | 64 +++++++++++++++---- 3 files changed, 63 insertions(+), 18 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py index 1275fc6d222..4aa4cbd7489 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_remote.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -56,7 +56,9 @@ def test_create_backing(self): rpc_method='call', server=self._fake_host, host=self._fake_host, - volume=self._fake_volume) + volume=self._fake_volume, + create_params=None + ) class VmdkDriverRemoteServiceTest(test.TestCase): @@ -117,4 +119,5 @@ def test_move_volume_backing_to_folder(self, get_moref): def test_create_backing(self): self._service.create_backing(self._ctxt, self._fake_volume) - self._driver._create_backing.assert_called_once_with(self._fake_volume) + self._driver._create_backing.assert_called_once_with( + self._fake_volume, create_params=None) diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py index 489e04b082e..4df7d39e77a 100644 --- a/cinder/volume/drivers/vmware/remote.py +++ b/cinder/volume/drivers/vmware/remote.py @@ -50,9 +50,10 @@ def move_volume_backing_to_folder(self, ctxt, host, volume, folder): return cctxt.call(ctxt, 'move_volume_backing_to_folder', volume=volume, folder=folder) - def create_backing(self, ctxt, host, volume): + def create_backing(self, ctxt, host, volume, create_params=None): cctxt = self._get_cctxt(host) - return cctxt.call(ctxt, 'create_backing', volume=volume) + return cctxt.call(ctxt, 'create_backing', volume=volume, + create_params=create_params) class VmdkDriverRemoteService(object): @@ -86,5 +87,6 @@ def move_volume_backing_to_folder(self, ctxt, volume, folder): folder_ref = vim_util.get_moref(folder, 'Folder') self._driver.volumeops.move_backing_to_folder(backing, folder_ref) - def create_backing(self, ctxt, volume): - return self._driver._create_backing(volume) + def create_backing(self, ctxt, volume, create_params=None): + return self._driver._create_backing(volume, + create_params=create_params) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8817bfc5d06..e9cd76463dc 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -337,6 +337,7 @@ def __init__(self, *args, **kwargs): self._session = None self._stats = None self._volumeops = None + self._vcenter_instance_uuid_cache = None self._storage_policy_enabled = False self._ds_sel = None self._clusters = None @@ -359,6 +360,14 @@ def volumeops(self): def ds_sel(self): return self._ds_sel + @property + def _vcenter_instance_uuid(self): + if self._vcenter_instance_uuid_cache: + return self._vcenter_instance_uuid_cache + self._vcenter_instance_uuid_cache = \ + self.session.vim.service_content.about.instanceUuid + return self._vcenter_instance_uuid_cache + def _validate_params(self): # Throw error if required parameters are not set. required_params = ['vmware_host_ip', @@ -409,8 +418,9 @@ def _get_fake_stats(self): return self._stats def _get_connection_capabilities(self): - return ['vmware_service_instance_uuid:%s' % - self.session.vim.service_content.about.instanceUuid] + return [ + 'vmware_service_instance_uuid:%s' % + self._vcenter_instance_uuid] def _get_volume_stats(self): """Fetch the stats about the backend. @@ -805,8 +815,7 @@ def service_locator_info(self): return { 'url': url, 'ssl_thumbprint': x509.digest("sha1"), - 'instance_uuid': - self.session.vim.service_content.about.instanceUuid, + 'instance_uuid': self._vcenter_instance_uuid, 'credential': { 'username': self.configuration.vmware_host_username, 'password': self.configuration.vmware_host_password @@ -819,7 +828,8 @@ def _get_connection_info(self, volume, backing, connector): 'volume': backing.value, 'volume_id': volume.id, 'name': volume.name, - 'profile_id': self._get_storage_profile_id(volume) + 'profile_id': self._get_storage_profile_id(volume), + 'datastore': self.volumeops.get_datastore(backing).value, } # vmdk connector in os-brick needs additional connection info. @@ -829,9 +839,6 @@ def _get_connection_info(self, volume, backing, connector): vmdk_path = self.volumeops.get_vmdk_path(backing) connection_info['data']['vmdk_path'] = vmdk_path - datastore = self.volumeops.get_datastore(backing) - connection_info['data']['datastore'] = datastore.value - datacenter = self.volumeops.get_dc(backing) connection_info['data']['datacenter'] = datacenter.value @@ -2633,12 +2640,13 @@ def migrate_volume(self, context, volume, host): """ false_ret = (False, None) - allowed_statuses = ['available', 'reserved'] + allowed_statuses = ['available', 'reserved', 'in-use'] if volume['status'] not in allowed_statuses: LOG.debug('Only %s volumes can be migrated using backend ' 'assisted migration. Falling back to generic migration.', " or ".join(allowed_statuses)) return false_ret + if 'location_info' not in host['capabilities']: return false_ret info = host['capabilities']['location_info'] @@ -2660,10 +2668,21 @@ def migrate_volume(self, context, volume, host): {'volume_name': volume.name, 'dest_host': dest_host}) return (True, None) - service_locator = self._remote_api.get_service_locator_info(context, - dest_host) + if volume['status'] == 'in-use': + if self._vcenter_instance_uuid != vcenter: + return self._migrate_attached_cross_vc(context, dest_host, + volume, backing) + else: + raise NotImplementedError() + else: + return self._migrate_unattached(context, dest_host, volume, + backing) + + def _migrate_unattached(self, context, dest_host, volume, backing): ds_info = self._remote_api.select_ds_for_volume(context, dest_host, volume) + service_locator = self._remote_api.get_service_locator_info(context, + dest_host) host_ref = vim_util.get_moref(ds_info['host'], 'HostSystem') rp_ref = vim_util.get_moref(ds_info['resource_pool'], 'ResourcePool') ds_ref = vim_util.get_moref(ds_info['datastore'], 'Datastore') @@ -2674,6 +2693,7 @@ def migrate_volume(self, context, volume, host): try: self._remote_api.move_volume_backing_to_folder( context, dest_host, volume, ds_info['folder']) + return (True, None) except Exception: # At this point the backing has been migrated to the new host. # If this movement to folder fails, we let the manager know the @@ -2686,10 +2706,30 @@ def migrate_volume(self, context, volume, host): 'folder': ds_info['folder']},) return (True, {'migration_status': 'error'}) - return (True, None) + def _migrate_attached_cross_vc(self, context, dest_host, volume, backing): + try: + # Create a diskless backing vm, so we can attach the + # backing moved in a live migration back to it + self._remote_api.create_backing( + context, dest_host, volume, create_params={ + CREATE_PARAM_DISK_LESS: True + }) + return (True, None) + except Exception: + # At this point the backing has been "migrated" to the new host. + # If this creation fails, return True so it will save the new host, + # but we update its status to 'error' so that someone can check + # the logs and perform a manual action. + LOG.exception("Failed to create the backing %(volume_id)s.", + {'volume_id': volume['id'], }, ) + return (True, {'migration_status': 'error'}) def update_migrated_volume(self, ctxt, volume, new_volume, original_volume_status): + if original_volume_status == 'in-use': + # Everything should be taken care in nova + return None + backing = self.volumeops.get_backing(new_volume['name'], new_volume['id']) if not backing: From cf48b38d9da0fffb53b5976ccfd5d2b63e0772dc Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Fri, 6 Aug 2021 13:26:45 +0200 Subject: [PATCH 054/149] [SAP] Also migrate on attachment creation The new workflow for migrating volumes is to create an attachment with connection info. We need to migrate the volume the same way as with the old initialize_connection call to make it work there --- cinder/volume/api.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 849e5149bcf..658a8ef06ba 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -2229,12 +2229,26 @@ def attachment_create(self, attachment_ref = self._attachment_reserve(ctxt, volume_ref, instance_uuid) - if connector: - connection_info = ( - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id)) + try: + if connector: + connection_info = ( + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id)) + except exception.ConnectorRejected: + with excutils.save_and_reraise_exception() as exc_context: + if CONF.allow_migration_on_attach: + LOG.info("The connector was rejected by the volume " + "backend while updating the attachments. " + "Trying to migrate it.") + exc_context.reraise = False + self._migrate_by_connector(ctxt, volume_ref, connector) + connection_info =\ + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id) attachment_ref.connection_info = connection_info # Use of admin_metadata for RO settings is deprecated From 69df27fe3a94c5ad0867e29d60534c278d8ed618 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 3 Aug 2021 14:59:19 +0200 Subject: [PATCH 055/149] VmWare: Use WithRetrieval instead of own continue_/cancel_retrieval `WithRetrieval` ensures, that `cancel_retrieval` is called also when there is an exception. If that isn't done, the VSphere server is leaking resources. It also fixes `VMwareVolumeOps.get_hosts` when there are more than `vmware_max_objects_retrieval` hosts. The function does neither call `continue_`- nor . So, if there are more hosts, you will only get the first `vmware_max_objects_retrieval`, and on top of it, leak a view on vserver side. --- .../drivers/vmware/test_vmware_volumeops.py | 35 ++++---------- cinder/volume/drivers/vmware/datastore.py | 22 ++++----- cinder/volume/drivers/vmware/vmdk.py | 11 ++--- cinder/volume/drivers/vmware/volumeops.py | 46 ++++++------------- 4 files changed, 35 insertions(+), 79 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 6d44dacbd89..f1d2631c299 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -124,7 +124,7 @@ def _create_backing_obj(self, name, ref, instance_uuid=None, vol_id=None): backing.propSet = [name_prop, instance_uuid_prop, vol_id_prop] return backing - @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' + @mock.patch('oslo_vmware.vim_util.' 'continue_retrieval', return_value=None) def test_build_backing_ref_cache(self, continue_retrieval): uuid1 = 'd68cbee0-c1f7-4886-98a4-cf2201461c6e' @@ -162,7 +162,7 @@ def test_build_backing_ref_cache(self, continue_retrieval): 'name', 'config.instanceUuid', 'config.extraConfig["cinder.volume.id"]']) - continue_retrieval.assert_called_once_with(result) + continue_retrieval.assert_called_once_with(self.session.vim, result) def test_delete_backing(self): backing = mock.sentinel.backing @@ -192,8 +192,10 @@ def _host_runtime_info( inMaintenanceMode=in_maintenance) def test_get_hosts(self): - hosts = mock.sentinel.hosts - self.session.invoke_api.return_value = hosts + retrieve_results = mock.sentinel.retrieve_results + hosts = [mock.sentinel.hosts] + retrieve_results.objects = hosts + self.session.invoke_api.return_value = retrieve_results result = self.vops.get_hosts() self.assertEqual(hosts, result) self.session.invoke_api.assert_called_once_with(vim_util, @@ -202,26 +204,6 @@ def test_get_hosts(self): 'HostSystem', self.MAX_OBJECTS) - def test_continue_retrieval(self): - retrieve_result = mock.sentinel.retrieve_result - self.session.invoke_api.return_value = retrieve_result - result = self.vops.continue_retrieval(retrieve_result) - self.assertEqual(retrieve_result, result) - self.session.invoke_api.assert_called_once_with(vim_util, - 'continue_retrieval', - self.session.vim, - retrieve_result) - - def test_cancel_retrieval(self): - retrieve_result = mock.sentinel.retrieve_result - self.session.invoke_api.return_value = retrieve_result - result = self.vops.cancel_retrieval(retrieve_result) - self.assertIsNone(result) - self.session.invoke_api.assert_called_once_with(vim_util, - 'cancel_retrieval', - self.session.vim, - retrieve_result) - def test_is_usable(self): mount_info = mock.Mock(spec=object) mount_info.accessMode = "readWrite" @@ -1819,7 +1801,7 @@ def test_get_cluster_hosts_with_no_host(self): cluster, 'host') - @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' + @mock.patch('oslo_vmware.vim_util.' 'continue_retrieval', return_value=None) def test_get_all_clusters(self, continue_retrieval): prop_1 = mock.Mock(val='test_cluster_1') @@ -1837,7 +1819,8 @@ def test_get_all_clusters(self, continue_retrieval): self.session.invoke_api.assert_called_once_with( vim_util, 'get_objects', self.session.vim, 'ClusterComputeResource', self.MAX_OBJECTS) - continue_retrieval.assert_called_once_with(retrieve_result) + continue_retrieval.assert_called_once_with(self.session.vim, + retrieve_result) def test_get_entity_by_inventory_path(self): self.session.invoke_api.return_value = mock.sentinel.ref diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 95eece84848..010d9eeff1e 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -167,27 +167,23 @@ def _get_object_properties(self, obj_content): return props def _get_datastores(self): + vim = self._session.vim datastores = {} retrieve_result = self._session.invoke_api( vim_util, 'get_objects', - self._session.vim, + vim, 'Datastore', self._max_objects, properties_to_collect=['host', 'summary']) - while retrieve_result: - if retrieve_result.objects: - for obj_content in retrieve_result.objects: - props = self._get_object_properties(obj_content) - if ('host' in props and - hasattr(props['host'], 'DatastoreHostMount')): - props['host'] = props['host'].DatastoreHostMount - datastores[obj_content.obj] = props - retrieve_result = self._session.invoke_api(vim_util, - 'continue_retrieval', - self._session.vim, - retrieve_result) + with vim_util.WithRetrieval(vim, retrieve_result) as objects: + for obj_content in objects: + props = self._get_object_properties(obj_content) + if ('host' in props and + hasattr(props['host'], 'DatastoreHostMount')): + props['host'] = props['host'].DatastoreHostMount + datastores[obj_content.obj] = props return datastores diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index e9cd76463dc..dd178830903 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -449,8 +449,9 @@ def _get_volume_stats(self): global_capacity = 0 global_free = 0 if ds_summaries: - while True: - for ds in ds_summaries.objects: + with vim_util.WithRetrieval( + self.session.vim, ds_summaries) as objects: + for ds in objects: ds_props = self._get_object_properties(ds) summary = ds_props['summary'] if self._is_datastore_accessible(summary, @@ -458,11 +459,7 @@ def _get_volume_stats(self): available_hosts): global_capacity += summary.capacity global_free += summary.freeSpace - if getattr(ds_summaries, 'token', None): - ds_summaries = self.volumeops.continue_retrieval( - ds_summaries) - else: - break + data['total_capacity_gb'] = round(global_capacity / units.Gi) data['free_capacity_gb'] = round(global_free / units.Gi) location_info = '%(driver_name)s:%(vcenter)s' % { diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 85c75f0c956..1469554e27b 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -331,7 +331,6 @@ def get_backing_by_uuid(self, uuid): return result[0] def build_backing_ref_cache(self, name_regex=None): - LOG.debug("Building backing ref cache.") result = self._session.invoke_api( vim_util, @@ -344,8 +343,8 @@ def build_backing_ref_cache(self, name_regex=None): 'config.instanceUuid', 'config.extraConfig["cinder.volume.id"]']) - while result: - for backing in result.objects: + with vim_util.WithRetrieval(self._session.vim, result) as objects: + for backing in objects: instance_uuid = None vol_id = None @@ -365,8 +364,6 @@ def build_backing_ref_cache(self, name_regex=None): continue self._backing_ref_cache[name] = backing.obj - - result = self.continue_retrieval(result) LOG.debug("Backing ref cache size: %d.", len(self._backing_ref_cache)) def delete_backing(self, backing): @@ -407,27 +404,11 @@ def get_hosts(self): :return: All the hosts from the inventory """ - return self._session.invoke_api(vim_util, 'get_objects', - self._session.vim, - 'HostSystem', self._max_objects) - - def continue_retrieval(self, retrieve_result): - """Continue retrieval of results if necessary. - - :param retrieve_result: Result from RetrievePropertiesEx - """ - - return self._session.invoke_api(vim_util, 'continue_retrieval', - self._session.vim, retrieve_result) - - def cancel_retrieval(self, retrieve_result): - """Cancel retrieval of results if necessary. - - :param retrieve_result: Result from RetrievePropertiesEx - """ - - self._session.invoke_api(vim_util, 'cancel_retrieval', - self._session.vim, retrieve_result) + result = self._session.invoke_api(vim_util, 'get_objects', + self._session.vim, + 'HostSystem', self._max_objects) + with vim_util.WithRetrieval(self._session.vim, result) as objects: + return list(objects) # TODO(vbala): move this method to datastore module def _is_usable(self, mount_info): @@ -1858,17 +1839,16 @@ def delete_vmdk_file(self, vmdk_file_path, dc_ref): LOG.info("Deleted vmdk file: %s.", vmdk_file_path) def _get_all_clusters(self): + vim = self._session.vim clusters = {} retrieve_result = self._session.invoke_api(vim_util, 'get_objects', - self._session.vim, + vim, 'ClusterComputeResource', self._max_objects) - while retrieve_result: - if retrieve_result.objects: - for cluster in retrieve_result.objects: - name = urllib.parse.unquote(cluster.propSet[0].val) - clusters[name] = cluster.obj - retrieve_result = self.continue_retrieval(retrieve_result) + with vim_util.WithRetrieval(vim, retrieve_result) as objects: + for cluster in objects: + name = urllib.parse.unquote(cluster.propSet[0].val) + clusters[name] = cluster.obj return clusters def get_cluster_refs(self, names): From ece2de85574b5e849a45ccc631e76f60dbdc2c4d Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 16 Aug 2021 14:38:51 -0400 Subject: [PATCH 056/149] [SAP] Filter out hosts that are marked buildup This patch filters out hosts in a cluster that has been marked by ops as 'buildup'. This ensures that we don't use hosts that aren't ready to be used quite yet. Ops has to set a 'buildup' : 'true' custom attribute in the cluster. This patch will reverse lookup which cluster a host is in and ensure that the cluster isn't marked as buildup. --- .../drivers/vmware/test_vmware_datastore.py | 8 +++- cinder/volume/drivers/vmware/datastore.py | 47 ++++++++++++++++++- cinder/volume/drivers/vmware/volumeops.py | 27 +++++++++++ 3 files changed, 78 insertions(+), 4 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py index 403d3b79de8..caeff9d3f38 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py @@ -298,6 +298,8 @@ def mock_is_usable(mount_info): def test_select_best_datastore_with_empty_datastores(self): self.assertIsNone(self._ds_sel._select_best_datastore({})) + @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' + 'is_host_in_buildup_cluster') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' 'get_profile_id') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' @@ -308,7 +310,7 @@ def test_select_best_datastore_with_empty_datastores(self): '_select_best_datastore') def test_select_datastore( self, select_best_datastore, filter_datastores, get_datastores, - get_profile_id): + get_profile_id, is_buildup): profile_id = mock.sentinel.profile_id get_profile_id.return_value = profile_id @@ -322,6 +324,8 @@ def test_select_datastore( best_datastore = mock.sentinel.best_datastore select_best_datastore.return_value = best_datastore + is_buildup.return_value = False + size_bytes = 1024 req = {self._ds_sel.SIZE_BYTES: size_bytes} aff_ds_types = [ds_sel.DatastoreType.VMFS] @@ -331,7 +335,7 @@ def test_select_datastore( profile_name = mock.sentinel.profile_name req[ds_sel.DatastoreSelector.PROFILE_NAME] = profile_name - hosts = mock.sentinel.hosts + hosts = [mock.sentinel.hosts] self.assertEqual(best_datastore, self._ds_sel.select_datastore(req, hosts)) get_datastores.assert_called_once_with() diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 010d9eeff1e..2ae87a461f7 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -17,6 +17,7 @@ Classes and utility methods for datastore selection. """ +from collections.abc import Iterable import random from oslo_log import log as logging @@ -99,6 +100,43 @@ def _filter_by_profile(self, datastores, profile_id): hub_ids = [hub.hubId for hub in hubs] return {k: v for k, v in datastores.items() if k.value in hub_ids} + def is_host_in_buildup_cluster(self, host_ref, cache=None): + host_cluster = self._vops._get_parent(host_ref, + "ClusterComputeResource") + if cache is not None and host_cluster.value in cache: + return cache[host_cluster.value] + + attrs = self._vops.get_cluster_custom_attributes(host_cluster) + LOG.debug("attrs {}".format(attrs)) + + def bool_from_str(bool_str): + if bool_str.lower() == "true": + return True + else: + return False + + result = (attrs and 'buildup' in attrs and + bool_from_str(attrs['buildup']['value'])) + if cache is not None: + cache[host_cluster.value] = result + return result + + def _filter_hosts(self, hosts): + """Filter out any hosts that are in a cluster marked buildup.""" + + valid_hosts = [] + cache = {} + if hosts: + if isinstance(hosts, Iterable): + for host in hosts: + if not self.is_host_in_buildup_cluster(host, cache): + valid_hosts.append(host) + else: + if not self.is_host_in_buildup_cluster(hosts, cache): + valid_hosts.append(hosts) + + return valid_hosts + def _filter_datastores(self, datastores, size_bytes, @@ -285,13 +323,18 @@ def select_datastore(self, req, hosts=None): profile_id = self.get_profile_id(profile_name) datastores = self._get_datastores() + # We don't want to use hosts in buildup + LOG.debug("FILTER hosts start {}".format(hosts)) + valid_hosts = self._filter_hosts(hosts) + LOG.debug("FILTERED hosts valid {}".format(valid_hosts)) datastores = self._filter_datastores(datastores, size_bytes, profile_id, hard_anti_affinity_datastores, hard_affinity_ds_types, - valid_host_refs=hosts) - res = self._select_best_datastore(datastores, valid_host_refs=hosts) + valid_host_refs=valid_hosts) + res = self._select_best_datastore(datastores, + valid_host_refs=valid_hosts) LOG.debug("Selected (host, resourcepool, datastore): %s", res) return res diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 1469554e27b..5ae686d7496 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1885,6 +1885,33 @@ def get_cluster_hosts(self, cluster): return host_refs + def get_cluster_custom_attributes(self, cluster): + retrieve_fields = self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + cluster, + 'availableField') + if retrieve_fields: + custom_fields = {} + for field in retrieve_fields: + for v in field[1]: + custom_fields[v.key] = v.name + + retrieve_result = self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + cluster, + 'customValue') + if retrieve_result: + custom_attributes = {} + for val in retrieve_result: + for i in val[1]: + custom_attributes[custom_fields[i.key]] = { + "value": i.value, 'id': i.key + } + + return custom_attributes + def get_entity_by_inventory_path(self, path): """Returns the managed object identified by the given inventory path. From 66a0db55c105cea691b3d01870ff60ddab1d1860 Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 22 Sep 2021 11:58:18 -0400 Subject: [PATCH 057/149] [SAP] Add vmware extension for migration This is a POC/WIP to add the extension to the cinder API to allow calling os-migrate_volume_by_connector. This will still be an admin API endpoint as it will call migrate_volume underneath it. Notes: I have tested this extension on my home devstack and it worked. --- cinder/api/contrib/vmware_migrate.py | 70 +++++++++++++++++++ .../api/schemas/vmware_extension_actions.py | 30 ++++++++ cinder/volume/api.py | 9 ++- 3 files changed, 107 insertions(+), 2 deletions(-) create mode 100644 cinder/api/contrib/vmware_migrate.py create mode 100644 cinder/api/schemas/vmware_extension_actions.py diff --git a/cinder/api/contrib/vmware_migrate.py b/cinder/api/contrib/vmware_migrate.py new file mode 100644 index 00000000000..bd7350b7d41 --- /dev/null +++ b/cinder/api/contrib/vmware_migrate.py @@ -0,0 +1,70 @@ +# Copyright (c) 2021 SAP Corporation +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""The Migrate by connector API.""" + + +from oslo_log import log as logging +from six.moves import http_client + +from cinder.api.contrib import admin_actions +from cinder.api import extensions +from cinder.api.openstack import wsgi +from cinder.api.schemas import vmware_extension_actions as vmware_actions +from cinder.api import validation + + +LOG = logging.getLogger(__name__) + + +class VMWareVolumeExtensionsController(admin_actions.VolumeAdminController): + + collection = 'volumes' + + @wsgi.response(http_client.ACCEPTED) + @wsgi.action('os-migrate_volume_by_connector') + @validation.schema(vmware_actions.migrate_volume_by_connector) + def _migrate_volume(self, req, id, body): + """Migrate a volume based on connector. + + This is an SAP VMWare extension that requires + the connector of the vmware vcenter to be provided. + The connector will contain the correct vcenter uuid + so that the scheduler can find the right cinder backend + to migrate the volume. + """ + context = req.environ['cinder.context'] + # Not found exception will be handled at the wsgi level + volume = self._get(context, id) + self.authorize(context, 'migrate_volume', target_obj=volume) + params = body['os-migrate_volume_by_connector'] + connector = params.get('connector', {}) + + self.volume_api.migrate_volume_by_connector( + context, volume, connector) + + +class Vmware_migrate(extensions.ExtensionDescriptor): + """Enable admin actions.""" + + name = "Vmware_migrate" + alias = "os-vmware-admin-actions" + updated = "2021-09-25T00:00:00+00:00" + + def get_controller_extensions(self): + controller = VMWareVolumeExtensionsController() + extension = extensions.ControllerExtension( + self, controller.collection, controller) + return [extension] diff --git a/cinder/api/schemas/vmware_extension_actions.py b/cinder/api/schemas/vmware_extension_actions.py new file mode 100644 index 00000000000..416d6a01e54 --- /dev/null +++ b/cinder/api/schemas/vmware_extension_actions.py @@ -0,0 +1,30 @@ +# Copyright (C) 2021 SAP +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + + +migrate_volume_by_connector = { + 'type': 'object', + 'properties': { + 'os-migrate_volume_by_connector': { + 'type': 'object', + 'properties': { + 'connector': {'type': ['string', 'object', 'null']}, + }, + 'additionalProperties': False, + }, + }, + 'required': ['os-migrate_volume_by_connector'], + 'additionalProperties': False, +} diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 658a8ef06ba..b08518b10ec 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -805,7 +805,12 @@ def detach(self, context, volume, attachment_id): resource=volume) return detach_results - def _migrate_by_connector(self, ctxt, volume, connector): + def migrate_volume_by_connector(self, ctxt, volume, connector): + if not connector: + raise exception.InvalidInput("Must provide a valid Connector") + return self._migrate_by_connector(ctxt, volume, connector, wait=False) + + def _migrate_by_connector(self, ctxt, volume, connector, wait=True): volume_type = {} if volume.volume_type_id: volume_type = volume_types.get_volume_type( @@ -828,7 +833,7 @@ def _migrate_by_connector(self, ctxt, volume, connector): LOG.debug("Invoking migrate_volume to host=%(host).", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, - wait_for_completion=True) + wait_for_completion=wait) volume.refresh() def initialize_connection(self, context, volume, connector): From 95aa93048780231424298f027264a1d9a2cdfe9e Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 8 Oct 2021 09:40:15 -0400 Subject: [PATCH 058/149] [SAP] Removed the automatic call to migration This patch removes the call to migrate a volume when ConnectorRejected is raised. This prevents the automatic migration of a volume during attach. It is now up to the caller to recognize the ConnectorRejected exception and then call the new API extension to migrate volume by connector. --- cinder/opts.py | 1 - .../volume/drivers/vmware/test_vmware_vmdk.py | 11 +++ cinder/tests/unit/volume/test_volume.py | 53 ------------ cinder/volume/api.py | 83 ++++--------------- 4 files changed, 26 insertions(+), 122 deletions(-) diff --git a/cinder/opts.py b/cinder/opts.py index eea37fa7c34..e695ce79797 100644 --- a/cinder/opts.py +++ b/cinder/opts.py @@ -265,7 +265,6 @@ def list_opts(): [cinder_volume_api.volume_host_opt], [cinder_volume_api.volume_same_az_opt], [cinder_volume_api.az_cache_time_opt], - [cinder_volume_api.migrate_on_attach_opt], cinder_volume_driver.volume_opts, cinder_volume_driver.iser_opts, cinder_volume_driver.nvmet_opts, diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 72d0defd81d..365c04c989e 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2222,6 +2222,17 @@ def test_initialize_connection_with_no_instance_and_no_backing(self): def test_initialize_connection_with_no_instance_and_backing(self): self._test_initialize_connection(instance_exists=False) + def test_initialize_connection_connector_rejected(self): + connector = {'ip': '0.0.0.0', + "connection_capabilities": {}} + volume = self._create_volume_obj() + + self._driver._vcenter_instance_uuid_cache = "11234" + + self.assertRaises(cinder_exceptions.ConnectorRejected, + self._driver.initialize_connection, + volume, connector) + @mock.patch.object(VMDK_DRIVER, 'volumeops') def test_terminate_connection(self, vops): volume = self._create_volume_obj(status='restoring-backup') diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index 0eb706efccd..9f4c8150200 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -3350,59 +3350,6 @@ def test_tpool_size(self): self.assertListEqual([], eventlet.tpool._threads) eventlet.tpool._nthreads = 20 - @mock.patch('cinder.volume.volume_types.get_volume_type') - def test_initialize_connection_with_rejected_connector( - self, fake_get_volume_type): - ini_ret = {'ip': '1.2.3.4'} - connector = {'ip': '0.0.0.0'} - volume_type = 'fake-volume-type' - volume = tests_utils.create_volume(self.context) - host_obj = {'host': 'fake-host', - 'cluster_name': 'fake-cluster', - 'capabilities': {}} - - self.override_config('allow_migration_on_attach', True) - fake_get_volume_type.return_value = volume_type - - volume_api = cinder.volume.api.API() - scheduler_rpcapi = mock.Mock() - volume_api.scheduler_rpcapi = scheduler_rpcapi - volume_api.scheduler_rpcapi.find_backend_for_connector.return_value =\ - host_obj - - volume_rpcapi = mock.Mock() - volume_api.volume_rpcapi = volume_rpcapi - - call_times = {volume.id: -1} - - def _initialize_connection_side_effect(context, volume, connector): - call_times[volume.id] += 1 - if call_times[volume.id] == 0: - # First time it rejects the connector - raise exception.ConnectorRejected(reason=None) - if call_times[volume.id] == 1: - # Second time (after migration) it returns the connection data - return ini_ret - - volume_rpcapi.initialize_connection.side_effect =\ - _initialize_connection_side_effect - - conn_result =\ - volume_api.initialize_connection(self.context, volume, connector) - - self.assertEqual(conn_result, ini_ret) - volume_rpcapi.initialize_connection.assert_has_calls([ - mock.call(self.context, volume, connector), - mock.call(self.context, volume, connector) - ]) - volume_rpcapi.migrate_volume.assert_called_once_with( - self.context, volume, mock.ANY, force_host_copy=False, - wait_for_completion=True) - backend = volume_rpcapi.migrate_volume.call_args[0][2] - self.assertEqual(backend.host, host_obj['host']) - self.assertEqual(backend.cluster_name, host_obj['cluster_name']) - self.assertEqual(backend.capabilities, host_obj['capabilities']) - class VolumeTestCaseLocks(base.BaseVolumeTestCase): MOCK_TOOZ = False diff --git a/cinder/volume/api.py b/cinder/volume/api.py index b08518b10ec..dacb3a71602 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -83,20 +83,12 @@ help='Cache volume availability zones in ' 'memory for the provided duration in ' 'seconds') -migrate_on_attach_opt = cfg.BoolOpt('allow_migration_on_attach', - default=False, - help="A host might recognise a connector " - "as valid but it can't use it to " - "initialize a connection. This will " - "allow to migrate the volume to a " - "valid host prior to attachment.") CONF = cfg.CONF CONF.register_opt(allow_force_upload_opt) CONF.register_opt(volume_host_opt) CONF.register_opt(volume_same_az_opt) CONF.register_opt(az_cache_time_opt) -CONF.register_opt(migrate_on_attach_opt) CONF.import_opt('glance_core_properties', 'cinder.image.glance') LOG = logging.getLogger(__name__) @@ -808,9 +800,7 @@ def detach(self, context, volume, attachment_id): def migrate_volume_by_connector(self, ctxt, volume, connector): if not connector: raise exception.InvalidInput("Must provide a valid Connector") - return self._migrate_by_connector(ctxt, volume, connector, wait=False) - def _migrate_by_connector(self, ctxt, volume, connector, wait=True): volume_type = {} if volume.volume_type_id: volume_type = volume_types.get_volume_type( @@ -833,7 +823,7 @@ def _migrate_by_connector(self, ctxt, volume, connector, wait=True): LOG.debug("Invoking migrate_volume to host=%(host).", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, - wait_for_completion=wait) + wait_for_completion=False) volume.refresh() def initialize_connection(self, context, volume, connector): @@ -847,23 +837,9 @@ def initialize_connection(self, context, volume, connector): "maintenance mode.") raise exception.InvalidVolume(reason=msg) - def _migrate_and_initialize_connection(): - self._migrate_by_connector(context, volume, connector) - return self.volume_rpcapi.initialize_connection(context, volume, - connector) - init_results = None - try: - init_results = self.volume_rpcapi.initialize_connection(context, - volume, - connector) - except exception.ConnectorRejected: - with excutils.save_and_reraise_exception() as exc_context: - if CONF.allow_migration_on_attach: - LOG.info("The connector was rejected by the volume " - "backend while initializing the connection. " - "Attempting to migrate it.") - init_results = _migrate_and_initialize_connection() - exc_context.reraise = False + init_results = self.volume_rpcapi.initialize_connection(context, + volume, + connector) LOG.info("Initialize volume connection completed successfully.", resource=volume) @@ -2234,26 +2210,12 @@ def attachment_create(self, attachment_ref = self._attachment_reserve(ctxt, volume_ref, instance_uuid) - try: - if connector: - connection_info = ( - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id)) - except exception.ConnectorRejected: - with excutils.save_and_reraise_exception() as exc_context: - if CONF.allow_migration_on_attach: - LOG.info("The connector was rejected by the volume " - "backend while updating the attachments. " - "Trying to migrate it.") - exc_context.reraise = False - self._migrate_by_connector(ctxt, volume_ref, connector) - connection_info =\ - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id) + if connector: + connection_info = ( + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id)) attachment_ref.connection_info = connection_info # Use of admin_metadata for RO settings is deprecated @@ -2325,26 +2287,11 @@ def attachment_update(self, ctxt, attachment_ref, connector): '%(vol)s') % {'vol': volume_ref.id} raise exception.InvalidVolume(reason=msg) - connection_info = None - try: - connection_info = ( - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id)) - except exception.ConnectorRejected: - with excutils.save_and_reraise_exception() as exc_context: - if CONF.allow_migration_on_attach: - LOG.info("The connector was rejected by the volume " - "backend while updating the attachments. " - "Trying to migrate it.") - exc_context.reraise = False - self._migrate_by_connector(ctxt, volume_ref, connector) - connection_info =\ - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id) + connection_info = ( + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id)) attachment_ref.connection_info = connection_info attachment_ref.save() return attachment_ref From f76b34a00682d69f9e4db23e745a265570f71279 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 19 Oct 2021 15:06:09 -0400 Subject: [PATCH 059/149] [SAP] Fixed name of migrate_volume method name This fixes a conflict of the method name for the _migrate_volume (by connector) in the vmware_migrate extension. --- cinder/api/contrib/vmware_migrate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/api/contrib/vmware_migrate.py b/cinder/api/contrib/vmware_migrate.py index bd7350b7d41..b40015d2cdd 100644 --- a/cinder/api/contrib/vmware_migrate.py +++ b/cinder/api/contrib/vmware_migrate.py @@ -36,7 +36,7 @@ class VMWareVolumeExtensionsController(admin_actions.VolumeAdminController): @wsgi.response(http_client.ACCEPTED) @wsgi.action('os-migrate_volume_by_connector') @validation.schema(vmware_actions.migrate_volume_by_connector) - def _migrate_volume(self, req, id, body): + def _migrate_volume_by_connector(self, req, id, body): """Migrate a volume based on connector. This is an SAP VMWare extension that requires From f9057824bdc08c476ed4f5bcca558a7961bacf9b Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 11 Mar 2020 21:47:14 +0000 Subject: [PATCH 060/149] [SAP] add support for reporting pools This patch adds the reporting of each datastore that is found in the list of storage_profiles configured, as an individual pools. This will expose each datastore in the profiles as schedulable via volume types, and enable thin provisioning limitations against each individual datastore. Added update_provider_info call to vmware driver. This patch modifies the volume manager's _sync_provider_info to allow it to update the host entries for volumes. The vmware driver adds an update_provider_info() call and loops through all the volumes and updates the host entry to include the pool name at the end of the entry. This is all done at cinder-volume service startup, which enables every volume existing in the cinder db to get an updated host entry, so after the pools are reported, cinder can still manage the existing volumes properly for api calls. If the host entry isn't updated, then Cinder will not be able to correctly take action on the volume as the host won't be running. [SAP] Update stats for each storage pool This patch updates the volume stats reporting for the vmdk driver to include datastores as pools when it's enabled and ensures no pools are reported when disabled. SAP Fix extra capabilities with pools. This patch ensures that extra_capabilities are inserted into each pool capabilities for the driver. Ensure migrate works with pools This patch updates the remote and select_ds_for_volumes to allow passing in a specific cinder_host, which happens during volume migration. Ensure we handle no backing volumes this patch ensures that we handle volumes that are still assigned to a backend_name for the pool during attach time. This is because we have lazy creates enabled and some volumes don't have any backing at the time of attach. We try and fallback to the old mechanism of assigning a datastore/pool --- .../unit/volume/drivers/vmware/test_fcd.py | 40 +- .../unit/volume/drivers/vmware/test_remote.py | 4 +- .../drivers/vmware/test_vmware_datastore.py | 5 +- .../volume/drivers/vmware/test_vmware_vmdk.py | 99 ++-- cinder/tests/unit/volume/test_capabilities.py | 4 +- cinder/volume/drivers/vmware/datastore.py | 82 ++- cinder/volume/drivers/vmware/remote.py | 17 +- cinder/volume/drivers/vmware/vmdk.py | 525 +++++++++++++----- cinder/volume/manager.py | 5 + 9 files changed, 560 insertions(+), 221 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index e79a8b37473..0abfab1aee2 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -69,6 +69,7 @@ def setUp(self): self._config.vmware_max_objects_retrieval = self.MAX_OBJECTS self._config.vmware_storage_profile = None self._config.reserved_percentage = self.RESERVED_PERCENTAGE + self._config.vmware_datastores_as_pools = False self._driver = fcd.VMwareVStorageObjectDriver( configuration=self._config) self._driver._vc_version = self.VC_VERSION @@ -88,40 +89,21 @@ def test_do_setup(self, vops, vmdk_do_setup): self.assertTrue(self._driver._storage_policy_enabled) @mock.patch.object(VMDK_DRIVER, 'session') - @mock.patch.object(VMDK_DRIVER, 'volumeops') - @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') - def test_get_volume_stats(self, _get_datastore_summaries, vops, - session): - FREE_GB = 7 - TOTAL_GB = 11 - - class ObjMock(object): - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - - _get_datastore_summaries.return_value = \ - ObjMock(objects= [ - ObjMock(propSet = [ - ObjMock(name = "host", - val = ObjMock(DatastoreHostMount = [])), - ObjMock(name = "summary", - val = ObjMock(freeSpace = FREE_GB * units.Gi, - capacity = TOTAL_GB * units.Gi, - accessible = True)) - ]) - ]) - - vops._in_maintenance.return_value = False - + def test_get_volume_stats(self, session): + retr_result_mock = mock.Mock(spec=['objects']) + retr_result_mock.objects = [] + session.vim.RetrievePropertiesEx.return_value = retr_result_mock + session.vim.service_content.about.instanceUuid = 'fake-service' stats = self._driver.get_volume_stats() self.assertEqual('VMware', stats['vendor_name']) self.assertEqual(self._driver.VERSION, stats['driver_version']) self.assertEqual(self._driver.STORAGE_TYPE, stats['storage_protocol']) - self.assertEqual(self.RESERVED_PERCENTAGE, - stats['reserved_percentage']) - self.assertEqual(TOTAL_GB, stats['total_capacity_gb']) - self.assertEqual(FREE_GB, stats['free_capacity_gb']) + self.assertEqual(0, stats['reserved_percentage']) + self.assertEqual(0, stats['total_capacity_gb']) + self.assertEqual(0, stats['free_capacity_gb']) + self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", + stats['location_info']) def _create_volume_dict(self, vol_id=VOL_ID, diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py index 4aa4cbd7489..140c88a5b68 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_remote.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -40,7 +40,7 @@ def test_select_ds_for_volume(self): self._test_rpc_api('select_ds_for_volume', rpc_method='call', server=self._fake_host, - host=self._fake_host, + cinder_host=self._fake_host, volume=self._fake_volume) def test_move_backing_to_folder(self): @@ -91,7 +91,7 @@ def test_select_ds_for_volume(self): ret_val = self._service.select_ds_for_volume(self._ctxt, self._fake_volume) self._driver._select_ds_for_volume.assert_called_once_with( - self._fake_volume) + self._fake_volume, cinder_host=None) self.assertEqual({ 'host': fake_host.value, 'resource_pool': fake_rp.value, diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py index caeff9d3f38..771f8ac83a6 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py @@ -298,6 +298,8 @@ def mock_is_usable(mount_info): def test_select_best_datastore_with_empty_datastores(self): self.assertIsNone(self._ds_sel._select_best_datastore({})) + @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' + '_is_host_usable') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' 'is_host_in_buildup_cluster') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' @@ -310,7 +312,7 @@ def test_select_best_datastore_with_empty_datastores(self): '_select_best_datastore') def test_select_datastore( self, select_best_datastore, filter_datastores, get_datastores, - get_profile_id, is_buildup): + get_profile_id, is_buildup, is_usable): profile_id = mock.sentinel.profile_id get_profile_id.return_value = profile_id @@ -325,6 +327,7 @@ def test_select_datastore( select_best_datastore.return_value = best_datastore is_buildup.return_value = False + is_usable.return_value = True size_bytes = 1024 req = {self._ds_sel.SIZE_BYTES: size_bytes} diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 365c04c989e..484ce14ee1b 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -121,6 +121,7 @@ def setUp(self): vmware_storage_profile=[self.STORAGE_PROFILE], vmware_select_random_best_datastore=False, vmware_random_datastore_range=None, + vmware_datastores_as_pools=False, ) self._db = mock.Mock() @@ -181,46 +182,82 @@ def test_get_storage_profile_id( get_profile_id_by_name.assert_called_once_with(session, 'gold') @mock.patch.object(VMDK_DRIVER, 'session') - @mock.patch.object(VMDK_DRIVER, 'volumeops') - @mock.patch.object(VMDK_DRIVER, '_get_datastore_summaries') - def test_get_volume_stats(self, _get_datastore_summaries, vops, - session): + def test_get_volume_stats_no_pools(self, session): retr_result_mock = mock.Mock(spec=['objects']) retr_result_mock.objects = [] session.vim.RetrievePropertiesEx.return_value = retr_result_mock session.vim.service_content.about.instanceUuid = 'fake-service' - FREE_GB = 7 - TOTAL_GB = 11 - - class ObjMock(object): - def __init__(self, **kwargs): - self.__dict__.update(kwargs) - - _get_datastore_summaries.return_value = (ObjMock(objects= [ - ObjMock(propSet = [ - ObjMock(name = "host", - val = ObjMock(DatastoreHostMount = [])), - ObjMock(name = "summary", - val = ObjMock(freeSpace = FREE_GB * units.Gi, - capacity = TOTAL_GB * units.Gi, - accessible = True)) - ]) - ])) - - vops._in_maintenance.return_value = False - # Enable volume stats collection from backend - self._driver.configuration.vmware_enable_volume_stats = True + stats = self._driver.get_volume_stats() + + self.assertEqual('VMware', stats['vendor_name']) + self.assertEqual(self._driver.VERSION, stats['driver_version']) + self.assertEqual('vmdk', stats['storage_protocol']) + self.assertEqual(0, stats['reserved_percentage']) + self.assertEqual(0, stats['total_capacity_gb']) + self.assertEqual(0, stats['free_capacity_gb']) + self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", + stats['location_info']) + + def _fake_stats_result(self): + ds_attrs = { + "value": "datastore-85", + } + ds = mock.Mock(**ds_attrs) + + summary_attrs = { + "datastore": ds, + "name": "datastore2", + "url": "ds:///vmfs/volumes/5ed14b0e-ee8de32d-2014-525400b59848/", + "capacity": 10000000000000, + "freeSpace": 5000000000000, + "uncommitted": 5000000000000, + "accessible": True, + "multipleHostAccess": False, + "type": "VMFS", + "maintenanceMode": "normal", + } + summary = mock.Mock(**summary_attrs) + summary.name = "datastore2" + + propset_attrs = {"val": summary} + _propSet = mock.MagicMock(**propset_attrs) + + class props(object): + propSet = [_propSet] + + class result(object): + objects = [props()] + datastores = {"datastore-85": {"summary": summary, + "storage_profile": "Gold"}} + return result(), datastores + + @mock.patch.object(VMDK_DRIVER, '_collect_backend_stats') + @mock.patch.object(VMDK_DRIVER, 'session') + def test_get_volume_stats_pools(self, session, mock_stats): + fake_result, fake_datastore_profiles = self._fake_stats_result() + mock_stats.return_value = (fake_result, fake_datastore_profiles) + self._config.vmware_datastores_as_pools = True + self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config, + additional_endpoints=[], + db=self._db) + + retr_result_mock = mock.Mock(spec=['objects']) + retr_result_mock.objects = [] + session.vim.RetrievePropertiesEx.return_value = retr_result_mock + session.vim.service_content.about.instanceUuid = 'fake-service' stats = self._driver.get_volume_stats() self.assertEqual('VMware', stats['vendor_name']) self.assertEqual(self._driver.VERSION, stats['driver_version']) self.assertEqual('vmdk', stats['storage_protocol']) - self.assertEqual(self._config.reserved_percentage, - stats['reserved_percentage']) - self.assertEqual(TOTAL_GB, stats['total_capacity_gb']) - self.assertEqual(FREE_GB, stats['free_capacity_gb']) - self.assertFalse(stats['shared_targets']) + self.assertIn('pools', stats) + self.assertEqual(1, len(stats["pools"])) + self.assertEqual(0, stats["pools"][0]['reserved_percentage']) + self.assertEqual(9313, stats["pools"][0]['total_capacity_gb']) + self.assertEqual(4657, stats["pools"][0]['free_capacity_gb']) + self.assertEqual('up', stats["pools"][0]['backend_state']) + self.assertFalse(stats["pools"][0]['Multiattach']) self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", stats['location_info']) @@ -3618,7 +3655,7 @@ def _assertions_for_migration(): mock.sentinel.context, dest_host) r_api.select_ds_for_volume.assert_called_once_with( - mock.sentinel.context, dest_host, volume) + mock.sentinel.context, volume=volume, cinder_host=dest_host) get_moref.assert_has_calls([ mock.call(ds_info['host'], 'HostSystem'), diff --git a/cinder/tests/unit/volume/test_capabilities.py b/cinder/tests/unit/volume/test_capabilities.py index 773ea290972..9c68ed41057 100644 --- a/cinder/tests/unit/volume/test_capabilities.py +++ b/cinder/tests/unit/volume/test_capabilities.py @@ -199,9 +199,9 @@ def test_extra_capabilities(self): self.assertTrue(mock_loads.called) volume_stats = manager.last_capabilities self.assertEqual(fake_capabilities['key1'], - volume_stats['key1']) + volume_stats["pools"][0]['key1']) self.assertEqual(fake_capabilities['key2'], - volume_stats['key2']) + volume_stats["pools"][0]['key2']) def test_extra_capabilities_fail(self): with mock.patch.object(jsonutils, 'loads') as mock_loads: diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 2ae87a461f7..5c83b28f13a 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -25,6 +25,7 @@ from oslo_vmware import vim_util from cinder import coordination +from cinder import exception from cinder.volume.drivers.vmware import exceptions as vmdk_exceptions @@ -121,6 +122,22 @@ def bool_from_str(bool_str): cache[host_cluster.value] = result return result + def _is_host_usable(self, host_ref, host_prop_map=None): + if host_prop_map is None: + host_prop_map = {} + props = host_prop_map.get(host_ref.value) + if props is None: + props = self._get_host_properties(host_ref) + host_prop_map[host_ref.value] = props + + runtime = props.get('runtime') + parent = props.get('parent') + if runtime and parent: + return (runtime.connectionState == 'connected' and + not runtime.inMaintenanceMode) + else: + return False + def _filter_hosts(self, hosts): """Filter out any hosts that are in a cluster marked buildup.""" @@ -128,11 +145,15 @@ def _filter_hosts(self, hosts): cache = {} if hosts: if isinstance(hosts, Iterable): + host_prop_map = {} for host in hosts: - if not self.is_host_in_buildup_cluster(host, cache): + if (not self.is_host_in_buildup_cluster(host, cache) + and self._is_host_usable( + host, host_prop_map=host_prop_map)): valid_hosts.append(host) else: - if not self.is_host_in_buildup_cluster(hosts, cache): + if (not self.is_host_in_buildup_cluster(hosts, cache) + and self._is_host_usable(host)): valid_hosts.append(hosts) return valid_hosts @@ -225,6 +246,41 @@ def _get_datastores(self): return datastores + def select_datastore_by_name(self, name): + """Find a datastore by it's name. + + Returns a host_ref and datastore summary. + """ + + resource_pool = None + datastore = None + datastores = self._get_datastores() + for k, v in datastores.items(): + if v['summary'].name == name: + datastore = v + + if not datastore: + # this shouldn't ever happen as the scheduler told us + # to use this named datastore + return (None, None, None) + + summary = datastore['summary'] + # pick a host that's available + hosts = [host['key'] for host in datastore['host']] + hosts = self._filter_hosts(hosts) + if not hosts: + raise exception.InvalidInput( + "No hosts available for datastore '%s'" % name) + + host = random.choice(hosts) + + # host_ref = datastore['host'][0]['key'] + host_props = self._get_host_properties(host) + parent = host_props.get('parent') + + resource_pool = self._get_resource_pool(parent) + return (host, resource_pool, summary) + def _get_host_properties(self, host_ref): retrieve_result = self._session.invoke_api(vim_util, 'get_object_properties', @@ -256,30 +312,17 @@ def _sort_key(ds_props): host_prop_map = {} - def _is_host_usable(host_ref): - props = host_prop_map.get(host_ref.value) - if props is None: - props = self._get_host_properties(host_ref) - host_prop_map[host_ref.value] = props - - runtime = props.get('runtime') - parent = props.get('parent') - if runtime and parent: - return (runtime.connectionState == 'connected' and - not runtime.inMaintenanceMode) - else: - return False - valid_host_refs = valid_host_refs or [] valid_hosts = [host_ref.value for host_ref in valid_host_refs] - def _select_host(host_mounts): + def _select_host(host_mounts, host_prop_map): random.shuffle(host_mounts) for host_mount in host_mounts: if valid_hosts and host_mount.key.value not in valid_hosts: continue if (self._vops._is_usable(host_mount.mountInfo) and - _is_host_usable(host_mount.key)): + self._is_host_usable(host_mount.key, + host_prop_map=host_prop_map)): return host_mount.key sorted_ds_props = sorted(datastores.values(), key=_sort_key) @@ -290,7 +333,8 @@ def _select_host(host_mounts): random.shuffle(sorted_ds_props) for ds_props in sorted_ds_props: - host_ref = _select_host(ds_props['host']) + host_ref = _select_host( + ds_props['host'], host_prop_map=host_prop_map) if host_ref: rp = self._get_resource_pool( host_prop_map[host_ref.value]['parent']) diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py index 4df7d39e77a..f87fcd28486 100644 --- a/cinder/volume/drivers/vmware/remote.py +++ b/cinder/volume/drivers/vmware/remote.py @@ -41,9 +41,10 @@ def get_service_locator_info(self, ctxt, host): cctxt = self._get_cctxt(host) return cctxt.call(ctxt, 'get_service_locator_info') - def select_ds_for_volume(self, ctxt, host, volume): - cctxt = self._get_cctxt(host) - return cctxt.call(ctxt, 'select_ds_for_volume', volume=volume) + def select_ds_for_volume(self, ctxt, cinder_host, volume): + cctxt = self._get_cctxt(cinder_host) + return cctxt.call(ctxt, 'select_ds_for_volume', volume=volume, + cinder_host=cinder_host) def move_volume_backing_to_folder(self, ctxt, host, volume, folder): cctxt = self._get_cctxt(host) @@ -67,9 +68,15 @@ def __init__(self, driver): def get_service_locator_info(self, ctxt): return self._driver.service_locator_info - def select_ds_for_volume(self, ctxt, volume): + def select_ds_for_volume(self, ctxt, volume, cinder_host=None): + """Select datastore for volume. + + cinder_host is a host@backend_name#pool entry. + host is an vmware host, which is returned from the driver call + to select_ds_for_volume and returned as part of this call + """ (host, rp, folder, summary) = self._driver._select_ds_for_volume( - volume) + volume, cinder_host=cinder_host) profile_id = self._driver._get_storage_profile_id(volume) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index dd178830903..e467e32ec60 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -138,6 +138,7 @@ help='Name of a vCenter compute cluster where volumes ' 'should be created.'), cfg.MultiStrOpt('vmware_storage_profile', + default=[], help='Names of storage profiles to be monitored. Only ' 'used when vmware_enable_volume_stats is True.'), cfg.IntOpt('vmware_connection_pool_size', @@ -165,7 +166,7 @@ help='Regular expression pattern to match the name of ' 'datastores where backend volumes are created.'), cfg.BoolOpt('vmware_enable_volume_stats', - default=False, + default=True, help='If true, this enables the fetching of the volume stats ' 'from the backend. This has potential performance ' 'issues at scale. When False, the driver will not ' @@ -196,6 +197,13 @@ 'datastores, and vmware_random_datastore_range is set to 5 ' 'Then it will filter in 5 datastores prior to randomizing ' 'the datastores to pick from.'), + cfg.BoolOpt('vmware_datastores_as_pools', + default=False, + help='Enable reporting individual datastores as pools. ' + 'This allows the cinder scheduler to pick which datastore ' + 'a volume lives on. This also enables managing capacity ' + 'for each datastore by cinder. ' + ) ] CONF = cfg.CONF @@ -318,7 +326,8 @@ class VMwareVcVmdkDriver(driver.VolumeDriver): # 3.4.2.99.1 - VMware implementation of volume migration # 3.4.2.99.2 - Added soft sharding volume migration, fixed a small issue # in check_for_setup_error where storage_profile not set. - VERSION = '3.4.2.99.2' + # 3.4.2.99.3 - Add support for reporting each datastore as a pool. + VERSION = '3.4.2.99.3' # ThirdPartySystems wiki page CI_WIKI_NAME = "VMware_CI" @@ -347,6 +356,7 @@ def __init__(self, *args, **kwargs): remote_api.VmdkDriverRemoteService(self) ]) self._remote_api = remote_api.VmdkDriverRemoteApi() + self._storage_profiles = [] @staticmethod def get_driver_options(): @@ -390,7 +400,6 @@ def check_for_setup_error(self): % storage_profile) raise exception.InvalidInput(reason=reason) - def _update_volume_stats(self): if self.configuration.safe_get('vmware_enable_volume_stats'): self._stats = self._get_volume_stats() @@ -422,90 +431,24 @@ def _get_connection_capabilities(self): 'vmware_service_instance_uuid:%s' % self._vcenter_instance_uuid] - def _get_volume_stats(self): - """Fetch the stats about the backend. - - This can be slow at scale, but allows - properly provisioning scheduling. - """ - backend_name = self.configuration.safe_get('volume_backend_name') - if not backend_name: - backend_name = self.__class__.__name__ - - max_over_subscription_ratio = self.configuration.safe_get( - 'max_over_subscription_ratio') - data = {'volume_backend_name': backend_name, - 'vendor_name': 'VMware', - 'driver_version': self.VERSION, - 'storage_protocol': 'vmdk', - 'reserved_percentage': self.configuration.reserved_percentage, - 'shared_targets': False, - 'thin_provisioning_support': True, - 'thick_provisioning_support': True, - 'max_over_subscription_ratio': max_over_subscription_ratio, - 'connection_capabilities': self._get_connection_capabilities()} - ds_summaries = self._get_datastore_summaries() - available_hosts = self._get_hosts(self._clusters) - global_capacity = 0 - global_free = 0 - if ds_summaries: - with vim_util.WithRetrieval( - self.session.vim, ds_summaries) as objects: - for ds in objects: - ds_props = self._get_object_properties(ds) - summary = ds_props['summary'] - if self._is_datastore_accessible(summary, - ds_props['host'], - available_hosts): - global_capacity += summary.capacity - global_free += summary.freeSpace - - data['total_capacity_gb'] = round(global_capacity / units.Gi) - data['free_capacity_gb'] = round(global_free / units.Gi) - location_info = '%(driver_name)s:%(vcenter)s' % { - 'driver_name': LOCATION_DRIVER_NAME, - 'vcenter': self.session.vim.service_content.about.instanceUuid} - data['location_info'] = location_info - self._stats = data - return data + def _collect_backend_stats(self): + """Build the call and return the results for stats.""" - def _get_datastore_summaries(self): client_factory = self.session.vim.client.factory object_specs = [] - if (self._storage_policy_enabled - and self.configuration.vmware_storage_profile): - # Get all available storage profiles on the vCenter and extract the - # IDs of those that we want to observe - profiles_ids = [] - for profile in pbm.get_all_profiles(self.session): - if profile.name in self.configuration.vmware_storage_profile: - profiles_ids.append(profile.profileId) + result = [] + if (self._storage_policy_enabled and self._storage_profiles): # Get all matching Datastores for each profile - datastores = {} - for profile_id in profiles_ids: - for pbm_hub in pbm.filter_hubs_by_profile(self.session, - None, - profile_id): - if pbm_hub.hubType != "Datastore": - # We are not interested in Datastore Clusters for now - continue - if pbm_hub.hubId not in datastores: - # Reconstruct a managed object reference to datastore - datastores[pbm_hub.hubId] = vim_util.get_moref( - pbm_hub.hubId, "Datastore") - # Build property collector object specs out of them - for datastore_ref in datastores.values(): - object_specs.append( - vim_util.build_object_spec(client_factory, - datastore_ref, - [])) - + LOG.debug("Storage Profile = '%s'", self._storage_profiles) + datastores = self._get_datastores_for_profiles() if not datastores: LOG.warning("No Datastores found for storage profile(s) " "''%s'", ', '.join( self.configuration.safe_get( 'vmware_storage_profile'))) + + return None, datastores else: # Build a catch-all object spec that would reach all datastores object_specs.append( @@ -514,44 +457,103 @@ def _get_datastore_summaries(self): self.session.vim.service_content.rootFolder, [vim_util.build_recursive_traversal_spec(client_factory)])) - # If there are no datastores, we won't have object_specs and will - # fail when trying to get stats - if not object_specs: - return + # If there are no datastores, then object specs are empty + # we can't query vcenter with empty object specs, or we'll + # get errors. + if object_specs: + prop_spec = vim_util.build_property_spec( + client_factory, 'Datastore', ['summary']) + filter_spec = vim_util.build_property_filter_spec( + client_factory, prop_spec, object_specs) + options = client_factory.create('ns0:RetrieveOptions') + max_objects = self.configuration.vmware_max_objects_retrieval + options.maxObjects = max_objects + result = self.session.vim.RetrievePropertiesEx( + self.session.vim.service_content.propertyCollector, + specSet=[filter_spec], + options=options) + + return (result, {}) + + @volume_utils.trace + def _get_volume_stats(self): + backend_name = self.configuration.safe_get('volume_backend_name') + if not backend_name: + backend_name = self.__class__.__name__ + + location_info = '%(driver_name)s:%(vcenter)s' % { + 'driver_name': LOCATION_DRIVER_NAME, + 'vcenter': self.session.vim.service_content.about.instanceUuid} + reserved_percentage = self.configuration.reserved_percentage + max_over_subscription_ratio = self.configuration.safe_get( + 'max_over_subscription_ratio') + + data = {'volume_backend_name': backend_name, + 'vendor_name': 'VMware', + 'driver_version': self.VERSION, + 'storage_protocol': 'vmdk', + 'location_info': location_info, + } + + result, datastores = self._collect_backend_stats() + if self.configuration.vmware_datastores_as_pools: + pools = [] + for ds_name in datastores: + datastore = datastores[ds_name] + summary = datastore["summary"] + pool_state = "up" if summary.accessible is True else "down" + pool = {'pool_name': summary.name, + 'total_capacity_gb': round( + summary.capacity / units.Gi), + 'free_capacity_gb': round( + summary.freeSpace / units.Gi), + 'thin_provisioning_support': True, + 'max_over_subscription_ratio': ( + max_over_subscription_ratio), + 'reserved_percentage': reserved_percentage, + 'Multiattach': False, + 'datastore_type': summary.type, + 'location_url': summary.url, + 'location_info': location_info, + 'backend_state': pool_state, + 'storage_profile': datastore["storage_profile"], + 'connection_capabilities': ( + self._get_connection_capabilities(),) + } + pools.append(pool) + data['pools'] = pools + return data + + if (self._storage_policy_enabled and self._storage_profiles): + global_capacity = global_free = 0 + # Pools are disabled, but storage profiles are enabled. + # so we collect all the stats from all the datastores returned + for ds_name in datastores: + datastore = datastores[ds_name] + summary = datastore["summary"] + global_capacity += summary.capacity + global_free += summary.freeSpace - prop_spec = vim_util.build_property_spec(client_factory, 'Datastore', - ['summary', 'host']) - filter_spec = vim_util.build_property_filter_spec(client_factory, - prop_spec, - object_specs) - options = client_factory.create('ns0:RetrieveOptions') - options.maxObjects = self.configuration.vmware_max_objects_retrieval - result = self.session.vim.RetrievePropertiesEx( - self.session.vim.service_content.propertyCollector, - specSet=[filter_spec], - options=options) - return result - - def _get_object_properties(self, obj_content): - props = {} - if hasattr(obj_content, 'propSet'): - prop_set = obj_content.propSet - if prop_set: - props = {prop.name: prop.val for prop in prop_set} - return props - - def _is_datastore_accessible(self, ds_summary, ds_host_mounts, - available_hosts): - # available_hosts empty => vmware_cluster_name not specified => don't - # filter by hosts - cluster_access_to_ds = not available_hosts - for host_mount in ds_host_mounts.DatastoreHostMount: - for avlbl_host in available_hosts: - if avlbl_host.value == host_mount.key.value: - cluster_access_to_ds = True - return (ds_summary.accessible - and not self.volumeops._in_maintenance(ds_summary) - and cluster_access_to_ds) + else: + global_capacity = global_free = 0 + with vim_util.WithRetrieval(self.session.vim, result) as objects: + for ds in objects: + summary = ds.propSet[0].val + global_capacity += summary.capacity + global_free += summary.freeSpace + + data_no_pools = { + 'reserved_percentage': self.configuration.reserved_percentage, + 'total_capacity_gb': round(global_capacity / units.Gi), + 'free_capacity_gb': round(global_free / units.Gi), + 'thin_provisioning_support': True, + 'thick_provisioning_support': True, + 'max_over_subscription_ratio': max_over_subscription_ratio, + 'connection_capabilities': self._get_connection_capabilities(), + } + data.update(data_no_pools) + + return data def _verify_volume_creation(self, volume): """Verify that the volume can be created. @@ -677,6 +679,7 @@ def _get_extra_config(self, volume): return {EXTRA_CONFIG_VOLUME_ID_KEY: volume['id'], volumeops.BACKING_UUID_KEY: volume['id']} + @volume_utils.trace def _create_backing(self, volume, host=None, create_params=None): """Create volume backing under the given host. @@ -688,9 +691,11 @@ def _create_backing(self, volume, host=None, create_params=None): backing VM creation :return: Reference to the created backing """ - create_params = create_params or {} + (host_ref, resource_pool, folder, - summary) = self._select_ds_for_volume(volume, host) + summary) = self._select_ds_for_volume(volume, host) + + create_params = create_params or {} # check if a storage profile needs to be associated with the backing VM profile_id = self._get_storage_profile_id(volume) @@ -735,6 +740,17 @@ def _create_backing(self, volume, host=None, create_params=None): extra_config=extra_config) self.volumeops.update_backing_disk_uuid(backing, volume['id']) + if (self.configuration.vmware_datastores_as_pools and + self._is_pool_outdated_for_volume(volume)): + # TODO(walt) - this writes the volume update to the db. :( + # This sucks, but don't have any other way + new_host = self._new_host_for_volume(volume) + LOG.info("Changing volume host from {} to {}".format( + volume.host, new_host + )) + model_update = {'host': new_host} + volume.update(model_update) + volume.save() return backing def _get_hosts(self, clusters): @@ -774,25 +790,108 @@ def _get_dc(self, resource_pool): self._dc_cache[resource_pool.value] = dc return dc - def _select_ds_for_volume(self, volume, host=None, create_params=None): + @volume_utils.trace + def _select_ds_by_name_for_volume(self, datastore_name, volume): + + # we need a host_ref, resource_pool, folder and summary + (host_ref, + resource_pool, + summary) = self.ds_sel.select_datastore_by_name(datastore_name) + + if not summary: + # couldn't find the datastore by name + return (None, None, None, None) + + # Get the host_ref + dc = self._get_dc(resource_pool) + folder = self._get_volume_group_folder(dc, volume['project_id']) + + return (host_ref, resource_pool, folder, summary) + + @volume_utils.trace + def _is_pool_outdated_for_volume(self, volume): + """When datastores as pools is enabled. + + This check determines if the pool name is the same as the + volume_backend_name when vmware_datastores_as_pools is enabled. + + This can happen when lazy create is enabled, vmware_datastores_as_pools + is disabled and a volume is created. There will be no backing for + the volume. Then you enable vmware_datastores_as_pools and the driver + tries to automatically update the host entry for the volume. Since + the volume has no backing, there is no datastore chosen for the + volume, and then the host will still remain as + service@backend_name#backend_name instead of + service@backend_name#pool + + We have to ensure that if we do create the backing finally for + this volume, that we force a cinder db update for the host. + """ + + host_entry = volume_utils.extract_host(volume['host'], 'backend') + backend_name = host_entry.split('@')[1] + datastore_name = volume_utils.extract_host(volume['host'], 'pool') + if self.configuration.vmware_datastores_as_pools: + return backend_name == datastore_name + else: + return False + + @volume_utils.trace + def _select_ds_for_volume(self, volume, host=None, create_params=None, + cinder_host=None): """Select datastore that can accommodate the given volume's backing. + host is a vmware esxi host + cinder_host is a service@backend_name#pool + Returns the selected datastore summary along with a compute host and its resource pool and folder where the volume can be created :return: (host, resource_pool, folder, summary) """ - # Form requirements for datastore selection. - create_params = create_params or {} - size = create_params.get(CREATE_PARAM_DISK_SIZE, volume['size']) + datastore_picked = False + if self.configuration.vmware_datastores_as_pools: + # we pick the datastore from the pool name + if not cinder_host: + cinder_host = volume['host'] + + host_entry = volume_utils.extract_host(cinder_host, 'backend') + host_parts = host_entry.split('@') + datastore_name = volume_utils.extract_host(cinder_host, 'pool') + # we might be a volume that has no backing yet that couldn't + # have had their host entry updated due to lazy create. + # IF so the backend name and pool name are the same, so a + # datastore wasn't picked for this volume. + if datastore_name != host_parts[1]: + (host_ref, resource_pool, + folder, summary) = self._select_ds_by_name_for_volume( + datastore_name, volume) + if summary: + # we were able to use the datastore from the host entry + # so don't use fallback. + datastore_picked = True + else: + raise exception.InvalidInput( + "Couldn't find datastore with name '%s'" % + datastore_name) + else: + LOG.info("Volume backend name and pool name are same. Using " + "Fallback mechanism to pick a datastore.") - req = {} - req[hub.DatastoreSelector.SIZE_BYTES] = size * units.Gi - req[hub.DatastoreSelector.PROFILE_NAME] = self._get_storage_profile( - volume) + if not datastore_picked: + # Form requirements for datastore selection. + create_params = create_params or {} + size = create_params.get(CREATE_PARAM_DISK_SIZE, volume['size']) - (host_ref, resource_pool, summary) = self._select_datastore(req, host) - dc = self._get_dc(resource_pool) - folder = self._get_volume_group_folder(dc, volume['project_id']) + req = {} + req[hub.DatastoreSelector.SIZE_BYTES] = size * units.Gi + req[hub.DatastoreSelector.PROFILE_NAME] = ( + self._get_storage_profile(volume) + ) + + (host_ref, resource_pool, summary) = self._select_datastore(req, + host) + dc = self._get_dc(resource_pool) + folder = self._get_volume_group_folder(dc, volume['project_id']) return (host_ref, resource_pool, folder, summary) @@ -2262,10 +2361,166 @@ def do_setup(self, context): self.volumeops.build_backing_ref_cache() + # Cache the storage profiles, so we don't + # have to fetch them every time. + if self.configuration.vmware_storage_profile: + self._get_storage_profiles() + LOG.info("Successfully setup driver: %(driver)s for server: " "%(ip)s.", {'driver': self.__class__.__name__, 'ip': self.configuration.vmware_host_ip}) + def _get_storage_profiles(self): + """Fetch the list of configured storage profiles we use.""" + + LOG.debug("Profiles = '%s'", self.configuration.vmware_storage_profile) + for profile in pbm.get_all_profiles(self.session): + if profile.name in self.configuration.vmware_storage_profile: + profile_dict = {"name": profile.name, + "id": profile.profileId} + self._storage_profiles.append(profile_dict) + + def _volume_provider_metadata(self, volume, backing=None): + if not backing: + backing = self.volumeops.get_backing(volume.name, volume.id) + + ds = self.volumeops.get_datastore(backing) + summary = self.volumeops.get_summary(ds) + profile = self._get_storage_profile(volume) + vcenter_uuid = ( + self.session.vim.service_content.about.instanceUuid + ) + provider_info = { + 'vmware_vcenter_id': vcenter_uuid, + 'vmware_ds_name': summary.name, + 'vmware_profile_name': profile, + } + + return provider_info + + @volume_utils.trace + def _get_datastores_for_profiles(self): + datastores = {} + for profile in self._storage_profiles: + for h in pbm.filter_hubs_by_profile(self.session, + None, + profile['id']): + if h.hubType != "Datastore": + # We are not interested in Datastore Clusters for now + continue + if h.hubId not in datastores: + # Reconstruct a managed object reference to that + # datastore + ds = vim_util.get_moref(h.hubId, "Datastore") + summary = self.volumeops.get_summary(ds) + datastores[summary.name] = {'summary': summary, + 'storage_profile': profile} + + return datastores + + def _new_host_for_volume(self, volume): + pool_info = volume_utils.extract_host( + volume.host, level='pool', default_pool_name=True) + model = self._volume_provider_metadata(volume) + if pool_info != model['vmware_ds_name']: + host = volume_utils.extract_host( + volume.host, level='host') + back = volume_utils.extract_host(volume.host) + backend = back.split('@')[1] + + new_host = '{}@{}#{}'.format( + host, backend, + model['vmware_ds_name'] + ) + return new_host + + @volume_utils.trace + def update_provider_info(self, volumes, snapshots): + """Ensure we have a provider_id set on volumes. + + If there is a provider_id already set then skip, if it is missing then + we will update it based on the volume object. We can always compute + the id if we have the full volume object, but not all driver API's + give us that info. + + We have to save each volume entry if they update their host, otherwise + cinder volume manager doesn't see that volume as part of the host + allocated_capacity calculation, which happens right after this call + completes. + + We don't care about snapshots, they just use the volume's provider_id. + """ + LOG.info("HOST {} : volumes {}".format(self.host, len(volumes))) + if self.configuration.vmware_datastores_as_pools: + LOG.info("vmware_datastores_as_pools is enabled. " + "Checking host entries for volumes and snapshots.") + datastores = self._get_datastores_for_profiles() + ds_keys = datastores.keys() + vol_updates = [] + LOG.info("Process {} volumes".format(len(volumes))) + for vol in volumes: + # make sure we have the correc host info + if vol['status'] in ['in-use', 'available']: + # do we need to update the host? + pool_info = volume_utils.extract_host( + vol.host, level='pool', default_pool_name=True) + + # IF the pool has already been set correctly, then + # no need to make api calls to vcenter to fetch the + # datastore name from the volume backing information. + # this will save time on every startup + if (pool_info not in ds_keys or + pool_info == volume_utils.DEFAULT_POOL_NAME): + LOG.debug("Updating host for volume {}".format(vol.id)) + + try: + new_host = self._new_host_for_volume(vol) + if new_host: + vol.update({'host': new_host}) + vol.save() + except Exception as ex: + LOG.warning("Couldn't update host for {} because " + " {}".format(vol.id, ex)) + else: + LOG.debug("Keeping host for volume {}".format(vol.id)) + + LOG.info("HOST COMPLETE {}".format(self.host)) + return vol_updates, None + else: + # Since pools are not enabled, we should ensure that the datastore + # isn't part of the host. This allows us to go backwards to + # not using datastores as pools. + LOG.info("vmware_datastores_as_pools is disabled.") + + vol_updates = [] + for vol in volumes: + # make sure we have the correc host info + if vol['status'] in ['in-use', 'available']: + # do we need to update the host? + pool_info = volume_utils.extract_host( + vol.host, level='pool') + backend_info = volume_utils.extract_host( + vol.host, level='backend' + ) + backend = backend_info.split("@") + LOG.info("Volume host '{}' backend '{}' pool '{}'".format( + vol.host, backend[1], pool_info)) + + # we need to force the host back to + # host@backend#backend + new_host = "{}@{}#{}".format( + backend[0], + backend[1], + backend[1] + ) + if new_host != vol.host: + LOG.info("Setting host to {}".format(new_host)) + vol.update({'host': new_host}) + vol.save() + + LOG.info("HOST COMPLETE {}".format(self.host)) + return vol_updates, None + def _get_volume_group_folder(self, datacenter, project_id, snapshot=False): """Get inventory folder for organizing volume backings and snapshots. @@ -2390,9 +2645,11 @@ def _clone_backing(self, volume, backing, snapshot, clone_type, src_vsize): host = None rp = None folder = None - if clone_type != volumeops.LINKED_CLONE_TYPE: - # Pick a datastore where to create the full clone under any host - (host, rp, folder, summary) = self._select_ds_for_volume(volume) + if not clone_type == volumeops.LINKED_CLONE_TYPE: + # Pick a datastore where to create the full clone under + # any host + (host, rp, folder, summary) = self._select_ds_for_volume( + volume) datastore = summary.datastore extra_config = self._get_extra_config(volume) clone = self.volumeops.clone_backing(volume['name'], backing, @@ -2422,6 +2679,7 @@ def _clone_backing(self, volume, backing, snapshot, clone_type, src_vsize): VMwareVcVmdkDriver._get_disk_type(volume)) LOG.info("Successfully created clone: %s.", clone) + @volume_utils.trace def _create_volume_from_template(self, volume, path): LOG.debug("Creating backing for volume: %(volume_id)s from template " "at path: %(path)s.", @@ -2431,6 +2689,7 @@ def _create_volume_from_template(self, volume, path): # Create temporary backing by cloning the template. tmp_name = uuidutils.generate_uuid() + (host, rp, folder, summary) = self._select_ds_for_volume(volume) datastore = summary.datastore disk_type = VMwareVcVmdkDriver._get_disk_type(volume) @@ -2630,6 +2889,7 @@ def revert_to_snapshot(self, context, volume, snapshot): else: self.volumeops.revert_to_snapshot(backing, snapshot.name) + @volume_utils.trace def migrate_volume(self, context, volume, host): """Migrate a volume to the specified host. @@ -2676,8 +2936,9 @@ def migrate_volume(self, context, volume, host): backing) def _migrate_unattached(self, context, dest_host, volume, backing): - ds_info = self._remote_api.select_ds_for_volume(context, dest_host, - volume) + ds_info = self._remote_api.select_ds_for_volume(context, + cinder_host=dest_host, + volume=volume) service_locator = self._remote_api.get_service_locator_info(context, dest_host) host_ref = vim_util.get_moref(ds_info['host'], 'HostSystem') diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index d9105bfbadc..361f93c8ba8 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2701,6 +2701,11 @@ def get_stats(): if self.extra_capabilities: volume_stats.update(self.extra_capabilities) + if "pools" in volume_stats: + for pool in volume_stats["pools"]: + pool.update(self.extra_capabilities) + else: + volume_stats.update(self.extra_capabilities) if volume_stats: # NOTE(xyang): If driver reports replication_status to be From 2fe210f7855b6e7c9044a9b479774c38ae1b385f Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 20 Oct 2021 09:57:17 -0400 Subject: [PATCH 061/149] [SAP] Change the API return code for ConnectorRejected This patch changes the HTTP response code for a call to attach a volume where the ConnectorRejected is raised. The Response code was 500 generic error. The new reponse is HTTP 406 HTTPNotAcceptable --- cinder/api/contrib/volume_actions.py | 4 ++++ cinder/api/v3/attachments.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/cinder/api/contrib/volume_actions.py b/cinder/api/contrib/volume_actions.py index deb326be079..c49779ccd84 100644 --- a/cinder/api/contrib/volume_actions.py +++ b/cinder/api/contrib/volume_actions.py @@ -165,6 +165,10 @@ def _initialize_connection(self, req, id, body): except exception.InvalidInput as err: raise webob.exc.HTTPBadRequest( explanation=err.msg) + except exception.ConnectorRejected: + msg = _("Volume needs to be migrated before attaching to this " + "instance") + raise webob.exc.HTTPNotAcceptable(explanation=msg) except exception.VolumeBackendAPIException: msg = _("Unable to fetch connection information from backend.") raise webob.exc.HTTPInternalServerError(explanation=msg) diff --git a/cinder/api/v3/attachments.py b/cinder/api/v3/attachments.py index f0c4110ccb5..a4301edba9d 100644 --- a/cinder/api/v3/attachments.py +++ b/cinder/api/v3/attachments.py @@ -198,6 +198,12 @@ def create(self, req, body): except (exception.NotAuthorized, exception.InvalidVolume): raise + except exception.ConnectorRejected: + # Don't use err_msg or it will raise the 500 + _msg = _("Volume needs to be migrated before attaching to this " + "instance") + LOG.exception(_msg) + raise webob.exc.HTTPNotAcceptable(explanation=_msg) except exception.CinderException as ex: err_msg = _( "Unable to create attachment for volume (%s).") % ex.msg @@ -252,6 +258,12 @@ def update(self, req, id, body): connector)) except exception.NotAuthorized: raise + except exception.ConnectorRejected: + # Don't use err_msg or it will raise the 500 + _msg = _("Volume needs to be migrated before attaching to this " + "instance") + LOG.exception(_msg) + raise webob.exc.HTTPNotAcceptable(explanation=_msg) except exception.CinderException as ex: err_msg = ( _("Unable to update attachment.(%s).") % ex.msg) From 67f5a974361eb5236d85fcba167bbb1a7eb46a8b Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 3 Nov 2021 15:08:28 -0400 Subject: [PATCH 062/149] [SAP] put volume in maintenance mode during migrate This patch ensures that the os migrate by connector puts the volume in maintenance mode if the lock_volume flag is passed in the API to migrate by connector Change-Id: If8b7f4cfafc5b94878ae985d6081e94b8d62f522 --- cinder/api/contrib/vmware_migrate.py | 6 ++- .../api/schemas/vmware_extension_actions.py | 3 ++ cinder/volume/api.py | 42 ++++++++++++++++++- cinder/volume/drivers/vmware/vmdk.py | 2 +- 4 files changed, 50 insertions(+), 3 deletions(-) diff --git a/cinder/api/contrib/vmware_migrate.py b/cinder/api/contrib/vmware_migrate.py index b40015d2cdd..d4ea467f772 100644 --- a/cinder/api/contrib/vmware_migrate.py +++ b/cinder/api/contrib/vmware_migrate.py @@ -17,6 +17,7 @@ from oslo_log import log as logging +from oslo_utils import strutils from six.moves import http_client from cinder.api.contrib import admin_actions @@ -51,9 +52,12 @@ def _migrate_volume_by_connector(self, req, id, body): self.authorize(context, 'migrate_volume', target_obj=volume) params = body['os-migrate_volume_by_connector'] connector = params.get('connector', {}) + lock_volume = strutils.bool_from_string( + params.get('lock_volume', False), + strict=True) self.volume_api.migrate_volume_by_connector( - context, volume, connector) + context, volume, connector, lock_volume) class Vmware_migrate(extensions.ExtensionDescriptor): diff --git a/cinder/api/schemas/vmware_extension_actions.py b/cinder/api/schemas/vmware_extension_actions.py index 416d6a01e54..357647d830d 100644 --- a/cinder/api/schemas/vmware_extension_actions.py +++ b/cinder/api/schemas/vmware_extension_actions.py @@ -13,6 +13,8 @@ # License for the specific language governing permissions and limitations # under the License. +from cinder.api.validation import parameter_types + migrate_volume_by_connector = { 'type': 'object', @@ -21,6 +23,7 @@ 'type': 'object', 'properties': { 'connector': {'type': ['string', 'object', 'null']}, + 'lock_volume': parameter_types.boolean, }, 'additionalProperties': False, }, diff --git a/cinder/volume/api.py b/cinder/volume/api.py index dacb3a71602..d0c66c218d9 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -797,7 +797,8 @@ def detach(self, context, volume, attachment_id): resource=volume) return detach_results - def migrate_volume_by_connector(self, ctxt, volume, connector): + def migrate_volume_by_connector(self, ctxt, volume, connector, + lock_volume): if not connector: raise exception.InvalidInput("Must provide a valid Connector") @@ -820,6 +821,45 @@ def migrate_volume_by_connector(self, ctxt, volume, connector): backend = host_manager.BackendState(host=dest['host'], cluster_name=dest['cluster_name'], capabilities=dest['capabilities']) + + # Build required conditions for conditional update + expected = {'status': ('available', 'reserved'), + 'migration_status': self.AVAILABLE_MIGRATION_STATUS, + 'replication_status': ( + None, + fields.ReplicationStatus.DISABLED, + fields.ReplicationStatus.NOT_CAPABLE), + 'consistencygroup_id': (None, ''), + 'group_id': (None, '')} + + expected['host'] = db.Not(dest['host']) + filters = [~db.volume_has_snapshots_filter()] + + updates = {'migration_status': 'starting', + 'previous_status': volume.model.status} + + # When the migration of an available volume starts, both the status + # and the migration status of the volume will be changed. + # If the admin sets lock_volume flag to True, the volume + # status is changed to 'maintenance', telling users + # that this volume is in maintenance mode, and no action is allowed + # on this volume, e.g. attach, detach, retype, migrate, etc. + if lock_volume: + updates['status'] = db.Case( + [(volume.model.status.in_(('available', 'reserved')), + 'maintenance')], + else_=volume.model.status) + + result = volume.conditional_update(updates, expected, filters) + + if not result: + msg = _('Volume %s status must be available or reserved, must not ' + 'be migrating, have snapshots, be replicated, be part of ' + 'a group and destination host/cluster must be different ' + 'than the current one') % volume.id + LOG.error(msg) + raise exception.InvalidVolume(reason=msg) + LOG.debug("Invoking migrate_volume to host=%(host).", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index e467e32ec60..d639ec86c8b 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2897,7 +2897,7 @@ def migrate_volume(self, context, volume, host): """ false_ret = (False, None) - allowed_statuses = ['available', 'reserved', 'in-use'] + allowed_statuses = ['available', 'reserved', 'in-use', 'maintenance'] if volume['status'] not in allowed_statuses: LOG.debug('Only %s volumes can be migrated using backend ' 'assisted migration. Falling back to generic migration.', From 70968e4a8f687b9f170c934f8d541748e03f0eed Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 29 Nov 2021 10:49:26 -0500 Subject: [PATCH 063/149] [SAP] Update connection_capabilities reporting in stats This patch fixes an issue with reporting the connection capabilities in pools enabled stats for vmdk driver. --- cinder/volume/drivers/vmware/vmdk.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index d639ec86c8b..fc5a46524f0 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -496,6 +496,7 @@ def _get_volume_stats(self): } result, datastores = self._collect_backend_stats() + connection_capabilities = self._get_connection_capabilities() if self.configuration.vmware_datastores_as_pools: pools = [] for ds_name in datastores: @@ -517,8 +518,7 @@ def _get_volume_stats(self): 'location_info': location_info, 'backend_state': pool_state, 'storage_profile': datastore["storage_profile"], - 'connection_capabilities': ( - self._get_connection_capabilities(),) + 'connection_capabilities': connection_capabilities, } pools.append(pool) data['pools'] = pools @@ -549,7 +549,7 @@ def _get_volume_stats(self): 'thin_provisioning_support': True, 'thick_provisioning_support': True, 'max_over_subscription_ratio': max_over_subscription_ratio, - 'connection_capabilities': self._get_connection_capabilities(), + 'connection_capabilities': connection_capabilities, } data.update(data_no_pools) From b7b214b4faf03d706d4c7117e2962cffec19d5d6 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Fri, 14 Jan 2022 09:52:59 +0100 Subject: [PATCH 064/149] [SAP] ShardFilter passes everything for find_backend_for_connector When we try to find a backend for a connector, we got called by an admin to migrate a volume towards a connector - a specific vCenter housing a specific VM. In those cases, we don't want to prohibit the migration just because the volume's project doesn't have the appropriate shards set, as the shards should more steer where we put volumes initially, not prohibit the user from attaching a volume if it's necessary on a "stray" instance. Change-Id: Iccf5f5287eda536adafe925c76054f14827c467e --- cinder/scheduler/filters/shard_filter.py | 7 +++++ .../tests/unit/scheduler/test_shard_filter.py | 30 +++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 67612939b58..7c2b741076c 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -156,6 +156,13 @@ def backend_passes(self, backend_state, filter_properties): LOG.debug('Ignoring snapshot.') return True + if spec.get('operation') == 'find_backend_for_connector': + # We don't care about shards here, as we want to move a volume to + # an instance sitting in a specific vCenter. Shards are only used + # if we don't know where the volume is needed. + LOG.debug('Ignoring find_backend_for_connector scheduling.') + return True + # allow an override of the automatic shard-detection like nova does for # its compute-hosts scheduler_hints = filter_properties.get('scheduler_hints') or {} diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 35251ba7c62..de1ee8f1172 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -161,3 +161,33 @@ def test_scheduler_hints_override_sharding_enabled(self): caps1 = {'vcenter-shard': 'vc-a-1'} host1 = fakes.FakeBackendState('host1', {'capabilities': caps1}) self.assertTrue(self.filt_cls.backend_passes(host1, self.props)) + + def test_noop_for_find_backend_by_connector_with_hint(self): + """Check if we pass any backend + + If the operation we're scheduling for is find_backend_for_connector, + we do not look at the shards but pass through every backend, because + this tries to move a volume towards where a server is during attach and + we always want that to succeed. Shards are supposed to help decision + making when we don't know where the volume will be attached. + """ + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} + self.props['request_spec']['operation'] = 'find_backend_for_connector' + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + + def test_noop_for_find_backend_by_connector_without_hint(self): + """Check if we pass any backend + + If the operation we're scheduling for is find_backend_for_connector, + we do not look at the shards but pass through every backend, because + this tries to move a volume towards where a server is during attach and + we always want that to succeed. Shards are supposed to help decision + making when we don't know where the volume will be attached. + """ + self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['vc-a-1'] + caps = {'vcenter-shard': 'vc-a-0'} + host = fakes.FakeBackendState('host1', {'capabilities': caps}) + self.props['request_spec']['operation'] = 'find_backend_for_connector' + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) From c99156461fbac02c0912def213f5e71360d0acc7 Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 21 Jan 2022 10:54:35 -0500 Subject: [PATCH 065/149] [SAP] add SAPLargeVolumeFilter This filter disallows any volumes larger than 2TB to be created on any datastore that has 'vvol' in it's name. [SAP] add SAPLargeVolumeFilter This filter disallows any volumes larger than 2TB to be created on any datastore that has 'vvol' in it's name. --- .../filters/sap_large_volume_filter.py | 39 +++++++++++++++++++ setup.cfg | 1 + 2 files changed, 40 insertions(+) create mode 100644 cinder/scheduler/filters/sap_large_volume_filter.py diff --git a/cinder/scheduler/filters/sap_large_volume_filter.py b/cinder/scheduler/filters/sap_large_volume_filter.py new file mode 100644 index 00000000000..c071c1a8209 --- /dev/null +++ b/cinder/scheduler/filters/sap_large_volume_filter.py @@ -0,0 +1,39 @@ +# Copyright (c) 2020 SAP SE +# +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_config import cfg +from oslo_log import log as logging + +from cinder.scheduler import filters + + +LOG = logging.getLogger(__name__) +CONF = cfg.CONF + + +class SAPLargeVolumeFilter(filters.BaseBackendFilter): + """Filter out volumes from landing on vvol datastores for > 2TB""" + + def backend_passes(self, backend_state, filter_properties): + host = backend_state.host + req_spec = filter_properties.get('request_spec') + volume_size = req_spec["volume"]["size"] + + if 'vvol' in host.lower() and volume_size > 2000: + LOG.info("Cannot allow volumes larger than 2000 GiB on vVol.") + return False + else: + return True diff --git a/setup.cfg b/setup.cfg index a7db385e2f6..ccca21464a0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -46,6 +46,7 @@ cinder.scheduler.filters = SameBackendFilter = cinder.scheduler.filters.affinity_filter:SameBackendFilter InstanceLocalityFilter = cinder.scheduler.filters.instance_locality_filter:InstanceLocalityFilter ShardFilter = cinder.scheduler.filters.shard_filter:ShardFilter + SAPLargeVolumeFilter = cinder.scheduler.filters.sap_large_volume_filter:SAPLargeVolumeFilter cinder.scheduler.weights = AllocatedCapacityWeigher = cinder.scheduler.weights.capacity:AllocatedCapacityWeigher CapacityWeigher = cinder.scheduler.weights.capacity:CapacityWeigher From dd373005ad88dd479eac4609b86fbdc9cc377c7c Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 25 Jan 2022 13:27:29 -0500 Subject: [PATCH 066/149] [SAP] Fix log.debug entry for migrate_by_connector Small issue with the logging text string format. --- cinder/volume/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index d0c66c218d9..a50a4dcf436 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -860,7 +860,7 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, LOG.error(msg) raise exception.InvalidVolume(reason=msg) - LOG.debug("Invoking migrate_volume to host=%(host).", dest['host']) + LOG.debug("Invoking migrate_volume to host=%s", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, wait_for_completion=False) From 982f7bc1abf4dd35c21a2d2f1b2b3ec477483920 Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 26 Jan 2022 10:47:36 -0500 Subject: [PATCH 067/149] [SAP] Remove the tracing from _get_datastores_for_profiles This tracing was useful during development of the datastores as pool, now it's just noise. --- cinder/volume/drivers/vmware/vmdk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index fc5a46524f0..c929818d6bd 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2398,7 +2398,6 @@ def _volume_provider_metadata(self, volume, backing=None): return provider_info - @volume_utils.trace def _get_datastores_for_profiles(self): datastores = {} for profile in self._storage_profiles: From ee8868b6de767c70046869c4ba6028b2aaef4ed8 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 1 Feb 2022 14:15:29 +0100 Subject: [PATCH 068/149] [SAP] Handle reserved status in live-migrations As we now lock the volumes, the statu of the volume will change from in-use to reserved. We have to handle that state, and look instead at the attach_status to determine if the volume is attached --- cinder/volume/api.py | 5 +++-- cinder/volume/drivers/vmware/vmdk.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index a50a4dcf436..a6ad8906ac3 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -822,8 +822,9 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, cluster_name=dest['cluster_name'], capabilities=dest['capabilities']) + states = ('available', 'reserved', 'in-use') # Build required conditions for conditional update - expected = {'status': ('available', 'reserved'), + expected = {'status': states, 'migration_status': self.AVAILABLE_MIGRATION_STATUS, 'replication_status': ( None, @@ -846,7 +847,7 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, # on this volume, e.g. attach, detach, retype, migrate, etc. if lock_volume: updates['status'] = db.Case( - [(volume.model.status.in_(('available', 'reserved')), + [(volume.model.status.in_(states), 'maintenance')], else_=volume.model.status) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index c929818d6bd..2af80df3dc9 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2924,7 +2924,7 @@ def migrate_volume(self, context, volume, host): {'volume_name': volume.name, 'dest_host': dest_host}) return (True, None) - if volume['status'] == 'in-use': + if volume['attach_status'] == 'attached': if self._vcenter_instance_uuid != vcenter: return self._migrate_attached_cross_vc(context, dest_host, volume, backing) From 5efb2475f43b6e9ab31ebe0add6f5cb59bd8a457 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Tue, 1 Feb 2022 14:17:47 +0100 Subject: [PATCH 069/149] [SAP] Pass cinder_host for live-migrations If we remotely create a backing for a volume, it is for a live-migration, and we need to override the host in the volume attribute, as it refers to the current location --- .../tests/unit/volume/drivers/vmware/test_remote.py | 5 +++-- .../unit/volume/drivers/vmware/test_vmware_vmdk.py | 6 +++++- cinder/volume/drivers/vmware/remote.py | 12 ++++++++---- cinder/volume/drivers/vmware/vmdk.py | 11 +++++++---- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py index 140c88a5b68..f5a1ba0c964 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_remote.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -57,7 +57,8 @@ def test_create_backing(self): server=self._fake_host, host=self._fake_host, volume=self._fake_volume, - create_params=None + create_params=None, + cinder_host=self._fake_host, ) @@ -120,4 +121,4 @@ def test_move_volume_backing_to_folder(self, get_moref): def test_create_backing(self): self._service.create_backing(self._ctxt, self._fake_volume) self._driver._create_backing.assert_called_once_with( - self._fake_volume, create_params=None) + self._fake_volume, create_params=None, cinder_host=None) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 484ce14ee1b..7baaf26daa0 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2974,7 +2974,11 @@ def _test_create_backing( ret = self._driver._create_backing(volume, host, create_params) self.assertEqual(backing, ret) - select_ds_for_volume.assert_called_once_with(volume, host) + select_ds_for_volume.assert_called_once_with( + volume, + host, + cinder_host=None, + create_params=create_params) get_storage_profile_id.assert_called_once_with(volume) exp_extra_config = {vmdk.EXTRA_CONFIG_VOLUME_ID_KEY: volume['id'], diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py index f87fcd28486..30c81462503 100644 --- a/cinder/volume/drivers/vmware/remote.py +++ b/cinder/volume/drivers/vmware/remote.py @@ -51,10 +51,12 @@ def move_volume_backing_to_folder(self, ctxt, host, volume, folder): return cctxt.call(ctxt, 'move_volume_backing_to_folder', volume=volume, folder=folder) - def create_backing(self, ctxt, host, volume, create_params=None): + def create_backing(self, ctxt, host, volume, create_params=None, + cinder_host=None): cctxt = self._get_cctxt(host) return cctxt.call(ctxt, 'create_backing', volume=volume, - create_params=create_params) + create_params=create_params, + cinder_host=cinder_host or host) class VmdkDriverRemoteService(object): @@ -94,6 +96,8 @@ def move_volume_backing_to_folder(self, ctxt, volume, folder): folder_ref = vim_util.get_moref(folder, 'Folder') self._driver.volumeops.move_backing_to_folder(backing, folder_ref) - def create_backing(self, ctxt, volume, create_params=None): + def create_backing(self, ctxt, volume, create_params=None, + cinder_host=None): return self._driver._create_backing(volume, - create_params=create_params) + create_params=create_params, + cinder_host=cinder_host) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 2af80df3dc9..aebfe2d2fad 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -680,7 +680,8 @@ def _get_extra_config(self, volume): volumeops.BACKING_UUID_KEY: volume['id']} @volume_utils.trace - def _create_backing(self, volume, host=None, create_params=None): + def _create_backing(self, volume, host=None, create_params=None, + cinder_host=None): """Create volume backing under the given host. If host is unspecified, any suitable host is selected. @@ -689,13 +690,15 @@ def _create_backing(self, volume, host=None, create_params=None): :param host: Reference of the host :param create_params: Dictionary specifying optional parameters for backing VM creation + :param cinder_host: String of the format host@backend_name#pool. :return: Reference to the created backing """ + create_params = create_params or {} (host_ref, resource_pool, folder, - summary) = self._select_ds_for_volume(volume, host) - - create_params = create_params or {} + summary) = self._select_ds_for_volume(volume, host, + create_params=create_params, + cinder_host=cinder_host) # check if a storage profile needs to be associated with the backing VM profile_id = self._get_storage_profile_id(volume) From e32d9581cab594eaa3d0a5041aa42c7d0b6f6bb9 Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Wed, 2 Feb 2022 16:09:45 +0100 Subject: [PATCH 070/149] Delete attachment on exception in create If there is an exception raised in attachment_update, the created attachment db record may persist, unless we delete it --- cinder/volume/api.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index a6ad8906ac3..5d3523992f7 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -2252,11 +2252,16 @@ def attachment_create(self, volume_ref, instance_uuid) if connector: - connection_info = ( - self.volume_rpcapi.attachment_update(ctxt, - volume_ref, - connector, - attachment_ref.id)) + try: + connection_info = ( + self.volume_rpcapi.attachment_update(ctxt, + volume_ref, + connector, + attachment_ref.id)) + except Exception: + with excutils.save_and_reraise_exception(): + self.attachment_delete(ctxt, attachment_ref) + attachment_ref.connection_info = connection_info # Use of admin_metadata for RO settings is deprecated From 1756bcf2347efadb68bc2aa084ff222d8d906b62 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 3 Feb 2022 11:03:40 -0500 Subject: [PATCH 071/149] [SAP] Fix an issue with SAPLargeVolumeFilter The filter was getting a KeyError during snapshot_create requests. This patch reworks the filter to use the safety like the capacity filter uses for getting the size out of the request spec. --- .../filters/sap_large_volume_filter.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/cinder/scheduler/filters/sap_large_volume_filter.py b/cinder/scheduler/filters/sap_large_volume_filter.py index c071c1a8209..e808bac485b 100644 --- a/cinder/scheduler/filters/sap_large_volume_filter.py +++ b/cinder/scheduler/filters/sap_large_volume_filter.py @@ -29,10 +29,21 @@ class SAPLargeVolumeFilter(filters.BaseBackendFilter): def backend_passes(self, backend_state, filter_properties): host = backend_state.host - req_spec = filter_properties.get('request_spec') - volume_size = req_spec["volume"]["size"] - if 'vvol' in host.lower() and volume_size > 2000: + # if the request is against a non vvol host, we pass. + if 'vvol' not in host.lower(): + return True + + if filter_properties.get('new_size'): + requested_size = int(filter_properties.get('new_size')) + else: + requested_size = int(filter_properties.get('size')) + + # requested_size is 0 means that it's a manage request. + if requested_size == 0: + return True + + if requested_size > 2000: LOG.info("Cannot allow volumes larger than 2000 GiB on vVol.") return False else: From 75bf35708ab68a8a81440aea2b3a9bd8148a8eee Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 14 Feb 2022 12:26:49 -0500 Subject: [PATCH 072/149] [SAP] Ensure pools specify thick_provisioning_support enabled This patch forces the pool stats to say they support thick provisioning. --- cinder/volume/drivers/vmware/vmdk.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index aebfe2d2fad..c57f72d84c2 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -509,6 +509,7 @@ def _get_volume_stats(self): 'free_capacity_gb': round( summary.freeSpace / units.Gi), 'thin_provisioning_support': True, + 'thick_provisioning_support': True, 'max_over_subscription_ratio': ( max_over_subscription_ratio), 'reserved_percentage': reserved_percentage, From e192eea65a3ba411d71f16c7541a0a0e6970dbac Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 24 Feb 2022 08:10:56 -0500 Subject: [PATCH 073/149] [SAP] update size check to Gib units This patch updates the 2 T units check to 2048, for Tib vs Tb --- cinder/scheduler/filters/sap_large_volume_filter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/scheduler/filters/sap_large_volume_filter.py b/cinder/scheduler/filters/sap_large_volume_filter.py index e808bac485b..ae51a2ee852 100644 --- a/cinder/scheduler/filters/sap_large_volume_filter.py +++ b/cinder/scheduler/filters/sap_large_volume_filter.py @@ -43,8 +43,8 @@ def backend_passes(self, backend_state, filter_properties): if requested_size == 0: return True - if requested_size > 2000: - LOG.info("Cannot allow volumes larger than 2000 GiB on vVol.") + if requested_size > 2048: + LOG.info("Cannot allow volumes larger than 2048 GiB on vVol.") return False else: return True From 0588cf57fc81b60744b59bcc4633a18b201a2b1c Mon Sep 17 00:00:00 2001 From: Fabian Wiesel Date: Fri, 4 Mar 2022 10:32:06 +0100 Subject: [PATCH 074/149] Vmware: Remove nvp.vm-uuid extraConfig The Nova vmwareapi driver falls back to the extraConfig option value nvp.vm-uuid to identify vms, if it cannot find VMs by the config.instanceUuid attribue. A clone copies the attribute, so when nova retries to delete a vm after initially succeeding, it will find the backing with the same value instead and delete it instead. --- cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py | 3 ++- cinder/volume/drivers/vmware/vmdk.py | 5 ++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 7baaf26daa0..b8323f21d33 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -2602,7 +2602,8 @@ def test_create_temp_backing_from_attached_vmdk( vops.clone_backing.assert_called_once_with( tmp_name, instance, None, volumeops.FULL_CLONE_TYPE, datastore, host=host, resource_pool=rp, folder=folder, - device_changes=[dev_change_disk_remove]) + device_changes=[dev_change_disk_remove], + extra_config={'nvp.vm-uuid': ''}) @mock.patch.object(VMDK_DRIVER, '_get_disk_type') @mock.patch.object(VMDK_DRIVER, 'volumeops') diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index c57f72d84c2..9247170426a 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2786,10 +2786,13 @@ def _create_temp_backing_from_attached_vmdk( device_changes.extend( self.volumeops._create_device_change_for_vif_removal(instance)) + # Remove another attribute by which the nova driver identifies VMs + extra_config = {'nvp.vm-uuid': ''} + return self.volumeops.clone_backing( tmp_name, instance, None, volumeops.FULL_CLONE_TYPE, datastore, host=host, resource_pool=rp, folder=folder, - device_changes=device_changes) + device_changes=device_changes, extra_config=extra_config) def _extend_if_needed(self, volume, backing): volume_size = volume.size * units.Gi From afd2c7f669421b90c431d4223c7c5aeeb5700d36 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 21 Feb 2022 12:34:36 -0500 Subject: [PATCH 075/149] [SAP] Add any custom attributes to pool stats This patch tries to read any custom attributes associated with each datastore and shove those attributes in the pool stats. This will allow us to filter by custom attributes. The end goal is to add a netapp backend fqdn custom attribute so we know where each datastore lives. This will allow cinder to have affinity/anti-affinity to netapps. --- cinder/volume/drivers/vmware/vmdk.py | 27 ++++++++++++++++++++--- cinder/volume/drivers/vmware/volumeops.py | 14 ++++++++++++ 2 files changed, 38 insertions(+), 3 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 9247170426a..fa817bb93d4 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -502,6 +502,7 @@ def _get_volume_stats(self): for ds_name in datastores: datastore = datastores[ds_name] summary = datastore["summary"] + pool_state = "up" if summary.accessible is True else "down" pool = {'pool_name': summary.name, 'total_capacity_gb': round( @@ -521,6 +522,11 @@ def _get_volume_stats(self): 'storage_profile': datastore["storage_profile"], 'connection_capabilities': connection_capabilities, } + + # Add any custom attributes associated with the datastore + if "custom_attributes" in datastore: + pool['custom_attributes'] = datastore['custom_attributes'] + pools.append(pool) data['pools'] = pools return data @@ -2415,10 +2421,25 @@ def _get_datastores_for_profiles(self): # Reconstruct a managed object reference to that # datastore ds = vim_util.get_moref(h.hubId, "Datastore") - summary = self.volumeops.get_summary(ds) + objects = self.volumeops.get_datastore_properties(ds) + summary = objects['summary'] datastores[summary.name] = {'summary': summary, - 'storage_profile': profile} - + 'storage_profile': profile, + 'datastore_object': ds} + if ('availableField' in objects and + 'customValue' in objects): + custom_fields = {} + for junk, field in objects['availableField']: + for v in field: + custom_fields[v.key] = v.name + + custom_attributes = {} + for junk, attr in objects['customValue']: + for v in attr: + field = custom_fields[v.key] + custom_attributes[field] = v.value + datastores[summary.name][ + "custom_attributes"] = custom_attributes return datastores def _new_host_for_volume(self, volume): diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 5ae686d7496..709b7a72239 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -984,6 +984,20 @@ def get_summary(self, datastore): self._session.vim, datastore, 'summary') + def get_datastore_properties(self, datastore): + """Get datastore summary. + + :param datastore: Reference to the datastore + :return: 'summary' property of the datastore + """ + return self._session.invoke_api(vim_util, 'get_object_properties_dict', + self._session.vim, datastore, + properties_to_collect=[ + "summary", + "availableField", + "customValue" + ]) + def _create_relocate_spec_disk_locator(self, datastore, disk_type, disk_device, profile_id=None): """Creates spec for disk type conversion during relocate.""" From b5d2e6355dc459127a35add4f8a6caac95d916b4 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 24 Feb 2022 09:54:00 -0500 Subject: [PATCH 076/149] [SAP] Account for volumes already provisioned to a host This patch updates the volume manager's creation of the allocated_capacity_gb setting for valid volumes that have already been provisioned. The volume manager looks at all non-deleted volumes in the database to account for storage that has been allocated by cinder in the backend. This patch updates that check for volumes that are in other non error states. --- cinder/volume/manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 361f93c8ba8..a42c42df3bc 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -551,8 +551,8 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: try: for volume in volumes: - # available volume should also be counted into allocated - if volume['status'] in ['in-use', 'available']: + # Account for volumes that have been provisioned already + if volume['host']: # calculate allocated capacity for driver self._count_allocated_capacity(ctxt, volume) From de967b08c3e2748da93f74ca0f9b7b94a5216b10 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 24 Mar 2022 13:04:30 -0400 Subject: [PATCH 077/149] [SAP] Added sap_affinity_filter.py This patch adds the SAPDifferentBackendFilter and SAPSameBackendFilter classes. These classes are the affinity/anti-affinity classes to check and ensure that the same/different netapp backends are used during volume provisioning requests. The same cinder api is used for passing in the scheduler hint. cinder volume create --name foo --hint same_host= cinder volume create --name foo --hint different_host= If a datastore has a custom attribute set on it with the name of netapp_fqdn then that value is compared in the filters to ensure the same/different datastore is chosen, depending on the scheduler hint passed in. --- .../scheduler/filters/sap_affinity_filter.py | 198 ++++++++++++++++++ setup.cfg | 2 + 2 files changed, 200 insertions(+) create mode 100644 cinder/scheduler/filters/sap_affinity_filter.py diff --git a/cinder/scheduler/filters/sap_affinity_filter.py b/cinder/scheduler/filters/sap_affinity_filter.py new file mode 100644 index 00000000000..8a52bf287e8 --- /dev/null +++ b/cinder/scheduler/filters/sap_affinity_filter.py @@ -0,0 +1,198 @@ +# Copyright 2022, SAP SE +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from oslo_log import log as logging +from oslo_utils import uuidutils + +from cinder.scheduler import filters +from cinder.volume import api as volume +from cinder.volume import volume_utils + + +LOG = logging.getLogger(__name__) + + +class SAPBackendFilter(filters.BaseBackendFilter): + + run_filter_once_per_request = False + + def __init__(self): + super().__init__() + self.volume_api = volume.API() + self.backend_fqdn_lookup = {} + + def _get_volumes(self, context, affinity_uuids, backend_state): + # We don't filter here on host. + filters = {'id': affinity_uuids, 'deleted': False} + return self.volume_api.get_all(context, filters=filters) + + def _affinity_volumes(self, backend_state, filter_properties): + context = filter_properties['context'] + scheduler_hints = filter_properties.get('scheduler_hints') or {} + affinity_uuids = scheduler_hints.get(self.hint_key, []) + + # scheduler hint verification: affinity_uuids can be a list of uuids + # or single uuid. The checks here is to make sure every single string + # in the list looks like a uuid, otherwise, this filter will fail to + # pass. Note that the filter does *NOT* ignore string doesn't look + # like a uuid, it is better to fail the request than serving it wrong. + if isinstance(affinity_uuids, list): + for uuid in affinity_uuids: + if uuidutils.is_uuid_like(uuid): + continue + else: + return False + elif uuidutils.is_uuid_like(affinity_uuids): + affinity_uuids = [affinity_uuids] + else: + # Not a list, not a string looks like uuid, don't pass it + # to DB for query to avoid potential risk. + return False + + if affinity_uuids: + return self._get_volumes(context, affinity_uuids, backend_state) + + def filter_all(self, filter_obj_list, filter_properties): + """Yield objects that pass the filter. + + Can be overridden in a subclass, if you need to base filtering + decisions on all objects. Otherwise, one can just override + _filter_one() to filter a single object. + + We override this so we can build a datastore -> backend fqdn lookup + for all the datastores. The filter will use it to test if the + volume's backend fqdn matches the passed in pool's fqdn. + """ + # reset the lookup for this request. It can change between requests. + self.backend_fqdn_lookup = {} + + # First we need to get all of the pools and populate a lookup for + # the datastore backend fqdn custom attributes, if any. + scheduler_hints = filter_properties.get('scheduler_hints') or {} + affinity_uuids = scheduler_hints.get(self.hint_key, []) + # filter_obj_list is a generator, so we need to copy + # each entry for looping again. + obj_list = [] + if affinity_uuids: + # Build the backend_fqdn_lookup, since we will need it this request + objs = list(filter_obj_list) + for obj in objs: + obj_list.append(obj) + caps = obj.capabilities + if ('custom_attributes' in caps and + 'netapp_fqdn' in caps['custom_attributes']): + datastore = volume_utils.extract_host(obj.host, + level='pool') + fqdn = caps['custom_attributes']['netapp_fqdn'] + self.backend_fqdn_lookup[datastore] = fqdn + else: + obj_list = filter_obj_list + + for obj in obj_list: + if self._filter_one(obj, filter_properties): + yield obj + + def _get_backend_fqdn(self, pool_name): + if pool_name in self.backend_fqdn_lookup: + return self.backend_fqdn_lookup[pool_name] + else: + return None + + +class SAPDifferentBackendFilter(SAPBackendFilter): + """Schedule volume on a different back-end from a set of volumes.""" + + hint_key = "different_host" + + def backend_passes(self, backend_state, filter_properties): + volumes = self._affinity_volumes(backend_state, filter_properties) + + # If we got no volumes, then no reason to check + if not volumes: + return True + + # Get the backend fqdn custom attribute for the volume + backend_datastore = volume_utils.extract_host(backend_state.host, + level='pool') + backend_fqdn = self._get_backend_fqdn(backend_datastore) + if not backend_fqdn: + # The datastore being filtered doesn't have a custom fqdn set + # Don't filter it out. + LOG.debug("Datastore {} has no fqdn".format( + backend_datastore + )) + return True + + # extract the datastore from the host entries from + # the volumes (from affinity_uuids), then find the backend associated + # with each of those and then only allow the same netapp to pass + for vol in volumes: + volume_datastore = volume_utils.extract_host(vol.host, + level='pool') + volume_fqdn = self._get_backend_fqdn(volume_datastore) + if volume_fqdn: + if volume_fqdn == backend_fqdn: + LOG.debug("Volume FQDN matches {}".format( + backend_fqdn + ), resource=vol) + return False + + return True + + +class SAPSameBackendFilter(SAPBackendFilter): + """Schedule volume on the same back-end as another volume. + + This also ensures that if a backend has a custom attribute + that specifies the actual Netapp fqdn, then passes when the + datastore matches that fqdn. + + """ + hint_key = "same_host" + + def backend_passes(self, backend_state, filter_properties): + volumes = self._affinity_volumes(backend_state, filter_properties) + + if not volumes: + return True + + # Get the backend fqdn custom attribute for the volume + backend_datastore = volume_utils.extract_host(backend_state.host, + level='pool') + backend_fqdn = self._get_backend_fqdn(backend_datastore) + if not backend_fqdn: + # The datastore being filtered doesn't have a custom fqdn set + # Don't filter it out. + LOG.debug("Datastore {} has no fqdn".format( + backend_datastore + )) + return True + + # If the result is a list of volumes, then we have to + # extract the datastore from the host entries from + # those volumes, then find the netapp associated with + # each of those and then only allow the same netapp to pass + for vol in volumes: + volume_datastore = volume_utils.extract_host(vol.host, + level='pool') + volume_fqdn = self._get_backend_fqdn(volume_datastore) + if volume_fqdn: + if volume_fqdn == backend_fqdn: + LOG.debug("Volume FQDN matches {}".format( + backend_fqdn + ), resource=vol) + return True + + return False diff --git a/setup.cfg b/setup.cfg index ccca21464a0..3048f01ac70 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,6 +47,8 @@ cinder.scheduler.filters = InstanceLocalityFilter = cinder.scheduler.filters.instance_locality_filter:InstanceLocalityFilter ShardFilter = cinder.scheduler.filters.shard_filter:ShardFilter SAPLargeVolumeFilter = cinder.scheduler.filters.sap_large_volume_filter:SAPLargeVolumeFilter + SAPDifferentBackendFilter = cinder.scheduler.filters.sap_affinity_filter:SAPDifferentBackendFilter + SAPSameBackendFilter = cinder.scheduler.filters.sap_affinity_filter:SAPSameBackendFilter cinder.scheduler.weights = AllocatedCapacityWeigher = cinder.scheduler.weights.capacity:AllocatedCapacityWeigher CapacityWeigher = cinder.scheduler.weights.capacity:CapacityWeigher From 4b1a73affadafe74ef9a082fa1f5f70930e354ce Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 1 Apr 2022 10:11:04 -0400 Subject: [PATCH 078/149] [SAP] Fixes to better account for allocated_capacity_gb This patch mirrors an upstream effort to fix issues related to not tracking the allocated_capacity_gb volume stats entry for backends. This value is critical for the cinder scheduler in determining capacity for provisioning requests per backend/pool. This patch fixes: migrate_volume delete_group revert_to_snapshot_generic --- .../tests/unit/volume/test_manage_volume.py | 12 +++---- cinder/volume/manager.py | 32 +++++++++++++------ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/cinder/tests/unit/volume/test_manage_volume.py b/cinder/tests/unit/volume/test_manage_volume.py index 5d4cc73f1ae..0a0eab66deb 100644 --- a/cinder/tests/unit/volume/test_manage_volume.py +++ b/cinder/tests/unit/volume/test_manage_volume.py @@ -58,7 +58,7 @@ def test_manage_existing(self): self.manager, '_run_manage_existing_flow_engine', return_value=volume_object) mock_update_volume_stats = self.mock_object( - self.manager, '_update_stats_for_managed') + self.manager, '_update_allocated_capacity') result = self.manager.manage_existing(self.context, volume_object) @@ -75,7 +75,7 @@ def test_manage_existing_with_volume_object(self): self.manager, '_run_manage_existing_flow_engine', return_value=volume_object) mock_update_volume_stats = self.mock_object( - self.manager, '_update_stats_for_managed') + self.manager, '_update_allocated_capacity') result = self.manager.manage_existing( self.context, volume_object) @@ -134,7 +134,7 @@ def test_update_stats_for_managed(self): volume_object = self._stub_volume_object_get(self, host=FAKE_HOST + '#volPool') - self.manager._update_stats_for_managed(volume_object) + self.manager._update_allocated_capacity(volume_object) backend_stats = self.manager.stats['pools'][FAKE_HOST_POOL] self.assertEqual( 1, backend_stats['allocated_capacity_gb']) @@ -146,7 +146,7 @@ def test_update_stats_for_managed_no_pool(self): self.manager.driver.configuration, 'safe_get', return_value=safe_get_backend) - self.manager._update_stats_for_managed(volume_obj) + self.manager._update_allocated_capacity(volume_obj) mock_safe_get.assert_called_once_with('volume_backend_name') backend_stats = self.manager.stats['pools'][safe_get_backend] @@ -157,7 +157,7 @@ def test_update_stats_for_managed_default_backend(self): mock_safe_get = self.mock_object( self.manager.driver.configuration, 'safe_get', return_value=None) - self.manager._update_stats_for_managed(volume_obj) + self.manager._update_allocated_capacity(volume_obj) mock_safe_get.assert_called_once_with('volume_backend_name') pool_stats = self.manager.stats['pools'] @@ -168,7 +168,7 @@ def test_update_stats_key_error(self): self.manager.stats = {} self.assertRaises( - KeyError, self.manager._update_stats_for_managed, + KeyError, self.manager._update_allocated_capacity, self._stub_volume_object_get(self)) @mock.patch('cinder.volume.drivers.lvm.LVMVolumeDriver.' diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index a42c42df3bc..8aa18d2ab2d 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -1023,9 +1023,11 @@ def _revert_to_snapshot_generic(self, temp_vol = self.driver._create_temp_volume_from_snapshot( ctxt, volume, snapshot, volume_options=v_options, status=fields.VolumeStatus.IN_USE) + self._update_allocated_capacity(temp_vol) self._copy_volume_data(ctxt, temp_vol, volume) self.driver.delete_volume(temp_vol) temp_vol.destroy() + self._update_allocated_capacity(temp_vol, decrement=True) except Exception: with excutils.save_and_reraise_exception(): LOG.exception( @@ -2635,8 +2637,12 @@ def migrate_volume(self, updates.update(model_update) if new_type_id: updates['volume_type_id'] = new_type_id + original_host = volume.host volume.update(updates) volume.save() + self._update_allocated_capacity(volume, decrement=True, + host=original_host) + self._update_allocated_capacity(volume) except Exception: with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -2646,7 +2652,11 @@ def migrate_volume(self, volume.save() if not moved: try: + original_host = volume.host self._migrate_volume_generic(ctxt, volume, host, new_type_id) + self._update_allocated_capacity(volume, decrement=True, + host=original_host) + self._update_allocated_capacity(volume) except Exception: with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -2943,11 +2953,7 @@ def extend_volume(self, 'volume_backend_name') or volume_utils.extract_host( volume.host, 'pool', True) - try: - self.stats['pools'][pool]['allocated_capacity_gb'] += size_increase - except KeyError: - self.stats['pools'][pool] = dict( - allocated_capacity_gb=size_increase) + self._update_allocated_capacity(volume, size=size_increase) self._notify_about_volume_usage( context, volume, "resize.end", @@ -3142,7 +3148,7 @@ def manage_existing(self, ctxt, volume, ref=None) -> ovo_fields.UUIDField: vol_ref = self._run_manage_existing_flow_engine( ctxt, volume, ref) - self._update_stats_for_managed(vol_ref) + self._update_allocated_capacity(vol_ref) LOG.info("Manage existing volume completed successfully.", resource=vol_ref) @@ -3628,10 +3634,12 @@ def _update_volume_from_src(self, self.db.volume_update(context, vol['id'], update) + @volume_utils.trace def _update_allocated_capacity(self, vol, decrement=False, - host: str = None) -> None: + host: str = None, + size=None) -> None: # Update allocated capacity in volume stats host = host or vol['host'] pool = volume_utils.extract_host(host, 'pool') @@ -3642,7 +3650,13 @@ def _update_allocated_capacity(self, 'pool', True) - vol_size = -vol['size'] if decrement else vol['size'] + # if a size was passed in, we use that to increment/decrement + # instead of the size in the volume. + # This is for extend + if size: + vol_size = -size if decrement else size + else: + vol_size = -vol['size'] if decrement else vol['size'] try: self.stats['pools'][pool]['allocated_capacity_gb'] += vol_size except KeyError: @@ -3762,7 +3776,7 @@ def delete_group(self, context, group: objects.Group) -> None: if reservations: QUOTAS.commit(context, reservations, project_id=project_id) - self.stats['allocated_capacity_gb'] -= vol.size + self._update_allocated_capacity(vol, decrement=True) if grpreservations: GROUP_QUOTAS.commit(context, grpreservations, From 09f4228637ce51cf42a2d49a55a1a85677b0edb3 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Fri, 11 Mar 2022 12:42:56 -0500 Subject: [PATCH 079/149] [SAP] Rework Capacitty filter This patch adds the new cinder/utils.py calculate_capacity_factors to provide a detailed and more consistent,accurate view of the various factors in determining virtual free space for a particular backend. It takes into consideration total capacity, free space, thin/thick provisioning in the volume type, thin/thick support in the backend, as well as reserved percentage and max_over_subscription_ratio. Since the vmware driver is configured to allow lazy creation of volumes, the free space reported by the pool/datastore isn't a reliable source of how much is free considering what has been requested from cinder to allocate. This patch calculates what should be free based upon the total available capacity ( total - reserved ) and what cinder has tracked as allocated against that backend. If that calculated free is less than the reported free, then the calculated free is what is reported as virtual_free_space. There is a known issue in cinder with keeping track of the allocated space in 2 places: 1) at startup cinder only considers volumes that are in-use and available. All other volumes in other states aren't used to calculate the allocated space. This has to be fixed. This is fixed here: https://github.com/sapcc/cinder/pull/117 2) The allocated space isn't adjusted during volume migrations. --- cinder/scheduler/filters/capacity_filter.py | 21 +++++++++++++-------- cinder/utils.py | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cinder/scheduler/filters/capacity_filter.py b/cinder/scheduler/filters/capacity_filter.py index bd4cce839a3..f31260aaf6a 100644 --- a/cinder/scheduler/filters/capacity_filter.py +++ b/cinder/scheduler/filters/capacity_filter.py @@ -139,8 +139,8 @@ def backend_passes(self, backend_state, filter_properties): msg_args = {"grouping_name": backend_state.backend_id, "grouping": grouping, "requested": requested_size, - "available": virtual_free_space} - + "available": virtual_free_space, + "provisioning_type": factors["provisioning_type"]} # Only evaluate using max_over_subscription_ratio if # thin_provisioning_support is True. Check if the ratio of # provisioned capacity over total capacity has exceeded over @@ -160,13 +160,14 @@ def backend_passes(self, backend_state, filter_properties): "oversub_ratio": backend_state.max_over_subscription_ratio, "grouping": grouping, "grouping_name": backend_state.backend_id, + "provisioning_type": "thin", } LOG.warning( "Insufficient free space for thin provisioning. " "The ratio of provisioned capacity over total capacity " "%(provisioned_ratio).2f has exceeded the maximum over " "subscription ratio %(oversub_ratio).2f on %(grouping)s " - "%(grouping_name)s.", msg_args) + "%(grouping_name)s %(provisioning_type)s.", msg_args) return False else: # Thin provisioning is enabled and projected over-subscription @@ -181,12 +182,15 @@ def backend_passes(self, backend_state, filter_properties): LOG.warning("Insufficient free virtual space " "(%(available)sGB) to accommodate thin " "provisioned %(requested)sGB volume on " - "%(grouping)s %(grouping_name)s.", msg_args) + "%(grouping)s %(grouping_name)s." + " %(provisioning_type)s).", + msg_args) else: LOG.debug("Space information for volume creation " "on %(grouping)s %(grouping_name)s " "(requested / avail): " - "%(requested)s/%(available)s", msg_args) + "%(requested)s/%(available)s" + " %(provisioning_type)s.", msg_args) return res elif thin and backend_state.thin_provisioning_support: LOG.warning("Filtering out %(grouping)s %(grouping_name)s " @@ -202,12 +206,13 @@ def backend_passes(self, backend_state, filter_properties): if virtual_free_space < requested_size: LOG.warning("Insufficient free space for volume creation " "on %(grouping)s %(grouping_name)s (requested / " - "avail): %(requested)s/%(available)s", - msg_args) + "avail): %(requested)s/%(available)s " + "%(provisioning_type)s", msg_args) return False LOG.debug("Space information for volume creation " "on %(grouping)s %(grouping_name)s (requested / avail): " - "%(requested)s/%(available)s", msg_args) + "%(requested)s/%(available)s %(provisioning_type)s.", + msg_args) return True diff --git a/cinder/utils.py b/cinder/utils.py index 751fe077cdc..c36c5dfa5ce 100644 --- a/cinder/utils.py +++ b/cinder/utils.py @@ -751,7 +751,7 @@ def _limit(x): max_over_subscription_ratio if provisioned_type == 'thin' else None ), "total_available_capacity": _limit(total_available_capacity), - "provisioned_capacity": provisioned_capacity, + "provisioned_capacity": _limit(provisioned_capacity), "calculated_free_capacity": _limit(calculated_free), "virtual_free_capacity": _limit(virtual_free), "free_percent": _limit(free_percent), From 9af2de4a8cb7ef70e01dba475d22b8d281789495 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 15 Mar 2022 08:45:14 -0400 Subject: [PATCH 080/149] [SAP] Replace CapacityFilter This patch reworks the CapacityFilter which uses the new utils.calculate_capacity_factors to determine if the virtual free space available. The main issue is that the vmware driver does lazy creates. This causes an issue of over reporting of free space by the backend, because the space hasn't been consumed yet. So the amount of free space is not accurate with respect to how much has been allocated by cinder. This updated calculate capacity factors as well as the SAPCapacityFilter accounts for the virtual free space by using the cinder allocated capacity tracking. If the free space is reported less than what cinder thinks should be available, then the reported free space is used. This relies on an acurate reporting of the allocated capacity by the driver. We know there is an issue with allocated capacity not being reported correctly for migrated volumes, as well as accounting for existing volumes at startup. The startup issue should be solved with this PR: https://github.com/sapcc/cinder/pull/117 Will have to do a folllow up to account for updating the allocated capacity for migrated volumes. --- cinder/scheduler/filters/capacity_filter.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cinder/scheduler/filters/capacity_filter.py b/cinder/scheduler/filters/capacity_filter.py index f31260aaf6a..38c3550c5fe 100644 --- a/cinder/scheduler/filters/capacity_filter.py +++ b/cinder/scheduler/filters/capacity_filter.py @@ -140,7 +140,7 @@ def backend_passes(self, backend_state, filter_properties): "grouping": grouping, "requested": requested_size, "available": virtual_free_space, - "provisioning_type": factors["provisioning_type"]} + "provisioning_type": factors["provisioned_type"]} # Only evaluate using max_over_subscription_ratio if # thin_provisioning_support is True. Check if the ratio of # provisioned capacity over total capacity has exceeded over @@ -181,9 +181,9 @@ def backend_passes(self, backend_state, filter_properties): if not res: LOG.warning("Insufficient free virtual space " "(%(available)sGB) to accommodate thin " - "provisioned %(requested)sGB volume on " - "%(grouping)s %(grouping_name)s." - " %(provisioning_type)s).", + "provisioned %(requested)sGB volume on" + " %(grouping)s %(grouping_name)s" + " %(provisioning_type)s.", msg_args) else: LOG.debug("Space information for volume creation " @@ -210,7 +210,7 @@ def backend_passes(self, backend_state, filter_properties): "%(provisioning_type)s", msg_args) return False - LOG.debug("Space information for volume creation " + LOG.debug("[SAP] Space information for volume creation " "on %(grouping)s %(grouping_name)s (requested / avail): " "%(requested)s/%(available)s %(provisioning_type)s.", msg_args) From 3622acded9fb217fb6ec3f1db9c847e310fbea83 Mon Sep 17 00:00:00 2001 From: Hemna Date: Wed, 4 May 2022 16:25:10 -0400 Subject: [PATCH 081/149] [SAP] Update allocated_capacity on remote host This patch calls a new RPCAPI to tell the new remote host that it needs to update it's allocated_capacity_gb after a volume migration is successful. This ensures that migrating between 2 different cinder volume services can update the allocated_capacity_gb after a migration from one host to another. This happens when a volume is moved from one shard to another. --- cinder/volume/manager.py | 10 ++++++++-- cinder/volume/rpcapi.py | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 8aa18d2ab2d..d5068c43744 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2609,6 +2609,7 @@ def migrate_volume(self, model_update = None moved = False + rpcapi = volume_rpcapi.VolumeAPI() status_update = None if volume.status in ('retyping', 'maintenance'): @@ -2642,7 +2643,7 @@ def migrate_volume(self, volume.save() self._update_allocated_capacity(volume, decrement=True, host=original_host) - self._update_allocated_capacity(volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume) except Exception: with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -2656,7 +2657,7 @@ def migrate_volume(self, self._migrate_volume_generic(ctxt, volume, host, new_type_id) self._update_allocated_capacity(volume, decrement=True, host=original_host) - self._update_allocated_capacity(volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume) except Exception: with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -4307,6 +4308,11 @@ def delete_group_snapshot(self, context, group_snapshot) -> None: "delete.end", snapshots) + @utils.trace + def update_migrated_volume_capacity(self, ctxt, volume): + """Update allocated_capacity_gb for the new migrated volume host.""" + self._update_allocated_capacity(volume) + def update_migrated_volume(self, ctxt, volume, new_volume, volume_status): """Finalize migration process on backend device.""" model_update = None diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index d7db6ac4d55..b89b70603b8 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -310,6 +310,10 @@ def update_migrated_volume(self, ctxt, volume, new_volume, new_volume=new_volume, volume_status=original_volume_status) + def update_migrated_volume_capacity(self, ctxt, volume): + cctxt = self._get_cctxt(volume.service_topic_queue) + cctxt.cast(ctxt, 'update_migrated_volume_capacity', volume=volume) + def freeze_host(self, ctxt, service): """Set backend host to frozen.""" cctxt = self._get_cctxt(service.service_topic_queue) From c9c85f58367a773e4fcddd0545ac205b0a83141a Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 12 May 2022 14:18:56 -0400 Subject: [PATCH 082/149] [SAP] Trap negative values for allocated_capacity_gb This patch looks for updates to allocated_capacity_gb going negative and logs them along w/ the resource and then resets the value to 0. The hope is that seeing these warnings in logs will help find out why they are happening to begin with. --- cinder/volume/manager.py | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index d5068c43744..7a6f2bad09a 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2643,6 +2643,10 @@ def migrate_volume(self, volume.save() self._update_allocated_capacity(volume, decrement=True, host=original_host) + LOG.debug("Update remote allocated_capacity_gb for " + "host %(host)s", + {'host': volume.host}, + resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume) except Exception: with excutils.save_and_reraise_exception(): @@ -2657,6 +2661,10 @@ def migrate_volume(self, self._migrate_volume_generic(ctxt, volume, host, new_type_id) self._update_allocated_capacity(volume, decrement=True, host=original_host) + LOG.debug("Update remote allocated_capacity_gb for " + "host %(host)s", + {'host': volume.host}, + resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume) except Exception: with excutils.save_and_reraise_exception(): @@ -3658,11 +3666,39 @@ def _update_allocated_capacity(self, vol_size = -size if decrement else size else: vol_size = -vol['size'] if decrement else vol['size'] + try: - self.stats['pools'][pool]['allocated_capacity_gb'] += vol_size + curr_size = self.stats['pools'][pool]['allocated_capacity_gb'] except KeyError: self.stats['pools'][pool] = dict( - allocated_capacity_gb=max(vol_size, 0)) + allocated_capacity_gb=0) + curr_size = 0 + + msg = "Decrementing " if decrement else "Incrementing " + msg += ("allocated_capacity_gb host %(host)s (%(curr_size)s) by " + "%(vol_size)s ") + LOG.debug( + msg, + {'host': host, + 'curr_size': self.stats['pools'][pool]['allocated_capacity_gb'], + 'vol_size': vol_size}, resource=vol) + + self.stats['pools'][pool]['allocated_capacity_gb'] += vol_size + + pool_info = self.stats['pools'][pool] + if pool_info['allocated_capacity_gb'] < 0: + # Remove this once we find out why + new_size = pool_info['allocated_capacity_gb'] + LOG.warning("allocated_capacity_gb now=%(new_size)s" + " prev=%(prev_size)s " + "for pool %(pool)s is negative," + "after being altered by %(vol_size)s size. Reset to 0", + {'new_size': new_size, + 'prev_size': curr_size, + 'pool': pool, + 'vol_size': vol_size}, + resource=vol) + self.stats['pools'][pool]['allocated_capacity_gb'] = 0 def delete_group(self, context, group: objects.Group) -> None: """Deletes group and the volumes in the group.""" From 1db5293f695b57382f73f0b70b78d4bf398739ce Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 10 May 2022 13:59:38 +0300 Subject: [PATCH 083/149] [vmware] Allow pulling images from Swift oslo.vmware has a new option to pass the Swift direct_url of an image to VMware for being downloaded directly from there, instead of cinder-volume proxying between glance and VMware. We can feature-toggle this on/off by controlling `allow_pulling_images_from_url` --- .../volume/drivers/vmware/test_vmware_vmdk.py | 4 +++- cinder/volume/drivers/vmware/vmdk.py | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index b8323f21d33..867c55bfb11 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -122,6 +122,7 @@ def setUp(self): vmware_select_random_best_datastore=False, vmware_random_datastore_range=None, vmware_datastores_as_pools=False, + allow_pulling_images_from_url=False, ) self._db = mock.Mock() @@ -1318,7 +1319,8 @@ def _test_copy_image_to_volume_stream_optimized(self, vm_folder=folder, vm_import_spec=import_spec, image_size=image_size, - http_method='POST') + http_method='POST', + allow_pull_from_url=False) if download_error: self.assertFalse(vops.update_backing_disk_uuid.called) vops.delete_backing.assert_called_once_with(backing) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index fa817bb93d4..7235acfc503 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -203,7 +203,15 @@ 'This allows the cinder scheduler to pick which datastore ' 'a volume lives on. This also enables managing capacity ' 'for each datastore by cinder. ' - ) + ), + cfg.StrOpt('allow_pulling_images_from_url', + default=True, + help='Allow VMware to pull images directly from Swift. ' + 'By enabling this, images that are stored in Swift will be ' + 'downloaded by VMWare from the `direct_url`, instead of the ' + 'cinder-volume container having to proxy the image between ' + 'glance and VMware.' + ), ] CONF = cfg.CONF @@ -1679,8 +1687,12 @@ def _fetch_stream_optimized_image(self, context, volume, image_service, timeout = self.configuration.vmware_image_transfer_timeout_secs host_ip = self.configuration.vmware_host_ip port = self.configuration.vmware_host_port + allow_url = self.configuration.allow_pulling_images_from_url LOG.debug("Fetching glance image: %(id)s to server: %(host)s.", {'id': image_id, 'host': host_ip}) + if allow_url: + LOG.debug("Downloading images directly from URL was enabled " + "by `allow_pulling_images_from_url`") backing = image_transfer.download_stream_optimized_image( context, timeout, @@ -1693,7 +1705,8 @@ def _fetch_stream_optimized_image(self, context, volume, image_service, vm_folder=folder, vm_import_spec=vm_import_spec, image_size=image_size, - http_method='POST') + http_method='POST', + allow_pull_from_url=allow_url) self.volumeops.update_backing_disk_uuid(backing, volume['id']) except (exceptions.VimException, exceptions.VMwareDriverException): From ecda4c7e8b6050ceb65f09e310d397794ba0b2e8 Mon Sep 17 00:00:00 2001 From: Hemna Date: Thu, 21 Apr 2022 14:15:07 -0400 Subject: [PATCH 084/149] [SAP] ensure affinty/antiaffinity is maintained at migration time This patch adds the volume create time scheduler hint to the volume metadata and uses that metadata at volume_migrate time. This ensures that when a volume is migrated via normal cinder migrate call, that the affinity/anti-affinity is still honored. The volume metadata keys of scheduler_hint_same_host scheduler_hint_different_host is not editable or creatable by the API. --- cinder/volume/api.py | 54 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 5d3523992f7..11343a972ab 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -203,6 +203,32 @@ def _is_encrypted(self, volume_type): return False return specs.get('encryption', {}) is not {} + def _set_scheduler_hints_to_volume_metadata(self, scheduler_hints, + metadata): + if scheduler_hints: + if 'same_host' in scheduler_hints: + hint = ','.join(scheduler_hints["same_host"]) + metadata["scheduler_hint_same_host"] = hint + if "different_host" in scheduler_hints: + hint = ','.join(scheduler_hints["different_host"]) + metadata["scheduler_hint_different_host"] = hint + return metadata + + def _get_scheduler_hints_from_volume(self, volume): + filter_properties = {} + if "scheduler_hint_same_host" in volume.metadata: + LOG.debug("Found a scheduler_hint_same_host in volume %s", + volume.metadata["scheduler_hint_same_host"]) + hint = volume.metadata["scheduler_hint_same_host"] + filter_properties["same_host"] = hint.split(',') + + if "scheduler_hint_different_host" in volume.metadata: + LOG.debug("Found a scheduler_hint_different_host in volume %s", + volume.metadata["scheduler_hint_different_host"]) + hint = volume.metadata["scheduler_hint_different_host"] + filter_properties["different_host"] = hint.split(',') + return filter_properties + def create(self, context, size, name, description, snapshot=None, image_id=None, volume_type=None, metadata=None, availability_zone=None, source_volume=None, @@ -296,6 +322,15 @@ def create(self, context, size, name, description, snapshot=None, if CONF.storage_availability_zone: availability_zones.add(CONF.storage_availability_zone) + # Force the scheduler hints into the volume metadata + if not metadata: + metadata = {} + + metadata = self._set_scheduler_hints_to_volume_metadata( + scheduler_hints, + metadata + ) + utils.check_metadata_properties(metadata) create_what = { @@ -861,10 +896,14 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, LOG.error(msg) raise exception.InvalidVolume(reason=msg) + # Check if there is an affinity/antiaffinity against the volume + filter_properties = self._get_scheduler_hints_from_volume(volume) + LOG.debug("Invoking migrate_volume to host=%s", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, - wait_for_completion=False) + wait_for_completion=False, + filter_properties=filter_properties) volume.refresh() def initialize_connection(self, context, volume, connector): @@ -1221,6 +1260,13 @@ def _update_volume_metadata(self, context, volume, metadata, delete=False, '%s status.') % volume['status'] LOG.info(msg, resource=volume) raise exception.InvalidVolume(reason=msg) + + if ('scheduler_hint_different_host' in metadata or + 'scheduler_hint_same_host' in metadata): + raise exception.InvalidInput("Cannot add/edit volume metadata key " + "scheduler_hint_same_host or " + "scheduler_hint_different_host") + return self.db.volume_metadata_update(context, volume['id'], metadata, delete, meta_type) @@ -1625,6 +1671,9 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, LOG.error(msg) raise exception.InvalidVolume(reason=msg) + # Check if there is an affinity/antiaffinity against the volume + filter_props = self._get_scheduler_hints_from_volume(volume) + # Call the scheduler to ensure that the host exists and that it can # accept the volume volume_type = {} @@ -1638,7 +1687,8 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, volume, cluster_name or host, force_copy, - request_spec) + request_spec, + filter_properties=filter_props) LOG.info("Migrate volume request issued successfully.", resource=volume) From 929cbceafb26ec22bc5c77e7cd715b18b733e199 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 8 Jun 2022 08:50:22 -0400 Subject: [PATCH 085/149] [SAP] Fix migration by connector filter_properties This patch fixes the migration by connector with affinity/anti-affinity filter_properties. The previous code was doing an rpcapi call to the volume manager to migrate the volume with the filter properties. At this point the scheduler has already picked a destination host, so the filter_properties being passed to the volume manager is useless. This patch sends the filter_properties through the scheduler to help it pick the right backend before calling the volume manager to migrate the volume to the new backend. --- cinder/scheduler/filter_scheduler.py | 6 ++++-- cinder/scheduler/manager.py | 6 ++++-- cinder/scheduler/rpcapi.py | 6 ++++-- cinder/tests/unit/scheduler/test_scheduler.py | 2 +- cinder/volume/api.py | 13 +++++++------ 5 files changed, 20 insertions(+), 13 deletions(-) diff --git a/cinder/scheduler/filter_scheduler.py b/cinder/scheduler/filter_scheduler.py index 462c4e357a9..e8e971a25e6 100644 --- a/cinder/scheduler/filter_scheduler.py +++ b/cinder/scheduler/filter_scheduler.py @@ -111,13 +111,15 @@ def schedule_create_volume(self, context, request_spec, filter_properties): filter_properties, allow_reschedule=True) - def find_backend_for_connector(self, context, connector, request_spec): + def find_backend_for_connector(self, context, connector, request_spec, + filter_properties=None): key = 'connection_capabilities' if key not in connector: raise exception.InvalidConnectionCapabilities( reason=_("The connector doesn't contain a %s field.") % key) - weighed_backends = self._get_weighted_candidates(context, request_spec) + weighed_backends = self._get_weighted_candidates(context, request_spec, + filter_properties) if not weighed_backends: raise exception.NoValidBackend(reason=_("No weighed backends " "available")) diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 65790be8124..6c76d314d44 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -368,12 +368,14 @@ def _retype_volume_set_error(self, context, ex, request_spec, old_reservations) @append_operation_type() - def find_backend_for_connector(self, context, connector, request_spec): + def find_backend_for_connector(self, context, connector, request_spec, + filter_properties=None): self._wait_for_scheduler() backend = self.driver.find_backend_for_connector(context, connector, - request_spec) + request_spec, + filter_properties) return {'host': backend.host, 'cluster_name': backend.cluster_name, 'capabilities': backend.capabilities} diff --git a/cinder/scheduler/rpcapi.py b/cinder/scheduler/rpcapi.py index 02c6a6ed912..9409df3c539 100644 --- a/cinder/scheduler/rpcapi.py +++ b/cinder/scheduler/rpcapi.py @@ -268,7 +268,9 @@ def create_backup(self, ctxt, backup): msg_args = {'backup': backup} return cctxt.cast(ctxt, 'create_backup', **msg_args) - def find_backend_for_connector(self, context, connector, request_spec): + def find_backend_for_connector(self, context, connector, request_spec, + filter_properties=None): cctxt = self._get_cctxt() return cctxt.call(context, 'find_backend_for_connector', - connector=connector, request_spec=request_spec) + connector=connector, request_spec=request_spec, + filter_properties=filter_properties) diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index f33b3bf04df..57141a0e59d 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -638,7 +638,7 @@ def test_find_backend_for_connector(self, _mock_find_backend_for_conector): ret = self.manager.find_backend_for_connector(self.context, connector, request_spec) _mock_find_backend_for_conector.assert_called_once_with( - self.context, connector, request_spec) + self.context, connector, request_spec, None) self.assertEqual(ret, { 'host': backend_ret.host, 'cluster_name': backend_ret.cluster_name, diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 11343a972ab..c20461d3cf9 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -845,9 +845,14 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, 'volume_properties': volume, 'volume_type': volume_type, 'volume_id': volume.id} + + # Check if there is an affinity/antiaffinity against the volume + filter_properties = self._get_scheduler_hints_from_volume(volume) + try: dest = self.scheduler_rpcapi.find_backend_for_connector( - ctxt, connector, request_spec) + ctxt, connector, request_spec, + filter_properties=filter_properties) except exception.NoValidBackend: LOG.error("The connector was rejected by the backend. Could not " "find another backend compatible with the connector %s.", @@ -896,14 +901,10 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, LOG.error(msg) raise exception.InvalidVolume(reason=msg) - # Check if there is an affinity/antiaffinity against the volume - filter_properties = self._get_scheduler_hints_from_volume(volume) - LOG.debug("Invoking migrate_volume to host=%s", dest['host']) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, - wait_for_completion=False, - filter_properties=filter_properties) + wait_for_completion=False) volume.refresh() def initialize_connection(self, context, volume, connector): From 86b77bc54686ce8c7adcf6f19e71b5507e455b0a Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 13 Jun 2022 14:55:35 -0400 Subject: [PATCH 086/149] [SAP] fix the metadata for affinity/anti-affinity This fixes a problem of storing the volume uuids in the metadata {'scheduler_hint_same_host': 'f,0,5,1,d,d,d,3,-,b,2,b,4,-,4,5,2,2,-,b,0,8,7,-,6,4,c,d,8,f,a,5,c,7,a,3'} --- cinder/volume/api.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index c20461d3cf9..0ddb0d93bc7 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -207,10 +207,16 @@ def _set_scheduler_hints_to_volume_metadata(self, scheduler_hints, metadata): if scheduler_hints: if 'same_host' in scheduler_hints: - hint = ','.join(scheduler_hints["same_host"]) + if isinstance(scheduler_hints['same_host'], str): + hint = scheduler_hints['same_host'] + else: + hint = ','.join(scheduler_hints["same_host"]) metadata["scheduler_hint_same_host"] = hint if "different_host" in scheduler_hints: - hint = ','.join(scheduler_hints["different_host"]) + if isinstance(scheduler_hints['different_host'], str): + hint = scheduler_hints["different_host"] + else: + hint = ','.join(scheduler_hints["different_host"]) metadata["scheduler_hint_different_host"] = hint return metadata From 5a6d32071caeb6d5d653432521467a3627b913e3 Mon Sep 17 00:00:00 2001 From: Hemna Date: Mon, 20 Jun 2022 10:33:23 -0400 Subject: [PATCH 087/149] [SAP] Fix for allocated_capacity_gb not being updated This patch puts a fix in for the scheduler's copy of the backend/pool's allocated_capacity_gb while doing a migrate by connected. This patch also reorders the RPC call to update the allocated_capacity_gb inside the volume manager's migrate_volume call. This will ensure: 1) The scheduler's copy of allocated_capacity_gb for a backend/pool is updated for a migrate_by_connector in between the volume manager's get_volume_stats() calls. 2) after the volume manager calls get_volume_stats on a driver the allocated_capacity_gb has been updated from the last migration. These issues were discovered by an overprovisioning of a few datastores in eu-de-2, when a customer created 10+ volumes quickly and attached all of them to a VM. The volumes were all created on 1 shard, and then due to the attach call, they were all migrated to the shard where the nova vm lived, all within a short period of time. --- cinder/scheduler/manager.py | 4 +- cinder/scheduler/rpcapi.py | 3 +- cinder/tests/unit/scheduler/test_scheduler.py | 4 +- cinder/volume/api.py | 2 +- cinder/volume/manager.py | 37 +++++++++++++------ cinder/volume/rpcapi.py | 6 ++- 6 files changed, 38 insertions(+), 18 deletions(-) diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 6c76d314d44..5c35a6a9ecf 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -369,13 +369,13 @@ def _retype_volume_set_error(self, context, ex, request_spec, @append_operation_type() def find_backend_for_connector(self, context, connector, request_spec, - filter_properties=None): + volume_size, filter_properties=None): self._wait_for_scheduler() - backend = self.driver.find_backend_for_connector(context, connector, request_spec, filter_properties) + backend.consume_from_volume({'size': volume_size}) return {'host': backend.host, 'cluster_name': backend.cluster_name, 'capabilities': backend.capabilities} diff --git a/cinder/scheduler/rpcapi.py b/cinder/scheduler/rpcapi.py index 9409df3c539..12f7b6b5ded 100644 --- a/cinder/scheduler/rpcapi.py +++ b/cinder/scheduler/rpcapi.py @@ -269,8 +269,9 @@ def create_backup(self, ctxt, backup): return cctxt.cast(ctxt, 'create_backup', **msg_args) def find_backend_for_connector(self, context, connector, request_spec, - filter_properties=None): + volume_size, filter_properties=None): cctxt = self._get_cctxt() return cctxt.call(context, 'find_backend_for_connector', connector=connector, request_spec=request_spec, + volume_size=volume_size, filter_properties=filter_properties) diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index 57141a0e59d..e4c31f33149 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -632,11 +632,13 @@ def test_create_backup_no_service(self, mock_volume_update, def test_find_backend_for_connector(self, _mock_find_backend_for_conector): connector = mock.Mock() request_spec = mock.Mock() + volume_size = mock.Mock() backend_ret = mock.Mock(host='fake-host', cluster_name='fake-cluster', capabilities=[]) _mock_find_backend_for_conector.return_value = backend_ret ret = self.manager.find_backend_for_connector(self.context, - connector, request_spec) + connector, request_spec, + volume_size) _mock_find_backend_for_conector.assert_called_once_with( self.context, connector, request_spec, None) self.assertEqual(ret, { diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 0ddb0d93bc7..8c19e037169 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -857,7 +857,7 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, try: dest = self.scheduler_rpcapi.find_backend_for_connector( - ctxt, connector, request_spec, + ctxt, connector, request_spec, volume.size, filter_properties=filter_properties) except exception.NoValidBackend: LOG.error("The connector was rejected by the backend. Could not " diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 7a6f2bad09a..1337720a6a5 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2620,6 +2620,14 @@ def migrate_volume(self, if not force_host_copy and self._can_use_driver_migration(diff): try: LOG.debug("Issue driver.migrate_volume.", resource=volume) + # Update the remote host's allocated_capacity_gb first + # Because the migration can take a while, and the scheduler + # needs to account for the space consumed. + LOG.debug("Update remote allocated_capacity_gb for " + "host %(host)s", + {'host': volume.host}, + resource=volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume) moved, model_update = self.driver.migrate_volume(ctxt, volume, host) @@ -2643,12 +2651,13 @@ def migrate_volume(self, volume.save() self._update_allocated_capacity(volume, decrement=True, host=original_host) - LOG.debug("Update remote allocated_capacity_gb for " - "host %(host)s", - {'host': volume.host}, - resource=volume) - rpcapi.update_migrated_volume_capacity(ctxt, volume) except Exception: + LOG.debug("Decrement remote allocated_capacity_gb for " + "host %(host)s", + {'host': volume.host}, + resource=volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume, + decrement=True) with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} if status_update: @@ -2658,15 +2667,21 @@ def migrate_volume(self, if not moved: try: original_host = volume.host - self._migrate_volume_generic(ctxt, volume, host, new_type_id) - self._update_allocated_capacity(volume, decrement=True, - host=original_host) LOG.debug("Update remote allocated_capacity_gb for " "host %(host)s", {'host': volume.host}, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume) + self._migrate_volume_generic(ctxt, volume, host, new_type_id) + self._update_allocated_capacity(volume, decrement=True, + host=original_host) except Exception: + LOG.debug("Decrement remote allocated_capacity_gb for " + "host %(host)s", + {'host': volume.host}, + resource=volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume, + decrement=True) with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} if status_update: @@ -4345,9 +4360,9 @@ def delete_group_snapshot(self, context, group_snapshot) -> None: snapshots) @utils.trace - def update_migrated_volume_capacity(self, ctxt, volume): - """Update allocated_capacity_gb for the new migrated volume host.""" - self._update_allocated_capacity(volume) + def update_migrated_volume_capacity(self, ctxt, volume, decrement=False): + """Update allocated_capacity_gb for the migrated volume host.""" + self._update_allocated_capacity(volume, decrement=decrement) def update_migrated_volume(self, ctxt, volume, new_volume, volume_status): """Finalize migration process on backend device.""" diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index b89b70603b8..e533c9e7178 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -310,9 +310,11 @@ def update_migrated_volume(self, ctxt, volume, new_volume, new_volume=new_volume, volume_status=original_volume_status) - def update_migrated_volume_capacity(self, ctxt, volume): + def update_migrated_volume_capacity(self, ctxt, volume, decrement=False): cctxt = self._get_cctxt(volume.service_topic_queue) - cctxt.cast(ctxt, 'update_migrated_volume_capacity', volume=volume) + cctxt.cast(ctxt, 'update_migrated_volume_capacity', + volume=volume, + decrement=decrement) def freeze_host(self, ctxt, service): """Set backend host to frozen.""" From 925562e2eed14bcdb483fcbae2f3eecf4cf238fa Mon Sep 17 00:00:00 2001 From: Hemna Date: Fri, 1 Jul 2022 10:23:48 -0400 Subject: [PATCH 088/149] [SAP] Added pool_state tracking. Updated backend_state This patch adds the ability to track if a datastore is available to be used in get_volume_stats. If a datastore is in maintenance mode, then it can't accept volume provisioning requests. So the driver now marks the datastore/pool as 'down'. When the datastore comes back out of maintenance mode, then the pool_state is changed to 'up'. Currently, Cinder doesn't have a built in mechanism for dealing with a pool_state being down, but we can simulate it by adding an extra spec setting in all volume types of pool_state='up'. That way the capability filter will filter out all pools that don't have that capability coming from the driver. This patch also changes where the backend_state is set so that the scheduler can mark the service as down correctly. --- .../unit/volume/drivers/vmware/test_vmware_vmdk.py | 10 ++++++++-- cinder/volume/drivers/vmware/datastore.py | 11 ++++++----- cinder/volume/drivers/vmware/vmdk.py | 13 +++++++++++-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 867c55bfb11..19a26bfa9c8 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -233,15 +233,20 @@ class result(object): "storage_profile": "Gold"}} return result(), datastores + @mock.patch('cinder.volume.drivers.vmware.datastore.' + 'DatastoreSelector.is_datastore_usable') @mock.patch.object(VMDK_DRIVER, '_collect_backend_stats') @mock.patch.object(VMDK_DRIVER, 'session') - def test_get_volume_stats_pools(self, session, mock_stats): + def test_get_volume_stats_pools(self, session, mock_stats, + datastore_usable): fake_result, fake_datastore_profiles = self._fake_stats_result() mock_stats.return_value = (fake_result, fake_datastore_profiles) + datastore_usable.return_value = True self._config.vmware_datastores_as_pools = True self._driver = vmdk.VMwareVcVmdkDriver(configuration=self._config, additional_endpoints=[], db=self._db) + self._driver._ds_sel = mock.MagicMock() retr_result_mock = mock.Mock(spec=['objects']) retr_result_mock.objects = [] @@ -257,7 +262,8 @@ def test_get_volume_stats_pools(self, session, mock_stats): self.assertEqual(0, stats["pools"][0]['reserved_percentage']) self.assertEqual(9313, stats["pools"][0]['total_capacity_gb']) self.assertEqual(4657, stats["pools"][0]['free_capacity_gb']) - self.assertEqual('up', stats["pools"][0]['backend_state']) + self.assertEqual('up', stats["pools"][0]['pool_state']) + self.assertEqual('up', stats["backend_state"]) self.assertFalse(stats["pools"][0]['Multiattach']) self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", stats['location_info']) diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 5c83b28f13a..2064b89da8c 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -158,6 +158,10 @@ def _filter_hosts(self, hosts): return valid_hosts + def is_datastore_usable(self, summary): + return summary.accessible and not self._vops._in_maintenance( + summary) + def _filter_datastores(self, datastores, size_bytes, @@ -175,10 +179,6 @@ def _is_valid_ds_type(summary): (hard_affinity_ds_types is None or ds_type in hard_affinity_ds_types)) - def _is_ds_usable(summary): - return summary.accessible and not self._vops._in_maintenance( - summary) - valid_host_refs = valid_host_refs or [] valid_hosts = [host_ref.value for host_ref in valid_host_refs] @@ -207,7 +207,8 @@ def _is_ds_valid(ds_ref, ds_props): not _is_ds_accessible_to_valid_host(host_mounts)): return False - return _is_valid_ds_type(summary) and _is_ds_usable(summary) + return (_is_valid_ds_type(summary) and + self.is_datastore_usable(summary)) datastores = {k: v for k, v in datastores.items() if _is_ds_valid(k, v)} diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 7235acfc503..59ee42e3d36 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -496,22 +496,30 @@ def _get_volume_stats(self): max_over_subscription_ratio = self.configuration.safe_get( 'max_over_subscription_ratio') + backend_state = 'up' data = {'volume_backend_name': backend_name, 'vendor_name': 'VMware', 'driver_version': self.VERSION, 'storage_protocol': 'vmdk', 'location_info': location_info, + 'backend_state': backend_state, } result, datastores = self._collect_backend_stats() connection_capabilities = self._get_connection_capabilities() + if not datastores: + backend_state = 'down' + data['backend_state'] = backend_state if self.configuration.vmware_datastores_as_pools: pools = [] for ds_name in datastores: datastore = datastores[ds_name] summary = datastore["summary"] - pool_state = "up" if summary.accessible is True else "down" + pool_state = 'down' + if self.ds_sel.is_datastore_usable(summary): + pool_state = 'up' + pool = {'pool_name': summary.name, 'total_capacity_gb': round( summary.capacity / units.Gi), @@ -526,9 +534,10 @@ def _get_volume_stats(self): 'datastore_type': summary.type, 'location_url': summary.url, 'location_info': location_info, - 'backend_state': pool_state, 'storage_profile': datastore["storage_profile"], 'connection_capabilities': connection_capabilities, + 'backend_state': backend_state, + 'pool_state': pool_state } # Add any custom attributes associated with the datastore From 088d9c7402869deae2fdcc167482fe97b589f3c8 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 6 Jul 2022 11:39:06 -0400 Subject: [PATCH 089/149] [SAP] update the new remote host allocated_capacity_gb This patch fixes a problem updating the allocated_capacity_gb for the destination cinder host during volume migration. We recently made a change to when we update the allocated_capacity_gb during volume migration, which allowed an over allocation. The remove rpc call to update the destination host was moved to before the call to migrate the volume to ensure that the scheduler had updated stats for the destination, since the migration can take a long time. The problem with this move is that the volume host entry wasn't updated at the time of the call to update the remote destination host. The update was being called on the source host, so there was effectively no change since the src and destination host was the same during both calls to update_allocated_capacity_gb. This patch fixes that. Added a host entry to the rpc call to force the update on that particular host value. --- cinder/volume/manager.py | 21 +++++++++++++-------- cinder/volume/rpcapi.py | 4 +++- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 1337720a6a5..4e002b17328 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2625,9 +2625,10 @@ def migrate_volume(self, # needs to account for the space consumed. LOG.debug("Update remote allocated_capacity_gb for " "host %(host)s", - {'host': volume.host}, + {'host': host}, resource=volume) - rpcapi.update_migrated_volume_capacity(ctxt, volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume, + host=host['host']) moved, model_update = self.driver.migrate_volume(ctxt, volume, host) @@ -2654,9 +2655,10 @@ def migrate_volume(self, except Exception: LOG.debug("Decrement remote allocated_capacity_gb for " "host %(host)s", - {'host': volume.host}, + {'host': host['host']}, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume, + host=host['host'], decrement=True) with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -2671,16 +2673,18 @@ def migrate_volume(self, "host %(host)s", {'host': volume.host}, resource=volume) - rpcapi.update_migrated_volume_capacity(ctxt, volume) + rpcapi.update_migrated_volume_capacity(ctxt, volume, + host=host['host']) self._migrate_volume_generic(ctxt, volume, host, new_type_id) self._update_allocated_capacity(volume, decrement=True, host=original_host) except Exception: LOG.debug("Decrement remote allocated_capacity_gb for " "host %(host)s", - {'host': volume.host}, + {'host': host['host']}, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume, + host=host['host'], decrement=True) with excutils.save_and_reraise_exception(): updates = {'migration_status': 'error'} @@ -4359,10 +4363,11 @@ def delete_group_snapshot(self, context, group_snapshot) -> None: "delete.end", snapshots) - @utils.trace - def update_migrated_volume_capacity(self, ctxt, volume, decrement=False): + @volume_utils.trace + def update_migrated_volume_capacity(self, ctxt, volume, host=None, + decrement=False): """Update allocated_capacity_gb for the migrated volume host.""" - self._update_allocated_capacity(volume, decrement=decrement) + self._update_allocated_capacity(volume, host=host, decrement=decrement) def update_migrated_volume(self, ctxt, volume, new_volume, volume_status): """Finalize migration process on backend device.""" diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index e533c9e7178..7ae8c9869dc 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -310,10 +310,12 @@ def update_migrated_volume(self, ctxt, volume, new_volume, new_volume=new_volume, volume_status=original_volume_status) - def update_migrated_volume_capacity(self, ctxt, volume, decrement=False): + def update_migrated_volume_capacity(self, ctxt, volume, host=None, + decrement=False): cctxt = self._get_cctxt(volume.service_topic_queue) cctxt.cast(ctxt, 'update_migrated_volume_capacity', volume=volume, + host=host, decrement=decrement) def freeze_host(self, ctxt, service): From 23d06a934badf71374472253acc00d8239547858 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Thu, 10 Mar 2022 14:25:05 +0100 Subject: [PATCH 090/149] [SAP] Add attached volume migration for vmware This patch adds volume migration for attached vmware volumes to the vmdk driver. --- cinder/volume/drivers/vmware/exceptions.py | 5 + cinder/volume/drivers/vmware/vmdk.py | 31 +++++- cinder/volume/drivers/vmware/volumeops.py | 118 +++++++++++++++++++++ 3 files changed, 153 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/exceptions.py b/cinder/volume/drivers/vmware/exceptions.py index fedfadf69e2..f7cc6144191 100644 --- a/cinder/volume/drivers/vmware/exceptions.py +++ b/cinder/volume/drivers/vmware/exceptions.py @@ -65,3 +65,8 @@ class TemplateNotFoundException(exceptions.VMwareDriverException): class SnapshotNotFoundException(exceptions.VMwareDriverException): """Thrown when the backend snapshot cannot be found.""" msg_fmt = _("Snapshot: %(name)s not found.") + + +class StorageMigrationFailed(exceptions.VMwareDriverException): + """Thrown when svmotion fails.""" + msg_fmt = _("SvMotion failed.") diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 59ee42e3d36..d956abfbfec 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2979,11 +2979,40 @@ def migrate_volume(self, context, volume, host): return self._migrate_attached_cross_vc(context, dest_host, volume, backing) else: - raise NotImplementedError() + return self._migrate_attached_same_vc(context, dest_host, + volume, backing) else: return self._migrate_unattached(context, dest_host, volume, backing) + def _migrate_attached_same_vc(self, context, dest_host, volume, backing): + get_vm_by_uuid = self.volumeops.get_backing_by_uuid + # reusing the get_backing_by_uuid to lookup the attacher vm + if volume['multiattach']: + raise NotImplementedError() + attachments = volume.volume_attachment + instance_uuid = attachments[0]['instance_uuid'] + attachedvm = get_vm_by_uuid(instance_uuid) + ds_info = self._remote_api.select_ds_for_volume(context, + cinder_host=dest_host, + volume=volume) + rp_ref = vim_util.get_moref(ds_info['resource_pool'], 'ResourcePool') + ds_ref = vim_util.get_moref(ds_info['datastore'], 'Datastore') + self.volumeops.relocate_one_disk(attachedvm, ds_ref, rp_ref, + volume_id=volume.id, + profile_id=ds_info.get('profile_id')) + new_disk = self.volumeops.get_disk_by_uuid(attachedvm, volume.id) + new_vmdk = new_disk.backing.fileName + # VMware does not update shadowvm backing after svmotion, + # So we need to fall reconfigure_backing_vmdk_path to fix + self.volumeops.reconfigure_backing_vmdk_path(backing, new_vmdk) + self.volumeops.relocate_backing(backing, ds_ref, None, None) + # VMware is locking the vmdk, so there is no posibility + # to update the profile from the backing, but the relocate_backing + # can still move the "skeletion" part of the backing to the new DS + + return (True, None) + def _migrate_unattached(self, context, dest_host, volume, backing): ds_info = self._remote_api.select_ds_for_volume(context, cinder_host=dest_host, diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 709b7a72239..22a406100d9 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1016,6 +1016,41 @@ def _create_relocate_spec_disk_locator(self, datastore, disk_type, return disk_locator + def _get_rspec_for_one_disk(self, datastore, + disk_move_type, disk_type=None, + disk_devices=None, disk_to_move=None, + profile_id=None): + """Return spec for relocating volume backing. + + :param datastore: Reference to the datastore + :param disk_move_type: Disk move type option + :param disk_type: Destination disk type + :param disk_devices: Virtual devices corresponding to the disks + :param disk_to_move: Virtual disk, we want to move to a new ds + :param profile_id: ID of the profile to use (Cross vCenter Vmotion) + :return: Spec for relocation + """ + cf = self._session.vim.client.factory + relocate_spec = cf.create('ns0:VirtualMachineRelocateSpec') + locator = [] + for disk_device in disk_devices: + if disk_device.backing.uuid == disk_to_move.backing.uuid: + spec = self._create_relocate_spec_disk_locator(datastore, + disk_type, + disk_device, + profile_id) + else: + original_ds = disk_device.backing.datastore + spec = self._create_relocate_spec_disk_locator(original_ds, + disk_type, + disk_device, + None) + locator.append(spec) + relocate_spec.disk = locator + relocate_spec.diskMoveType = disk_move_type + LOG.debug("Spec for relocating the backing: %s.", relocate_spec) + return relocate_spec + def _get_relocate_spec(self, datastore, resource_pool, host, disk_move_type, disk_type=None, disk_device=None, profile_id=None, service=None): @@ -1068,6 +1103,55 @@ def _get_service_locator_spec(self, service): return service_locator + def relocate_one_disk( + self, ownervm, datastore, resource_pool, volume_id, + disk_type=None, profile_id=None): + """Relocates one disk of the consumer vm to the target datastore + + :param ownervm: Reference to the attacher of the voume + :param datastore: Reference to the datastore where we move + :param resource_pool: Reference to the resource pool + :param volume_id: ID of the cinder volume + :param disk_type: destination disk type + :param profile_id: Id of the storage profile + """ + rename_vm = self.rename_backing + # reusing existing vm rename function for the customer vm + disk_devices = self._get_disk_devices(ownervm) + disk_to_move = self.get_disk_by_uuid(ownervm, volume_id) + vmdk_path = disk_to_move.backing.fileName + disk_move_type = 'moveAllDiskBackingsAndDisallowSharing' + relocate_spec = self._get_rspec_for_one_disk(datastore, + disk_move_type, + disk_type, + disk_devices, + disk_to_move, + profile_id=profile_id) + original_name = self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + ownervm, 'name') + rename_vm(ownervm, volume_id) + try: + task = self._session.invoke_api(self._session.vim, + 'RelocateVM_Task', + ownervm, spec=relocate_spec) + + LOG.debug("Initiated relocation of volume main vmdk: %s.", + vmdk_path) + self._session.wait_for_task(task) + ds_val = vim_util.get_moref_value(datastore) + rp_val = vim_util.get_moref_value(resource_pool) + LOG.info("Successfully relocated volume main vmdk: %(path)s" + " to datastore: %(ds)s and resource pool: %(rp)s.", + {'path': vmdk_path, + 'ds': ds_val, 'rp': rp_val}) + except Exception as e: + LOG.error("Relocation of main vmdk: %s failed.", vmdk_path) + raise vmdk_exceptions.StorageMigratonFailed(e) + finally: + rename_vm(ownervm, original_name) + def relocate_backing( self, backing, datastore, resource_pool, host, disk_type=None, profile_id=None, service=None): @@ -1489,6 +1573,23 @@ def detach_disk_from_backing(self, backing, disk_device): reconfig_spec.deviceChange = [spec] self._reconfigure_backing(backing, reconfig_spec) + def reconfigure_backing_vmdk_path(self, backing, new_vmdk_path): + """Reconfigures backing VM with a new vmdk file pointer""" + + cf = self._session.vim.client.factory + reconfig_spec = cf.create('ns0:VirtualMachineConfigSpec') + disk_device = self._get_disk_device(backing) + disk_spec = cf.create('ns0:VirtualDeviceConfigSpec') + disk_spec.device = disk_device + disk_spec.device.backing.fileName = new_vmdk_path + disk_spec.operation = 'edit' + reconfig_spec.deviceChange = [disk_spec] + self._reconfigure_backing(backing, reconfig_spec) + LOG.debug("Backing VM: %(backing)s reconfigured with new vmdk_path: " + "%(vmdk_path)s.", + {'backing': backing, + 'vmdk_path': new_vmdk_path}) + def rename_backing(self, backing, new_name): """Rename backing VM. @@ -1974,6 +2075,23 @@ def get_disk_device(self, vm, vmdk_path): and backing.fileName == vmdk_path): return disk_device + def get_disk_by_uuid(self, vm, disk_uuid): + """Get the disk device of the VM which corresponds to the given uuid. + + :param vm: VM reference + :param disk_uuid: Uniq uuid of the disk, normaly same as cinder uuid + :return: Matching disk device + """ + disk_devices = self._get_disk_devices(vm) + + for disk_device in disk_devices: + backing = disk_device.backing + if (backing.__class__.__name__ == "VirtualDiskFlatVer2BackingInfo" + and backing.uuid == disk_uuid): + return disk_device + LOG.error("Virtual disk device: %s not found.", disk_uuid) + raise vmdk_exceptions.VirtualDiskNotFoundException() + def mark_backing_as_template(self, backing): LOG.debug("Marking backing: %s as template.", backing) self._session.invoke_api(self._session.vim, 'MarkAsTemplate', backing) From 12cea1a65955badf9bbd492239db290533f5a7c5 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 11 Jul 2022 10:43:49 -0400 Subject: [PATCH 091/149] [SAP] prevent cross host/shard migration This patch adds an additional check in the volume api to ensure that cinder doesn't allow attached volumes to be migrated to a different host. --- cinder/volume/api.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 8c19e037169..3e82788b824 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -1635,6 +1635,20 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, # in a cluster we will do a cluster migration. cluster_name = svc.cluster_name + # Add a check here to ensure that a volume isn't moved + # to another shard if it's attached. + # migrate_by_connector is called for nova live migration + # so this is only called by an admin manually migrating a volume + if volume.status == 'in-use': + # Check if the new host is in a different shard. + src_host = volume_utils.extract_host(volume['host'], 'host') + dst_host = volume_utils.extract_host(host, 'host') + if src_host != dst_host: + msg = ("Cannot migrate an attached volume to a different " + "host/shard") + LOG.error(msg) + raise exception.InvalidHost(reason=msg) + # Build required conditions for conditional update expected = {'status': ('available', 'in-use'), 'migration_status': self.AVAILABLE_MIGRATION_STATUS, From 3de81f6606a2119b5012e704bde883694084b963 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 21 Jul 2022 10:14:11 -0400 Subject: [PATCH 092/149] [SAP] Add option to disable incremental backup This patch adds a new custom config option to silently disable incremental backups. --- cinder/backup/api.py | 13 +++++++++++++ cinder/opts.py | 1 + 2 files changed, 14 insertions(+) diff --git a/cinder/backup/api.py b/cinder/backup/api.py index 30a036c233c..985488c57c0 100644 --- a/cinder/backup/api.py +++ b/cinder/backup/api.py @@ -49,8 +49,16 @@ help='Backup services use same backend.') ] +sap_backup_opts = [ + cfg.BoolOpt('sap_disable_incremental_backup', + default=False, + help='Silently disable incremental backup.') +] + + CONF = cfg.CONF CONF.register_opts(backup_opts) +CONF.register_opts(sap_backup_opts) LOG = logging.getLogger(__name__) QUOTAS = quota.QUOTAS IMPORT_VOLUME_ID = '00000000-0000-0000-0000-000000000000' @@ -202,6 +210,11 @@ def create(self, context, name, description, volume_id, """Make the RPC call to create a volume backup.""" volume = self.volume_api.get(context, volume_id) context.authorize(policy.CREATE_POLICY, target_obj=volume) + if CONF.sap_disable_incremental_backup: + # This means force incremental to be OFF + incremental = False + LOG.info("Incremental Backups have been silently disabled." + " Will do a full backup.") snapshot = None if snapshot_id: snapshot = self.volume_api.get_snapshot(context, snapshot_id) diff --git a/cinder/opts.py b/cinder/opts.py index e695ce79797..4e056eaeffc 100644 --- a/cinder/opts.py +++ b/cinder/opts.py @@ -221,6 +221,7 @@ def list_opts(): [cinder_api_middleware_auth.use_forwarded_for_opt], cinder_api_views_versions.versions_opts, cinder_backup_api.backup_opts, + cinder_backup_api.sap_backup_opts, cinder_backup_chunkeddriver.backup_opts, cinder_backup_driver.backup_opts, cinder_backup_drivers_ceph.service_opts, From 1b9e1480a7f38844caf0c82e9729eed9bde06f93 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Sun, 17 Jul 2022 15:19:20 +0300 Subject: [PATCH 093/149] Migrate the volume if resize can't happen on current host If the new size of a volume isn't accepted by the host it will be rescheduled and migrated to another one. Volumes that are in a group cannot be rescheduled because migration to another host is not possible in that case. --- cinder/scheduler/manager.py | 59 ++++++++++++++++--- cinder/tests/unit/scheduler/test_scheduler.py | 26 +++++++- cinder/tests/unit/volume/test_rpcapi.py | 1 + .../unit/volume/test_volume_migration.py | 14 ++++- cinder/volume/api.py | 39 ++---------- cinder/volume/drivers/vmware/vmdk.py | 3 +- cinder/volume/manager.py | 7 ++- cinder/volume/rpcapi.py | 5 +- cinder/volume/volume_utils.py | 34 +++++++++++ 9 files changed, 136 insertions(+), 52 deletions(-) diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 5c35a6a9ecf..230c1fafdef 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -483,15 +483,56 @@ def _extend_volume_set_error(self, context, ex, request_spec): {'size': new_size - volume.size}) volume_rpcapi.VolumeAPI().extend_volume(context, volume, new_size, reservations) - except exception.NoValidBackend as ex: - QUOTAS.rollback(context, reservations, - project_id=volume.project_id) - _extend_volume_set_error(self, context, ex, request_spec) - self.message_api.create( - context, - message_field.Action.EXTEND_VOLUME, - resource_uuid=volume.id, - exception=ex) + except exception.NoValidBackend: + try: + self._extend_migrate(context, volume, new_size, request_spec, + filter_properties, reservations) + except exception.NoValidBackend as ex: + QUOTAS.rollback(context, reservations, + project_id=volume.project_id) + _extend_volume_set_error(self, context, ex, request_spec) + self.message_api.create( + context, + message_field.Action.EXTEND_VOLUME, + resource_uuid=volume.id, + exception=ex) + + def _extend_migrate(self, context, volume, new_size, request_spec, + filter_properties, reservations): + + if volume.consistencygroup_id or volume.group_id: + raise exception.NoValidBackend( + reason='The volume is in a group and cannot be migrated.') + + scheduler_hints = \ + vol_utils.get_scheduler_hints_from_volume(volume) + filter_properties.update(scheduler_hints) + filter_properties.pop('new_size') + + if not request_spec: + request_spec = {'volume_properties': {'size': new_size}} + else: + request_spec['volume_properties']['size'] = new_size + + if volume['availability_zone']: + request_spec['resource_properties'] = { + 'availability_zone': volume['availability_zone']} + + # SAP + # We have to force the destination host to be on + # the same backend, or it might get migrated + # to another vcenter. + backend = vol_utils.extract_host(volume['host']) + + backend_state = self.driver.backend_passes_filters( + context, backend, request_spec, filter_properties) + + backend_state.consume_from_volume(volume) + + volume_rpcapi.VolumeAPI().migrate_volume( + context, volume, backend_state, + force_host_copy=False, wait_for_completion=False, + extend_spec={'new_size': new_size, 'reservations': reservations}) def _set_volume_state_and_notify(self, method, updates, context, ex, request_spec, msg=None): diff --git a/cinder/tests/unit/scheduler/test_scheduler.py b/cinder/tests/unit/scheduler/test_scheduler.py index e4c31f33149..14eb279d8d4 100644 --- a/cinder/tests/unit/scheduler/test_scheduler.py +++ b/cinder/tests/unit/scheduler/test_scheduler.py @@ -212,9 +212,11 @@ def test_extend_volume_no_valid_host(self, status, mock_create, mock_backend_passes): volume = fake_volume.fake_volume_obj(self.context, **{'size': 1, + 'host': 'fake_host', 'previous_status': status}) no_valid_backend = exception.NoValidBackend(reason='') - mock_backend_passes.side_effect = [no_valid_backend] + mock_backend_passes.side_effect = [no_valid_backend, + no_valid_backend] with mock.patch.object(self.manager, '_set_volume_state_and_notify') as mock_notify: @@ -234,6 +236,28 @@ def test_extend_volume_no_valid_host(self, status, mock_create, resource_uuid=volume.id, exception=no_valid_backend) + @ddt.data('available', 'in-use') + @mock.patch('cinder.volume.rpcapi.VolumeAPI.migrate_volume') + @mock.patch('cinder.scheduler.driver.Scheduler.backend_passes_filters') + def test_extend_volume_migrate_in_group(self, + status, + mock_backend_passes, + mock_migrate): + volume = fake_volume.fake_volume_obj(self.context, + **{'size': 1, + 'group_id': fake.GROUP_ID, + 'previous_status': status}) + no_valid_backend = exception.NoValidBackend(reason='') + mock_backend_passes.side_effect = [no_valid_backend] + + with mock.patch.object(self.manager, + '_set_volume_state_and_notify') as mock_notify: + self.manager.extend_volume(self.context, volume, 2, + 'fake_reservation') + posargs = mock_notify.call_args[0] + self.assertIsInstance(posargs[3], exception.NoValidBackend) + mock_migrate.assert_not_called() + @mock.patch('cinder.quota.QuotaEngine.expire') def test_clean_expired_reservation(self, mock_clean): diff --git a/cinder/tests/unit/volume/test_rpcapi.py b/cinder/tests/unit/volume/test_rpcapi.py index 32169e59655..4022feaa340 100644 --- a/cinder/tests/unit/volume/test_rpcapi.py +++ b/cinder/tests/unit/volume/test_rpcapi.py @@ -303,6 +303,7 @@ def __init__(self): volume=self.fake_volume_obj, dest_backend=dest_backend, force_host_copy=True, + extend_spec=None, expected_kwargs_diff={ 'host': {'host': 'fake_host', 'cluster_name': 'cluster_name', diff --git a/cinder/tests/unit/volume/test_volume_migration.py b/cinder/tests/unit/volume/test_volume_migration.py index cd3136b6506..64c731cad8d 100644 --- a/cinder/tests/unit/volume/test_volume_migration.py +++ b/cinder/tests/unit/volume/test_volume_migration.py @@ -86,7 +86,7 @@ def tearDown(self): super(VolumeMigrationTestCase, self).tearDown() self._clear_patch.stop() - def test_migrate_volume_driver(self): + def test_migrate_volume_driver(self, extend_spec=None): """Test volume migration done by driver.""" # Mock driver and rpc functions self.mock_object(self.volume.driver, 'migrate_volume', @@ -97,7 +97,8 @@ def test_migrate_volume_driver(self): host=CONF.host, migration_status='migrating') host_obj = {'host': 'newhost', 'capabilities': {}} - self.volume.migrate_volume(self.context, volume, host_obj, False) + self.volume.migrate_volume(self.context, volume, host_obj, False, + extend_spec=extend_spec) # check volume properties volume = objects.Volume.get_by_id(context.get_admin_context(), @@ -151,6 +152,15 @@ def test_migrate_volume_driver_for_retype_generic(self, mock_can_use, mock_generic.assert_called_once_with(self.context, volume, host_obj, fake.VOLUME_TYPE2_ID) + @mock.patch.object(volume_rpcapi.VolumeAPI, 'extend_volume') + def test_migrate_volume_driver_with_extend(self, fake_extend): + extend_spec = {'new_size': 10, 'reservations': 'fake-rsv'} + self.test_migrate_volume_driver(extend_spec=extend_spec) + self.assertTrue(fake_extend.called) + named_args = fake_extend.call_args[1] + self.assertEqual(10, named_args['new_size']) + self.assertEqual('fake-rsv', named_args['reservations']) + def test_migrate_volume_driver_cross_az(self): """Test volume migration done by driver.""" # Mock driver and rpc functions diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 3e82788b824..a8124dda6a1 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -203,38 +203,6 @@ def _is_encrypted(self, volume_type): return False return specs.get('encryption', {}) is not {} - def _set_scheduler_hints_to_volume_metadata(self, scheduler_hints, - metadata): - if scheduler_hints: - if 'same_host' in scheduler_hints: - if isinstance(scheduler_hints['same_host'], str): - hint = scheduler_hints['same_host'] - else: - hint = ','.join(scheduler_hints["same_host"]) - metadata["scheduler_hint_same_host"] = hint - if "different_host" in scheduler_hints: - if isinstance(scheduler_hints['different_host'], str): - hint = scheduler_hints["different_host"] - else: - hint = ','.join(scheduler_hints["different_host"]) - metadata["scheduler_hint_different_host"] = hint - return metadata - - def _get_scheduler_hints_from_volume(self, volume): - filter_properties = {} - if "scheduler_hint_same_host" in volume.metadata: - LOG.debug("Found a scheduler_hint_same_host in volume %s", - volume.metadata["scheduler_hint_same_host"]) - hint = volume.metadata["scheduler_hint_same_host"] - filter_properties["same_host"] = hint.split(',') - - if "scheduler_hint_different_host" in volume.metadata: - LOG.debug("Found a scheduler_hint_different_host in volume %s", - volume.metadata["scheduler_hint_different_host"]) - hint = volume.metadata["scheduler_hint_different_host"] - filter_properties["different_host"] = hint.split(',') - return filter_properties - def create(self, context, size, name, description, snapshot=None, image_id=None, volume_type=None, metadata=None, availability_zone=None, source_volume=None, @@ -332,7 +300,7 @@ def create(self, context, size, name, description, snapshot=None, if not metadata: metadata = {} - metadata = self._set_scheduler_hints_to_volume_metadata( + metadata = volume_utils.set_scheduler_hints_to_volume_metadata( scheduler_hints, metadata ) @@ -853,7 +821,8 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, 'volume_id': volume.id} # Check if there is an affinity/antiaffinity against the volume - filter_properties = self._get_scheduler_hints_from_volume(volume) + filter_properties = \ + volume_utils.get_scheduler_hints_from_volume(volume) try: dest = self.scheduler_rpcapi.find_backend_for_connector( @@ -1693,7 +1662,7 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, raise exception.InvalidVolume(reason=msg) # Check if there is an affinity/antiaffinity against the volume - filter_props = self._get_scheduler_hints_from_volume(volume) + filter_props = volume_utils.get_scheduler_hints_from_volume(volume) # Call the scheduler to ensure that the host exists and that it can # accept the volume diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index d956abfbfec..8fe76a32a64 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -2946,7 +2946,8 @@ def migrate_volume(self, context, volume, host): """ false_ret = (False, None) - allowed_statuses = ['available', 'reserved', 'in-use', 'maintenance'] + allowed_statuses = ['available', 'reserved', 'in-use', 'maintenance', + 'extending'] if volume['status'] not in allowed_statuses: LOG.debug('Only %s volumes can be migrated using backend ' 'assisted migration. Falling back to generic migration.', diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 4e002b17328..f01cb89f458 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2595,7 +2595,8 @@ def migrate_volume(self, host, force_host_copy: bool = False, new_type_id=None, - diff=None) -> None: + diff=None, + extend_spec=None) -> None: """Migrate the volume to the specified host (called on source host).""" try: # NOTE(flaper87): Verify the driver is enabled @@ -2612,7 +2613,7 @@ def migrate_volume(self, rpcapi = volume_rpcapi.VolumeAPI() status_update = None - if volume.status in ('retyping', 'maintenance'): + if volume.status in ('retyping', 'maintenance', 'extending'): status_update = {'status': volume.previous_status} volume.migration_status = 'migrating' @@ -2652,6 +2653,8 @@ def migrate_volume(self, volume.save() self._update_allocated_capacity(volume, decrement=True, host=original_host) + if extend_spec: + rpcapi.extend_volume(ctxt, volume, **extend_spec) except Exception: LOG.debug("Decrement remote allocated_capacity_gb for " "host %(host)s", diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 7ae8c9869dc..10f5a2d21b1 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -261,7 +261,7 @@ def extend_volume(self, ctxt, volume, new_size, reservations): reservations=reservations) def migrate_volume(self, ctxt, volume, dest_backend, force_host_copy, - wait_for_completion=False): + wait_for_completion=False, extend_spec=None): backend_p = {'host': dest_backend.host, 'cluster_name': dest_backend.cluster_name, 'capabilities': dest_backend.capabilities} @@ -274,7 +274,8 @@ def migrate_volume(self, ctxt, volume, dest_backend, force_host_copy, cctxt = self._get_cctxt(volume.service_topic_queue, version) method = 'call' if wait_for_completion else 'cast' getattr(cctxt, method)(ctxt, 'migrate_volume', volume=volume, - host=backend_p, force_host_copy=force_host_copy) + host=backend_p, force_host_copy=force_host_copy, + extend_spec=extend_spec) def migrate_volume_completion(self, ctxt, volume, new_volume, error): cctxt = self._get_cctxt(volume.service_topic_queue) diff --git a/cinder/volume/volume_utils.py b/cinder/volume/volume_utils.py index 588c7e99d6c..27496b2a63e 100644 --- a/cinder/volume/volume_utils.py +++ b/cinder/volume/volume_utils.py @@ -1533,3 +1533,37 @@ def setup_tracing(trace_flags): LOG.warning('Invalid trace flag: %s', invalid_flag) TRACE_METHOD = 'method' in trace_flags TRACE_API = 'api' in trace_flags + + +def get_scheduler_hints_from_volume(volume): + filter_properties = {} + if "scheduler_hint_same_host" in volume.metadata: + LOG.debug("Found a scheduler_hint_same_host in volume %s", + volume.metadata["scheduler_hint_same_host"]) + hint = volume.metadata["scheduler_hint_same_host"] + filter_properties["same_host"] = hint.split(',') + + if "scheduler_hint_different_host" in volume.metadata: + LOG.debug("Found a scheduler_hint_different_host in volume %s", + volume.metadata["scheduler_hint_different_host"]) + hint = volume.metadata["scheduler_hint_different_host"] + filter_properties["different_host"] = hint.split(',') + return filter_properties + + +def set_scheduler_hints_to_volume_metadata(scheduler_hints, + metadata): + if scheduler_hints: + if 'same_host' in scheduler_hints: + if isinstance(scheduler_hints['same_host'], str): + hint = scheduler_hints['same_host'] + else: + hint = ','.join(scheduler_hints["same_host"]) + metadata["scheduler_hint_same_host"] = hint + if "different_host" in scheduler_hints: + if isinstance(scheduler_hints['different_host'], str): + hint = scheduler_hints["different_host"] + else: + hint = ','.join(scheduler_hints["different_host"]) + metadata["scheduler_hint_different_host"] = hint + return metadata From 97c656515407cc969c16c2e9af339c85eea09125 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 30 Aug 2022 08:57:56 -0400 Subject: [PATCH 094/149] [SAP] Fix misnamed StorageMigrationFailed call This patch fixes a misspelled name of a vmware exception from StorageMigratonFailed to StorageMigrationFailed --- cinder/volume/drivers/vmware/volumeops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 22a406100d9..b9f9939ae35 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -1148,7 +1148,7 @@ def relocate_one_disk( 'ds': ds_val, 'rp': rp_val}) except Exception as e: LOG.error("Relocation of main vmdk: %s failed.", vmdk_path) - raise vmdk_exceptions.StorageMigratonFailed(e) + raise vmdk_exceptions.StorageMigrationFailed(e) finally: rename_vm(ownervm, original_name) From 332e59b1a076e0e2a476b0192c27fe39fe06c836 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 17 Aug 2022 15:14:32 -0400 Subject: [PATCH 095/149] [SAP] Add new recount_host_stats API This patch adds a custom API to force the volume manager to recount all of it's volumes and snapshots This enables us to update the allocated capacity for a running cinder service. This patch has a shared lock with the _report_driver_status call as well so if the recount_host_stats is working, then the driver stats won't get reported until it is complete. --- cinder/api/contrib/services.py | 11 ++++ cinder/api/schemas/services.py | 9 ++++ cinder/policies/services.py | 11 ++++ cinder/tests/unit/volume/test_volume.py | 11 ++-- cinder/volume/api.py | 5 ++ cinder/volume/manager.py | 70 +++++++++++++++---------- cinder/volume/rpcapi.py | 6 +++ 7 files changed, 91 insertions(+), 32 deletions(-) diff --git a/cinder/api/contrib/services.py b/cinder/api/contrib/services.py index 2d7ddca3100..7a729e5cba8 100644 --- a/cinder/api/contrib/services.py +++ b/cinder/api/contrib/services.py @@ -153,6 +153,15 @@ def _failover(self, req, context, clustered, body): cluster_name, body.get('backend_id')) return webob.Response(status_int=HTTPStatus.ACCEPTED) + @validation.schema(os_services.recount_host_stats) + def _recount_host_stats(self, req, context, body): + """Ask the volume manager to recount allocated capacity for host.""" + cluster_name, host = common.get_cluster_host(req, body, + mv.REPLICATION_CLUSTER) + self._volume_api_proxy(self.volume_api.recount_host_stats, context, + host) + return webob.Response(status_int=http_client.ACCEPTED) + def _log_params_binaries_services(self, context, body): """Get binaries and services referred by given log set/get request.""" query_filters = {'is_up': True} @@ -273,6 +282,8 @@ def update(self, req, id, body): return self._set_log(req, context, body=body) elif support_dynamic_log and id == 'get-log': return self._get_log(req, context, body=body) + elif id == "recount_host_stats": + return self._recount_host_stats(req, context, body=body) else: raise exception.InvalidInput(reason=_("Unknown action")) diff --git a/cinder/api/schemas/services.py b/cinder/api/schemas/services.py index eb094c1c2b5..d03522c61b5 100644 --- a/cinder/api/schemas/services.py +++ b/cinder/api/schemas/services.py @@ -82,3 +82,12 @@ }, 'additionalProperties': False, } + + +recount_host_stats = { + 'type': 'object', + 'properties': { + 'host': parameter_types.hostname, + }, + 'additionalProperties': False, +} diff --git a/cinder/policies/services.py b/cinder/policies/services.py index 7aee0cdc06e..8e44ddaa167 100644 --- a/cinder/policies/services.py +++ b/cinder/policies/services.py @@ -23,6 +23,7 @@ FAILOVER_POLICY = "volume:failover_host" FREEZE_POLICY = "volume:freeze_host" THAW_POLICY = "volume:thaw_host" +RECOUNT_STATS_POLICY = "volume:recount_host_stats" services_policies = [ policy.DocumentedRuleDefault( @@ -76,6 +77,16 @@ 'path': '/os-services/failover_host' } ]), + policy.DocumentedRuleDefault( + name=RECOUNT_STATS_POLICY, + check_str=base.RULE_ADMIN_API, + description="Recount host stats allocated capacity", + operations=[ + { + 'method': 'PUT', + 'path': '/os-services/recount_host_stats' + } + ]), ] diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index 9f4c8150200..be516505622 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -1460,7 +1460,8 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, dst_vol) - mock_lock.assert_called_with('%s-delete_volume' % dst_vol.id) + mock_lock.assert_any_call('%s-delete_volume' % dst_vol.id) + mock_lock.assert_any_call('volume-stats') # locked self.volume.delete_snapshot(self.context, snapshot_obj) @@ -1468,7 +1469,8 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, src_vol) - mock_lock.assert_called_with('%s-delete_volume' % src_vol.id) + mock_lock.assert_any_call('%s-delete_volume' % src_vol.id) + mock_lock.assert_any_call('volume-stats') self.assertTrue(mock_lvm_create.called) @@ -1513,11 +1515,12 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, dst_vol) - mock_lock.assert_called_with('%s-delete_volume' % dst_vol_id) + mock_lock.assert_any_call('%s-delete_volume' % dst_vol_id) + mock_lock.assert_any_call('volume-stats') # locked self.volume.delete_volume(self.context, src_vol) - mock_lock.assert_called_with('%s-delete_volume' % src_vol_id) + mock_lock.assert_any_call('%s-delete_volume' % src_vol_id) def _raise_metadata_copy_failure(self, method, dst_vol): # MetadataCopyFailure exception will be raised if DB service is Down diff --git a/cinder/volume/api.py b/cinder/volume/api.py index a8124dda6a1..76135fa471f 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -2145,6 +2145,11 @@ def thaw_host(self, ctxt, host, cluster_name): if not self.volume_rpcapi.thaw_host(ctxt, services[0]): return "Backend reported error during thaw_host operation." + def recount_host_stats(self, ctxt, host): + ctxt.authorize(svr_policy.RECOUNT_STATS_POLICY) + ctxt = ctxt if ctxt.is_admin else ctxt.elevated() + self.volume_rpcapi.recount_host_stats(ctxt, host) + def check_volume_filters(self, filters, strict=False): """Sets the user filter value to accepted format""" booleans = self.db.get_booleans_for_table('volume') diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index f01cb89f458..0e747d46841 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -496,6 +496,44 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: # Initialize backend capabilities list self.driver.init_capabilities() + # collect and count all host volumes and snapshots + volumes_to_migrate = self._count_host_stats(ctxt, export_volumes=True) + + self.driver.set_throttle() + + # at this point the driver is considered initialized. + # NOTE(jdg): Careful though because that doesn't mean + # that an entry exists in the service table + self.driver.set_initialized() + + # Keep the image tmp file clean when init host. + backend_name = volume_utils.extract_host(self.service_topic_queue) + image_utils.cleanup_temporary_file(backend_name) + + # Migrate any ConfKeyManager keys based on fixed_key to the currently + # configured key manager. + self._add_to_threadpool(key_migration.migrate_fixed_key, + volumes=volumes_to_migrate) + + # collect and publish service capabilities + self.publish_service_capabilities(ctxt) + LOG.info("Driver initialization completed successfully.", + resource={'type': 'driver', + 'id': self.driver.__class__.__name__}) + + # Make sure to call CleanableManager to do the cleanup + super(VolumeManager, self).init_host(added_to_cluster=added_to_cluster, + **kwargs) + + def recount_host_stats(self, context): + self._count_host_stats(context, export_volumes=False) + + @coordination.synchronized('volume-stats') + def _count_host_stats(self, context, export_volumes=False): + """Recount the number of volumes and allocated capacity.""" + ctxt = context.elevated() + LOG.info("Recounting Allocated capacity") + # Zero stats self.stats['pools'] = {} self.stats.update({'allocated_capacity_gb': 0}) @@ -524,7 +562,6 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: req_offset: int for req_offset in req_range: - # Retrieve 'req_limit' number of objects starting from # 'req_offset' position volumes, snapshots = [], [] @@ -541,6 +578,7 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: offset=req_offset) else: snapshots = objects.SnapshotList() + # or retrieve all volumes and snapshots per single request else: volumes = self._get_my_volumes(ctxt) @@ -556,6 +594,7 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: # calculate allocated capacity for driver self._count_allocated_capacity(ctxt, volume) + if export_volumes: try: if volume['status'] in ['in-use']: self.driver.ensure_export(ctxt, volume) @@ -565,8 +604,6 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: resource=volume) volume.conditional_update({'status': 'error'}, {'status': 'in-use'}) - # All other cleanups are processed by parent class - - # CleanableManager except Exception: LOG.exception("Error during re-export on driver init.", @@ -579,31 +616,7 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: del volumes del snapshots - self.driver.set_throttle() - - # at this point the driver is considered initialized. - # NOTE(jdg): Careful though because that doesn't mean - # that an entry exists in the service table - self.driver.set_initialized() - - # Keep the image tmp file clean when init host. - backend_name = volume_utils.extract_host(self.service_topic_queue) - image_utils.cleanup_temporary_file(backend_name) - - # Migrate any ConfKeyManager keys based on fixed_key to the currently - # configured key manager. - self._add_to_threadpool(key_migration.migrate_fixed_key, - volumes=volumes_to_migrate) - - # collect and publish service capabilities - self.publish_service_capabilities(ctxt) - LOG.info("Driver initialization completed successfully.", - resource={'type': 'driver', - 'id': self.driver.__class__.__name__}) - - # Make sure to call CleanableManager to do the cleanup - super(VolumeManager, self).init_host(added_to_cluster=added_to_cluster, - **kwargs) + return volumes_to_migrate def init_host_with_rpc(self) -> None: LOG.info("Initializing RPC dependent components of volume " @@ -2813,6 +2826,7 @@ def get_stats(): # queue it to be sent to the Schedulers. self.update_service_capabilities(volume_stats) + @coordination.synchronized('volume-stats') def _append_volume_stats(self, vol_stats) -> None: pools = vol_stats.get('pools', None) if pools: diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 10f5a2d21b1..38842a050d6 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -135,6 +135,7 @@ class VolumeAPI(rpc.RPCAPI): 3.15 - Add revert_to_snapshot method 3.16 - Add no_snapshots to accept_transfer method 3.17 - Make get_backup_device a cast (async) + 3.17 - SAP - Added recount_host_stats (async) """ RPC_API_VERSION = '3.17' @@ -159,6 +160,11 @@ def _get_cctxt(self, host=None, version=None, **kwargs): return super(VolumeAPI, self)._get_cctxt(version=version, **kwargs) + @rpc.assert_min_rpc_version('3.17') + def recount_host_stats(self, ctxt, host): + cctxt = self._get_cctxt(host=host) + cctxt.cast(ctxt, 'recount_host_stats') + def create_volume(self, ctxt, volume, request_spec, filter_properties, allow_reschedule=True): cctxt = self._get_cctxt(volume.service_topic_queue) From 458a424139b557fb4bc48c24046a0607d0edc700 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 22 Sep 2022 15:39:08 -0400 Subject: [PATCH 096/149] [SAP] Add ability to disable updating provider_info This patch defaults to disabling the ability to go through every single volume and ensure the pool is set correctly. This was initially needed in the transition from aggregate info to datastores as pools. This is not really needed now. It will eventually get moved to a utility. --- cinder/volume/drivers/vmware/vmdk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 8fe76a32a64..82f0bf9285a 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -204,6 +204,10 @@ 'a volume lives on. This also enables managing capacity ' 'for each datastore by cinder. ' ), + cfg.BoolOpt('vmware_sap_update_provider_info', + default=False, + help='This prevents the driver from traversing all volumes ' + 'associated with a backend to ensure the pool is correct'), cfg.StrOpt('allow_pulling_images_from_url', default=True, help='Allow VMware to pull images directly from Swift. ' @@ -2497,6 +2501,10 @@ def update_provider_info(self, volumes, snapshots): We don't care about snapshots, they just use the volume's provider_id. """ LOG.info("HOST {} : volumes {}".format(self.host, len(volumes))) + if not self.configuration.vmware_sap_update_provider_info: + LOG.info("Not updating provider information") + return [], None + if self.configuration.vmware_datastores_as_pools: LOG.info("vmware_datastores_as_pools is enabled. " "Checking host entries for volumes and snapshots.") From eb3dee7554c40258a4ab1e07d29f7e5894d52c06 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 22 Sep 2022 08:19:04 -0400 Subject: [PATCH 097/149] [SAP] Fix multiattach reporting for vmdk This patch fixes the reporting of multiattach capability in the vmdk driver. Multiattach -> multiattach --- cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py | 2 +- cinder/volume/drivers/vmware/vmdk.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 19a26bfa9c8..0c916d17518 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -264,7 +264,7 @@ def test_get_volume_stats_pools(self, session, mock_stats, self.assertEqual(4657, stats["pools"][0]['free_capacity_gb']) self.assertEqual('up', stats["pools"][0]['pool_state']) self.assertEqual('up', stats["backend_state"]) - self.assertFalse(stats["pools"][0]['Multiattach']) + self.assertFalse(stats["pools"][0]['multiattach']) self.assertEqual(vmdk.LOCATION_DRIVER_NAME + ":fake-service", stats['location_info']) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 82f0bf9285a..e10c261c125 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -534,7 +534,7 @@ def _get_volume_stats(self): 'max_over_subscription_ratio': ( max_over_subscription_ratio), 'reserved_percentage': reserved_percentage, - 'Multiattach': False, + 'multiattach': False, 'datastore_type': summary.type, 'location_url': summary.url, 'location_info': location_info, From 3fd7350db12f7d1ef08f95d4990ad43e11fb5799 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Fri, 23 Sep 2022 16:10:02 -0400 Subject: [PATCH 098/149] [SAP] Fix rpc call to remote host to update host capacity This patch fixes a problem with the volume rcpapi.update_migrate_volume_capacity() It was always pulling the remote host to call from the volume.service_topic_queue, which is always the current host entry, not the new host that the volume is migrating to. This is how we were getting overcommited against remote datastores during volume attachments inducing a migration to another vcenter. The remote wasn't actually getting called to update it's allocated_capacity_gb. --- cinder/volume/manager.py | 4 ++-- cinder/volume/rpcapi.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 0e747d46841..3a6a68568cf 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2639,7 +2639,7 @@ def migrate_volume(self, # needs to account for the space consumed. LOG.debug("Update remote allocated_capacity_gb for " "host %(host)s", - {'host': host}, + {'host': host['host']}, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume, host=host['host']) @@ -4381,7 +4381,7 @@ def delete_group_snapshot(self, context, group_snapshot) -> None: snapshots) @volume_utils.trace - def update_migrated_volume_capacity(self, ctxt, volume, host=None, + def update_migrated_volume_capacity(self, ctxt, volume, host, decrement=False): """Update allocated_capacity_gb for the migrated volume host.""" self._update_allocated_capacity(volume, host=host, decrement=decrement) diff --git a/cinder/volume/rpcapi.py b/cinder/volume/rpcapi.py index 38842a050d6..1d26a834dc5 100644 --- a/cinder/volume/rpcapi.py +++ b/cinder/volume/rpcapi.py @@ -317,9 +317,9 @@ def update_migrated_volume(self, ctxt, volume, new_volume, new_volume=new_volume, volume_status=original_volume_status) - def update_migrated_volume_capacity(self, ctxt, volume, host=None, + def update_migrated_volume_capacity(self, ctxt, volume, host, decrement=False): - cctxt = self._get_cctxt(volume.service_topic_queue) + cctxt = self._get_cctxt(host) cctxt.cast(ctxt, 'update_migrated_volume_capacity', volume=volume, host=host, From 99fa0547317abbb30ba5fc654d6b49c052b369f3 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 19 Sep 2022 14:18:13 -0400 Subject: [PATCH 099/149] SAP propagate scheduler hints for volumes This patch ensures that when a new volume is create and if it has scheduler hints for affinity/anti-affinity for other volumes, that the volumes get affinity/anti-affinity for the newly created volumes. This ensures affinity and anti-affinity are both ways for volumes. --- cinder/volume/manager.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 3a6a68568cf..4c3da9ffceb 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -714,6 +714,36 @@ def _set_resource_host(self, resource) -> None: resource.host = volume_utils.append_host(self.host, pool) resource.save() + def _propagate_volume_scheduler_hints(self, context, volume): + """Ensure metadata hints are propagated to referenced volumes.""" + hints = volume_utils.get_scheduler_hints_from_volume(volume) + if not hints: + return + + LOG.debug("Found hints for %(volume)s - %(hints)s", + {'volume': volume.id, 'hints': hints}) + for hint_key in hints: + for vol_id in hints[hint_key]: + try: + meta_vol = objects.Volume.get_by_id(context, vol_id) + # Because pep8 length issues + vut = volume_utils + meta_vol_hints = vut.get_scheduler_hints_from_volume( + meta_vol + ) + meta_vol_hints.setdefault(hint_key, []) + if volume.id not in meta_vol_hints[hint_key]: + meta_vol_hints[hint_key].append(volume.id) + + md = vut.set_scheduler_hints_to_volume_metadata( + meta_vol_hints, meta_vol.metadata + ) + meta_vol.metadata = md + meta_vol.save() + except Exception: + LOG.exception("Failed to set scheduler hints.", + resource=meta_vol) + @objects.Volume.set_workers def create_volume(self, context, volume, request_spec=None, filter_properties=None, @@ -822,6 +852,10 @@ def _run_flow() -> None: volume.service_uuid = self.service_uuid volume.save() + # propagate any scheduler hint affinity/anti-affinity metadata to + # other volumes. + self._propagate_volume_scheduler_hints(context, volume) + LOG.info("Created volume successfully.", resource=volume) return volume.id From 35093f57330aab96c4603250b8eea074b28ab8f6 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 7 Sep 2022 15:59:16 -0400 Subject: [PATCH 100/149] [SAP] Mark a pool as down if overcommited This patch will check the capacity factors of a pool and check it's virtual_free_capacity left every time the volume manager gets the volume stats from the backend. If the virtual_free_capacity is <= 0 then the pool is overcommited and will be marked down. The new SAPPoolDownFilter will check against this pool_state and if it's down, then filter the pool out of results for the scheduler. --- .../scheduler/filters/sap_pool_down_filter.py | 36 ++++++++++ cinder/scheduler/host_manager.py | 1 + cinder/volume/drivers/vmware/vmdk.py | 40 +++++++++++- cinder/volume/manager.py | 65 +++++++++++++++++++ setup.cfg | 1 + 5 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 cinder/scheduler/filters/sap_pool_down_filter.py diff --git a/cinder/scheduler/filters/sap_pool_down_filter.py b/cinder/scheduler/filters/sap_pool_down_filter.py new file mode 100644 index 00000000000..8d7fc951c9e --- /dev/null +++ b/cinder/scheduler/filters/sap_pool_down_filter.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020 SAP SE +# +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from oslo_log import log as logging + +from cinder.scheduler import filters + + +LOG = logging.getLogger(__name__) + + +class SAPPoolDownFilter(filters.BaseBackendFilter): + """Filter out pools that are not marked 'up'.""" + + def backend_passes(self, backend_state, filter_properties): + + if backend_state.pool_state == 'up': + return True + else: + LOG.debug("%(id)s pool state is not 'up'. state='%(state)s'", + {'id': backend_state.backend_id, + 'state': backend_state.pool_state}) + return False diff --git a/cinder/scheduler/host_manager.py b/cinder/scheduler/host_manager.py index 876e91dc43f..c1373209eb8 100644 --- a/cinder/scheduler/host_manager.py +++ b/cinder/scheduler/host_manager.py @@ -396,6 +396,7 @@ def update_from_volume_capability(self, capability, service=None): capability, CONF.max_over_subscription_ratio)) self.multiattach = capability.get('multiattach', False) + self.pool_state = capability.get('pool_state', 'up') self.filter_function = capability.get('filter_function', None) self.goodness_function = capability.get('goodness_function', 0) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index e10c261c125..cf3de964695 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -39,6 +39,7 @@ from oslo_vmware import pbm from oslo_vmware import vim_util +from cinder import context from cinder import exception from cinder.i18n import _ from cinder.image import image_utils @@ -339,7 +340,9 @@ class VMwareVcVmdkDriver(driver.VolumeDriver): # 3.4.2.99.2 - Added soft sharding volume migration, fixed a small issue # in check_for_setup_error where storage_profile not set. # 3.4.2.99.3 - Add support for reporting each datastore as a pool. - VERSION = '3.4.2.99.3' + # 3.4.2.99.4 - Default to thick provisioning and report provisioning type + # based on the volume type extra specs if possible. + VERSION = '3.4.2.99.4' # ThirdPartySystems wiki page CI_WIKI_NAME = "VMware_CI" @@ -369,6 +372,7 @@ def __init__(self, *args, **kwargs): ]) self._remote_api = remote_api.VmdkDriverRemoteApi() self._storage_profiles = [] + self._volume_type_by_backend = None @staticmethod def get_driver_options(): @@ -487,12 +491,42 @@ def _collect_backend_stats(self): return (result, {}) + def _get_volume_type_by_backend_name(self, backend_name): + if not self._volume_type_by_backend: + self._volume_type_by_backend = {} + ctxt = context.get_admin_context() + all_types = volume_types.get_all_types(ctxt) + for v_type_name, v_type in all_types.items(): + specs = v_type['extra_specs'] + if 'volume_backend_name' in specs: + self._volume_type_by_backend[backend_name] = v_type + + return self._volume_type_by_backend.get(backend_name, None) + @volume_utils.trace def _get_volume_stats(self): backend_name = self.configuration.safe_get('volume_backend_name') if not backend_name: backend_name = self.__class__.__name__ + # Force the reporting of provisioning support based + # on the volume type setting + v_type_provisioning_type = 'thick' + + # Volume type matches for this backend + v_type = self._get_volume_type_by_backend_name(backend_name) + if v_type and v_type.get('extra_specs', None): + extra_specs = v_type.get('extra_specs') + v_type_provisioning_type = extra_specs.get('provisioning:type', + 'thin') + + if v_type_provisioning_type == 'thin': + thin_provisioning_on = True + thick_provisioning_on = False + else: + thin_provisioning_on = False + thick_provisioning_on = True + location_info = '%(driver_name)s:%(vcenter)s' % { 'driver_name': LOCATION_DRIVER_NAME, 'vcenter': self.session.vim.service_content.about.instanceUuid} @@ -529,8 +563,8 @@ def _get_volume_stats(self): summary.capacity / units.Gi), 'free_capacity_gb': round( summary.freeSpace / units.Gi), - 'thin_provisioning_support': True, - 'thick_provisioning_support': True, + 'thin_provisioning_support': thin_provisioning_on, + 'thick_provisioning_support': thick_provisioning_on, 'max_over_subscription_ratio': ( max_over_subscription_ratio), 'reserved_percentage': reserved_percentage, diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 4c3da9ffceb..bba71db62ed 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2860,6 +2860,69 @@ def get_stats(): # queue it to be sent to the Schedulers. self.update_service_capabilities(volume_stats) + def _set_pool_state(self, pool): + """Check the pool stats to decide if the pool should be down. + + When a pool is overcommited already, the pool should not be + available to accept new volumes, so we mark the pool down. + """ + # Lets see if we should mark the pool as down + # if we are overcommited over the allowed amount + thin = pool.get('thin_provisioning_support', None) + thick = pool.get('thick_provisioning_support', None) + if not thin and not thick: + return + + if thin: + max_over_subscription_ratio = pool.get( + 'max_over_subscription_ratio', + self.configuration.max_over_subscription_ratio + ) + thin_factors = utils.calculate_capacity_factors( + pool['total_capacity_gb'], + pool['free_capacity_gb'], + pool['allocated_capacity_gb'], + True, + max_over_subscription_ratio, + pool['reserved_percentage'], + True + ) + if thin_factors['virtual_free_capacity'] <= 0: + # The pool has no free space left or has been + # overcommited past what is allowed. + LOG.error("Pool(%(pool_name)s is overcommited!!", + {'pool_name': pool['pool_name']}) + pool['pool_state'] = 'down' + pool['pool_state_reason'] = ( + 'Volume manager marked pool down for being' + ' allocated beyond what is allowed for thin' + ' provisioning.' + ) + else: + # Thick provisioning won't allow max oversub + # So we force it to 1:1 + max_oversubscription_ratio = 1 + thick_factors = utils.calculate_capacity_factors( + pool['total_capacity_gb'], + pool['free_capacity_gb'], + pool['allocated_capacity_gb'], + False, + max_oversubscription_ratio, + pool['reserved_percentage'], + False + ) + if thick_factors['virtual_free_capacity'] <= 0: + # The pool has no free space left or has been + # overcommited past what is allowed. + LOG.error("Pool(%(pool_name)s is overcommited!!", + {'pool_name': pool['pool_name']}) + pool['pool_state'] = 'down' + pool['pool_state_reason'] = ( + 'Volume manager marked pool down for being' + ' allocated beyond what is allowed for thick' + ' provisioning' + ) + @coordination.synchronized('volume-stats') def _append_volume_stats(self, vol_stats) -> None: pools = vol_stats.get('pools', None) @@ -2874,6 +2937,8 @@ def _append_volume_stats(self, vol_stats) -> None: pool_stats = dict(allocated_capacity_gb=0) pool.update(pool_stats) + self._set_pool_state(pool) + else: raise exception.ProgrammingError( reason='Pools stats reported by the driver are not ' diff --git a/setup.cfg b/setup.cfg index 3048f01ac70..5180138ae30 100644 --- a/setup.cfg +++ b/setup.cfg @@ -49,6 +49,7 @@ cinder.scheduler.filters = SAPLargeVolumeFilter = cinder.scheduler.filters.sap_large_volume_filter:SAPLargeVolumeFilter SAPDifferentBackendFilter = cinder.scheduler.filters.sap_affinity_filter:SAPDifferentBackendFilter SAPSameBackendFilter = cinder.scheduler.filters.sap_affinity_filter:SAPSameBackendFilter + SAPPoolDownFilter = cinder.scheduler.filters.sap_pool_down_filter:SAPPoolDownFilter cinder.scheduler.weights = AllocatedCapacityWeigher = cinder.scheduler.weights.capacity:AllocatedCapacityWeigher CapacityWeigher = cinder.scheduler.weights.capacity:CapacityWeigher From 0a8d8a58725e46aea4a32ae0ea0769412c2edf93 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 16 Aug 2022 14:45:08 -0400 Subject: [PATCH 101/149] [SAP] Mark datastore down if it has specific alert This patch checks datastores to see if they have alerts. If the alert is red or yellow and the alert is due to 'Thin-provisioned volume capacity threshold exceeded' then vcenter will fail to create a volume on that datastore while in that state. This patch marks the datastore as down in that case. --- cinder/volume/drivers/vmware/vmdk.py | 30 +++++++++++++++++++++-- cinder/volume/drivers/vmware/volumeops.py | 12 ++++++++- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index cf3de964695..cb90bb67653 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -342,7 +342,8 @@ class VMwareVcVmdkDriver(driver.VolumeDriver): # 3.4.2.99.3 - Add support for reporting each datastore as a pool. # 3.4.2.99.4 - Default to thick provisioning and report provisioning type # based on the volume type extra specs if possible. - VERSION = '3.4.2.99.4' + # 3.4.2.99.5 - Mark datastore/pool as down if it has red flag alert. + VERSION = '3.4.2.99.5' # ThirdPartySystems wiki page CI_WIKI_NAME = "VMware_CI" @@ -555,8 +556,25 @@ def _get_volume_stats(self): summary = datastore["summary"] pool_state = 'down' + pool_down_reason = 'Datastore not usable' if self.ds_sel.is_datastore_usable(summary): pool_state = 'up' + pool_down_reason = 'up' + + # make sure the datastore isn't in red flag alert! + for alarm in datastore.get('alarms', []): + for alarm_state in alarm: + if alarm_state['overallStatus'] in ('red', 'yellow'): + alarm_info = self.volumeops.get_datastore_alarm( + alarm_state['alarm'] + ) + if ('volume capacity threshold exceeded' in + alarm_info['info.description']): + # vCenter will fail to create volumes here + pool_state = 'down' + pool_down_reason = ( + alarm_info['info.description'] + ) pool = {'pool_name': summary.name, 'total_capacity_gb': round( @@ -575,7 +593,8 @@ def _get_volume_stats(self): 'storage_profile': datastore["storage_profile"], 'connection_capabilities': connection_capabilities, 'backend_state': backend_state, - 'pool_state': pool_state + 'pool_state': pool_state, + 'pool_down_reason': pool_down_reason } # Add any custom attributes associated with the datastore @@ -2500,6 +2519,13 @@ def _get_datastores_for_profiles(self): custom_attributes[field] = v.value datastores[summary.name][ "custom_attributes"] = custom_attributes + + if ('triggeredAlarmState' in objects and + objects['triggeredAlarmState']): + alarms = [] + for (_x, alarm) in objects['triggeredAlarmState']: + alarms.append(alarm) + datastores[summary.name]['alarms'] = alarms return datastores def _new_host_for_volume(self, volume): diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index b9f9939ae35..fc0385bbb5c 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -995,7 +995,17 @@ def get_datastore_properties(self, datastore): properties_to_collect=[ "summary", "availableField", - "customValue" + "customValue", + "triggeredAlarmState" + ]) + + def get_datastore_alarm(self, alarm): + return self._session.invoke_api(vim_util, 'get_object_properties_dict', + self._session.vim, alarm, + properties_to_collect=[ + 'info.name', + 'info.systemName', + 'info.description' ]) def _create_relocate_spec_disk_locator(self, datastore, disk_type, From 7a0054f7ddf44eea3b6404b935523f4ad46949d2 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Fri, 30 Sep 2022 09:40:17 -0400 Subject: [PATCH 102/149] [SAP] change overcommit log to info This patch changes the overcommit error log to info. --- cinder/volume/manager.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index bba71db62ed..79726bcc974 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2890,8 +2890,8 @@ def _set_pool_state(self, pool): if thin_factors['virtual_free_capacity'] <= 0: # The pool has no free space left or has been # overcommited past what is allowed. - LOG.error("Pool(%(pool_name)s is overcommited!!", - {'pool_name': pool['pool_name']}) + LOG.info("Pool(%(pool_name)s is overcommited!!", + {'pool_name': pool['pool_name']}) pool['pool_state'] = 'down' pool['pool_state_reason'] = ( 'Volume manager marked pool down for being' @@ -2914,8 +2914,8 @@ def _set_pool_state(self, pool): if thick_factors['virtual_free_capacity'] <= 0: # The pool has no free space left or has been # overcommited past what is allowed. - LOG.error("Pool(%(pool_name)s is overcommited!!", - {'pool_name': pool['pool_name']}) + LOG.info("Pool(%(pool_name)s is overcommited!!", + {'pool_name': pool['pool_name']}) pool['pool_state'] = 'down' pool['pool_state_reason'] = ( 'Volume manager marked pool down for being' From e3ae4c27bed64aace7c508ca48a760a46c79cf08 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 3 Oct 2022 08:46:56 -0400 Subject: [PATCH 103/149] [SAP] fix default max_oversubscription_ratio to float When the pool stats are pulled from the volume driver and the max oversubscription ratio isn't set yet, the max oversubscription ratio comes from the default config setting, which is a string. Have to force that to a float when using it for calculations. --- cinder/volume/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 79726bcc974..9160419cc4a 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2876,7 +2876,7 @@ def _set_pool_state(self, pool): if thin: max_over_subscription_ratio = pool.get( 'max_over_subscription_ratio', - self.configuration.max_over_subscription_ratio + float(self.configuration.max_over_subscription_ratio) ) thin_factors = utils.calculate_capacity_factors( pool['total_capacity_gb'], From 6dcc5a58c39b29bfacd2845f3026eb522d3208f2 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 3 Oct 2022 08:46:56 -0400 Subject: [PATCH 104/149] [SAP] fix default max_oversubscription_ratio to float When the pool stats are pulled from the volume driver and the max oversubscription ratio is a string, the volume manager has to force the value to a float. --- cinder/volume/manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 9160419cc4a..53e004a7093 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -2883,7 +2883,7 @@ def _set_pool_state(self, pool): pool['free_capacity_gb'], pool['allocated_capacity_gb'], True, - max_over_subscription_ratio, + float(max_over_subscription_ratio), pool['reserved_percentage'], True ) From e90bf86e828aab95fc195d0553bf523d4102fa49 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 5 Oct 2022 12:09:44 -0400 Subject: [PATCH 105/149] [SAP] Add context to the volume-stats tooz lock This patch adds the cinder host name to the lock for collecting volume-stats to ensure the lock is contextual only inside each host volume process. This will prevent other hosts for waiting for the same lock when they don't need to. --- cinder/tests/unit/volume/test_volume.py | 6 +++--- cinder/volume/manager.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cinder/tests/unit/volume/test_volume.py b/cinder/tests/unit/volume/test_volume.py index be516505622..e97546bad2f 100644 --- a/cinder/tests/unit/volume/test_volume.py +++ b/cinder/tests/unit/volume/test_volume.py @@ -1461,7 +1461,7 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, dst_vol) mock_lock.assert_any_call('%s-delete_volume' % dst_vol.id) - mock_lock.assert_any_call('volume-stats') + mock_lock.assert_any_call('volume-stats-%s' % self.volume.host) # locked self.volume.delete_snapshot(self.context, snapshot_obj) @@ -1470,7 +1470,7 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, src_vol) mock_lock.assert_any_call('%s-delete_volume' % src_vol.id) - mock_lock.assert_any_call('volume-stats') + mock_lock.assert_any_call('volume-stats-%s' % self.volume.host) self.assertTrue(mock_lvm_create.called) @@ -1516,7 +1516,7 @@ def mock_flow_run(*args, **kwargs): # locked self.volume.delete_volume(self.context, dst_vol) mock_lock.assert_any_call('%s-delete_volume' % dst_vol_id) - mock_lock.assert_any_call('volume-stats') + mock_lock.assert_any_call('volume-stats-%s' % self.volume.host) # locked self.volume.delete_volume(self.context, src_vol) diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 53e004a7093..731725a80ac 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -528,7 +528,7 @@ def _init_host(self, added_to_cluster=None, **kwargs) -> None: def recount_host_stats(self, context): self._count_host_stats(context, export_volumes=False) - @coordination.synchronized('volume-stats') + @coordination.synchronized('volume-stats-{self.host}') def _count_host_stats(self, context, export_volumes=False): """Recount the number of volumes and allocated capacity.""" ctxt = context.elevated() @@ -2923,7 +2923,7 @@ def _set_pool_state(self, pool): ' provisioning' ) - @coordination.synchronized('volume-stats') + @coordination.synchronized('volume-stats-{self.host}') def _append_volume_stats(self, vol_stats) -> None: pools = vol_stats.get('pools', None) if pools: From 06f86fb0c36a048d0167135d99557261ee6b7581 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Thu, 6 Oct 2022 16:43:50 +0200 Subject: [PATCH 106/149] [SAP] vmware: Add helper to fetch properties for given objects This adds a helper-function _get_properties_for_morefs() to the DatastoreSelector, as we want to be able to fetch certain properties not just for all objects of a type or by making a call per object, but by telling the vCenter to retrieve the properties for the objects we specify. Change-Id: I7f1de1b5034e98244e53d4f02dfe9cabdf958cf6 --- cinder/volume/drivers/vmware/datastore.py | 27 +++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 2064b89da8c..87d82915283 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -292,6 +292,33 @@ def _get_host_properties(self, host_ref): if retrieve_result: return self._get_object_properties(retrieve_result[0]) + def _get_properties_for_morefs(self, type_, morefs, properties): + """Fetch properties for the given morefs of type type_ + + :param type_: a ManagedObject type + :param morefs: a list of ManagedObjectReference for the given type_ + :param properties: a list of strings defining the properties to fetch + :returns: a dict of ManagedObjectReference values mapped to a dict of + (property name, property value) + """ + obj_prop_map = {} + + result = \ + self._session.invoke_api( + vim_util, + "get_properties_for_a_collection_of_objects", + self._session.vim, + type_, morefs, + properties) + with vim_util.WithRetrieval(self._session.vim, result) as objects: + for obj in objects: + props = self._get_object_properties(obj) + + obj_prop_map[vim_util.get_moref_value(obj.obj)] = { + prop: props.get(prop) for prop in properties} + + return obj_prop_map + def _get_resource_pool(self, cluster_ref): return self._session.invoke_api(vim_util, 'get_object_property', From 90749273792a0cbb25f9fbd9ae7ccfbeb7dbd2ff Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Thu, 6 Oct 2022 17:10:17 +0200 Subject: [PATCH 107/149] [SAP] Optimize vCenter queries in filter_hosts() Instead of querying the vCenter with multiple queries for each and every host and cluster, we try to pre-fetch all the relevant properties and pass them on to the appropriate functions. Those function take the cache as optional argument, so they still work as before when called without it from another source. Additionally, we only fetch "runtime.inMaintenanceMode" and "runtime.connectionState" instead of fetching all of "runtime". This reduces the amount of costly XML-parsing in our cinder-volume pod. Change-Id: I37c0c0e86439e67aea95cd5efaa20377d3121e64 --- .../drivers/vmware/test_vmware_datastore.py | 23 ++-- cinder/volume/drivers/vmware/datastore.py | 122 ++++++++++++------ cinder/volume/drivers/vmware/volumeops.py | 69 ++++++---- 3 files changed, 142 insertions(+), 72 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py index 771f8ac83a6..1cd75e0c851 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_datastore.py @@ -252,10 +252,9 @@ def mock_get_host_properties(host_ref): in_maintenance = False else: in_maintenance = True - runtime = mock.Mock(spec=['connectionState', 'inMaintenanceMode']) - runtime.connectionState = 'connected' - runtime.inMaintenanceMode = in_maintenance - return {'parent': cluster_ref, 'runtime': runtime} + return {'parent': cluster_ref, + 'runtime.connectionState': 'connected', + 'runtime.inMaintenanceMode': in_maintenance} get_host_props.side_effect = mock_get_host_properties @@ -306,13 +305,15 @@ def test_select_best_datastore_with_empty_datastores(self): 'get_profile_id') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' '_get_datastores') + @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' + '_filter_hosts') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' '_filter_datastores') @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' '_select_best_datastore') def test_select_datastore( - self, select_best_datastore, filter_datastores, get_datastores, - get_profile_id, is_buildup, is_usable): + self, select_best_datastore, filter_datastores, filter_hosts, + get_datastores, get_profile_id, is_buildup, is_usable): profile_id = mock.sentinel.profile_id get_profile_id.return_value = profile_id @@ -320,6 +321,9 @@ def test_select_datastore( datastores = mock.sentinel.datastores get_datastores.return_value = datastores + filtered_hosts = mock.sentinel.filtered_hosts + filter_hosts.return_value = filtered_hosts + filtered_datastores = mock.sentinel.filtered_datastores filter_datastores.return_value = filtered_datastores @@ -342,11 +346,12 @@ def test_select_datastore( self.assertEqual(best_datastore, self._ds_sel.select_datastore(req, hosts)) get_datastores.assert_called_once_with() + filter_hosts.assert_called_once_with(hosts) filter_datastores.assert_called_once_with( datastores, size_bytes, profile_id, anti_affinity_ds, aff_ds_types, - valid_host_refs=hosts) - select_best_datastore.assert_called_once_with(filtered_datastores, - valid_host_refs=hosts) + valid_host_refs=filtered_hosts) + select_best_datastore.assert_called_once_with( + filtered_datastores, valid_host_refs=filtered_hosts) @mock.patch('cinder.volume.drivers.vmware.datastore.DatastoreSelector.' 'get_profile_id') diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 87d82915283..10fafb06443 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -101,60 +101,106 @@ def _filter_by_profile(self, datastores, profile_id): hub_ids = [hub.hubId for hub in hubs] return {k: v for k, v in datastores.items() if k.value in hub_ids} - def is_host_in_buildup_cluster(self, host_ref, cache=None): - host_cluster = self._vops._get_parent(host_ref, - "ClusterComputeResource") - if cache is not None and host_cluster.value in cache: - return cache[host_cluster.value] + def is_host_in_buildup_cluster(self, host_ref, host_cluster_ref=None, + cluster_cache=None): + """Check if a host is in a cluster marked as in buildup + + :param host_ref: a ManagedObjectReference to HostSystem + :param host_cluster_ref: (optional) ManagedObjectReference to + ClusterComputeResource pointing to the cluster of + the given host. Will be fetched if not given. + :param cluster_cache: (optional) dict from ManagedObjectReference value + to dict (property name, property value) for + ClusterComputeResource objects. Can be set if the + required properties for + get_cluster_custom_attributes() were prefetched + for multiple clusters. + """ + if cluster_cache is None: + cluster_cache = {} + + if host_cluster_ref is None: + host_cluster_ref = self._vops._get_parent(host_ref, + "ClusterComputeResource") - attrs = self._vops.get_cluster_custom_attributes(host_cluster) - LOG.debug("attrs {}".format(attrs)) + host_cluster_value = vim_util.get_moref_value(host_cluster_ref) + + attrs = self._vops.get_cluster_custom_attributes( + host_cluster_ref, props=cluster_cache.get(host_cluster_value)) + LOG.debug("Cluster %s custom attributes: %s", + host_cluster_value, attrs) + + if not attrs or 'buildup' not in attrs: + return False def bool_from_str(bool_str): - if bool_str.lower() == "true": - return True - else: - return False + return bool_str.lower() == "true" - result = (attrs and 'buildup' in attrs and - bool_from_str(attrs['buildup']['value'])) - if cache is not None: - cache[host_cluster.value] = result - return result + return bool_from_str(attrs['buildup']['value']) def _is_host_usable(self, host_ref, host_prop_map=None): + """Check a host's connectionState and inMaintenanceMode properties + + :param host_ref: a ManagedObjectReference to HostSystem + :param host_prop_map: (optional) a dict from ManagedObjectReference + value to a dict (property name, property value). + Can be set if the required properties were + prefetched for multiple hosts. + :return: boolean if the host is usable + """ if host_prop_map is None: host_prop_map = {} + props = host_prop_map.get(host_ref.value) if props is None: props = self._get_host_properties(host_ref) host_prop_map[host_ref.value] = props - runtime = props.get('runtime') - parent = props.get('parent') - if runtime and parent: - return (runtime.connectionState == 'connected' and - not runtime.inMaintenanceMode) - else: + connection_state = props.get('runtime.connectionState') + in_maintenance = props.get('runtime.inMaintenanceMode') + if None in (connection_state, in_maintenance): return False + return (connection_state == 'connected' and + not in_maintenance) + def _filter_hosts(self, hosts): - """Filter out any hosts that are in a cluster marked buildup.""" + """Filter out hosts in buildup cluster or otherwise unusable""" + if not hosts: + return [] + + if isinstance(hosts, Iterable): + # prefetch host properties + host_properties = ['runtime.connectionState', + 'runtime.inMaintenanceMode', 'parent'] + host_prop_map = self._get_properties_for_morefs( + 'HostSystem', hosts, host_properties) + + # prefetch cluster properties + host_cluster_refs = set( + h_props['parent'] for h_props in host_prop_map.values() + if h_props.get('parent')) + cluster_prop_map = self._get_properties_for_morefs( + 'ClusterComputeResource', list(host_cluster_refs), + ['availableField', 'customValue']) + else: + host_prop_map = cluster_prop_map = None + hosts = [hosts] valid_hosts = [] - cache = {} - if hosts: - if isinstance(hosts, Iterable): - host_prop_map = {} - for host in hosts: - if (not self.is_host_in_buildup_cluster(host, cache) - and self._is_host_usable( - host, host_prop_map=host_prop_map)): - valid_hosts.append(host) - else: - if (not self.is_host_in_buildup_cluster(hosts, cache) - and self._is_host_usable(host)): - valid_hosts.append(hosts) + for host in hosts: + host_ref_value = vim_util.get_moref_value(host) + host_props = host_prop_map.get(host_ref_value, {}) + host_cluster_ref = host_props.get('parent') + if self.is_host_in_buildup_cluster( + host, host_cluster_ref=host_cluster_ref, + cluster_cache=cluster_prop_map): + continue + + if not self._is_host_usable(host, host_prop_map=host_prop_map): + continue + + valid_hosts.append(host) return valid_hosts @@ -283,11 +329,13 @@ def select_datastore_by_name(self, name): return (host, resource_pool, summary) def _get_host_properties(self, host_ref): + properties = ['runtime.connectionState', 'runtime.inMaintenanceMode', + 'parent'] retrieve_result = self._session.invoke_api(vim_util, 'get_object_properties', self._session.vim, host_ref, - ['runtime', 'parent']) + properties) if retrieve_result: return self._get_object_properties(retrieve_result[0]) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index fc0385bbb5c..823f955b7a5 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2010,32 +2010,49 @@ def get_cluster_hosts(self, cluster): return host_refs - def get_cluster_custom_attributes(self, cluster): - retrieve_fields = self._session.invoke_api(vim_util, - 'get_object_property', - self._session.vim, - cluster, - 'availableField') - if retrieve_fields: - custom_fields = {} - for field in retrieve_fields: - for v in field[1]: - custom_fields[v.key] = v.name - - retrieve_result = self._session.invoke_api(vim_util, - 'get_object_property', - self._session.vim, - cluster, - 'customValue') - if retrieve_result: - custom_attributes = {} - for val in retrieve_result: - for i in val[1]: - custom_attributes[custom_fields[i.key]] = { - "value": i.value, 'id': i.key - } - - return custom_attributes + def get_cluster_custom_attributes(self, cluster, props=None): + """Retrieve custom attributes for the given cluster + + props can be a dictionary of pre-fetched properties for this cluster. + We need availableField and customValue here. If they are missing, we + fetch them. + """ + if props is None: + props = {} + + if 'availableField' not in props: + props['availableField'] = \ + self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + cluster, + 'availableField') + if not props['availableField']: + return + + if 'customValue' not in props: + props['customValue'] = \ + self._session.invoke_api(vim_util, + 'get_object_property', + self._session.vim, + cluster, + 'customValue') + if not props['customValue']: + return + + custom_fields = {} + for field in props['availableField']: + for v in field[1]: + custom_fields[v.key] = v.name + + custom_attributes = {} + for val in props['customValue']: + for i in val[1]: + custom_attributes[custom_fields[i.key]] = { + "value": i.value, 'id': i.key + } + + return custom_attributes def get_entity_by_inventory_path(self, path): """Returns the managed object identified by the given inventory path. From 59c20f59dbf6aad6f0d71fb6f8fb4803657b6d73 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 11 Oct 2022 08:48:53 -0400 Subject: [PATCH 108/149] [SAP] catch TemplateNotFoundException during delete This patch adds an exception catch around the call to delete a snapshot template. If the template isn't found, then most likely it was never created, so 'delete' can be successful. --- cinder/volume/drivers/vmware/vmdk.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index cb90bb67653..1ad1336c7e2 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -1318,7 +1318,13 @@ def _delete_snapshot(self, snapshot): raise exception.InvalidSnapshot(reason=msg) else: if is_template: - self._delete_snapshot_template_format(snapshot) + try: + self._delete_snapshot_template_format(snapshot) + except vmdk_exceptions.TemplateNotFoundException: + # Just raise a warning and move on like the snap + # was deleted. If it's not there, it's already gone. + LOG.warning("Failed to find template for snapshot %s", + snapshot.id) else: self.volumeops.delete_snapshot(backing, snapshot.name) From c191aa84808006ba7ec3dfe97f5f59cdbd6d14cb Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 24 Oct 2022 22:11:10 +0300 Subject: [PATCH 109/149] update custom-requirements to wallaby-m3 --- custom-requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/custom-requirements.txt b/custom-requirements.txt index f398d2f90b9..992859509f1 100644 --- a/custom-requirements.txt +++ b/custom-requirements.txt @@ -9,6 +9,6 @@ jaeger-client -e git+https://github.com/sapcc/openstack-watcher-middleware.git#egg=watcher-middleware -e git+https://github.com/sapcc/openstack-audit-middleware.git#egg=audit-middleware -e git+https://github.com/sapcc/openstack-rate-limit-middleware.git#egg=rate-limit-middleware --e git+https://github.com/sapcc/os-brick.git@stable/train-m3#egg=os-brick --e git+https://github.com/sapcc/oslo.vmware.git@stable/train-m3#egg=oslo.vmware +-e git+https://github.com/sapcc/os-brick.git@stable/wallaby-m3#egg=os-brick +-e git+https://github.com/sapcc/oslo.vmware.git@stable/wallaby-m3#egg=oslo.vmware -e git+https://github.com/sapcc/dnspython.git@ccloud#egg=dnspython From df00fe579fc360430c5ea210b26774546915cc68 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 24 Oct 2022 22:12:26 +0300 Subject: [PATCH 110/149] fix concourse_unit_test_task for wallaby --- concourse_unit_test_task | 9 ++++----- test-requirements.txt | 3 +++ tox.ini | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/concourse_unit_test_task b/concourse_unit_test_task index aed7613082d..c90fcadbe1e 100644 --- a/concourse_unit_test_task +++ b/concourse_unit_test_task @@ -1,9 +1,8 @@ export DEBIAN_FRONTEND=noninteractive && \ -export UPPER_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/train-m3/upper-constraints.txt && \ apt-get update && \ -apt-get install -y build-essential python-pip python-dev python3-dev git libpcre++-dev gettext libpq-dev && \ +apt-get install -y build-essential python3-pip python3-dev git libpcre++-dev gettext libpq-dev && \ pip install -U pip && \ -pip install tox "six>=1.14.0" && \ -git clone -b stable/train-m3 --single-branch https://github.com/sapcc/cinder.git --depth=1 && \ -cd cinder && \ +pip install tox && \ +cd source && \ +export TOX_CONSTRAINTS_FILE=https://raw.githubusercontent.com/sapcc/requirements/stable/wallaby-m3/upper-constraints.txt && \ tox -e pep8,py3 diff --git a/test-requirements.txt b/test-requirements.txt index 591debf04bc..74ad39ed49f 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -23,3 +23,6 @@ bandit==1.6.0 # Apache-2.0 doc8>=0.8.1 # Apache-2.0 mypy>=0.782 # MIT moto>=1.3.15 # Apache-2.0 + +git+https://github.com/sapcc/os-brick.git@stable/wallaby-m3#egg=os-brick +git+https://github.com/sapcc/oslo.vmware.git@stable/wallaby-m3#egg=oslo.vmware \ No newline at end of file diff --git a/tox.ini b/tox.ini index 6636861a375..21653e7551e 100644 --- a/tox.ini +++ b/tox.ini @@ -19,7 +19,7 @@ setenv = VIRTUAL_ENV={envdir} OS_TEST_TIMEOUT=60 OS_TEST_PATH=./cinder/tests/unit usedevelop = True -install_command=python -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://releases.openstack.org/constraints/upper/wallaby} {opts} {packages} +install_command=python -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://raw.githubusercontent.com/sapcc/requirements/stable/wallaby-m3/upper-constraints.txt} {opts} {packages} deps = -r{toxinidir}/test-requirements.txt -r{toxinidir}/requirements.txt From 37f6523a3fd75e0aae5b31e8dc744566e228a2ee Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 24 Oct 2022 22:25:49 +0300 Subject: [PATCH 111/149] remove oslo-vmware and os-brick from requirements.txt they are specified in custom-requirements.txt --- requirements.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 48af461c4bd..592206143c8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,8 +55,9 @@ tabulate>=0.8.7 # MIT tenacity>=6.3.1 # Apache-2.0 WebOb>=1.8.6 # MIT oslo.i18n>=5.0.1 # Apache-2.0 -oslo.vmware>=3.8.0 # Apache-2.0 -os-brick>=4.3.1 # Apache-2.0 +# Double requirements with the sapcc/custom-requirements.txt +# oslo.vmware>=3.8.0 # Apache-2.0 +# os-brick>=4.3.1 # Apache-2.0 os-win>=5.4.0 # Apache-2.0 tooz>=2.7.1 # Apache-2.0 google-api-python-client>=1.11.0 # Apache-2.0 From 36b0296e5bff4be6de19bdf8d8171e436b2ab737 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 1 Nov 2022 09:07:56 +0200 Subject: [PATCH 112/149] fix pep8 and unit for wallaby --- cinder/api/contrib/services.py | 2 +- cinder/backup/chunkeddriver.py | 2 +- .../scheduler/filters/sap_affinity_filter.py | 18 ++++-------- .../unit/backup/drivers/test_backup_handle.py | 3 +- .../tests/unit/scheduler/test_shard_filter.py | 3 +- .../tests/unit/volume/drivers/test_datera.py | 23 +++++---------- .../unit/volume/drivers/vmware/test_remote.py | 5 ++-- .../volume/drivers/vmware/test_vmware_vmdk.py | 15 ---------- .../drivers/vmware/test_vmware_volumeops.py | 1 - .../drivers/netapp/dataontap/client/api.py | 2 +- cinder/volume/drivers/vmware/datastore.py | 4 +-- cinder/volume/drivers/vmware/remote.py | 6 ++-- cinder/volume/drivers/vmware/vmdk.py | 28 +++++++++---------- cinder/volume/manager.py | 4 +-- 14 files changed, 40 insertions(+), 76 deletions(-) diff --git a/cinder/api/contrib/services.py b/cinder/api/contrib/services.py index 7a729e5cba8..d81941ad4f0 100644 --- a/cinder/api/contrib/services.py +++ b/cinder/api/contrib/services.py @@ -160,7 +160,7 @@ def _recount_host_stats(self, req, context, body): mv.REPLICATION_CLUSTER) self._volume_api_proxy(self.volume_api.recount_host_stats, context, host) - return webob.Response(status_int=http_client.ACCEPTED) + return webob.Response(status_int=HTTPStatus.ACCEPTED) def _log_params_binaries_services(self, context, body): """Get binaries and services referred by given log set/get request.""" diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index b3b1ceecb6c..89a58619ac0 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -34,8 +34,8 @@ from oslo_log import log as logging from oslo_service import loopingcall from oslo_utils import excutils -from oslo_utils import secretutils from oslo_utils import importutils +from oslo_utils import secretutils from oslo_utils import units from cinder.backup import driver diff --git a/cinder/scheduler/filters/sap_affinity_filter.py b/cinder/scheduler/filters/sap_affinity_filter.py index 8a52bf287e8..5d4e3b75d7a 100644 --- a/cinder/scheduler/filters/sap_affinity_filter.py +++ b/cinder/scheduler/filters/sap_affinity_filter.py @@ -130,9 +130,7 @@ def backend_passes(self, backend_state, filter_properties): if not backend_fqdn: # The datastore being filtered doesn't have a custom fqdn set # Don't filter it out. - LOG.debug("Datastore {} has no fqdn".format( - backend_datastore - )) + LOG.debug("Datastore {} has no fqdn", backend_datastore) return True # extract the datastore from the host entries from @@ -144,9 +142,8 @@ def backend_passes(self, backend_state, filter_properties): volume_fqdn = self._get_backend_fqdn(volume_datastore) if volume_fqdn: if volume_fqdn == backend_fqdn: - LOG.debug("Volume FQDN matches {}".format( - backend_fqdn - ), resource=vol) + LOG.debug("Volume FQDN matches {}", + backend_fqdn, resource=vol) return False return True @@ -175,9 +172,7 @@ def backend_passes(self, backend_state, filter_properties): if not backend_fqdn: # The datastore being filtered doesn't have a custom fqdn set # Don't filter it out. - LOG.debug("Datastore {} has no fqdn".format( - backend_datastore - )) + LOG.debug("Datastore {} has no fqdn", backend_datastore) return True # If the result is a list of volumes, then we have to @@ -190,9 +185,8 @@ def backend_passes(self, backend_state, filter_properties): volume_fqdn = self._get_backend_fqdn(volume_datastore) if volume_fqdn: if volume_fqdn == backend_fqdn: - LOG.debug("Volume FQDN matches {}".format( - backend_fqdn - ), resource=vol) + LOG.debug("Volume FQDN matches {}", + backend_fqdn, resource=vol) return True return False diff --git a/cinder/tests/unit/backup/drivers/test_backup_handle.py b/cinder/tests/unit/backup/drivers/test_backup_handle.py index 36fe0e8d578..dea2088cbe4 100644 --- a/cinder/tests/unit/backup/drivers/test_backup_handle.py +++ b/cinder/tests/unit/backup/drivers/test_backup_handle.py @@ -12,11 +12,10 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. - +from unittest import mock from cinder.backup import chunkeddriver from cinder.tests.unit import test -import mock class BackupRestoreHandleV1TestCase(test.TestCase): diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index de1ee8f1172..5c8f11037f5 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -12,8 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. import time - -import mock +from unittest import mock from cinder.tests.unit.scheduler import fakes from cinder.tests.unit.scheduler.test_host_filters \ diff --git a/cinder/tests/unit/volume/drivers/test_datera.py b/cinder/tests/unit/volume/drivers/test_datera.py index de7eb4f5c52..1d28a3200c3 100644 --- a/cinder/tests/unit/volume/drivers/test_datera.py +++ b/cinder/tests/unit/volume/drivers/test_datera.py @@ -20,7 +20,6 @@ from cinder import context from cinder import exception from cinder.tests.unit import test -from cinder import version from cinder.volume import configuration as conf from cinder.volume import volume_types @@ -430,21 +429,13 @@ def test_get_manageable_volumes(self): offset = mock.MagicMock() sort_keys = mock.MagicMock() sort_dirs = mock.MagicMock() - if (version.version_string() >= '15.0.0'): - with mock.patch( - 'cinder.volume.volume_utils.paginate_entries_list') \ - as mpage: - self.driver.get_manageable_volumes( - [testvol], marker, limit, offset, sort_keys, sort_dirs) - mpage.assert_called_once_with( - [v1, v2], marker, limit, offset, sort_keys, sort_dirs) - else: - with mock.patch( - 'cinder.volume.utils.paginate_entries_list') as mpage: - self.driver.get_manageable_volumes( - [testvol], marker, limit, offset, sort_keys, sort_dirs) - mpage.assert_called_once_with( - [v1, v2], marker, limit, offset, sort_keys, sort_dirs) + with mock.patch( + 'cinder.volume.volume_utils.paginate_entries_list') \ + as mpage: + self.driver.get_manageable_volumes( + [testvol], marker, limit, offset, sort_keys, sort_dirs) + mpage.assert_called_once_with( + [v1, v2], marker, limit, offset, sort_keys, sort_dirs) def test_unmanage(self): testvol = _stub_volume() diff --git a/cinder/tests/unit/volume/drivers/vmware/test_remote.py b/cinder/tests/unit/volume/drivers/vmware/test_remote.py index f5a1ba0c964..3942a1685f0 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_remote.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_remote.py @@ -12,13 +12,12 @@ # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. +from unittest import mock -from cinder.tests.unit import test from cinder.tests.unit import fake_volume +from cinder.tests.unit import test from cinder.volume.drivers.vmware import remote as vmware_remote -import mock - class VmdkDriverRemoteApiTest(test.RPCAPITestCase): diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 0c916d17518..02b0b5f7089 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -167,21 +167,6 @@ def test_check_for_setup_error_fail(self, get_profile_id_by_name, session): get_profile_id_by_name.assert_called_once_with(session, self.STORAGE_PROFILE) - @mock.patch.object(VMDK_DRIVER, '_get_storage_profile') - @mock.patch('oslo_vmware.pbm.get_profile_id_by_name') - def test_get_storage_profile_id( - self, get_profile_id_by_name, session, get_storage_profile): - get_storage_profile.return_value = 'gold' - profile_id = mock.sentinel.profile_id - get_profile_id_by_name.return_value = mock.Mock(uniqueId=profile_id) - - self._driver._storage_policy_enabled = True - volume = self._create_volume_dict() - self.assertEqual(profile_id, - self._driver._get_storage_profile_id(volume)) - get_storage_profile.assert_called_once_with(volume) - get_profile_id_by_name.assert_called_once_with(session, 'gold') - @mock.patch.object(VMDK_DRIVER, 'session') def test_get_volume_stats_no_pools(self, session): retr_result_mock = mock.Mock(spec=['objects']) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index f1d2631c299..154d05dca1d 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -1043,7 +1043,6 @@ def _test_get_clone_spec( value = mock.sentinel.value extra_config = {key: value, volumeops.BACKING_UUID_KEY: mock.sentinel.uuid} - disks_to_clone = [mock.sentinel.disk_uuid] ret = self.vops._get_clone_spec(datastore, disk_move_type, snapshot, diff --git a/cinder/volume/drivers/netapp/dataontap/client/api.py b/cinder/volume/drivers/netapp/dataontap/client/api.py index 95b3cb63b45..adbe4889d22 100644 --- a/cinder/volume/drivers/netapp/dataontap/client/api.py +++ b/cinder/volume/drivers/netapp/dataontap/client/api.py @@ -20,6 +20,7 @@ Contains classes required to issue API calls to Data ONTAP and OnCommand DFM. """ import random +import ssl from eventlet import greenthread from eventlet import semaphore @@ -28,7 +29,6 @@ from oslo_utils import netutils import six from six.moves import urllib -import ssl from cinder import exception from cinder.i18n import _ diff --git a/cinder/volume/drivers/vmware/datastore.py b/cinder/volume/drivers/vmware/datastore.py index 10fafb06443..5bdc3e540ef 100644 --- a/cinder/volume/drivers/vmware/datastore.py +++ b/cinder/volume/drivers/vmware/datastore.py @@ -444,9 +444,9 @@ def select_datastore(self, req, hosts=None): datastores = self._get_datastores() # We don't want to use hosts in buildup - LOG.debug("FILTER hosts start {}".format(hosts)) + LOG.debug("FILTER hosts start {}", hosts) valid_hosts = self._filter_hosts(hosts) - LOG.debug("FILTERED hosts valid {}".format(valid_hosts)) + LOG.debug("FILTERED hosts valid {}", valid_hosts) datastores = self._filter_datastores(datastores, size_bytes, profile_id, diff --git a/cinder/volume/drivers/vmware/remote.py b/cinder/volume/drivers/vmware/remote.py index 30c81462503..5582e551fa0 100644 --- a/cinder/volume/drivers/vmware/remote.py +++ b/cinder/volume/drivers/vmware/remote.py @@ -18,12 +18,12 @@ This is the gateway which allows us gathering VMWare related information from other hosts and perform cross vCenter operations. """ +import oslo_messaging as messaging +from oslo_vmware import vim_util + from cinder import rpc from cinder.volume.rpcapi import VolumeAPI from cinder.volume import volume_utils -from oslo_vmware import vim_util - -import oslo_messaging as messaging class VmdkDriverRemoteApi(rpc.RPCAPI): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 1ad1336c7e2..7a2e56b4ae7 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -23,10 +23,10 @@ """ import math -import OpenSSL import re import ssl +import OpenSSL from oslo_config import cfg from oslo_log import log as logging from oslo_utils import excutils @@ -44,7 +44,6 @@ from cinder.i18n import _ from cinder.image import image_utils from cinder import interface -from cinder import utils from cinder.volume import configuration from cinder.volume import driver from cinder.volume.drivers.vmware import datastore as hub @@ -829,9 +828,8 @@ def _create_backing(self, volume, host=None, create_params=None, # TODO(walt) - this writes the volume update to the db. :( # This sucks, but don't have any other way new_host = self._new_host_for_volume(volume) - LOG.info("Changing volume host from {} to {}".format( - volume.host, new_host - )) + LOG.info("Changing volume host from {} to {}", + volume.host, new_host) model_update = {'host': new_host} volume.update(model_update) volume.save() @@ -2566,7 +2564,7 @@ def update_provider_info(self, volumes, snapshots): We don't care about snapshots, they just use the volume's provider_id. """ - LOG.info("HOST {} : volumes {}".format(self.host, len(volumes))) + LOG.info("HOST {} : volumes {}", self.host, len(volumes)) if not self.configuration.vmware_sap_update_provider_info: LOG.info("Not updating provider information") return [], None @@ -2577,7 +2575,7 @@ def update_provider_info(self, volumes, snapshots): datastores = self._get_datastores_for_profiles() ds_keys = datastores.keys() vol_updates = [] - LOG.info("Process {} volumes".format(len(volumes))) + LOG.info("Process {} volumes", len(volumes)) for vol in volumes: # make sure we have the correc host info if vol['status'] in ['in-use', 'available']: @@ -2591,7 +2589,7 @@ def update_provider_info(self, volumes, snapshots): # this will save time on every startup if (pool_info not in ds_keys or pool_info == volume_utils.DEFAULT_POOL_NAME): - LOG.debug("Updating host for volume {}".format(vol.id)) + LOG.debug("Updating host for volume {}", vol.id) try: new_host = self._new_host_for_volume(vol) @@ -2600,11 +2598,11 @@ def update_provider_info(self, volumes, snapshots): vol.save() except Exception as ex: LOG.warning("Couldn't update host for {} because " - " {}".format(vol.id, ex)) + " {}", vol.id, ex) else: - LOG.debug("Keeping host for volume {}".format(vol.id)) + LOG.debug("Keeping host for volume {}", vol.id) - LOG.info("HOST COMPLETE {}".format(self.host)) + LOG.info("HOST COMPLETE {}", self.host) return vol_updates, None else: # Since pools are not enabled, we should ensure that the datastore @@ -2623,8 +2621,8 @@ def update_provider_info(self, volumes, snapshots): vol.host, level='backend' ) backend = backend_info.split("@") - LOG.info("Volume host '{}' backend '{}' pool '{}'".format( - vol.host, backend[1], pool_info)) + LOG.info("Volume host '{}' backend '{}' pool '{}'", + vol.host, backend[1], pool_info) # we need to force the host back to # host@backend#backend @@ -2634,11 +2632,11 @@ def update_provider_info(self, volumes, snapshots): backend[1] ) if new_host != vol.host: - LOG.info("Setting host to {}".format(new_host)) + LOG.info("Setting host to {}", new_host) vol.update({'host': new_host}) vol.save() - LOG.info("HOST COMPLETE {}".format(self.host)) + LOG.info("HOST COMPLETE {}", self.host) return vol_updates, None def _get_volume_group_folder(self, datacenter, project_id, snapshot=False): diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 731725a80ac..72376692f4f 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -4857,7 +4857,7 @@ def get_backup_device(self, ctxt, backup, want_objects=False, try: (backup_device, is_snapshot) = ( self.driver.get_backup_device(ctxt, backup)) - except Exception as ex: + except Exception: if async_call: LOG.exception("Failed to get backup device. " "Calling backup continue_backup to cleanup") @@ -4881,7 +4881,7 @@ def get_backup_device(self, ctxt, backup, want_objects=False, if async_call: # we have to use an rpc call back to the backup manager to # continue the backup - LOG.info("Calling backup continue_backup for: {}".format(backup)) + LOG.info("Calling backup continue_backup for: {}", backup) rpcapi = backup_rpcapi.BackupAPI() rpcapi.continue_backup(ctxt, backup, backup_device) else: From 315786c7f9f3f38d6e120020606b3c6f88e4d13b Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 2 Nov 2022 12:45:56 -0400 Subject: [PATCH 113/149] [SAP] Update requirements.txt to include memcached for tooz This patch updates the requirements.txt to include the optional package memacached required for tooz for our deployment --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 592206143c8..cfed27ae498 100644 --- a/requirements.txt +++ b/requirements.txt @@ -59,7 +59,7 @@ oslo.i18n>=5.0.1 # Apache-2.0 # oslo.vmware>=3.8.0 # Apache-2.0 # os-brick>=4.3.1 # Apache-2.0 os-win>=5.4.0 # Apache-2.0 -tooz>=2.7.1 # Apache-2.0 +tooz[memcached]>=2.7.1 # Apache-2.0 google-api-python-client>=1.11.0 # Apache-2.0 castellan>=3.6.0 # Apache-2.0 cryptography>=3.1 # BSD/Apache-2.0 From 19a6b0b04187dd713168cdab20ccaa1fe1408d16 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 31 Oct 2022 13:27:15 +0200 Subject: [PATCH 114/149] [SAP] set the correct vmdk_size when restoring backups VMDKs are stream optimized, having different size than the actual volume. We have to set the correct vmdk_size to VmdkWriteHandle so that it can validate the upload progress correctly. --- cinder/backup/chunkeddriver.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 89a58619ac0..050839b10ee 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -780,6 +780,16 @@ def add_backup(self, backup, metadata, requested_backup, volume_id): return def finish_restore(self): + # SAP + # hacking into VmdkWriteHandle to set the correct file size. + # VMDKs are stream optimized, being smaller than the actual + # volume. Setting the correct vmdk_size in VmdkWriteHandle + # allows it to properly validate the upload progress, + # otherwise the restore would fail with 'incomplete transfer'. + if hasattr(self._volume_file, '_vmdk_size'): + file_size = sum([s.length for s in self._segments]) + self._volume_file._vmdk_size = file_size + for segment in self._segments: LOG.debug('restoring object. backup: %(backup_id)s, ' 'container: %(container)s, object name: ' From 78c9875dc12194b048875f626730494a1d3bceaa Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 3 Nov 2022 16:37:09 -0400 Subject: [PATCH 115/149] [SAP] Fix the shard filter for group creation Temptest tests found a bug in the shard filter where creating a cinder group from an existing group or group snapshot always failed with not finding any hosts. This is due to the shard filter not getting much of any information about the request. This patch adds the project_id of the group being created to ensure that the shard filter can work properly. --- cinder/group/api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cinder/group/api.py b/cinder/group/api.py index 29c138e2f34..852f9e25d93 100644 --- a/cinder/group/api.py +++ b/cinder/group/api.py @@ -254,7 +254,10 @@ def create_from_src(self, context, name, description=None, # 'volume_properties' as scheduler's filter logic are all designed # based on this attribute. kwargs = {'group_id': group.id, - 'volume_properties': objects.VolumeProperties(size=size)} + 'volume_properties': objects.VolumeProperties( + size=size, + project_id=group.project_id + )} host = group.resource_backend if not host or not self.scheduler_rpcapi.validate_host_capacity( From 33ad0d84c92ff53db11292767ac40a55a42be770 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 17 Nov 2022 15:24:16 -0500 Subject: [PATCH 116/149] Allow snapshots to be independent This patch allows a snapshot creation request go through the scheduler to pick a pool for the snapshot to live on. The backend picked by the scheduler is added to the snapshot metadata. This metadata field '__cinder_internal_backend' is filtered out of requests fetching the snapshot information, so end users will never see it. To enable this ability a new config option is added to the scheduler and is defaulted to be False or disabled. sap_allow_independent_snapshots This allows creating a snapshot from a volume to happen on a completely different pool than the source volume. This patch also alters create volume from snapshot, to allow a volume to be created from a different pool than the source volume that the snapshot was created from. All of this is not how upstream works or allows. Fix snapshot view filtering This patch fixes an issue iterating over the metadata from the snapshot views API. The code is supposed to filter out and remove the __cinder_internal keys in the metadata. This patch looks for the keys to remove first and then deletes them instead of iterating over the metadata array and trying to delete them while iterating over the metadata, which causes a python exception. --- cinder/api/common.py | 2 ++ cinder/api/views/snapshots.py | 7 ++++++ cinder/objects/snapshot.py | 3 +++ cinder/scheduler/manager.py | 23 +++++++++++++++++-- .../volume/drivers/vmware/test_vmware_vmdk.py | 5 ++-- cinder/volume/drivers/vmware/vmdk.py | 23 ++++++++++++++++--- cinder/volume/flows/api/create_volume.py | 8 +++++++ 7 files changed, 64 insertions(+), 7 deletions(-) diff --git a/cinder/api/common.py b/cinder/api/common.py index 4320527862c..f3c53566fa7 100644 --- a/cinder/api/common.py +++ b/cinder/api/common.py @@ -54,6 +54,8 @@ METADATA_TYPES = enum.Enum('METADATA_TYPES', 'user image') +SAP_HIDDEN_METADATA_KEY = "__cinder_internal" + def get_pagination_params(params, max_limit=None): """Return marker, limit, offset tuple from request. diff --git a/cinder/api/views/snapshots.py b/cinder/api/views/snapshots.py index 7d562307c4d..467d380a3d0 100644 --- a/cinder/api/views/snapshots.py +++ b/cinder/api/views/snapshots.py @@ -39,6 +39,13 @@ def summary(self, request, snapshot): """Generic, non-detailed view of a snapshot.""" if isinstance(snapshot.metadata, dict): metadata = snapshot.metadata + # SAP we don't show the backend here because it's + # custom for our deployment with independent snaps + # for the vmware vmdk driver + del_key = common.SAP_HIDDEN_METADATA_KEY + delete_keys = [key for key in metadata if key.startswith(del_key)] + for key in delete_keys: + del metadata[key] else: metadata = {} diff --git a/cinder/objects/snapshot.py b/cinder/objects/snapshot.py index 844c51ca5ca..114184a4b55 100644 --- a/cinder/objects/snapshot.py +++ b/cinder/objects/snapshot.py @@ -16,6 +16,7 @@ from oslo_utils import versionutils from oslo_versionedobjects import fields +from cinder.api import common from cinder import db from cinder import exception from cinder.i18n import _ @@ -28,6 +29,8 @@ CONF = cfg.CONF +SAP_HIDDEN_BACKEND_KEY = common.SAP_HIDDEN_METADATA_KEY + "_backend" + @base.CinderObjectRegistry.register class Snapshot(cleanable.CinderCleanableObject, base.CinderObject, diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 230c1fafdef..84107d76255 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -44,6 +44,7 @@ from cinder.message import message_field from cinder import objects from cinder.objects import fields +from cinder.objects import snapshot as snapshot_obj from cinder import quota from cinder import rpc from cinder.scheduler.flows import create_volume @@ -62,6 +63,10 @@ min=1, help='Maximum time in seconds to wait for the driver to ' 'report as ready'), + cfg.BoolOpt('sap_allow_independent_snapshots', + default=False, + help='Allow cinder to schedule snapshot creations on pools ' + 'other than the source volume pool.'), ] CONF = cfg.CONF @@ -239,19 +244,33 @@ def create_snapshot(self, ctxt, volume, snapshot, backend, """ self._wait_for_scheduler() + if CONF.sap_allow_independent_snapshots: + # We allow snapshots to be created on a pool + # separate from the volume's pool. + backend = vol_utils.extract_host(volume['host']) + try: tgt_backend = self.driver.backend_passes_filters( ctxt, backend, request_spec, filter_properties) tgt_backend.consume_from_volume( {'size': request_spec['volume_properties']['size']}) + LOG.info("Snapshot picked backend_id is '%s'", + tgt_backend.backend_id) except exception.NoValidBackend as ex: self._set_snapshot_state_and_notify('create_snapshot', snapshot, fields.SnapshotStatus.ERROR, ctxt, ex, request_spec) else: - volume_rpcapi.VolumeAPI().create_snapshot(ctxt, volume, - snapshot) + if CONF.sap_allow_independent_snapshots: + # Set this in the metadata of the snap + key = snapshot_obj.SAP_HIDDEN_BACKEND_KEY + if snapshot.metadata: + snapshot.metadata[key] = tgt_backend.backend_id + else: + snapshot.metadata = {key: tgt_backend.backend_id} + snapshot.save() + volume_rpcapi.VolumeAPI().create_snapshot(ctxt, volume, snapshot) def _do_cleanup(self, ctxt, vo_resource): # We can only receive cleanup requests for volumes, but we check anyway diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py index 02b0b5f7089..adeca7d5403 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_vmdk.py @@ -422,7 +422,8 @@ def _create_snapshot_dict(self, 'name': name, 'display_description': description, 'volume_size': volume['size'], - 'provider_location': provider_location + 'provider_location': provider_location, + 'metadata': {}, } @mock.patch.object(VMDK_DRIVER, 'volumeops') @@ -569,7 +570,7 @@ def test_create_snapshot_template( vops.get_backing.assert_called_once_with(snapshot['volume_name'], snapshot['volume']['id']) create_snapshot_template_format.assert_called_once_with( - snapshot, backing) + snapshot, backing, backend=None) @mock.patch.object(VMDK_DRIVER, 'volumeops') def test_get_template_by_inv_path(self, vops): diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 7a2e56b4ae7..dc06c8c309e 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -44,6 +44,7 @@ from cinder.i18n import _ from cinder.image import image_utils from cinder import interface +from cinder.objects import snapshot as snapshot_obj from cinder.volume import configuration from cinder.volume import driver from cinder.volume.drivers.vmware import datastore as hub @@ -1211,10 +1212,21 @@ def _get_snapshot_group_folder(self, volume, backing): return self._get_volume_group_folder( dc, volume.project_id, snapshot=True) - def _create_snapshot_template_format(self, snapshot, backing): + @volume_utils.trace + def _create_snapshot_template_format(self, snapshot, backing, + backend=None): volume = snapshot.volume folder = self._get_snapshot_group_folder(volume, backing) - datastore = self.volumeops.get_datastore(backing) + if backend: + # Create the snapshot on the datastore described in + # backend making this snapshot independent from the volume + datastore_name = volume_utils.extract_host(backend, 'pool') + (host_ref, + resource_pool, + summary) = self.ds_sel.select_datastore_by_name(datastore_name) + datastore = summary.datastore + else: + datastore = self.volumeops.get_datastore(backing) if self._in_use(volume): tmp_backing = self._create_temp_backing_from_attached_vmdk( @@ -1243,6 +1255,11 @@ def _create_snapshot(self, snapshot): :param snapshot: Snapshot object """ + backend = None + key = snapshot_obj.SAP_HIDDEN_BACKEND_KEY + if ('metadata' in snapshot and snapshot['metadata'] and + key in snapshot['metadata']): + backend = snapshot['metadata'][key] volume = snapshot['volume'] snapshot_format = self.configuration.vmware_snapshot_format @@ -1265,7 +1282,7 @@ def _create_snapshot(self, snapshot): snapshot['display_description']) else: model_update = self._create_snapshot_template_format( - snapshot, backing) + snapshot, backing, backend=backend) LOG.info("Successfully created snapshot: %s.", snapshot['name']) return model_update diff --git a/cinder/volume/flows/api/create_volume.py b/cinder/volume/flows/api/create_volume.py index a598fa726be..a6a20957d19 100644 --- a/cinder/volume/flows/api/create_volume.py +++ b/cinder/volume/flows/api/create_volume.py @@ -761,6 +761,14 @@ def _cast_create_volume(self, context, request_spec, filter_properties): # service with the desired backend information. snapshot = objects.Snapshot.get_by_id(context, snapshot_id) request_spec['resource_backend'] = snapshot.volume.resource_backend + # SAP only force the same backend, not the same pool + # if we are allowing snapshots to live on pools other than + # the source volume. + if CONF.sap_allow_independent_snapshots: + backend = volume_utils.extract_host( + snapshot.volume.resource_backend + ) + request_spec['resource_backend'] = backend elif source_volid: source_volume_ref = objects.Volume.get_by_id(context, source_volid) request_spec['resource_backend'] = ( From f5c24c69ac4bd40b04d62df29dab3e4596dcfd09 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 6 Dec 2022 10:50:10 -0500 Subject: [PATCH 117/149] Allow independent cloning of volumes This patch adds a new custom sap config option SAP_allow_independent_clone This patch allows a clone of a volume to be scheduled on a different pool than the source volume. --- cinder/scheduler/manager.py | 4 ++++ cinder/volume/flows/api/create_volume.py | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/cinder/scheduler/manager.py b/cinder/scheduler/manager.py index 84107d76255..7e810dbdcaf 100644 --- a/cinder/scheduler/manager.py +++ b/cinder/scheduler/manager.py @@ -67,6 +67,10 @@ default=False, help='Allow cinder to schedule snapshot creations on pools ' 'other than the source volume pool.'), + cfg.BoolOpt('sap_allow_independent_clone', + default=False, + help='Allow cinder to schedule a clone volume on a pool ' + 'other than the source volume pool.'), ] CONF = cfg.CONF diff --git a/cinder/volume/flows/api/create_volume.py b/cinder/volume/flows/api/create_volume.py index a6a20957d19..be852d8c2b3 100644 --- a/cinder/volume/flows/api/create_volume.py +++ b/cinder/volume/flows/api/create_volume.py @@ -771,8 +771,13 @@ def _cast_create_volume(self, context, request_spec, filter_properties): request_spec['resource_backend'] = backend elif source_volid: source_volume_ref = objects.Volume.get_by_id(context, source_volid) - request_spec['resource_backend'] = ( - source_volume_ref.resource_backend) + if CONF.sap_allow_independent_clone: + backend = volume_utils.extract_host( + source_volume_ref.resource_backend + ) + else: + backend = source_volume_ref.resource_backend + request_spec['resource_backend'] = backend self.scheduler_rpcapi.create_volume( context, From 4ee8e7912fd20b3b48e1318c96d4671fe33aee48 Mon Sep 17 00:00:00 2001 From: Maurice Escher Date: Wed, 14 Dec 2022 08:27:41 +0100 Subject: [PATCH 118/149] SAPCC: don't log in method called by eventlet.tpool.execute() fixes hanging thread due to https://github.com/eventlet/eventlet/issues/432 which may get fixed for oslo.log in 5.0.1 with https://github.com/openstack/oslo.log/commit/94b9dc32ec1f52a582adbd97fe2847f7c87d6c17 (at the time of writing master in antelope cycle is constraint to 5.0.0) --- cinder/backup/chunkeddriver.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 050839b10ee..54df8338dc5 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -875,7 +875,9 @@ def _clear_reader(self, segment): return self._object_readers[obj_name].close() del self._object_readers[obj_name] - LOG.debug("Cleared reader for object %s", segment.obj['name']) + # See hint about https://github.com/eventlet/eventlet/issues/432 + # at the top why not to log here. May be fixed in oslo.log 5.0.1 + # LOG.debug("Cleared reader for object %s", segment.obj['name']) def add_object(self, metadata_object): """Merges a backup chunk over the self._segments list. From ec9b72037eac73df0ed33f61b56de0561b2139e9 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 5 Jan 2023 14:22:40 -0500 Subject: [PATCH 119/149] Mark pool as down if datastore draining This patch looks for a custom attribute 'cinder_state' on a datastore at get_volume_stats() time. If the custom attribute value is 'drain', then the datastore is marked as 'draining' and cinder shouldn't use it for provisioning new requests. The datastore will be marked as down in the pool stats for that datastore. --- cinder/volume/drivers/vmware/vmdk.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index dc06c8c309e..ddeecd5af43 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -576,6 +576,19 @@ def _get_volume_stats(self): alarm_info['info.description'] ) + # Add any custom attributes associated with the datastore + custom_attributes = {} + if "custom_attributes" in datastore: + custom_attributes = datastore['custom_attributes'] + + # A datastore can be marked as draining in vcenter + # in which case we want to mark it down. + if 'cinder_state' in custom_attributes: + cinder_pool_state = custom_attributes['cinder_state'] + if cinder_pool_state.lower() == 'drain': + pool_state = 'down' + pool_down_reason = 'Datastore marked as draining' + pool = {'pool_name': summary.name, 'total_capacity_gb': round( summary.capacity / units.Gi), @@ -594,13 +607,10 @@ def _get_volume_stats(self): 'connection_capabilities': connection_capabilities, 'backend_state': backend_state, 'pool_state': pool_state, - 'pool_down_reason': pool_down_reason + 'pool_down_reason': pool_down_reason, + 'custom_attributes': custom_attributes, } - # Add any custom attributes associated with the datastore - if "custom_attributes" in datastore: - pool['custom_attributes'] = datastore['custom_attributes'] - pools.append(pool) data['pools'] = pools return data From 3aa26a12d1b779280f4c921999bc7aebb0e8b222 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 25 Jan 2023 11:35:40 -0500 Subject: [PATCH 120/149] [SAP] drop existing SAP constraint This patch updates the 141 DB version upgrade to first drop the existing quota_usages table constraint on quota_usages_project_id_key, which happens to be UNIQUE (project_id, resource, deleted). 141 by default adds a unique constraint on the same key as ALTER TABLE quota_usages ADD CONSTRAINT quota_usages_project_id_key UNIQUE (project_id, resource, race_preventer) --- .../versions/141_add_quota_usage_unique_constraint.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/cinder/db/sqlalchemy/migrate_repo/versions/141_add_quota_usage_unique_constraint.py b/cinder/db/sqlalchemy/migrate_repo/versions/141_add_quota_usage_unique_constraint.py index 81ed7bb2d50..b5559b8d689 100644 --- a/cinder/db/sqlalchemy/migrate_repo/versions/141_add_quota_usage_unique_constraint.py +++ b/cinder/db/sqlalchemy/migrate_repo/versions/141_add_quota_usage_unique_constraint.py @@ -31,6 +31,12 @@ def upgrade(migrate_engine): if not hasattr(quota_usages.c, 'race_preventer'): quota_usages.create_column(Column('race_preventer', Boolean, nullable=True)) + # SAP drop the existing constraint + unique_SAP = constraint.UniqueConstraint( + 'project_id', 'resource', 'deleted', + table=quota_usages) + unique_SAP.drop(engine=migrate_engine) + unique = constraint.UniqueConstraint( 'project_id', 'resource', 'race_preventer', table=quota_usages) From 4d647b8a24b4a606a4e8affe6beea15dc00c8c18 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 9 Feb 2023 15:24:00 -0500 Subject: [PATCH 121/149] Fix setting the restore handle size This patch changes how cinder calls the oslo.vmware rw_handle to set the size of the contents of the vmdk file bytes to using a method on the object. This requires PR: https://github.com/sapcc/oslo.vmware/pull/35 --- cinder/backup/chunkeddriver.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/cinder/backup/chunkeddriver.py b/cinder/backup/chunkeddriver.py index 54df8338dc5..9323a6af599 100644 --- a/cinder/backup/chunkeddriver.py +++ b/cinder/backup/chunkeddriver.py @@ -786,9 +786,11 @@ def finish_restore(self): # volume. Setting the correct vmdk_size in VmdkWriteHandle # allows it to properly validate the upload progress, # otherwise the restore would fail with 'incomplete transfer'. - if hasattr(self._volume_file, '_vmdk_size'): + if hasattr(self._volume_file, 'set_size'): file_size = sum([s.length for s in self._segments]) - self._volume_file._vmdk_size = file_size + LOG.debug("Setting vmdk contents size to %s for restore", + file_size) + self._volume_file.set_size(file_size) for segment in self._segments: LOG.debug('restoring object. backup: %(backup_id)s, ' @@ -819,6 +821,12 @@ def finish_restore(self): # Restoring a backup to a volume can take some time. Yield so other # threads can run, allowing for among other things the service # status to be updated + if hasattr(self._volume_file, '_get_progress'): + LOG.debug("progress? %(progress)s '%(volume_file)s'", + { + 'progress': self._volume_file._get_progress(), + 'volume_file': str(self._volume_file) + }) eventlet.sleep(0) def _read_segment(self, segment): From 510f1a6db9ab1fa76973c719e925fcfd6dbce135 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 28 Feb 2023 09:13:20 -0500 Subject: [PATCH 122/149] [SAP] Fix logging formats This patch fixes some formats of strings and calls to LOG. They were working in train, but started failing in Wallaby. The old strings were LOG.debug("foo bar {}", something) They are fixed to LOG.debug("foo bar %s", something) --- cinder/volume/drivers/vmware/vmdk.py | 31 ++++++++++++++++------------ cinder/volume/manager.py | 2 +- 2 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index ddeecd5af43..f15e1d5dd08 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -839,8 +839,8 @@ def _create_backing(self, volume, host=None, create_params=None, # TODO(walt) - this writes the volume update to the db. :( # This sucks, but don't have any other way new_host = self._new_host_for_volume(volume) - LOG.info("Changing volume host from {} to {}", - volume.host, new_host) + LOG.info("Changing volume host from %(old_host)s to %(new_host)s", + {'old_host': volume.host, 'new_host': new_host}) model_update = {'host': new_host} volume.update(model_update) volume.save() @@ -2591,7 +2591,8 @@ def update_provider_info(self, volumes, snapshots): We don't care about snapshots, they just use the volume's provider_id. """ - LOG.info("HOST {} : volumes {}", self.host, len(volumes)) + LOG.info("HOST %(host)s : volumes %(volumes_count)s", + {'host': self.host, 'volumes_count': len(volumes)}) if not self.configuration.vmware_sap_update_provider_info: LOG.info("Not updating provider information") return [], None @@ -2602,7 +2603,7 @@ def update_provider_info(self, volumes, snapshots): datastores = self._get_datastores_for_profiles() ds_keys = datastores.keys() vol_updates = [] - LOG.info("Process {} volumes", len(volumes)) + LOG.info("Process %s volumes", len(volumes)) for vol in volumes: # make sure we have the correc host info if vol['status'] in ['in-use', 'available']: @@ -2616,7 +2617,7 @@ def update_provider_info(self, volumes, snapshots): # this will save time on every startup if (pool_info not in ds_keys or pool_info == volume_utils.DEFAULT_POOL_NAME): - LOG.debug("Updating host for volume {}", vol.id) + LOG.debug("Updating host for volume %s", vol.id) try: new_host = self._new_host_for_volume(vol) @@ -2624,12 +2625,13 @@ def update_provider_info(self, volumes, snapshots): vol.update({'host': new_host}) vol.save() except Exception as ex: - LOG.warning("Couldn't update host for {} because " - " {}", vol.id, ex) + LOG.warning("Couldn't update host for %(id)s " + "because %(error)s", + {'id': vol.id, 'error': ex}) else: - LOG.debug("Keeping host for volume {}", vol.id) + LOG.debug("Keeping host for volume %s", vol.id) - LOG.info("HOST COMPLETE {}", self.host) + LOG.info("HOST COMPLETE %s", self.host) return vol_updates, None else: # Since pools are not enabled, we should ensure that the datastore @@ -2648,8 +2650,11 @@ def update_provider_info(self, volumes, snapshots): vol.host, level='backend' ) backend = backend_info.split("@") - LOG.info("Volume host '{}' backend '{}' pool '{}'", - vol.host, backend[1], pool_info) + LOG.info("Volume host '%(host)s' backend '%(backend)s' " + "pool '%(pool)s'", + {'host': vol.host, + 'backend': backend[1], + 'pool': pool_info}) # we need to force the host back to # host@backend#backend @@ -2659,11 +2664,11 @@ def update_provider_info(self, volumes, snapshots): backend[1] ) if new_host != vol.host: - LOG.info("Setting host to {}", new_host) + LOG.info("Setting host to %s", new_host) vol.update({'host': new_host}) vol.save() - LOG.info("HOST COMPLETE {}", self.host) + LOG.info("HOST COMPLETE %s", self.host) return vol_updates, None def _get_volume_group_folder(self, datacenter, project_id, snapshot=False): diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 72376692f4f..00078c45641 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -4881,7 +4881,7 @@ def get_backup_device(self, ctxt, backup, want_objects=False, if async_call: # we have to use an rpc call back to the backup manager to # continue the backup - LOG.info("Calling backup continue_backup for: {}", backup) + LOG.info("Calling backup continue_backup for: %s", backup) rpcapi = backup_rpcapi.BackupAPI() rpcapi.continue_backup(ctxt, backup, backup_device) else: From 9f5f69521ec1886279a4049cbd20750f6928516a Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 28 Feb 2023 10:07:39 -0500 Subject: [PATCH 123/149] [SAP]Get ready for tox 4 Changes: - eliminate whitespace in passenv values - account for stricter allowlist checking - removed skipsdist=True, which in tox 4 appears to prevent cinder from being installed in the testenvs - made 4.0.0 the tox minversion, which means tox will have to update itself until it's available in distros, which in turn means that the default install_command needs to be used in the base testenv, which means a new base pyXX testenv is added to include our install_command that includes upper constraints - added install_command to most testenvs since they don't inherit the correct one from the base testenv any more Not strictly necessary for this patch, but I did them anyway: - moved the api-ref and releasenotes testenvs to be closer to the docs testenv - added reno as a dep for the 'venv' testenv, which our contributor docs say should be used to generate a new release note from the reno template, and which has apparently been broken for a while This patch makes tox 4 the default so that we can hopefully catch problems locally before they block the gate. SAP - The change to setup.py to add the py_modules line comes from setuptools >= 61 being automatically installed in the process and it's only overwritten with the version from constraints Change-Id: I75e36fa100925bd486c9d4fdf8a33dd58347ce81 --- setup.py | 1 + tools/check_exec.py | 2 +- tox.ini | 104 +++++++++++++++++++++++++++++++------------- 3 files changed, 76 insertions(+), 31 deletions(-) diff --git a/setup.py b/setup.py index cd35c3c35bf..31d6ada1745 100644 --- a/setup.py +++ b/setup.py @@ -17,4 +17,5 @@ setuptools.setup( setup_requires=['pbr>=2.0.0'], + py_modules=[], pbr=True) diff --git a/tools/check_exec.py b/tools/check_exec.py index 7fe59fb4310..251312935fa 100755 --- a/tools/check_exec.py +++ b/tools/check_exec.py @@ -1,4 +1,4 @@ -#!/usr/bin/python3 +#!/usr/bin/env python3 # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain diff --git a/tox.ini b/tox.ini index 21653e7551e..6c80c80bdce 100644 --- a/tox.ini +++ b/tox.ini @@ -1,28 +1,32 @@ [tox] -minversion = 3.18.0 -requires = - virtualenv>=20.4.2 - tox<4 -skipsdist = True -# python runtimes: https://governance.openstack.org/tc/reference/project-testing-interface.html#tested-runtimes -envlist = py3,compliance,pep8 +minversion = 4.0.0 +# specify virtualenv here to keep local runs consistent with the +# gate (it sets the versions of pip, setuptools, and wheel) +requires = virtualenv>=20.17.1 # this allows tox to infer the base python from the environment name # and override any basepython configured in this file ignore_basepython_conflict=true [testenv] basepython = python3 -setenv = VIRTUAL_ENV={envdir} - PYTHONWARNINGS=default::DeprecationWarning - OS_STDOUT_CAPTURE=1 - OS_STDERR_CAPTURE=1 - OS_TEST_TIMEOUT=60 - OS_TEST_PATH=./cinder/tests/unit -usedevelop = True -install_command=python -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://raw.githubusercontent.com/sapcc/requirements/stable/wallaby-m3/upper-constraints.txt} {opts} {packages} +usedevelop = true +setenv = + VIRTUAL_ENV={envdir} + OS_STDOUT_CAPTURE=1 + OS_STDERR_CAPTURE=1 + OS_TEST_TIMEOUT=60 + OS_TEST_PATH=./cinder/tests/unit + PYTHONDONTWRITEBYTECODE=1 +# TODO(stephenfin): Remove once we bump our upper-constraint to SQLAlchemy 2.0 + SQLALCHEMY_WARN_20=1 -deps = -r{toxinidir}/test-requirements.txt - -r{toxinidir}/requirements.txt +# this environment's install command is used if the 'minversion' or 'requires' +# values declared above in the [tox] section require tox to update itself, so +# we don't define a non-default install_command here + +deps = + -r{toxinidir}/test-requirements.txt + -r{toxinidir}/requirements.txt # By default stestr will set concurrency # to ncpu, to specify something else use @@ -35,20 +39,27 @@ commands = allowlist_externals = find -passenv = *_proxy *_PROXY - -[testenv:api-ref] -allowlist_externals = rm -deps = {[testenv:docs]deps} -commands = - rm -rf api-ref/build - sphinx-build -W -b html -d api-ref/build/doctrees api-ref/source api-ref/build/html/ +passenv = + *_proxy + *_PROXY -[testenv:releasenotes] -deps = {[testenv:docs]deps} -commands = sphinx-build -a -E -W -j auto -d releasenotes/build/doctrees -b html releasenotes/source releasenotes/build/html +[testenv:py{3,36,38,39,310,311}] +# NOTE: Do not move the constraints from the install_command into deps, as that +# may result in tox using unconstrained/untested dependencies. +# We use "usedevelop = True" for tox jobs (except bindep), so tox does 2 +# install calls, one for the deps and another for the cinder source code +# as editable (pip -e). +# Without the constraints in the install_command only the first +# installation will honor the upper constraints, and the second install +# for cinder itself will not know about the constraints which can result +# in installing versions we don't want. +# With constraints in the install_command tox will always honor our +# constraints. +install_command = + python -m pip install -c{env:TOX_CONSTRAINTS_FILE:https://raw.githubusercontent.com/sapcc/requirements/stable/wallaby-m3/upper-constraints.txt} {opts} {packages} [testenv:functional] +install_command = {[testenv:py3]install_command} setenv = OS_TEST_PATH = ./cinder/tests/functional @@ -61,6 +72,7 @@ setenv = {[testenv:functional]setenv} [testenv:api-samples] +install_command = {[testenv:functional]install_command} envdir = {toxworkdir}/shared setenv = GENERATE_SAMPLES=True @@ -71,10 +83,14 @@ commands = stestr slowest [testenv:compliance] +install_command = {[testenv:py3]install_command} setenv = OS_TEST_PATH = ./cinder/tests/compliance [testenv:pep8] +allowlist_externals = + {toxinidir}/tools/config/check_uptodate.sh + {toxinidir}/tools/check_exec.py commands = flake8 {posargs} . doc8 @@ -84,11 +100,15 @@ commands = [testenv:fast8] # Use same environment directory as pep8 env to save space and install time envdir = {toxworkdir}/pep8 +install_command = {[testenv:py3]install_command} +allowlist_externals = + {toxinidir}/tools/fast8.sh commands = {toxinidir}/tools/fast8.sh passenv = FAST8_NUM_COMMITS [testenv:pylint] +install_command = {[testenv:py3]install_command} deps = -r{toxinidir}/requirements.txt -r{toxinidir}/test-requirements.txt pylint==2.6.0 @@ -98,6 +118,7 @@ commands = [testenv:cover] # Also do not run test_coverage_ext tests while gathering coverage as those # tests conflict with coverage. +install_command = {[testenv:py3]install_command} setenv = {[testenv]setenv} PYTHON=coverage run --source cinder --parallel-mode @@ -108,20 +129,27 @@ commands = coverage xml -o cover/coverage.xml [testenv:genconfig] +install_command = {[testenv:py3]install_command} sitepackages = False envdir = {toxworkdir}/pep8 commands = oslo-config-generator --config-file=tools/config/cinder-config-generator.conf [testenv:genpolicy] +install_command = {[testenv:py3]install_command} commands = oslopolicy-sample-generator --config-file=tools/config/cinder-policy-generator.conf [testenv:genopts] +install_command = {[testenv:py3]install_command} sitepackages = False envdir = {toxworkdir}/pep8 commands = python tools/config/generate_cinder_opts.py [testenv:venv] +install_command = {[testenv:py3]install_command} +deps = + {[testenv]deps} + reno commands = {posargs} [testenv:docs] @@ -134,6 +162,7 @@ commands = allowlist_externals = rm [testenv:pdf-docs] +install_command = {[testenv:docs]install_command} deps = {[testenv:docs]deps} commands = rm -fr doc/source/contributor/api/ @@ -144,12 +173,27 @@ allowlist_externals = make rm +[testenv:api-ref] +install_command = {[testenv:docs]install_command} +allowlist_externals = rm +deps = {[testenv:docs]deps} +commands = + rm -rf api-ref/build + sphinx-build -W -b html -d api-ref/build/doctrees api-ref/source api-ref/build/html/ + +[testenv:releasenotes] +install_command = {[testenv:docs]install_command} +deps = {[testenv:docs]deps} +commands = sphinx-build -a -E -W -j auto -d releasenotes/build/doctrees -b html releasenotes/source releasenotes/build/html + [testenv:gendriverlist] +install_command = {[testenv:py3]install_command} sitepackages = False envdir = {toxworkdir}/venv commands = python {toxinidir}/tools/generate_driver_list.py [testenv:bandit] +install_command = {[testenv:py3]install_command} deps = -r{toxinidir}/test-requirements.txt commands = bandit -r cinder -n5 -x cinder/tests/* -ll @@ -162,8 +206,8 @@ commands = bandit-baseline -r cinder -n5 -x cinder/tests/* -ii -ll # system dependencies are missing, since it's used to tell you what system # dependencies are missing! This also means that bindep must be installed # separately, outside of the requirements files, and develop mode disabled -# explicitly to avoid unnecessarily installing the checked-out repo too (this -# further relies on "tox.skipsdist = True" above). +# explicitly to avoid unnecessarily installing the checked-out repo too +skip_install = True deps = bindep commands = bindep {posargs} usedevelop = False From 50da0750c63366af1dabb5947a3f31d9a9e78bf9 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 16 Mar 2023 09:14:24 -0400 Subject: [PATCH 124/149] [SAP] Fix vmdk stats reporting There is a failure for looking at the cinder_pool_state for a datastore when the state is None (not set). This patch ensures that the custom attribute is set to something before trying to call .lower() on the string. --- cinder/volume/drivers/vmware/vmdk.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index f15e1d5dd08..0eea29262b7 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -585,7 +585,8 @@ def _get_volume_stats(self): # in which case we want to mark it down. if 'cinder_state' in custom_attributes: cinder_pool_state = custom_attributes['cinder_state'] - if cinder_pool_state.lower() == 'drain': + if (cinder_pool_state and + cinder_pool_state.lower() == 'drain'): pool_state = 'down' pool_down_reason = 'Datastore marked as draining' From f2d13c9d63cb200b14471fd967643647975d3947 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 23 Feb 2023 10:49:14 -0500 Subject: [PATCH 125/149] [SAP] Update shard filter to filter only on vmware This patch adds a simple check against the pool in the shard filter to only filter if the pool is reporting as a vendor_name of 'VMware', which is true only for vmdk.py and fcd.py drivers. If we deploy a netapp cinder driver, this will pass the shard filter as there is no reason to shard. --- cinder/scheduler/filters/shard_filter.py | 7 ++ .../tests/unit/scheduler/test_shard_filter.py | 68 ++++++++++++++----- 2 files changed, 57 insertions(+), 18 deletions(-) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 7c2b741076c..8a0434916f4 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -143,6 +143,13 @@ def _get_shards(self, project_id): return self._PROJECT_SHARD_CACHE.get(project_id) def backend_passes(self, backend_state, filter_properties): + # We only need the shard filter for vmware based pools + if backend_state.vendor_name != 'VMware': + LOG.info( + "Shard Filter ignoring backend %s as it's not vmware based" + " driver", backend_state.backend_id) + return True + spec = filter_properties.get('request_spec', {}) vol = spec.get('volume_properties', {}) project_id = vol.get('project_id', None) diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 5c8f11037f5..9a9a41316d8 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -18,6 +18,8 @@ from cinder.tests.unit.scheduler.test_host_filters \ import BackendFiltersTestCase +VMWARE_VENDOR = 'VMware' + class ShardFilterTestCase(BackendFiltersTestCase): @@ -72,7 +74,9 @@ def set_cache(): def test_shard_project_not_found(self, mock_update_cache): caps = {'vcenter-shard': 'vc-a-1'} self.props['request_spec']['volume_properties']['project_id'] = 'bar' - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertFalse(self.filt_cls.backend_passes(host, self.props)) def test_snapshot(self): @@ -83,7 +87,9 @@ def test_snapshot(self): } } caps = {'vcenter-shard': 'vc-a-1'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertTrue(self.filt_cls.backend_passes(host, snap_props)) def test_snapshot_None(self): @@ -94,44 +100,58 @@ def test_snapshot_None(self): } } caps = {'vcenter-shard': 'vc-a-1'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertFalse(self.filt_cls.backend_passes(host, snap_props)) def test_shard_project_no_shards(self): caps = {'vcenter-shard': 'vc-a-1'} self.filt_cls._PROJECT_SHARD_CACHE['foo'] = [] - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertFalse(self.filt_cls.backend_passes(host, self.props)) def test_backend_without_shard(self): - host = fakes.FakeBackendState('host1', {}) + host = fakes.FakeBackendState('host1', {'vendor_name': VMWARE_VENDOR}) self.assertFalse(self.filt_cls.backend_passes(host, self.props)) def test_backend_shards_dont_match(self): caps = {'vcenter-shard': 'vc-a-1'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertFalse(self.filt_cls.backend_passes(host, self.props)) def test_backend_shards_match(self): caps = {'vcenter-shard': 'vc-b-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertTrue(self.filt_cls.backend_passes(host, self.props)) def test_shard_override_matches(self): caps = {'vcenter-shard': 'vc-a-1'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} self.assertTrue(self.filt_cls.backend_passes(host, self.props)) def test_shard_override_no_match(self): caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} self.assertFalse(self.filt_cls.backend_passes(host, self.props)) def test_shard_override_no_data(self): caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': None} self.assertFalse(self.filt_cls.backend_passes(host, self.props)) @@ -139,7 +159,9 @@ def test_sharding_enabled_any_backend_match(self): self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled'] self.props['request_spec']['volume_properties']['project_id'] = 'baz' caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertTrue(self.filt_cls.backend_passes(host, self.props)) def test_sharding_enabled_and_single_shard_any_backend_match(self): @@ -147,7 +169,9 @@ def test_sharding_enabled_and_single_shard_any_backend_match(self): 'vc-a-1'] self.props['request_spec']['volume_properties']['project_id'] = 'baz' caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.assertTrue(self.filt_cls.backend_passes(host, self.props)) def test_scheduler_hints_override_sharding_enabled(self): @@ -155,11 +179,15 @@ def test_scheduler_hints_override_sharding_enabled(self): self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} self.props['request_spec']['volume_properties']['project_id'] = 'baz' caps0 = {'vcenter-shard': 'vc-a-0'} - host0 = fakes.FakeBackendState('host0', {'capabilities': caps0}) - self.assertFalse(self.filt_cls.backend_passes(host0, self.props)) + host = fakes.FakeBackendState('host0', + {'capabilities': caps0, + 'vendor_name': VMWARE_VENDOR}) + self.assertFalse(self.filt_cls.backend_passes(host, self.props)) caps1 = {'vcenter-shard': 'vc-a-1'} - host1 = fakes.FakeBackendState('host1', {'capabilities': caps1}) - self.assertTrue(self.filt_cls.backend_passes(host1, self.props)) + host = fakes.FakeBackendState('host1', + {'capabilities': caps1, + 'vendor_name': VMWARE_VENDOR}) + self.assertTrue(self.filt_cls.backend_passes(host, self.props)) def test_noop_for_find_backend_by_connector_with_hint(self): """Check if we pass any backend @@ -171,7 +199,9 @@ def test_noop_for_find_backend_by_connector_with_hint(self): making when we don't know where the volume will be attached. """ caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} self.props['request_spec']['operation'] = 'find_backend_for_connector' self.assertTrue(self.filt_cls.backend_passes(host, self.props)) @@ -187,6 +217,8 @@ def test_noop_for_find_backend_by_connector_without_hint(self): """ self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['vc-a-1'] caps = {'vcenter-shard': 'vc-a-0'} - host = fakes.FakeBackendState('host1', {'capabilities': caps}) + host = fakes.FakeBackendState('host1', + {'capabilities': caps, + 'vendor_name': VMWARE_VENDOR}) self.props['request_spec']['operation'] = 'find_backend_for_connector' self.assertTrue(self.filt_cls.backend_passes(host, self.props)) From 297adb9cef19c2aee6babde399079a99a39136aa Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 1 Feb 2023 09:25:20 -0500 Subject: [PATCH 126/149] [SAP] Add action_track to format specific logs for easier debugging This patch adds the new action_track for cinder. The purpose of this module is to add a track() method which can be used to log business logic actions being taken on resources (volumes, snaps, etc) so we can then use this action logs to discover our most common failures and help debug/fix them. For now the only supported output is sending the actions into the standard cinder logfile. But we could add another adapter in the trace() method for sending the action logs to a rabbitmq queue, and/or a db table directly. This patch also adds calls throughout cinder code to track specific actions against volumes, such as create, delete, attach, migrate. Updated the test_attachments_manager test to use a magicmock instead of a sentinel as sentinels don't cover unknown attrubutes like magickmock does. --- cinder/action_track.py | 200 +++++++++++++++++ cinder/api/contrib/volume_actions.py | 31 +++ cinder/api/v3/attachments.py | 29 +++ cinder/backup/manager.py | 145 +++++++++--- .../attachments/test_attachments_manager.py | 3 +- cinder/tests/unit/volume/test_connection.py | 5 +- cinder/volume/api.py | 137 ++++++++++-- cinder/volume/manager.py | 206 +++++++++++++++--- 8 files changed, 676 insertions(+), 80 deletions(-) create mode 100644 cinder/action_track.py diff --git a/cinder/action_track.py b/cinder/action_track.py new file mode 100644 index 00000000000..73c34d531fa --- /dev/null +++ b/cinder/action_track.py @@ -0,0 +1,200 @@ +# Copyright 2023 Openstack Foundation. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +import abc +import inspect +import traceback + +import decorator +from oslo_config import cfg +from oslo_log import log as logging +from oslo_utils import excutils + +from cinder import context as cinder_context + + +CONF = cfg.CONF +LOG = logging.getLogger(__name__) + +ACTION_VOLUME_CREATE = "volume_create" +ACTION_VOLUME_DELETE = "volume_delete" +ACTION_VOLUME_RESERVE = "volume_reserve" +ACTION_VOLUME_ATTACH = "volume_attach" +ACTION_VOLUME_EXTEND = "volume_extend" +ACTION_VOLUME_DETACH = "volume_detach" +ACTION_VOLUME_MIGRATE = "volume_migrate" +ACTION_VOLUME_RETYPE = "volume_retype" +ACTION_VOLUME_BACKUP = "volume_backup" +ACTION_BACKUP_RESTORE = "volume_restore" +ACTION_VOLUME_BACKUP_DELETE = "volume_backup_delete" +ACTION_VOLUME_COPY_TO_IMAGE = "volume_copy_to_image" +ACTION_VOLUME_BACKUP_RESET_STATUS = "volume_backup_reset_status" +ACTION_SNAPSHOT_CREATE = "snapshot_create" +ACTION_SNAPSHOT_DELETE = "snapshot_delete" + + +VALID_RESOURCE_NAMES = [ + 'volume', 'backup', 'snapshot' +] +VALID_CONTEXT_NAMES = [ + 'context', 'ctxt' +] + +log_level_map = { + logging.CRITICAL: LOG.error, + logging.ERROR: LOG.error, + logging.WARNING: LOG.warning, + logging.INFO: LOG.info, + logging.DEBUG: LOG.debug, +} + + +class ActionTrack(object, metaclass=abc.ABCMeta): + """Base class for the 'trace' api. + + The purpose of this trace facility is to be able to + keep track of critical parts of operations against resources. + This will create a standardized object/log entry for troubleshooting + actions against resources. + + This is not for performance tracing, but for action/operation tracking. + by default it will simply format a log entry such that the entries are + easy to find with a standard format and information. + """ + + @staticmethod + @abc.abstractmethod + def track(context, action, resource, message, loglevel=logging.INFO): + pass + + @staticmethod + @abc.abstractmethod + def track_with_file_info(context, action, resource, message, + filename, line_number, function, + loglevel=logging.INFO): + pass + + +class LogActionTrack(ActionTrack): + @staticmethod + def _track_with_info(context, action, resource, message, + filename, line_number, function, + loglevel=logging.INFO): + entry = f"ACTION:'{action}' " + if loglevel == logging.ERROR or loglevel == logging.CRITICAL: + # The action failed and this trace is the reason + entry += "FAILED " + + msg = message.replace("\n", "") + + entry += ( + f"MSG:'{msg}' " + f"FILE:{filename}:{line_number}:{function} " + f"RSC:{resource} " + ) + log_func = log_level_map[loglevel] + log_func(entry, resource=resource) + + @staticmethod + def track(context, action, resource, message, loglevel=logging.INFO): + # Do not call this directly. Call action_track.track() instead. + + # We only want the frame of the caller + # we should always be called from the trace() method in this module + # not called directly in this static method + info = list(traceback.walk_stack(None))[1][0] + LogActionTrack._track_with_info(context, action, resource, message, + info.f_code.co_filename, + info.f_lineno, + info.f_code.co_name, + loglevel=loglevel) + + @staticmethod + def track_with_file_info(context, action, resource, message, + filename, line_number, function, + loglevel=logging.INFO): + # Do not call this directly. + # Call action_track.track_with_file_info() instead. + LogActionTrack._track_with_info( + context, action, resource, message, + filename, line_number, function, loglevel=loglevel + ) + + +def track(context, action, resource, message, loglevel=logging.INFO): + """For now we only implement LogActionTrack. + + TODO(waboring): add rabbitmq trace? to send entries to a msg queue + or add: DBtrace to send traces to the DB instead? + """ + LogActionTrack.track(context, action, resource, message, + loglevel=loglevel) + + +def track_with_info(context, action, resource, message, file, line_number, + function, loglevel=logging.INFO): + """For now we only implement LogActionTrack. + + TODO(waboring): add rabbitmq trace? to send entries to a msg queue + or add: DBtrace to send traces to the DB instead? + """ + LogActionTrack.track_with_file_info(context, action, resource, message, + file, line_number, function, + loglevel=loglevel) + + +def track_decorator(action): + """Decorator to automatically handle exceptions raised as failures. + + Place this decorator on a function that you want to mark as an + action failure and the action_track tracing will get called + for the action. + + @track_decorator(action_track.ACTION_VOLUME_ATTACH) + def initialize_connection(....) + If initialize_connection raises an exception then you will get a + action_track.track called with action of ACTION_VOLUME_ATTACH + and a failure. + + """ + @decorator.decorator + def inner(func, *args, **kwargs): + # Find the context and the volume/backup object + resource = None + context = None + call_args = inspect.getcallargs(func, *args, **kwargs) + for key in call_args: + if key in VALID_RESOURCE_NAMES: + resource = call_args[key] + elif (key in VALID_CONTEXT_NAMES and + isinstance(call_args[key], cinder_context.RequestContext)): + context = call_args[key] + + track(context, action, resource, "called") + + try: + return func(*args, **kwargs) + except Exception: + with excutils.save_and_reraise_exception() as exc: + # We only want the frame of the caller + tl = traceback.extract_tb(exc.tb) + i = tl[1] + message = str(exc.value) + track_with_info( + context, action, resource, message, + i.filename, i.lineno, i.name, + loglevel=logging.ERROR + ) + return inner diff --git a/cinder/api/contrib/volume_actions.py b/cinder/api/contrib/volume_actions.py index c49779ccd84..96f3fb68304 100644 --- a/cinder/api/contrib/volume_actions.py +++ b/cinder/api/contrib/volume_actions.py @@ -12,6 +12,7 @@ # License for the specific language governing permissions and limitations # under the License. from http import HTTPStatus +import logging from castellan import key_manager from oslo_config import cfg @@ -19,6 +20,7 @@ from oslo_utils import strutils import webob +from cinder import action_track from cinder.api import extensions from cinder.api import microversions as mv from cinder.api.openstack import wsgi @@ -77,11 +79,20 @@ def _attach(self, req, id, body): msg = _("Error attaching volume - %(err_type)s: " "%(err_msg)s") % { 'err_type': error.exc_type, 'err_msg': error.value} + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, msg, loglevel=logging.ERROR + ) raise webob.exc.HTTPBadRequest(explanation=msg) else: # There are also few cases where attach call could fail due to # db or volume driver errors. These errors shouldn't be exposed # to the user and in such cases it should raise 500 error. + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, f"Attach failed because {error}", + loglevel=logging.ERROR + ) raise @wsgi.response(HTTPStatus.ACCEPTED) @@ -159,20 +170,40 @@ def _initialize_connection(self, req, id, body): volume = self.volume_api.get(context, id) connector = body['os-initialize_connection']['connector'] try: + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "Call initialize_connection", + ) info = self.volume_api.initialize_connection(context, volume, connector) except exception.InvalidInput as err: + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "Invalid Input", loglevel=logging.ERROR + ) raise webob.exc.HTTPBadRequest( explanation=err.msg) except exception.ConnectorRejected: msg = _("Volume needs to be migrated before attaching to this " "instance") + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, msg + ) raise webob.exc.HTTPNotAcceptable(explanation=msg) except exception.VolumeBackendAPIException: msg = _("Unable to fetch connection information from backend.") + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, msg, loglevel=logging.ERROR + ) raise webob.exc.HTTPInternalServerError(explanation=msg) except messaging.RemoteError as error: + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, f"Failed because {error}", loglevel=logging.ERROR + ) if error.exc_type == 'InvalidInput': raise exception.InvalidInput(reason=error.value) raise diff --git a/cinder/api/v3/attachments.py b/cinder/api/v3/attachments.py index a4301edba9d..d24520e7607 100644 --- a/cinder/api/v3/attachments.py +++ b/cinder/api/v3/attachments.py @@ -16,6 +16,7 @@ from oslo_log import log as logging import webob +from cinder import action_track from cinder.api import api_utils from cinder.api import common from cinder.api import microversions as mv @@ -203,14 +204,26 @@ def create(self, req, body): _msg = _("Volume needs to be migrated before attaching to this " "instance") LOG.exception(_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume_ref, _msg, loglevel=logging.ERROR + ) raise webob.exc.HTTPNotAcceptable(explanation=_msg) except exception.CinderException as ex: err_msg = _( "Unable to create attachment for volume (%s).") % ex.msg LOG.exception(err_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume_ref, err_msg, loglevel=logging.ERROR + ) except Exception: err_msg = _("Unable to create attachment for volume.") LOG.exception(err_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume_ref, err_msg, loglevel=logging.ERROR + ) finally: if err_msg: raise webob.exc.HTTPInternalServerError(explanation=err_msg) @@ -263,14 +276,26 @@ def update(self, req, id, body): _msg = _("Volume needs to be migrated before attaching to this " "instance") LOG.exception(_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + attachment_ref, _msg, loglevel=logging.ERROR + ) raise webob.exc.HTTPNotAcceptable(explanation=_msg) except exception.CinderException as ex: err_msg = ( _("Unable to update attachment.(%s).") % ex.msg) LOG.exception(err_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + attachment_ref, err_msg, loglevel=logging.ERROR + ) except Exception: err_msg = _("Unable to update the attachment.") LOG.exception(err_msg) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + attachment_ref, err_msg, loglevel=logging.ERROR + ) finally: if err_msg: raise webob.exc.HTTPInternalServerError(explanation=err_msg) @@ -312,6 +337,10 @@ def complete(self, req, id, body): attachment_ref.save() volume_ref.update({'status': 'in-use', 'attach_status': 'attached'}) volume_ref.save() + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "Volume attachment completed!" + ) volume_utils.notify_about_volume_usage(context, volume_ref, "attach.end") diff --git a/cinder/backup/manager.py b/cinder/backup/manager.py index 5b51440224e..2196f20d237 100644 --- a/cinder/backup/manager.py +++ b/cinder/backup/manager.py @@ -45,6 +45,7 @@ from oslo_utils import importutils from oslo_utils import timeutils +from cinder import action_track from cinder.backup import rpcapi as backup_rpcapi from cinder import context from cinder import exception @@ -58,6 +59,7 @@ from cinder.volume import rpcapi as volume_rpcapi from cinder.volume import volume_utils + LOG = logging.getLogger(__name__) backup_manager_opts = [ @@ -330,6 +332,7 @@ def _cleanup_temp_volumes_snapshots_when_backup_created( if backup.temp_snapshot_id: self._delete_temp_snapshot(ctxt, backup) + @action_track.track_decorator(action_track.ACTION_VOLUME_BACKUP) @utils.limit_operations def create_backup(self, context, backup): """Create volume backups using configured backup service.""" @@ -350,7 +353,10 @@ def create_backup(self, context, backup): 'volume: %(volume_id)s.' % {'backup_id': backup.id, 'volume_id': volume_id}) - LOG.info(log_message) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, log_message + ) self._notify_about_backup_usage(context, backup, "create.start") @@ -434,9 +440,14 @@ def _start_backup(self, context, backup, volume): # This is an async call to the volume manager. We will get a # callback from the volume manager to continue once it's done. + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, "calling volume_rpcapi.get_backup_device()" + ) self.volume_rpcapi.get_backup_device(context, backup, volume) @volume_utils.trace + @action_track.track_decorator(action_track.ACTION_VOLUME_BACKUP) def continue_backup(self, context, backup, backup_device): """This is the callback from the volume manager to continue. @@ -463,11 +474,19 @@ def continue_backup(self, context, backup, backup_device): raise exception.BackupOperationError("Failed to get backup " "device from driver.") + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, "_attach_device()" + ) attach_info = self._attach_device(context, backup_device.device_obj, properties, backup_device.is_snapshot) try: + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, "call backup_service.backup()" + ) device_path = attach_info['device']['path'] if (isinstance(device_path, str) and not os.path.isdir(device_path)): @@ -507,9 +526,13 @@ def continue_backup(self, context, backup, backup_device): self._cleanup_temp_volumes_snapshots_when_backup_created( context, backup) - LOG.info("finish backup!") + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, "finish backup!" + ) self._finish_backup(context, backup, volume, updates) + @action_track.track_decorator(action_track.ACTION_VOLUME_BACKUP) @volume_utils.trace def _finish_backup(self, context, backup, volume, updates): volume_id = backup.volume_id @@ -548,6 +571,11 @@ def _finish_backup(self, context, backup, volume, updates): parent_backup.num_dependent_backups += 1 parent_backup.save() LOG.info('Create backup %s. backup: %s.', completion_msg, backup.id) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, + f'Create backup {completion_msg}. backup: {backup.id}.' + ) self._notify_about_backup_usage(context, backup, "create.end") def _is_our_backup(self, backup): @@ -572,12 +600,17 @@ def _is_our_backup(self, backup): return False + @action_track.track_decorator(action_track.ACTION_BACKUP_RESTORE) @utils.limit_operations def restore_backup(self, context, backup, volume_id): """Restore volume backups from configured backup service.""" - LOG.info('Restore backup started, backup: %(backup_id)s ' - 'volume: %(volume_id)s.', - {'backup_id': backup.id, 'volume_id': volume_id}) + msg = ('Restore backup started, backup: %(backup_id)s ' + 'volume: %(volume_id)s.' % + {'backup_id': backup.id, 'volume_id': volume_id}) + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, msg + ) volume = objects.Volume.get_by_id(context, volume_id) self._notify_about_backup_usage(context, backup, "restore.start") @@ -613,13 +646,17 @@ def restore_backup(self, context, backup, volume_id): raise exception.InvalidBackup(reason=err) if volume['size'] > backup['size']: - LOG.info('Volume: %(vol_id)s, size: %(vol_size)d is ' - 'larger than backup: %(backup_id)s, ' - 'size: %(backup_size)d, continuing with restore.', - {'vol_id': volume['id'], - 'vol_size': volume['size'], - 'backup_id': backup['id'], - 'backup_size': backup['size']}) + msg = ('Volume: %(vol_id)s, size: %(vol_size)d is ' + 'larger than backup: %(backup_id)s, ' + 'size: %(backup_size)d, continuing with restore.' % + {'vol_id': volume['id'], + 'vol_size': volume['size'], + 'backup_id': backup['id'], + 'backup_size': backup['size']}) + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, msg + ) if not self._is_our_backup(backup): err = _('Restore backup aborted, the backup service currently' @@ -640,6 +677,10 @@ def restore_backup(self, context, backup, volume_id): self._run_restore(context, backup, volume) except exception.BackupRestoreCancel: canceled = True + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, "restore was cancelled", loglevel=logging.ERROR + ) except Exception: with excutils.save_and_reraise_exception(): self.db.volume_update( @@ -672,26 +713,42 @@ def restore_backup(self, context, backup, volume_id): volume.save() backup.status = fields.BackupStatus.AVAILABLE backup.save() - LOG.info('%(result)s restoring backup %(backup_id)s to volume ' - '%(volume_id)s.', - {'result': 'Canceled' if canceled else 'Finished', - 'backup_id': backup.id, - 'volume_id': volume_id}) + msg = ('%(result)s restoring backup %(backup_id)s to volume ' + '%(volume_id)s.' % + {'result': 'Canceled' if canceled else 'Finished', + 'backup_id': backup.id, + 'volume_id': volume_id}) self._notify_about_backup_usage(context, backup, "restore.end") + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, msg + ) def _run_restore(self, context, backup, volume): orig_key_id = volume.encryption_key_id backup_service = self.service(context) + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, "_run_restore called" + ) properties = volume_utils.brick_get_connector_properties() secure_enabled = ( self.volume_rpcapi.secure_file_operations_enabled(context, volume)) + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, "_attach_device" + ) attach_info = self._attach_device(context, volume, properties) # NOTE(geguileo): Not all I/O disk operations properly do greenthread # context switching and may end up blocking the greenthread, so we go # with native threads proxy-wrapping the device file object. + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, "call backup_service.restore" + ) try: device_path = attach_info['device']['path'] open_mode = 'rb+' if os.name == 'nt' else 'wb' @@ -713,9 +770,14 @@ def _run_restore(self, context, backup, volume): except exception.BackupRestoreCancel: raise except Exception: - LOG.exception('Restoring backup %(backup_id)s to volume ' - '%(volume_id)s failed.', {'backup_id': backup.id, - 'volume_id': volume.id}) + msg = ('Restoring backup %(backup_id)s to volume ' + '%(volume_id)s failed.' % + {'backup_id': backup.id, 'volume_id': volume.id}) + LOG.exception(msg) + action_track.track( + context, action_track.ACTION_BACKUP_RESTORE, + backup, msg, loglevel=logging.ERROR + ) raise finally: self._detach_device(context, attach_info, volume, properties, @@ -763,6 +825,7 @@ def _run_restore(self, context, backup, volume): 'matches encryption key ID in backup %(backup_id)s.', {'volume_id': volume.id, 'backup_id': backup.id}) + @action_track.track_decorator(action_track.ACTION_VOLUME_BACKUP_DELETE) def delete_backup(self, context, backup): """Delete volume backup from configured backup service.""" LOG.info('Delete backup started, backup: %s.', backup.id) @@ -777,12 +840,20 @@ def delete_backup(self, context, backup): % {'expected_status': expected_status, 'actual_status': actual_status} volume_utils.update_backup_error(backup, err) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP_DELETE, + backup, err, loglevel=logging.ERROR + ) raise exception.InvalidBackup(reason=err) if backup.service and not self.is_working(): err = _('Delete backup is aborted due to backup service is down.') status = fields.BackupStatus.ERROR_DELETING volume_utils.update_backup_error(backup, err, status) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP_DELETE, + backup, err, loglevel=logging.ERROR + ) raise exception.InvalidBackup(reason=err) if not self._is_our_backup(backup): @@ -793,6 +864,10 @@ def delete_backup(self, context, backup): % {'configured_service': self.driver_name, 'backup_service': backup.service} volume_utils.update_backup_error(backup, err) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP_DELETE, + backup, err, loglevel=logging.ERROR + ) raise exception.InvalidBackup(reason=err) if backup.service: @@ -802,6 +877,10 @@ def delete_backup(self, context, backup): except Exception as err: with excutils.save_and_reraise_exception(): volume_utils.update_backup_error(backup, str(err)) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP_DELETE, + backup, str(err), loglevel=logging.ERROR + ) # Get reservations try: @@ -837,7 +916,11 @@ def delete_backup(self, context, backup): QUOTAS.commit(context, reservations, project_id=backup.project_id) - LOG.info('Delete backup finished, backup %s deleted.', backup.id) + msg = ('Delete backup finished, backup %s deleted.' % backup.id) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP_DELETE, + backup, msg + ) self._notify_about_backup_usage(context, backup, "delete.end") def _notify_about_backup_usage(self, @@ -999,6 +1082,9 @@ def import_record(self, LOG.info('Import record id %s metadata from driver ' 'finished.', backup.id) + @action_track.track_decorator( + action_track.ACTION_VOLUME_BACKUP_RESET_STATUS + ) def reset_status(self, context, backup, status): """Reset volume backup status. @@ -1065,15 +1151,24 @@ def _attach_volume(self, context, volume, properties): return self._connect_device(conn) except Exception: with excutils.save_and_reraise_exception(): + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, "Failed volume_rpcapi.initialize_connection", + loglevel=logging.ERROR + ) try: self.volume_rpcapi.terminate_connection(context, volume, properties, force=True) except Exception: - LOG.warning("Failed to terminate the connection " - "of volume %(volume_id)s, but it is " - "acceptable.", - {'volume_id': volume.id}) + msg = ("Failed to terminate the connection " + "of volume %(volume_id)s, but it is " + "acceptable." % + {'volume_id': volume.id}) + action_track.track( + context, action_track.ACTION_VOLUME_BACKUP, + volume, msg + ) def _attach_snapshot(self, ctxt, snapshot, properties): """Attach a snapshot.""" diff --git a/cinder/tests/unit/attachments/test_attachments_manager.py b/cinder/tests/unit/attachments/test_attachments_manager.py index f540123b1a2..f545e029c02 100644 --- a/cinder/tests/unit/attachments/test_attachments_manager.py +++ b/cinder/tests/unit/attachments/test_attachments_manager.py @@ -243,12 +243,13 @@ def test_connection_terminate_no_connector_force_false(self): # does not have a connector will not call the driver and return None # if the force flag is False. attachment = mock.MagicMock(connector={}) + volume = mock.MagicMock() with mock.patch.object(self.manager.driver, '_initialized', create=True, new=True): with mock.patch.object(self.manager.driver, 'terminate_connection') as term_conn: has_shared_connection = self.manager._connection_terminate( - self.context, mock.sentinel.volume, attachment) + self.context, volume, attachment) self.assertIsNone(has_shared_connection) term_conn.assert_not_called() diff --git a/cinder/tests/unit/volume/test_connection.py b/cinder/tests/unit/volume/test_connection.py index 0d472d6228b..abb5908fc14 100644 --- a/cinder/tests/unit/volume/test_connection.py +++ b/cinder/tests/unit/volume/test_connection.py @@ -500,8 +500,9 @@ def test_run_attach_detach_volume_for_instance(self, volume_object): self.context, volume_id) - @mock.patch('cinder.volume.manager.LOG', mock.Mock()) - def test_initialize_connection(self): + @mock.patch.object(cinder.action_track.LOG, "log") + @mock.patch.object(cinder.volume.manager.LOG, "log") + def test_initialize_connection(self, _mock_manager, _mock_track): volume = mock.Mock(save=mock.Mock(side_effect=Exception)) with mock.patch.object(self.volume, 'driver') as driver_mock: self.assertRaises(exception.ExportFailure, diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 76135fa471f..89e70b205c8 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -28,6 +28,7 @@ from oslo_utils import timeutils from oslo_utils import versionutils +from cinder import action_track from cinder.api import common from cinder.common import constants from cinder import context @@ -203,6 +204,7 @@ def _is_encrypted(self, volume_type): return False return specs.get('encryption', {}) is not {} + @action_track.track_decorator(action_track.ACTION_VOLUME_CREATE) def create(self, context, size, name, description, snapshot=None, image_id=None, volume_type=None, metadata=None, availability_zone=None, source_volume=None, @@ -344,6 +346,10 @@ def create(self, context, size, name, description, snapshot=None, create_what, sched_rpcapi, volume_rpcapi) + action_track.track( + context, action_track.ACTION_VOLUME_CREATE, + None, "taskflow get_flow() created" + ) except Exception: msg = _('Failed to create api volume flow.') LOG.exception(msg) @@ -361,8 +367,10 @@ def create(self, context, size, name, description, snapshot=None, if flow_engine.storage.fetch('refresh_az'): self.list_availability_zones(enable_cache=True, refresh_cache=True) - LOG.info("Create volume request issued successfully.", - resource=vref) + action_track.track( + context, action_track.ACTION_VOLUME_CREATE, + vref, "create volume request issued successfully" + ) return vref except exception.InvalidAvailabilityZone: with excutils.save_and_reraise_exception(): @@ -696,6 +704,7 @@ def get_all_snapshots(self, context, search_opts=None, marker=None, LOG.info("Get all snapshots completed successfully.") return snapshots + @action_track.track_decorator(action_track.ACTION_VOLUME_RESERVE) def reserve_volume(self, context, volume): context.authorize(vol_action_policy.RESERVE_POLICY, target_obj=volume) expected = {'multiattach': volume.multiattach, @@ -709,11 +718,12 @@ def reserve_volume(self, context, volume): msg = _('Volume status must be %(expected)s to reserve, but the ' 'status is %(current)s.') % {'expected': expected_status, 'current': volume.status} - LOG.error(msg) raise exception.InvalidVolume(reason=msg) - LOG.info("Reserve volume completed successfully.", - resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "Reserve volume completed successfully." + ) def unreserve_volume(self, context, volume): context.authorize(vol_action_policy.UNRESERVE_POLICY, @@ -763,6 +773,7 @@ def roll_detaching(self, context, volume): LOG.info("Roll detaching of volume completed successfully.", resource=volume) + @action_track.track_decorator(action_track.ACTION_VOLUME_ATTACH) def attach(self, context, volume, instance_uuid, host_name, mountpoint, mode): context.authorize(vol_action_policy.ATTACH_POLICY, @@ -782,16 +793,23 @@ def attach(self, context, volume, instance_uuid, host_name, raise exception.InvalidVolumeAttachMode(mode=mode, volume_id=volume.id) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "calling volume_rpc_api.attach_volume" + ) attach_results = self.volume_rpcapi.attach_volume(context, volume, instance_uuid, host_name, mountpoint, mode) - LOG.info("Attach volume completed successfully.", - resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "Attach volume completed successfully." + ) return attach_results + @action_track.track_decorator(action_track.ACTION_VOLUME_DETACH) def detach(self, context, volume, attachment_id): context.authorize(vol_action_policy.DETACH_POLICY, target_obj=volume) @@ -806,6 +824,7 @@ def detach(self, context, volume, attachment_id): resource=volume) return detach_results + @action_track.track_decorator(action_track.ACTION_VOLUME_MIGRATE) def migrate_volume_by_connector(self, ctxt, volume, connector, lock_volume): if not connector: @@ -829,9 +848,11 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, ctxt, connector, request_spec, volume.size, filter_properties=filter_properties) except exception.NoValidBackend: - LOG.error("The connector was rejected by the backend. Could not " - "find another backend compatible with the connector %s.", - connector) + msg = ("The connector was rejected by the backend. Could not " + "find another backend compatible with the connector %s.", + connector) + action_track.track(ctxt, action_track.ACTION_VOLUME_MIGRATE, + volume, msg, loglevel=logging.ERROR) return None backend = host_manager.BackendState(host=dest['host'], cluster_name=dest['cluster_name'], @@ -866,6 +887,10 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, 'maintenance')], else_=volume.model.status) + action_track.track( + ctxt, action_track.ACTION_VOLUME_MIGRATE, + volume, "updating volume migration_status to 'starting'" + ) result = volume.conditional_update(updates, expected, filters) if not result: @@ -876,7 +901,10 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, LOG.error(msg) raise exception.InvalidVolume(reason=msg) - LOG.debug("Invoking migrate_volume to host=%s", dest['host']) + action_track.track( + ctxt, action_track.ACTION_VOLUME_MIGRATE, + volume, f"calling volume_rpcapi.migrate_volume to {dest['host']}" + ) self.volume_rpcapi.migrate_volume(ctxt, volume, backend, force_host_copy=False, wait_for_completion=False) @@ -1149,8 +1177,10 @@ def create_snapshot(self, context, result = self._create_snapshot(context, volume, name, description, False, metadata, cgsnapshot_id, group_snapshot_id) - LOG.info("Snapshot create request issued successfully.", - resource=result) + action_track.track( + context, action_track.ACTION_SNAPSHOT_CREATE, + result, "Snapshot create request issued successfully." + ) return result def create_snapshot_force(self, context, @@ -1158,8 +1188,10 @@ def create_snapshot_force(self, context, description, metadata=None): result = self._create_snapshot(context, volume, name, description, True, metadata) - LOG.info("Snapshot force create request issued successfully.", - resource=result) + action_track.track( + context, action_track.ACTION_SNAPSHOT_CREATE, + result, "Snapshot force create request issued successfully." + ) return result def delete_snapshot(self, context, snapshot, force=False, @@ -1190,8 +1222,10 @@ def delete_snapshot(self, context, snapshot, force=False, raise exception.InvalidSnapshot(reason=msg) self.volume_rpcapi.delete_snapshot(context, snapshot, unmanage_only) - LOG.info("Snapshot delete request issued successfully.", - resource=snapshot) + action_track.track( + context, action_track.ACTION_SNAPSHOT_DELETE, + snapshot, "Snapshot delete request issued successfully." + ) def update_snapshot(self, context, snapshot, fields): context.authorize(snapshot_policy.UPDATE_POLICY, @@ -2220,6 +2254,10 @@ def _attachment_reserve(self, ctxt, vref, instance_uuid=None): # NOTE(jdg): Reserved is a special case, we're avoiding allowing # creation of other new reserves/attachments while in this state # so we avoid contention issues with shared connections + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + vref, "called", + ) # Multiattach of bootable volumes is a special case with it's own # policy, check that here right off the bat @@ -2266,6 +2304,10 @@ def _attachment_reserve(self, ctxt, vref, instance_uuid=None): {'vol_id': vref.id, 'statuses': utils.build_or_str(expected['status']), 'current': vref.status}) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + vref, msg, loglevel=logging.ERROR + ) raise exception.InvalidVolume(reason=msg) values = {'volume_id': vref.id, @@ -2273,6 +2315,10 @@ def _attachment_reserve(self, ctxt, vref, instance_uuid=None): 'attach_status': 'reserved', 'instance_uuid': instance_uuid} db_ref = self.db.volume_attach(ctxt.elevated(), values) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + vref, "attach_status changed to reserved", + ) return objects.VolumeAttachment.get_by_id(ctxt, db_ref['id']) def attachment_create(self, @@ -2284,6 +2330,10 @@ def attachment_create(self, """Create an attachment record for the specified volume.""" ctxt.authorize(attachment_policy.CREATE_POLICY, target_obj=volume_ref) connection_info = {} + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "attachment_create called" + ) if "error" in volume_ref.status: msg = ('Volume attachments can not be created if the volume ' 'is in an error state. ' @@ -2291,13 +2341,20 @@ def attachment_create(self, '%(volume_status)s ') % { 'volume_id': volume_ref.id, 'volume_status': volume_ref.status} - LOG.error(msg) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, msg, loglevel=logging.ERROR + ) raise exception.InvalidVolume(reason=msg) attachment_ref = self._attachment_reserve(ctxt, volume_ref, instance_uuid) if connector: try: + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "calling volume_rpc_api.attachment_update" + ) connection_info = ( self.volume_rpcapi.attachment_update(ctxt, volume_ref, @@ -2305,6 +2362,12 @@ def attachment_create(self, attachment_ref.id)) except Exception: with excutils.save_and_reraise_exception(): + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, + "attachment_update failed, deleting attachment record", + loglevel=logging.ERROR + ) self.attachment_delete(ctxt, attachment_ref) attachment_ref.connection_info = connection_info @@ -2326,6 +2389,10 @@ def attachment_create(self, attachment_ref.attach_mode = attach_mode attachment_ref.save() + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "attachment_create done" + ) return attachment_ref @coordination.synchronized( @@ -2348,6 +2415,10 @@ def attachment_update(self, ctxt, attachment_ref, connector): ctxt.authorize(attachment_policy.UPDATE_POLICY, target_obj=attachment_ref) volume_ref = objects.Volume.get_by_id(ctxt, attachment_ref.volume_id) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "called" + ) if "error" in volume_ref.status: msg = ('Volume attachments can not be updated if the volume ' 'is in an error state. The Volume %(volume_id)s ' @@ -2355,6 +2426,10 @@ def attachment_update(self, ctxt, attachment_ref, connector): 'volume_id': volume_ref.id, 'volume_status': volume_ref.status} LOG.error(msg) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, msg, loglevel=logging.ERROR + ) raise exception.InvalidVolume(reason=msg) if (len(volume_ref.volume_attachment) > 1 and @@ -2376,8 +2451,16 @@ def attachment_update(self, ctxt, attachment_ref, connector): msg = _('duplicate connectors detected on volume ' '%(vol)s') % {'vol': volume_ref.id} - + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, msg, loglevel=logging.ERROR + ) raise exception.InvalidVolume(reason=msg) + + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "calling volume_rpcapi.attachment_update" + ) connection_info = ( self.volume_rpcapi.attachment_update(ctxt, volume_ref, @@ -2385,12 +2468,20 @@ def attachment_update(self, ctxt, attachment_ref, connector): attachment_ref.id)) attachment_ref.connection_info = connection_info attachment_ref.save() + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume_ref, "Complete" + ) return attachment_ref def attachment_delete(self, ctxt, attachment): ctxt.authorize(attachment_policy.DELETE_POLICY, target_obj=attachment) volume = objects.Volume.get_by_id(ctxt, attachment.volume_id) + action_track.track( + ctxt, action_track.ACTION_VOLUME_DETACH, + volume, "called" + ) if attachment.attach_status == fields.VolumeAttachStatus.RESERVED: volume_utils.notify_about_volume_usage(ctxt, @@ -2398,6 +2489,10 @@ def attachment_delete(self, ctxt, attachment): else: # Generate the detach.start notification on the volume service to # include the host doing the operation. + action_track.track( + ctxt, action_track.ACTION_VOLUME_DETACH, + volume, "call volume_rpcapi.attachment_delete" + ) self.volume_rpcapi.attachment_delete(ctxt, attachment.id, volume) # Trigger attachments lazy load (missing since volume was loaded in the @@ -2412,6 +2507,10 @@ def attachment_delete(self, ctxt, attachment): # #1937084, and doing the notification there with the finish here leads # to bug #1916980. volume_utils.notify_about_volume_usage(ctxt, volume, "detach.end") + action_track.track( + ctxt, action_track.ACTION_VOLUME_DETACH, + volume, "Completed" + ) return volume.volume_attachment diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 00078c45641..49e3e53da9e 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -55,6 +55,7 @@ import requests from taskflow import exceptions as tfe +from cinder import action_track from cinder.backup import rpcapi as backup_rpcapi from cinder.common import constants from cinder import compute @@ -744,6 +745,7 @@ def _propagate_volume_scheduler_hints(self, context, volume): LOG.exception("Failed to set scheduler hints.", resource=meta_vol) + @action_track.track_decorator(action_track.ACTION_VOLUME_CREATE) @objects.Volume.set_workers def create_volume(self, context, volume, request_spec=None, filter_properties=None, @@ -856,7 +858,10 @@ def _run_flow() -> None: # other volumes. self._propagate_volume_scheduler_hints(context, volume) - LOG.info("Created volume successfully.", resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_CREATE, + volume, "Created Volume successfully" + ) return volume.id def _check_is_our_resource(self, resource) -> None: @@ -872,6 +877,7 @@ def _check_is_our_resource(self, resource) -> None: raise exception.Invalid(msg) @coordination.synchronized('{volume.id}-{f_name}') + @action_track.track_decorator(action_track.ACTION_VOLUME_DELETE) @objects.Volume.set_workers def delete_volume(self, context: context.RequestContext, @@ -899,7 +905,11 @@ def delete_volume(self, except exception.VolumeNotFound: # NOTE(thingee): It could be possible for a volume to # be deleted when resuming deletes from init_host(). - LOG.debug("Attempted delete of non-existent volume: %s", volume.id) + action_track.track( + context, action_track.ACTION_VOLUME_DELETE, + volume, + "Attempted delete of non-existent volume: %s" % volume.id, + ) return if context.project_id != volume.project_id: @@ -977,8 +987,10 @@ def delete_volume(self, else: self.driver.delete_volume(volume) except exception.VolumeIsBusy: - LOG.error("Unable to delete busy volume.", - resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_DELETE, + volume, "Unable to delete busy volume.", loglevel=logging.ERROR + ) # If this is a destination volume, we have to clear the database # record to avoid user confusion. self._clear_db(is_migrating_dest, volume, 'available') @@ -1033,7 +1045,10 @@ def delete_volume(self, msg = "Deleted volume successfully." if unmanage_only: msg = "Unmanaged volume successfully." - LOG.info(msg, resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_DELETE, + volume, msg + ) def _clear_db(self, is_migrating_dest, volume_ref, status) -> None: # This method is called when driver.unmanage() or @@ -1206,6 +1221,7 @@ def revert_to_snapshot(self, context, volume, snapshot) -> None: self._notify_about_volume_usage(context, volume, "revert.end") self._notify_about_snapshot_usage(context, snapshot, "revert.end") + @action_track.track_decorator(action_track.ACTION_SNAPSHOT_CREATE) @objects.Snapshot.set_workers def create_snapshot(self, context, snapshot) -> ovo_fields.UUIDField: """Creates and exports the snapshot.""" @@ -1259,6 +1275,11 @@ def create_snapshot(self, context, snapshot) -> ovo_fields.UUIDField: resource=snapshot) snapshot.status = fields.SnapshotStatus.ERROR snapshot.save() + action_track.track( + context, action_track.ACTION_SNAPSHOT_CREATE, + snapshot, "Failed updating snapshot metadata", + loglevel=logging.ERROR + ) self.message_api.create( context, action=message_field.Action.SNAPSHOT_CREATE, @@ -1278,11 +1299,14 @@ def create_snapshot(self, context, snapshot) -> ovo_fields.UUIDField: snapshot.save() self._notify_about_snapshot_usage(context, snapshot, "create.end") - LOG.info("Create snapshot completed successfully", - resource=snapshot) + action_track.track( + context, action_track.ACTION_SNAPSHOT_CREATE, + snapshot, "Create snapshot completed successfully", + ) return snapshot.id @coordination.synchronized('{snapshot.id}-{f_name}') + @action_track.track_decorator(action_track.ACTION_SNAPSHOT_DELETE) def delete_snapshot(self, context: context.RequestContext, snapshot: objects.Snapshot, @@ -1312,8 +1336,11 @@ def delete_snapshot(self, else: self.driver.delete_snapshot(snapshot) except exception.SnapshotIsBusy as busy_error: - LOG.error("Delete snapshot failed, due to snapshot busy.", - resource=snapshot) + action_track.track( + context, action_track.ACTION_SNAPSHOT_DELETE, + snapshot, "Delete snapshot failed, due to snapshot busy.", + loglevel=logging.ERROR + ) snapshot.status = fields.SnapshotStatus.AVAILABLE snapshot.save() self.message_api.create( @@ -1369,8 +1396,13 @@ def delete_snapshot(self, if unmanage_only: msg = "Unmanage snapshot completed successfully." LOG.info(msg, resource=snapshot) + action_track.track( + context, action_track.ACTION_SNAPSHOT_DELETE, + snapshot, msg, + ) @coordination.synchronized('{volume_id}') + @action_track.track_decorator(action_track.ACTION_VOLUME_ATTACH) def attach_volume(self, context, volume_id, instance_uuid, host_name, mountpoint, mode, volume=None) -> objects.VolumeAttachment: @@ -1434,12 +1466,15 @@ def attach_volume(self, context, volume_id, instance_uuid, host_name, # and the volume status updated. utils.require_driver_initialized(self.driver) - LOG.info('Attaching volume %(volume_id)s to instance ' - '%(instance)s at mountpoint %(mount)s on host ' - '%(host)s.', - {'volume_id': volume_id, 'instance': instance_uuid, - 'mount': mountpoint, 'host': host_name_sanitized}, - resource=volume) + msg = ('Attaching volume %(volume_id)s to instance ' + '%(instance)s at mountpoint %(mount)s on host ' + '%(host)s.' % + {'volume_id': volume_id, 'instance': instance_uuid, + 'mount': mountpoint, 'host': host_name_sanitized}) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, msg + ) self.driver.attach_volume(context, volume, instance_uuid, @@ -1463,11 +1498,14 @@ def attach_volume(self, context, volume_id, instance_uuid, host_name, mode) self._notify_about_volume_usage(context, volume, "attach.end") - LOG.info("Attach volume completed successfully.", - resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, "Attach volume completed successfully.", + ) return attachment @coordination.synchronized('{volume_id}-{f_name}') + @action_track.track_decorator(action_track.ACTION_VOLUME_DETACH) def detach_volume(self, context, volume_id, attachment_id=None, volume=None) -> None: """Updates db to show volume is detached.""" @@ -1733,6 +1771,7 @@ def _clone_image_volume_and_add_location(self, ctx, volume, image_service, False) return True + @action_track.track_decorator(action_track.ACTION_VOLUME_COPY_TO_IMAGE) def copy_volume_to_image(self, context: context.RequestContext, volume_id: str, @@ -1878,6 +1917,7 @@ def _parse_connection_options(self, context, volume: objects.Volume, return conn_info + @action_track.track_decorator(action_track.ACTION_VOLUME_ATTACH) def initialize_connection(self, context, volume: objects.Volume, @@ -2040,6 +2080,7 @@ def initialize_connection_snapshot(self, resource=snapshot) return conn + @action_track.track_decorator(action_track.ACTION_VOLUME_DETACH) def terminate_connection(self, context, volume_id: ovo_fields.UUIDField, @@ -2636,6 +2677,7 @@ def _can_use_driver_migration(self, diff): extra_specs.pop('RESKEY:availability_zones', None) return not extra_specs + @action_track.track_decorator(action_track.ACTION_VOLUME_MIGRATE) def migrate_volume(self, ctxt: context.RequestContext, volume, @@ -2677,6 +2719,10 @@ def migrate_volume(self, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume, host=host['host']) + action_track.track( + context, action_track.ACTION_VOLUME_MIGRATE, + volume, "calling driver migrate_volume" + ) moved, model_update = self.driver.migrate_volume(ctxt, volume, host) @@ -2725,6 +2771,10 @@ def migrate_volume(self, resource=volume) rpcapi.update_migrated_volume_capacity(ctxt, volume, host=host['host']) + action_track.track( + context, action_track.ACTION_VOLUME_MIGRATE, + volume, "Call Generic migrate volume" + ) self._migrate_volume_generic(ctxt, volume, host, new_type_id) self._update_allocated_capacity(volume, decrement=True, host=original_host) @@ -2737,13 +2787,20 @@ def migrate_volume(self, host=host['host'], decrement=True) with excutils.save_and_reraise_exception(): + action_track.track( + context, action_track.ACTION_VOLUME_MIGRATE, + volume, "Failed generic migration", + loglevel=logging.ERROR + ) updates = {'migration_status': 'error'} if status_update: updates.update(status_update) volume.update(updates) volume.save() - LOG.info("Migrate volume completed successfully.", - resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_MIGRATE, + volume, "Migrate volume completed successfully." + ) def _report_driver_status(self, context: context.RequestContext) -> None: # It's possible during live db migration that the self.service_uuid @@ -3032,6 +3089,7 @@ def _notify_about_group_snapshot_usage(self, context, snapshot, event_suffix, extra_usage_info=extra_usage_info, host=self.host) + @action_track.track_decorator(action_track.ACTION_VOLUME_EXTEND) def extend_volume(self, context, volume: objects.Volume, @@ -3112,6 +3170,7 @@ def _is_our_backend(self, host: str, cluster_name: str): volume_utils.hosts_are_equivalent(self.driver.cluster_name, cluster_name))) + @action_track.track_decorator(action_track.ACTION_VOLUME_RETYPE) def retype(self, context: context.RequestContext, volume: objects.Volume, @@ -4903,19 +4962,33 @@ def _connection_create(self, try: self.driver.validate_connector(connector) except exception.InvalidConnectorException as err: + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + volume, str(err), loglevel=logging.ERROR + ) raise exception.InvalidInput(reason=str(err)) except Exception as err: err_msg = (_("Validate volume connection failed " "(error: %(err)s).") % {'err': err}) - LOG.error(err_msg, resource=volume) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, err_msg, loglevel=logging.ERROR + ) raise exception.VolumeBackendAPIException(data=err_msg) try: + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, "call create_export" + ) model_update = self.driver.create_export(ctxt.elevated(), volume, connector) except exception.CinderException as ex: err_msg = (_("Create export for volume failed (%s).") % ex.msg) - LOG.exception(err_msg, resource=volume) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, err_msg, loglevel=logging.ERROR + ) raise exception.VolumeBackendAPIException(data=err_msg) try: @@ -4924,18 +4997,34 @@ def _connection_create(self, volume.save() except exception.CinderException as ex: LOG.exception("Model update failed.", resource=volume) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, f"Model update failed {str(ex)}", + loglevel=logging.ERROR + ) raise exception.ExportFailure(reason=str(ex)) try: + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, "call driver initialize_connection" + ) conn_info = self.driver.initialize_connection(volume, connector) except exception.ConnectorRejected: with excutils.save_and_reraise_exception(): - LOG.info("The connector was rejected by the volume driver.") + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, "ConnectorRejected. Volume needs to be migrated" + ) except Exception as err: err_msg = (_("Driver initialize connection failed " "(error: %(err)s).") % {'err': err}) LOG.exception(err_msg, resource=volume) self.driver.remove_export(ctxt.elevated(), volume) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, err_msg, loglevel=logging.ERROR + ) raise exception.VolumeBackendAPIException(data=err_msg) conn_info = self._parse_connection_options(ctxt, volume, conn_info) @@ -4951,9 +5040,13 @@ def _connection_create(self, self.db.volume_attachment_update(ctxt, attachment.id, values) connection_info['attachment_id'] = attachment.id - LOG.debug("Connection info returned from driver %(connection_info)s", - {'connection_info': + msg = ("Connection info returned from driver %(connection_info)s" % + {'connection_info': strutils.mask_dict_password(connection_info)}) + action_track.track( + ctxt, action_track.ACTION_VOLUME_ATTACH, + volume, msg + ) return connection_info def attachment_update(self, @@ -4977,6 +5070,10 @@ def attachment_update(self, self._notify_about_volume_usage(context, vref, 'attach.start') attachment_ref = objects.VolumeAttachment.get_by_id(context, attachment_id) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + vref, "called" + ) # Check to see if a mode parameter was set during attachment-create; # this seems kinda wonky, but it's how we're keeping back compatability @@ -4994,6 +5091,10 @@ def attachment_update(self, try: utils.require_driver_initialized(self.driver) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + vref, "calling driver attach_volume" + ) self.driver.attach_volume(context, vref, attachment_ref.instance_uuid, @@ -5004,6 +5105,11 @@ def attachment_update(self, context, message_field.Action.UPDATE_ATTACHMENT, resource_uuid=vref.id, exception=err) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + vref, f"driver attach_volume failed {str(err)}", + loglevel=logging.ERROR + ) with excutils.save_and_reraise_exception(): self.db.volume_attachment_update( context, attachment_ref.id, @@ -5019,8 +5125,10 @@ def attachment_update(self, False) vref.refresh() attachment_ref.refresh() - LOG.info("attachment_update completed successfully.", - resource=vref) + action_track.track( + context, action_track.ACTION_VOLUME_ATTACH, + vref, "attachment_update completed_successfully" + ) return connection_info def _connection_terminate(self, @@ -5033,6 +5141,10 @@ def _connection_terminate(self, Exits early if the attachment does not have a connector and returns None to indicate shared connections are irrelevant. """ + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + volume, "called" + ) utils.require_driver_initialized(self.driver) connector = attachment.connector if not connector and not force: @@ -5042,11 +5154,19 @@ def _connection_terminate(self, # so if we don't have a connector we can't terminate a connection # that was never actually made to the storage backend, so just # log a message and exit. - LOG.debug('No connector for attachment %s; skipping storage ' - 'backend terminate_connection call.', attachment.id) + msg = ('No connector for attachment %s; skipping storage ' + 'backend terminate_connection call.' % attachment.id) + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + volume, msg + ) # None indicates we don't know and don't care. return None try: + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + volume, "Call driver.terminate_connection" + ) shared_connections = self.driver.terminate_connection(volume, connector, force=force) @@ -5057,9 +5177,16 @@ def _connection_terminate(self, err_msg = (_('Terminate volume connection failed: %(err)s') % {'err': err}) LOG.exception(err_msg, resource=volume) + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + volume, err_msg, loglevel=logging.ERROR + ) raise exception.VolumeBackendAPIException(data=err_msg) - LOG.info("Terminate volume connection completed successfully.", - resource=volume) + + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + volume, "Terminate volume connection completed successfully." + ) # NOTE(jdg): Return True/False if there are other outstanding # attachments that share this connection. If True should signify # caller to preserve the actual host connection (work should be @@ -5082,6 +5209,10 @@ def attachment_delete(self, NOTE if the attachment reference is None, we remove all existing attachments for the specified volume object. """ + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + vref, "called" + ) attachment_ref = objects.VolumeAttachment.get_by_id(context, attachment_id) if not attachment_ref: @@ -5100,15 +5231,24 @@ def _do_attachment_delete(self, vref, attachment) try: - LOG.debug('Deleting attachment %(attachment_id)s.', - {'attachment_id': attachment.id}, - resource=vref) + msg = ('Deleting attachment %(attachment_id)s.' % + {'attachment_id': attachment.id}) + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + vref, msg + ) self.driver.detach_volume(context, vref, attachment) if has_shared_connection is not None and not has_shared_connection: self.driver.remove_export(context.elevated(), vref) except Exception: # Failures on detach_volume and remove_export are not considered # failures in terms of detaching the volume. + action_track.track( + context, action_track.ACTION_VOLUME_DETACH, + vref, + 'Failed during driver.detach_volume or driver.remove_export', + loglevel=logging.ERROR + ) pass # Replication group API (Tiramisu) From ad2c75d2cfcf75c8f4025afc1487debb43620065 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Tue, 9 May 2023 09:53:20 -0400 Subject: [PATCH 127/149] [SAP] ensure snapshot clones accounted for This patch adds the ability in the volume manger to check if a backend has clones as snapshots and account for the space consumed on the pool. All of our snapshots are clones and the volume manager doesn't account for the space consumed on the pool/datastore. Only the scheduler accounts for that space consumed on the pool currently. With this patch the volume manager will check all snapshots in the DB and properly account for the space used on the pool. This is to help prevent overcommit on a pool. --- .../unit/volume/drivers/vmware/test_fcd.py | 1 + cinder/volume/drivers/vmware/vmdk.py | 22 +++++++ cinder/volume/manager.py | 66 +++++++++++++++---- 3 files changed, 78 insertions(+), 11 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index 0abfab1aee2..2581e554f33 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -70,6 +70,7 @@ def setUp(self): self._config.vmware_storage_profile = None self._config.reserved_percentage = self.RESERVED_PERCENTAGE self._config.vmware_datastores_as_pools = False + self._config.vmware_snapshot_format = "COW" self._driver = fcd.VMwareVStorageObjectDriver( configuration=self._config) self._driver._vc_version = self.VC_VERSION diff --git a/cinder/volume/drivers/vmware/vmdk.py b/cinder/volume/drivers/vmware/vmdk.py index 0eea29262b7..14a442f7b1a 100644 --- a/cinder/volume/drivers/vmware/vmdk.py +++ b/cinder/volume/drivers/vmware/vmdk.py @@ -417,6 +417,21 @@ def check_for_setup_error(self): % storage_profile) raise exception.InvalidInput(reason=reason) + def _init_vendor_properties(self): + """Set some vmware specific properties.""" + + properties = {} + vendor_prefix = "vmware" + self._set_property( + properties, + f"{vendor_prefix}:snapshot_type", + "Snapshot type", + _("Specifies Type of snapshot"), + "string", + enum=["snapshot", "clone"]) + + return properties, vendor_prefix + def _update_volume_stats(self): if self.configuration.safe_get('vmware_enable_volume_stats'): self._stats = self._get_volume_stats() @@ -535,6 +550,12 @@ def _get_volume_stats(self): max_over_subscription_ratio = self.configuration.safe_get( 'max_over_subscription_ratio') + snapshot_format = self.configuration.vmware_snapshot_format + if snapshot_format == 'COW': + snapshot_type = 'snapshot' + else: + snapshot_type = 'clone' + backend_state = 'up' data = {'volume_backend_name': backend_name, 'vendor_name': 'VMware', @@ -542,6 +563,7 @@ def _get_volume_stats(self): 'storage_protocol': 'vmdk', 'location_info': location_info, 'backend_state': backend_state, + 'snapshot_type': snapshot_type } result, datastores = self._collect_backend_stats() diff --git a/cinder/volume/manager.py b/cinder/volume/manager.py index 49e3e53da9e..aea2e10d9aa 100644 --- a/cinder/volume/manager.py +++ b/cinder/volume/manager.py @@ -328,6 +328,20 @@ def __init__(self, volume_driver=None, service_name: str = None, {'host': self.host}) self.image_volume_cache = None + def _update_pool_allocated_capacity(self, pool, size): + try: + pool_stat = self.stats['pools'][pool] + except KeyError: + # First volume in the pool + self.stats['pools'][pool] = dict( + allocated_capacity_gb=0) + pool_stat = self.stats['pools'][pool] + pool_sum = pool_stat['allocated_capacity_gb'] + pool_sum += size + + self.stats['pools'][pool]['allocated_capacity_gb'] = pool_sum + self.stats['allocated_capacity_gb'] += size + def _count_allocated_capacity(self, ctxt: context.RequestContext, volume: objects.Volume) -> None: pool = volume_utils.extract_host(volume['host'], 'pool') @@ -356,18 +370,29 @@ def _count_allocated_capacity(self, ctxt: context.RequestContext, pool = (self.driver.configuration.safe_get( 'volume_backend_name') or volume_utils.extract_host( volume['host'], 'pool', True)) - try: - pool_stat = self.stats['pools'][pool] - except KeyError: - # First volume in the pool - self.stats['pools'][pool] = dict( - allocated_capacity_gb=0) - pool_stat = self.stats['pools'][pool] - pool_sum = pool_stat['allocated_capacity_gb'] - pool_sum += volume['size'] - self.stats['pools'][pool]['allocated_capacity_gb'] = pool_sum - self.stats['allocated_capacity_gb'] += volume['size'] + self._update_pool_allocated_capacity(pool, volume['size']) + + def _update_snapshot_allocated_capacity(self, ctxt: context.RequestContext, + snapshot: objects.Snapshot, + host: ty.Optional[str]) -> None: + """Use the size of the snapshot to adjust the allocated capacity. + + This updates the pool stats allocated_capacity for the pool that owns + the snapshot. The snapshot lives either in it's independent pool + or in the same pool as the source volume. + + The scheduler updates the pool account at snapshot creation time. This + ensures at volume service start time, the stats are adjusted as well. + """ + size = snapshot.volume_size + if not host: + # get the source volume to find the host + volume = snapshot.volume + host = volume.host + + pool = volume_utils.extract_host(host, 'pool') + self._update_pool_allocated_capacity(pool, size) def _set_voldb_empty_at_startup_indicator( self, @@ -611,6 +636,25 @@ def _count_host_stats(self, context, export_volumes=False): resource=volume) return + # SAP + # Account for the creation of snapshots on each pool + # this is only valid for vmware backends that are configured + # for clone based snapshots. This only counts snapshots against + # the snapshot/source volume's pool if the snapshot is a clone + # of the source volume. + if self.driver.capabilities.get('snapshot_type') == 'clone': + try: + for snapshot in snapshots: + host = None + key = objects.snapshot.SAP_HIDDEN_BACKEND_KEY + if snapshot.metadata: + host = snapshot.metadata.get(key) + self._update_snapshot_allocated_capacity( + ctxt, snapshot, host=host + ) + except Exception: + LOG.exception("Error during snapshot calculation") + if len(volumes): volumes_to_migrate.append(volumes, ctxt) From 47931985ec25d000df53e7cd2abaece47b994861 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Fri, 26 May 2023 14:10:41 -0400 Subject: [PATCH 128/149] SAP: allow volume migrations with snapshots This patch removes the volume_has_snapshots_filter in the conditional update when the backend that owns the volume has clones as snapshots. This effectively makes all vmware driver based volumes migrateable within a shards if the volume has snapshots. --- cinder/volume/api.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 89e70b205c8..f0f63f8fc91 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -1669,7 +1669,22 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, else: expected['host'] = db.Not(host) - filters = [~db.volume_has_snapshots_filter()] + # SAP: If the backend's snapshots are clones, then we don't + # have to filter out volumes that have snapshots. We can find out + # if a backend has clones for snapshots by fetching the backend + # capabilities and looking for snapshot_type=='clone' + caps = self.volume_rpcapi.get_capabilities( + context, volume.service_topic_queue, discover=False) + + # SAP(walter): If snapshots are clones, then we can migrate a + # volume to another pool on the same shard. So we can ignore + # filtering on the volume having snapshots + if caps.get('snapshot_type') == 'clone': + # This has to be (), not [] as the default filters param in the + # conditional_update is () + filters = () + else: + filters = [~db.volume_has_snapshots_filter()] updates = {'migration_status': 'starting', 'previous_status': volume.model.status} @@ -1688,10 +1703,13 @@ def migrate_volume(self, context, volume, host, cluster_name, force_copy, result = volume.conditional_update(updates, expected, filters) if not result: + snaps_str = "" + if filters: + snaps_str = " have snapshots," msg = _('Volume %s status must be available or in-use, must not ' - 'be migrating, have snapshots, be replicated, be part of ' + 'be migrating,%s be replicated, be part of ' 'a group and destination host/cluster must be different ' - 'than the current one') % volume.id + 'than the current one') % (volume.id, snaps_str) LOG.error(msg) raise exception.InvalidVolume(reason=msg) From 19ac4298b29a320dccc03ef2872451ab3f4594df Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Mon, 12 Jun 2023 11:53:44 -0400 Subject: [PATCH 129/149] [SAP] Use snapshot hidden backend for new volumes When creating a volume from a snapshot, the hidden __cinder_internal_backend metadata on the snapshot will be used for the host for the volume creation. We do this now instead of falling back to the snapshot's source volume host can be different than it was when the snapshot was changed, because the source volume can be migrated now. This ensures that the new volume from snapshot can copy the bits from the snapshot (clone). --- cinder/volume/flows/api/create_volume.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/cinder/volume/flows/api/create_volume.py b/cinder/volume/flows/api/create_volume.py index be852d8c2b3..3a14238293e 100644 --- a/cinder/volume/flows/api/create_volume.py +++ b/cinder/volume/flows/api/create_volume.py @@ -21,6 +21,7 @@ from cinder.i18n import _ from cinder import objects from cinder.objects import fields +from cinder.objects import snapshot as snapshot_obj from cinder.policies import volumes as policy from cinder import quota from cinder import quota_utils @@ -765,9 +766,21 @@ def _cast_create_volume(self, context, request_spec, filter_properties): # if we are allowing snapshots to live on pools other than # the source volume. if CONF.sap_allow_independent_snapshots: - backend = volume_utils.extract_host( - snapshot.volume.resource_backend - ) + # First see if the host was saved in metadata, because the + # source volume is allowed to be migrated off of it's original + # shard. + # If not, then use the volume's host entry. + snap_host_key = snapshot_obj.SAP_HIDDEN_BACKEND_KEY + snap_host = snapshot.metadata.get(snap_host_key) + if snap_host: + # we need to use the host entry saved in the snapshot + # metadata as the source volume can be migrated to a + # different shard. + backend = volume_utils.extract_host(snap_host) + else: + backend = volume_utils.extract_host( + snapshot.volume.resource_backend + ) request_spec['resource_backend'] = backend elif source_volid: source_volume_ref = objects.Volume.get_by_id(context, source_volid) From b44fcab7f886d4596b0edc0f955f0fb1008971cd Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Fri, 7 Jul 2023 12:08:07 +0200 Subject: [PATCH 130/149] Change FCD driver to use volume.name attribute for directory naming, also use it for temp directory. This change will help ops to find fcd objects on the datastore, as there is no longer any searchable object in vcenter.... --- cinder/volume/drivers/vmware/fcd.py | 6 +++--- cinder/volume/drivers/vmware/volumeops.py | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/cinder/volume/drivers/vmware/fcd.py b/cinder/volume/drivers/vmware/fcd.py index b77c14b8406..0573ebd4355 100644 --- a/cinder/volume/drivers/vmware/fcd.py +++ b/cinder/volume/drivers/vmware/fcd.py @@ -91,7 +91,7 @@ def _select_ds_fcd(self, volume): (_host_ref, _resource_pool, summary) = self._select_datastore(req) return summary.datastore - def _get_temp_image_folder(self, size_bytes, preallocated=False): + def _get_temp_image_folder(self, name, size_bytes, preallocated=False): req = {} req[hub.DatastoreSelector.SIZE_BYTES] = size_bytes @@ -102,7 +102,7 @@ def _get_temp_image_folder(self, size_bytes, preallocated=False): (host_ref, _resource_pool, summary) = self._select_datastore(req) - folder_path = vmdk.TMP_IMAGES_DATASTORE_FOLDER_PATH + folder_path = name + '/' dc_ref = self.volumeops.get_dc(host_ref) self.volumeops.create_datastore_folder( summary.name, folder_path, dc_ref) @@ -201,7 +201,7 @@ def copy_image_to_volume(self, context, volume, image_service, image_id): size_bytes = metadata['size'] dc_ref, summary, folder_path = self._get_temp_image_folder( - volume.size * units.Gi) + volume.name, volume.size * units.Gi) disk_name = volume.id if disk_type in [vmdk.ImageDiskType.SPARSE, vmdk.ImageDiskType.STREAM_OPTIMIZED]: diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 823f955b7a5..931d4f9ed5b 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2123,13 +2123,15 @@ def mark_backing_as_template(self, backing): LOG.debug("Marking backing: %s as template.", backing) self._session.invoke_api(self._session.vim, 'MarkAsTemplate', backing) - def _create_fcd_backing_spec(self, disk_type, ds_ref): + def _create_fcd_backing_spec(self, disk_type, ds_ref, path=None): backing_spec = self._session.vim.client.factory.create( 'ns0:VslmCreateSpecDiskFileBackingSpec') if disk_type == VirtualDiskType.PREALLOCATED: disk_type = 'lazyZeroedThick' backing_spec.provisioningType = disk_type backing_spec.datastore = ds_ref + if path: + backing_spec.path = path + '/' return backing_spec def _create_profile_spec(self, cf, profile_id): @@ -2142,7 +2144,16 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref) + spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref, name) + hosts = self.get_connected_hosts(ds_ref) + host_ref = vim_util.get_moref(hosts[0], 'HostSystem') + dc_ref = self.get_dc(host_ref) + ds_name = self._session.invoke_api(vim_util, 'get_object_property', + self._session.vim, ds_ref, + 'name') + + self.create_datastore_folder(ds_name, name, dc_ref) + if profile_id: profile_spec = self._create_profile_spec(cf, profile_id) @@ -2177,7 +2188,8 @@ def clone_fcd( spec = cf.create('ns0:VslmCloneSpec') spec.name = name spec.backingSpec = self._create_fcd_backing_spec(disk_type, - dest_ds_ref) + dest_ds_ref, + name) if profile_id: profile_spec = self._create_profile_spec(cf, profile_id) From aee5ac6f88ae3d175355e8c2cff0db7d4a4fe597 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Fri, 7 Jul 2023 12:29:14 +0200 Subject: [PATCH 131/149] Fix pep8 issues --- cinder/volume/drivers/vmware/volumeops.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 931d4f9ed5b..615497c0544 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2131,7 +2131,7 @@ def _create_fcd_backing_spec(self, disk_type, ds_ref, path=None): backing_spec.provisioningType = disk_type backing_spec.datastore = ds_ref if path: - backing_spec.path = path + '/' + backing_spec.path = path + '/' return backing_spec def _create_profile_spec(self, cf, profile_id): @@ -2144,16 +2144,17 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref, name) + spec.backingSpec = self._create_fcd_backing_spec(disk_type, + ds_ref, + name) hosts = self.get_connected_hosts(ds_ref) host_ref = vim_util.get_moref(hosts[0], 'HostSystem') dc_ref = self.get_dc(host_ref) ds_name = self._session.invoke_api(vim_util, 'get_object_property', - self._session.vim, ds_ref, - 'name') - - self.create_datastore_folder(ds_name, name, dc_ref) + self._session.vim, ds_ref, + 'name') + self.create_datastore_folder(ds_name, name, dc_ref) if profile_id: profile_spec = self._create_profile_spec(cf, profile_id) From f8e48270a4bf21bb74cd3def3dbc991385b61cf1 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Fri, 7 Jul 2023 12:38:26 +0200 Subject: [PATCH 132/149] Pep8 --- cinder/volume/drivers/vmware/volumeops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 615497c0544..d5f31cf5d09 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2144,8 +2144,8 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, - ds_ref, + spec.backingSpec = self._create_fcd_backing_spec(disk_type, + ds_ref, name) hosts = self.get_connected_hosts(ds_ref) host_ref = vim_util.get_moref(hosts[0], 'HostSystem') From d0092e644fba661f7bbe91cb4595825c3fb269e0 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Mon, 10 Jul 2023 14:59:33 +0200 Subject: [PATCH 133/149] Fix unit tests --- .../unit/volume/drivers/vmware/test_fcd.py | 9 +++-- .../drivers/vmware/test_vmware_volumeops.py | 38 ++++++++++++++----- cinder/volume/drivers/vmware/volumeops.py | 15 +++----- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index 2581e554f33..841ff40973d 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -161,15 +161,16 @@ def _test_get_temp_image_folder( host = mock.sentinel.host summary = mock.Mock() summary.name = 'ds-1' + volume = self._create_volume_obj() select_datastore.return_value = (host, mock.ANY, summary) dc_ref = mock.sentinel.dc_ref vops.get_dc.return_value = dc_ref size_bytes = units.Gi - ret = self._driver._get_temp_image_folder(size_bytes, preallocated) + ret = self._driver._get_temp_image_folder(volume.name, size_bytes, preallocated) self.assertEqual( - (dc_ref, summary, vmdk.TMP_IMAGES_DATASTORE_FOLDER_PATH), ret) + (dc_ref, summary, volume.name + '/'), ret) exp_req = {hub.DatastoreSelector.SIZE_BYTES: size_bytes} if preallocated: exp_req[hub.DatastoreSelector.HARD_AFFINITY_DS_TYPE] = ( @@ -179,7 +180,7 @@ def _test_get_temp_image_folder( select_datastore.assert_called_once_with(exp_req) vops.get_dc.assert_called_once_with(host) vops.create_datastore_folder.assert_called_once_with( - summary.name, vmdk.TMP_IMAGES_DATASTORE_FOLDER_PATH, dc_ref) + summary.name, volume.name + '/', dc_ref) def test_get_temp_image_folder(self): self._test_get_temp_image_folder() @@ -357,7 +358,7 @@ def test_copy_image_to_volume(self, self._context, volume, image_service, image_id) self.assertEqual({'provider_location': provider_location}, ret) - get_temp_image_folder.assert_called_once_with(volume.size * units.Gi) + get_temp_image_folder.assert_called_once_with(volume.name, volume.size * units.Gi) if disk_type == vmdk.ImageDiskType.PREALLOCATED: create_disk_from_preallocated_image.assert_called_once_with( self._context, image_service, image_id, image_meta['size'], diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 154d05dca1d..4a7af21ccdc 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -32,7 +32,6 @@ class VolumeOpsTestCase(test.TestCase): """Unit tests for volumeops module.""" MAX_OBJECTS = 100 - def setUp(self): super(VolumeOpsTestCase, self).setUp() self.session = mock.MagicMock() @@ -1958,27 +1957,39 @@ def test_create_fcd_backing_spec(self, disk_type): def test_create_fcd(self, create_profile_spec, create_fcd_backing_spec): spec = mock.Mock() self.session.vim.client.factory.create.return_value = spec - backing_spec = mock.sentinel.backing_spec create_fcd_backing_spec.return_value = backing_spec - profile_spec = mock.sentinel.profile_spec create_profile_spec.return_value = profile_spec - task = mock.sentinel.task - self.session.invoke_api.return_value = task - task_info = mock.Mock() fcd_id = mock.sentinel.fcd_id task_info.result.config.id.id = fcd_id self.session.wait_for_task.return_value = task_info - name = mock.sentinel.name size_mb = 1024 ds_ref_val = mock.sentinel.ds_ref_val + dc = mock.Mock(spec=object) + dc._type = 'Datacenter' ds_ref = mock.Mock(value=ds_ref_val) + ds_ref._type = 'Datastore' + ds_ref.parent = dc + self.session.invoke_api.return_value = dc disk_type = mock.sentinel.disk_type profile_id = mock.sentinel.profile_id + + def mock_invoke_api(vim_util, method, vim, + the_object=None, arg=None, + name=None, datacenter=None, + spec=None): + if arg == "parent": + return the_object.parent + if arg == "name": + return mock.sentinel.name + if method == "CreateDisk_Task": + return task + self.session.invoke_api.side_effect = mock_invoke_api + ret = self.vops.create_fcd( name, size_mb, ds_ref, disk_type, profile_id=profile_id) @@ -1986,14 +1997,21 @@ def test_create_fcd(self, create_profile_spec, create_fcd_backing_spec): self.assertEqual(ds_ref_val, ret.ds_ref_val) self.session.vim.client.factory.create.assert_called_once_with( 'ns0:VslmCreateSpec') - create_fcd_backing_spec.assert_called_once_with(disk_type, ds_ref) + create_fcd_backing_spec.assert_called_once_with(disk_type, ds_ref, name) self.assertEqual(1024, spec.capacityInMB) self.assertEqual(name, spec.name) self.assertEqual(backing_spec, spec.backingSpec) self.assertEqual([profile_spec], spec.profile) create_profile_spec.assert_called_once_with( self.session.vim.client.factory, profile_id) - self.session.invoke_api.assert_called_once_with( + ds_folder_path = "[%s] %s" % (mock.sentinel.name, name) + self.session.invoke_api.assert_any_call( + self.session.vim, + 'MakeDirectory', + self.session.vim.service_content.fileManager, + name = ds_folder_path, + datacenter = dc) + self.session.invoke_api.assert_any_call( self.session.vim, 'CreateDisk_Task', self.session.vim.service_content.vStorageObjectManager, @@ -2059,7 +2077,7 @@ def test_clone_fcd(self, create_profile_spec, create_fcd_backing_spec): self.assertEqual(dest_ds_ref_val, ret.ds_ref_val) self.session.vim.client.factory.create.assert_called_once_with( 'ns0:VslmCloneSpec') - create_fcd_backing_spec.assert_called_once_with(disk_type, dest_ds_ref) + create_fcd_backing_spec.assert_called_once_with(disk_type, dest_ds_ref, name) self.assertEqual(name, spec.name) self.assertEqual(backing_spec, spec.backingSpec) self.assertEqual([profile_spec], spec.profile) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index d5f31cf5d09..864a382c97a 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2131,7 +2131,7 @@ def _create_fcd_backing_spec(self, disk_type, ds_ref, path=None): backing_spec.provisioningType = disk_type backing_spec.datastore = ds_ref if path: - backing_spec.path = path + '/' + backing_spec.path = path + '/' return backing_spec def _create_profile_spec(self, cf, profile_id): @@ -2144,16 +2144,11 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, - ds_ref, - name) - hosts = self.get_connected_hosts(ds_ref) - host_ref = vim_util.get_moref(hosts[0], 'HostSystem') - dc_ref = self.get_dc(host_ref) + spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref, name) + dc_ref = self.get_dc(ds_ref) ds_name = self._session.invoke_api(vim_util, 'get_object_property', - self._session.vim, ds_ref, - 'name') - + self._session.vim, ds_ref, + 'name') self.create_datastore_folder(ds_name, name, dc_ref) if profile_id: From 53cf6d2f633a6e4786f7f25e1cd5277d4845366a Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Mon, 10 Jul 2023 15:20:16 +0200 Subject: [PATCH 134/149] Fix pep8 again --- .../volume/drivers/vmware/test_vmware_volumeops.py | 11 +++++++---- cinder/volume/drivers/vmware/volumeops.py | 9 +++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 4a7af21ccdc..307db905d03 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -32,6 +32,7 @@ class VolumeOpsTestCase(test.TestCase): """Unit tests for volumeops module.""" MAX_OBJECTS = 100 + def setUp(self): super(VolumeOpsTestCase, self).setUp() self.session = mock.MagicMock() @@ -1978,9 +1979,9 @@ def test_create_fcd(self, create_profile_spec, create_fcd_backing_spec): disk_type = mock.sentinel.disk_type profile_id = mock.sentinel.profile_id - def mock_invoke_api(vim_util, method, vim, - the_object=None, arg=None, - name=None, datacenter=None, + def mock_invoke_api(vim_util, method, vim, + the_object=None, arg=None, + name=None, datacenter=None, spec=None): if arg == "parent": return the_object.parent @@ -1997,7 +1998,9 @@ def mock_invoke_api(vim_util, method, vim, self.assertEqual(ds_ref_val, ret.ds_ref_val) self.session.vim.client.factory.create.assert_called_once_with( 'ns0:VslmCreateSpec') - create_fcd_backing_spec.assert_called_once_with(disk_type, ds_ref, name) + create_fcd_backing_spec.assert_called_once_with(disk_type, + ds_ref, + name) self.assertEqual(1024, spec.capacityInMB) self.assertEqual(name, spec.name) self.assertEqual(backing_spec, spec.backingSpec) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 864a382c97a..ccb14895d51 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2131,7 +2131,7 @@ def _create_fcd_backing_spec(self, disk_type, ds_ref, path=None): backing_spec.provisioningType = disk_type backing_spec.datastore = ds_ref if path: - backing_spec.path = path + '/' + backing_spec.path = path + '/' return backing_spec def _create_profile_spec(self, cf, profile_id): @@ -2144,11 +2144,12 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref, name) + spec.backingSpec = self._create_fcd_backing_spec(disk_type, + ds_ref, name) dc_ref = self.get_dc(ds_ref) ds_name = self._session.invoke_api(vim_util, 'get_object_property', - self._session.vim, ds_ref, - 'name') + self._session.vim, ds_ref, + 'name') self.create_datastore_folder(ds_name, name, dc_ref) if profile_id: From 682845f547ac4136c0dc6df66867a7eaef2af62b Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Mon, 10 Jul 2023 15:31:01 +0200 Subject: [PATCH 135/149] pep8 --- cinder/tests/unit/volume/drivers/vmware/test_fcd.py | 7 +++++-- .../unit/volume/drivers/vmware/test_vmware_volumeops.py | 4 +++- cinder/volume/drivers/vmware/volumeops.py | 2 +- 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index 841ff40973d..eb45ee1e715 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -168,7 +168,9 @@ def _test_get_temp_image_folder( vops.get_dc.return_value = dc_ref size_bytes = units.Gi - ret = self._driver._get_temp_image_folder(volume.name, size_bytes, preallocated) + ret = self._driver._get_temp_image_folder(volume.name, + size_bytes, + preallocated) self.assertEqual( (dc_ref, summary, volume.name + '/'), ret) exp_req = {hub.DatastoreSelector.SIZE_BYTES: size_bytes} @@ -358,7 +360,8 @@ def test_copy_image_to_volume(self, self._context, volume, image_service, image_id) self.assertEqual({'provider_location': provider_location}, ret) - get_temp_image_folder.assert_called_once_with(volume.name, volume.size * units.Gi) + get_temp_image_folder.assert_called_once_with(volume.name, + volume.size * units.Gi) if disk_type == vmdk.ImageDiskType.PREALLOCATED: create_disk_from_preallocated_image.assert_called_once_with( self._context, image_service, image_id, image_meta['size'], diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index 307db905d03..eb7720a97a0 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -2080,7 +2080,9 @@ def test_clone_fcd(self, create_profile_spec, create_fcd_backing_spec): self.assertEqual(dest_ds_ref_val, ret.ds_ref_val) self.session.vim.client.factory.create.assert_called_once_with( 'ns0:VslmCloneSpec') - create_fcd_backing_spec.assert_called_once_with(disk_type, dest_ds_ref, name) + create_fcd_backing_spec.assert_called_once_with(disk_type, + dest_ds_ref, + name) self.assertEqual(name, spec.name) self.assertEqual(backing_spec, spec.backingSpec) self.assertEqual([profile_spec], spec.profile) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index ccb14895d51..a044fb5e4dc 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2144,7 +2144,7 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb spec.name = name - spec.backingSpec = self._create_fcd_backing_spec(disk_type, + spec.backingSpec = self._create_fcd_backing_spec(disk_type, ds_ref, name) dc_ref = self.get_dc(ds_ref) ds_name = self._session.invoke_api(vim_util, 'get_object_property', From dbc8e74817c1a33d40cc4ca6f097afc53ac9ca52 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Tue, 11 Jul 2023 16:19:53 +0200 Subject: [PATCH 136/149] Fix volume creation from snapshot --- cinder/volume/drivers/vmware/volumeops.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index a044fb5e4dc..aecd8b880ad 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2300,6 +2300,13 @@ def create_fcd_from_snapshot(self, fcd_snap_loc, name, profile_id=None): vstorage_mgr = self._session.vim.service_content.vStorageObjectManager cf = self._session.vim.client.factory + ds_ref = fcd_snap_loc.fcd_loc.ds_ref() + dc_ref = self.get_dc(ds_ref) + ds_name = self._session.invoke_api(vim_util, 'get_object_property', + self._session.vim, ds_ref, + 'name') + self.create_datastore_folder(ds_name, name, dc_ref) + if profile_id: profile = [self._create_profile_spec(cf, profile_id)] else: @@ -2312,7 +2319,8 @@ def create_fcd_from_snapshot(self, fcd_snap_loc, name, profile_id=None): datastore=fcd_snap_loc.fcd_loc.ds_ref(), snapshotId=fcd_snap_loc.id(cf), name=name, - profile=profile) + profile=profile, + path=name + '/') task_info = self._session.wait_for_task(task) fcd_loc = FcdLocation.create(task_info.result.config.id, fcd_snap_loc.fcd_loc.ds_ref()) From b4e1acfc54fadda55f01ac3c9114ba901710ee4b Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Thu, 13 Jul 2023 11:43:00 +0200 Subject: [PATCH 137/149] Modify driver to update vmdk uuid to cinder volume id --- cinder/volume/drivers/vmware/fcd.py | 11 ++++-- cinder/volume/drivers/vmware/volumeops.py | 41 ++++++++++++++++++++--- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/cinder/volume/drivers/vmware/fcd.py b/cinder/volume/drivers/vmware/fcd.py index 0573ebd4355..4ae414e7aba 100644 --- a/cinder/volume/drivers/vmware/fcd.py +++ b/cinder/volume/drivers/vmware/fcd.py @@ -130,8 +130,8 @@ def create_volume(self, volume): ds_ref = self._select_ds_fcd(volume) profile_id = self._get_storage_profile_id(volume) fcd_loc = self.volumeops.create_fcd( - volume.name, volume.size * units.Ki, ds_ref, disk_type, - profile_id=profile_id) + volume.id, volume.name, volume.size * units.Ki, ds_ref, + disk_type, profile_id=profile_id) return {'provider_location': fcd_loc.provider_location()} def _delete_fcd(self, provider_loc): @@ -228,6 +228,8 @@ def copy_image_to_volume(self, context, volume, image_service, image_id): profile_id = self._get_storage_profile_id(volume) if profile_id: self.volumeops.update_fcd_policy(fcd_loc, profile_id) + + self.volumeops.update_fcd_vmdk_uuid(summary.datastore, vmdk_path, volume.id) return {'provider_location': fcd_loc.provider_location()} @@ -344,6 +346,7 @@ def _extend_if_needed(self, fcd_loc, cur_size, new_size): self.volumeops.extend_fcd(fcd_loc, new_size * units.Ki) def _create_volume_from_fcd(self, provider_loc, cur_size, volume): + cf = self._session.vim.client.factory ds_ref = self._select_ds_fcd(volume) disk_type = self._get_disk_type(volume) profile_id = self._get_storage_profile_id(volume) @@ -351,6 +354,8 @@ def _create_volume_from_fcd(self, provider_loc, cur_size, volume): provider_loc, volume.name, ds_ref, disk_type=disk_type, profile_id=profile_id) self._extend_if_needed(cloned_fcd_loc, cur_size, volume.size) + vmdk_path = self.volumeops.get_vmdk_path_for_fcd(ds_ref, cloned_fcd_loc.id(cf)) + self.volumeops.update_fcd_vmdk_uuid(ds_ref, vmdk_path, volume.id) return {'provider_location': cloned_fcd_loc.provider_location()} def create_volume_from_snapshot(self, volume, snapshot): @@ -365,7 +370,7 @@ def create_volume_from_snapshot(self, volume, snapshot): if fcd_snap_loc: profile_id = self._get_storage_profile_id(volume) fcd_loc = self.volumeops.create_fcd_from_snapshot( - fcd_snap_loc, volume.name, profile_id=profile_id) + fcd_snap_loc, volume.name, volume.id, profile_id=profile_id) self._extend_if_needed(fcd_loc, snapshot.volume_size, volume.size) return {'provider_location': fcd_loc.provider_location()} else: diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index aecd8b880ad..cacc555aa36 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2134,12 +2134,38 @@ def _create_fcd_backing_spec(self, disk_type, ds_ref, path=None): backing_spec.path = path + '/' return backing_spec + def get_vmdk_path_for_fcd(self, ds_ref, disk_id): + vstorage_mgr = self._session.vim.service_content.vStorageObjectManager + fcd_obj = self._session.invoke_api( + self._session.vim, + 'RetrieveVStorageObject', + vstorage_mgr, + id=disk_id, + datastore=ds_ref) + vmdk_path = fcd_obj.config.backing.filePath + return vmdk_path + + def update_fcd_vmdk_uuid(self, ds_ref, vmdk_path, cinder_uuid ): + def cinder_uuid_to_vmwhex(cinder_uuid): + t = iter(cinder_uuid.replace('-','')) + hextext = ' '.join(a+b for a,b in zip(t, t)) + return hextext[:23] + '-' + hextext[24:] + + virtual_dmgr = self._session.vim.service_content.virtualDiskManager + self._session.invoke_api( + self._session.vim, + 'SetVirtualDiskUuid', + virtual_dmgr, + name=vmdk_path, + datacenter=self.get_dc(ds_ref), + uuid=cinder_uuid_to_vmwhex(cinder_uuid)) + def _create_profile_spec(self, cf, profile_id): profile_spec = cf.create('ns0:VirtualMachineDefinedProfileSpec') profile_spec.profileId = profile_id return profile_spec - def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): + def create_fcd(self, cinder_uuid, name, size_mb, ds_ref, disk_type, profile_id=None): cf = self._session.vim.client.factory spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb @@ -2164,10 +2190,13 @@ def create_fcd(self, name, size_mb, ds_ref, disk_type, profile_id=None): vstorage_mgr, spec=spec) task_info = self._session.wait_for_task(task) - fcd_loc = FcdLocation.create(task_info.result.config.id, ds_ref) + fcd_obj = task_info.result + fcd_loc = FcdLocation.create(fcd_obj.config.id, ds_ref) + vmdk_path = fcd_obj.config.backing.filePath + self.update_fcd_vmdk_uuid(ds_ref, vmdk_path, cinder_uuid ) LOG.debug("Created fcd: %s.", fcd_loc) return fcd_loc - + def delete_fcd(self, fcd_location): cf = self._session.vim.client.factory vstorage_mgr = self._session.vim.service_content.vStorageObjectManager @@ -2294,7 +2323,8 @@ def delete_fcd_snapshot(self, fcd_snap_loc): snapshotId=fcd_snap_loc.id(cf)) self._session.wait_for_task(task) - def create_fcd_from_snapshot(self, fcd_snap_loc, name, profile_id=None): + def create_fcd_from_snapshot(self, fcd_snap_loc, name, + cinder_uuid, profile_id=None): LOG.debug("Creating fcd with name: %(name)s from fcd snapshot: " "%(snap)s.", {'name': name, 'snap': fcd_snap_loc}) @@ -2322,6 +2352,9 @@ def create_fcd_from_snapshot(self, fcd_snap_loc, name, profile_id=None): profile=profile, path=name + '/') task_info = self._session.wait_for_task(task) + self.update_fcd_vmdk_uuid(fcd_snap_loc.fcd_loc.ds_ref(), + task_info.result.config.backing.filePath, + cinder_uuid) fcd_loc = FcdLocation.create(task_info.result.config.id, fcd_snap_loc.fcd_loc.ds_ref()) From 331125b1d5b9b0f6d5a8920b6ef592b8cb33a242 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Wed, 19 Jul 2023 11:20:24 +0200 Subject: [PATCH 138/149] Fix unittests --- .../unit/volume/drivers/vmware/test_fcd.py | 19 ++-- .../drivers/vmware/test_vmware_volumeops.py | 101 +++++++++++++----- cinder/volume/drivers/vmware/fcd.py | 17 ++- cinder/volume/drivers/vmware/volumeops.py | 29 +++-- 4 files changed, 109 insertions(+), 57 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py index eb45ee1e715..28d78c8e8db 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_fcd.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_fcd.py @@ -229,7 +229,8 @@ def test_create_volume(self, vops, get_storage_profile_id, get_disk_type, select_ds_fcd.assert_called_once_with(volume) get_disk_type.assert_called_once_with(volume) vops.create_fcd.assert_called_once_with( - volume.name, volume.size * units.Ki, ds_ref, disk_type, + volume.id, volume.name, volume.size * units.Ki, + ds_ref, disk_type, profile_id=profile_id) @mock.patch.object(volumeops.FcdLocation, 'from_provider_location') @@ -474,8 +475,8 @@ def test_clone_fcd(self, vops, from_provider_loc): name = mock.sentinel.name dest_ds_ref = mock.sentinel.dest_ds_ref disk_type = mock.sentinel.disk_type - ret = self._driver._clone_fcd( - provider_loc, name, dest_ds_ref, disk_type) + ret = self._driver._clone_fcd(provider_loc, + name, dest_ds_ref, disk_type) self.assertEqual(dest_fcd_loc, ret) from_provider_loc.assert_called_once_with(provider_loc) vops.clone_fcd.assert_called_once_with( @@ -586,18 +587,14 @@ def test_create_volume_from_fcd( get_disk_type, select_ds_fcd): ds_ref = mock.sentinel.ds_ref select_ds_fcd.return_value = ds_ref - disk_type = mock.sentinel.disk_type get_disk_type.return_value = disk_type - profile_id = mock.sentinel.profile_id get_storage_profile_id.return_value = profile_id - cloned_fcd_loc = mock.Mock() dest_provider_loc = mock.sentinel.dest_provider_loc cloned_fcd_loc.provider_location.return_value = dest_provider_loc clone_fcd.return_value = cloned_fcd_loc - provider_loc = mock.sentinel.provider_loc cur_size = 1 volume = self._create_volume_obj() @@ -606,9 +603,9 @@ def test_create_volume_from_fcd( self.assertEqual({'provider_location': dest_provider_loc}, ret) select_ds_fcd.test_assert_called_once_with(volume) get_disk_type.test_assert_called_once_with(volume) - clone_fcd.assert_called_once_with( - provider_loc, volume.name, ds_ref, disk_type=disk_type, - profile_id=profile_id) + clone_fcd.assert_called_once_with(provider_loc, volume, + ds_ref, disk_type=disk_type, + profile_id=profile_id) extend_if_needed.assert_called_once_with( cloned_fcd_loc, cur_size, volume.size) @@ -643,7 +640,7 @@ def _test_create_volume_from_snapshot( if use_fcd_snapshot: self.assertEqual({'provider_location': provider_loc}, ret) vops.create_fcd_from_snapshot.assert_called_once_with( - fcd_snap_loc, volume.name, profile_id=profile_id) + fcd_snap_loc, volume.name, volume.id, profile_id=profile_id) extend_if_needed.assert_called_once_with( fcd_loc, snapshot.volume_size, volume.size) else: diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index eb7720a97a0..d7c25f3f2e6 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -1958,8 +1958,10 @@ def test_create_fcd_backing_spec(self, disk_type): def test_create_fcd(self, create_profile_spec, create_fcd_backing_spec): spec = mock.Mock() self.session.vim.client.factory.create.return_value = spec + backing_spec = mock.sentinel.backing_spec create_fcd_backing_spec.return_value = backing_spec + profile_spec = mock.sentinel.profile_spec create_profile_spec.return_value = profile_spec task = mock.sentinel.task @@ -1967,33 +1969,37 @@ def test_create_fcd(self, create_profile_spec, create_fcd_backing_spec): fcd_id = mock.sentinel.fcd_id task_info.result.config.id.id = fcd_id self.session.wait_for_task.return_value = task_info - name = mock.sentinel.name + + name = "volume-405d6afd-43be-4ce0-9e5f-fd49559e2763" size_mb = 1024 ds_ref_val = mock.sentinel.ds_ref_val + ds_ref_val.name = 'ds-1' dc = mock.Mock(spec=object) dc._type = 'Datacenter' ds_ref = mock.Mock(value=ds_ref_val) ds_ref._type = 'Datastore' ds_ref.parent = dc - self.session.invoke_api.return_value = dc + ds_ref.name = 'ds-1' + cinder_uuid = '405d6afd-43be-4ce0-9e5f-fd49559e2763' disk_type = mock.sentinel.disk_type profile_id = mock.sentinel.profile_id def mock_invoke_api(vim_util, method, vim, the_object=None, arg=None, name=None, datacenter=None, - spec=None): + spec=None, **kwargs): if arg == "parent": return the_object.parent if arg == "name": - return mock.sentinel.name + return the_object.name if method == "CreateDisk_Task": return task + if method == "MakeDirectory": + return None self.session.invoke_api.side_effect = mock_invoke_api - - ret = self.vops.create_fcd( - name, size_mb, ds_ref, disk_type, profile_id=profile_id) - + ret = self.vops.create_fcd(cinder_uuid, name, size_mb, + ds_ref, disk_type, + profile_id=profile_id) self.assertEqual(fcd_id, ret.fcd_id) self.assertEqual(ds_ref_val, ret.ds_ref_val) self.session.vim.client.factory.create.assert_called_once_with( @@ -2007,13 +2013,13 @@ def mock_invoke_api(vim_util, method, vim, self.assertEqual([profile_spec], spec.profile) create_profile_spec.assert_called_once_with( self.session.vim.client.factory, profile_id) - ds_folder_path = "[%s] %s" % (mock.sentinel.name, name) + ds_folder_path = "[ds-1] %s" % name self.session.invoke_api.assert_any_call( self.session.vim, 'MakeDirectory', self.session.vim.service_content.fileManager, - name = ds_folder_path, - datacenter = dc) + name=ds_folder_path, + datacenter=dc) self.session.invoke_api.assert_any_call( self.session.vim, 'CreateDisk_Task', @@ -2053,10 +2059,20 @@ def test_clone_fcd(self, create_profile_spec, create_fcd_backing_spec): profile_spec = mock.sentinel.profile_spec create_profile_spec.return_value = profile_spec - task = mock.sentinel.task - self.session.invoke_api.return_value = task + def mock_invoke_api(vim_util, method, vim, + the_object=None, arg=None, + name=None, datacenter=None, + spec=None, **kwargs): + + if arg == "parent": + return dc + if arg == "name": + return the_object.name + if method == "CloneVStorageObject_Task": + return task + self.session.invoke_api.side_effect = mock_invoke_api task_info = mock.Mock() fcd_id = mock.sentinel.fcd_id task_info.result.config.id.id = fcd_id @@ -2065,16 +2081,25 @@ def test_clone_fcd(self, create_profile_spec, create_fcd_backing_spec): fcd_location = mock.Mock() fcd_id = mock.sentinel.fcd_id fcd_location.id.return_value = fcd_id - ds_ref = mock.sentinel.ds_ref - fcd_location.ds_ref.return_value = ds_ref + ds_ref_val = mock.sentinel.ds_ref_val + ds_ref_val.name = 'ds-1' + dc = mock.Mock(spec=object) + dc._type = 'Datacenter' + ds_ref = mock.Mock(value=ds_ref_val) + ds_ref._type = 'Datastore' + ds_ref.parent = dc + ds_ref.name = 'ds-1' - name = mock.sentinel.name + fcd_location.ds_ref.return_value = ds_ref + volume = mock.sentinel.name + volume.id = "9b3f6f1b-03a9-4f1e-aaff-ae15122b6ccf" + volume.name = "volume-9b3f6f1b-03a9-4f1e-aaff-ae15122b6ccf" dest_ds_ref_val = mock.sentinel.dest_ds_ref_val dest_ds_ref = mock.Mock(value=dest_ds_ref_val) disk_type = mock.sentinel.disk_type profile_id = mock.sentinel.profile_id - ret = self.vops.clone_fcd( - name, fcd_location, dest_ds_ref, disk_type, profile_id=profile_id) + ret = self.vops.clone_fcd(volume, fcd_location, dest_ds_ref, + disk_type, profile_id=profile_id) self.assertEqual(fcd_id, ret.fcd_id) self.assertEqual(dest_ds_ref_val, ret.ds_ref_val) @@ -2082,13 +2107,13 @@ def test_clone_fcd(self, create_profile_spec, create_fcd_backing_spec): 'ns0:VslmCloneSpec') create_fcd_backing_spec.assert_called_once_with(disk_type, dest_ds_ref, - name) - self.assertEqual(name, spec.name) + volume.name) + self.assertEqual(volume.name, spec.name) self.assertEqual(backing_spec, spec.backingSpec) self.assertEqual([profile_spec], spec.profile) create_profile_spec.assert_called_once_with( self.session.vim.client.factory, profile_id) - self.session.invoke_api.assert_called_once_with( + self.session.invoke_api.assert_any_call( self.session.vim, 'CloneVStorageObject_Task', self.session.vim.service_content.vStorageObjectManager, @@ -2211,7 +2236,7 @@ def test_create_fcd_snapshot(self): self.assertEqual(fcd_snap_id, ret.snap_id) self.assertEqual(fcd_location, ret.fcd_loc) - self.session.invoke_api.assert_called_once_with( + self.session.invoke_api.assert_any_call( self.session.vim, 'VStorageObjectCreateSnapshot_Task', self.session.vim.service_content.vStorageObjectManager, @@ -2247,7 +2272,6 @@ def test_delete_fcd_snapshot(self): '_create_profile_spec') def test_create_fcd_from_snapshot(self, create_profile_spec): task = mock.sentinel.task - self.session.invoke_api.return_value = task profile_spec = mock.sentinel.profile_spec create_profile_spec.return_value = profile_spec @@ -2257,26 +2281,48 @@ def test_create_fcd_from_snapshot(self, create_profile_spec): task_info.result.config.id.id = fcd_id self.session.wait_for_task.return_value = task_info + def mock_invoke_api(vim_util, method, vim, + the_object=None, arg=None, + name=None, datacenter=None, + spec=None, **kwargs): + if arg == "parent": + return the_object.parent + if arg == "name": + return the_object.name + if method == "CreateDiskFromSnapshot_Task": + return task + if method == "MakeDirectory": + return None + self.session.invoke_api.side_effect = mock_invoke_api fcd_location = mock.Mock() fcd_id = mock.sentinel.fcd_id + dc = mock.Mock(spec=object) + dc._type = 'Datacenter' fcd_location.id.return_value = fcd_id ds_ref_val = mock.sentinel.ds_ref_val + ds_ref_val.name = 'ds-1' + ds_ref_val.parent = dc + ds_ref_val._type = 'Datastore' ds_ref = mock.Mock(value=ds_ref_val) + ds_ref._type = 'Datastore' + ds_ref.parent = dc + ds_ref.name = 'ds-1' fcd_location.ds_ref.return_value = ds_ref fcd_snap_id = mock.sentinel.fcd_snap_id fcd_snap_loc = mock.Mock(fcd_loc=fcd_location) fcd_snap_loc.id.return_value = fcd_snap_id - name = mock.sentinel.name + name = "volume-9b3f6f1b-03a9-4f1e-aaff-ae15122b6ccf" + cinder_uuid = "9b3f6f1b-03a9-4f1e-aaff-ae15122b6ccf" profile_id = mock.sentinel.profile_id ret = self.vops.create_fcd_from_snapshot( - fcd_snap_loc, name, profile_id=profile_id) + fcd_snap_loc, name, cinder_uuid, profile_id=profile_id) self.assertEqual(fcd_id, ret.fcd_id) self.assertEqual(ds_ref_val, ret.ds_ref_val) create_profile_spec.assert_called_once_with( self.session.vim.client.factory, profile_id) - self.session.invoke_api.assert_called_once_with( + self.session.invoke_api.assert_any_call( self.session.vim, 'CreateDiskFromSnapshot_Task', self.session.vim.service_content.vStorageObjectManager, @@ -2284,7 +2330,8 @@ def test_create_fcd_from_snapshot(self, create_profile_spec): datastore=ds_ref, snapshotId=fcd_snap_id, name=name, - profile=[profile_spec]) + profile=[profile_spec], + path=name+'/') self.session.wait_for_task.assert_called_once_with(task) @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' diff --git a/cinder/volume/drivers/vmware/fcd.py b/cinder/volume/drivers/vmware/fcd.py index 4ae414e7aba..60f1ff384b8 100644 --- a/cinder/volume/drivers/vmware/fcd.py +++ b/cinder/volume/drivers/vmware/fcd.py @@ -228,8 +228,9 @@ def copy_image_to_volume(self, context, volume, image_service, image_id): profile_id = self._get_storage_profile_id(volume) if profile_id: self.volumeops.update_fcd_policy(fcd_loc, profile_id) - - self.volumeops.update_fcd_vmdk_uuid(summary.datastore, vmdk_path, volume.id) + + self.volumeops.update_fcd_vmdk_uuid(summary.datastore, + vmdk_path, volume.id) return {'provider_location': fcd_loc.provider_location()} @@ -300,12 +301,13 @@ def extend_volume(self, volume, new_size): volume.provider_location) self.volumeops.extend_fcd(fcd_loc, new_size * units.Ki) - def _clone_fcd(self, provider_loc, name, dest_ds_ref, + def _clone_fcd(self, provider_loc, volume, dest_ds_ref, disk_type=vops.VirtualDiskType.THIN, profile_id=None): fcd_loc = vops.FcdLocation.from_provider_location(provider_loc) - return self.volumeops.clone_fcd( - name, fcd_loc, dest_ds_ref, disk_type, profile_id=profile_id) + return self.volumeops.clone_fcd(volume, + fcd_loc, dest_ds_ref, + disk_type, profile_id=profile_id) def create_snapshot(self, snapshot): """Creates a snapshot. @@ -346,16 +348,13 @@ def _extend_if_needed(self, fcd_loc, cur_size, new_size): self.volumeops.extend_fcd(fcd_loc, new_size * units.Ki) def _create_volume_from_fcd(self, provider_loc, cur_size, volume): - cf = self._session.vim.client.factory ds_ref = self._select_ds_fcd(volume) disk_type = self._get_disk_type(volume) profile_id = self._get_storage_profile_id(volume) cloned_fcd_loc = self._clone_fcd( - provider_loc, volume.name, ds_ref, disk_type=disk_type, + provider_loc, volume, ds_ref, disk_type=disk_type, profile_id=profile_id) self._extend_if_needed(cloned_fcd_loc, cur_size, volume.size) - vmdk_path = self.volumeops.get_vmdk_path_for_fcd(ds_ref, cloned_fcd_loc.id(cf)) - self.volumeops.update_fcd_vmdk_uuid(ds_ref, vmdk_path, volume.id) return {'provider_location': cloned_fcd_loc.provider_location()} def create_volume_from_snapshot(self, volume, snapshot): diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index cacc555aa36..88836fa754a 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2144,11 +2144,11 @@ def get_vmdk_path_for_fcd(self, ds_ref, disk_id): datastore=ds_ref) vmdk_path = fcd_obj.config.backing.filePath return vmdk_path - - def update_fcd_vmdk_uuid(self, ds_ref, vmdk_path, cinder_uuid ): + + def update_fcd_vmdk_uuid(self, ds_ref, vmdk_path, cinder_uuid): def cinder_uuid_to_vmwhex(cinder_uuid): - t = iter(cinder_uuid.replace('-','')) - hextext = ' '.join(a+b for a,b in zip(t, t)) + t = iter(cinder_uuid.replace('-', '')) + hextext = ' '.join(a+b for a, b in zip(t, t)) return hextext[:23] + '-' + hextext[24:] virtual_dmgr = self._session.vim.service_content.virtualDiskManager @@ -2165,7 +2165,8 @@ def _create_profile_spec(self, cf, profile_id): profile_spec.profileId = profile_id return profile_spec - def create_fcd(self, cinder_uuid, name, size_mb, ds_ref, disk_type, profile_id=None): + def create_fcd(self, cinder_uuid, name, size_mb, + ds_ref, disk_type, profile_id=None): cf = self._session.vim.client.factory spec = cf.create('ns0:VslmCreateSpec') spec.capacityInMB = size_mb @@ -2193,10 +2194,10 @@ def create_fcd(self, cinder_uuid, name, size_mb, ds_ref, disk_type, profile_id=N fcd_obj = task_info.result fcd_loc = FcdLocation.create(fcd_obj.config.id, ds_ref) vmdk_path = fcd_obj.config.backing.filePath - self.update_fcd_vmdk_uuid(ds_ref, vmdk_path, cinder_uuid ) + self.update_fcd_vmdk_uuid(ds_ref, vmdk_path, cinder_uuid) LOG.debug("Created fcd: %s.", fcd_loc) return fcd_loc - + def delete_fcd(self, fcd_location): cf = self._session.vim.client.factory vstorage_mgr = self._session.vim.service_content.vStorageObjectManager @@ -2209,13 +2210,19 @@ def delete_fcd(self, fcd_location): self._session.wait_for_task(task) def clone_fcd( - self, name, fcd_location, dest_ds_ref, disk_type, profile_id=None): + self, volume, fcd_location, dest_ds_ref, + disk_type, profile_id=None): cf = self._session.vim.client.factory spec = cf.create('ns0:VslmCloneSpec') - spec.name = name + spec.name = volume.name + dc_ref = self.get_dc(dest_ds_ref) + ds_name = self._session.invoke_api(vim_util, 'get_object_property', + self._session.vim, dest_ds_ref, + 'name') + self.create_datastore_folder(ds_name, volume.name, dc_ref) spec.backingSpec = self._create_fcd_backing_spec(disk_type, dest_ds_ref, - name) + volume.name) if profile_id: profile_spec = self._create_profile_spec(cf, profile_id) @@ -2234,6 +2241,8 @@ def clone_fcd( datastore=fcd_location.ds_ref(), spec=spec) task_info = self._session.wait_for_task(task) + vmdk_path = task_info.result.config.backing.filePath + self.update_fcd_vmdk_uuid(dest_ds_ref, vmdk_path, volume.id) dest_fcd_loc = FcdLocation.create(task_info.result.config.id, dest_ds_ref) LOG.debug("Clone fcd: %s.", dest_fcd_loc) From f92a1aee74d00d3c0d7315e81c4ed7d489363779 Mon Sep 17 00:00:00 2001 From: Csaba Seres Date: Wed, 19 Jul 2023 11:28:58 +0200 Subject: [PATCH 139/149] Fix pep8 --- .../tests/unit/volume/drivers/vmware/test_vmware_volumeops.py | 2 +- cinder/volume/drivers/vmware/volumeops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py index d7c25f3f2e6..d9904173643 100644 --- a/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py +++ b/cinder/tests/unit/volume/drivers/vmware/test_vmware_volumeops.py @@ -2331,7 +2331,7 @@ def mock_invoke_api(vim_util, method, vim, snapshotId=fcd_snap_id, name=name, profile=[profile_spec], - path=name+'/') + path=name + '/') self.session.wait_for_task.assert_called_once_with(task) @mock.patch('cinder.volume.drivers.vmware.volumeops.VMwareVolumeOps.' diff --git a/cinder/volume/drivers/vmware/volumeops.py b/cinder/volume/drivers/vmware/volumeops.py index 88836fa754a..b8f68e559ee 100644 --- a/cinder/volume/drivers/vmware/volumeops.py +++ b/cinder/volume/drivers/vmware/volumeops.py @@ -2148,7 +2148,7 @@ def get_vmdk_path_for_fcd(self, ds_ref, disk_id): def update_fcd_vmdk_uuid(self, ds_ref, vmdk_path, cinder_uuid): def cinder_uuid_to_vmwhex(cinder_uuid): t = iter(cinder_uuid.replace('-', '')) - hextext = ' '.join(a+b for a, b in zip(t, t)) + hextext = ' '.join(a + b for a, b in zip(t, t)) return hextext[:23] + '-' + hextext[24:] virtual_dmgr = self._session.vim.service_content.virtualDiskManager From 63e996c7e1f87fb3ca2378c80214f673166533f3 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 24 Aug 2023 16:27:35 -0400 Subject: [PATCH 140/149] [SAP] Fix vmware fcd driver delete_volume on failure This patch catches the VIMException when an fcd volume isn't found during a delete_volume. This is considered a success. --- cinder/volume/drivers/vmware/fcd.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/cinder/volume/drivers/vmware/fcd.py b/cinder/volume/drivers/vmware/fcd.py index 60f1ff384b8..a6ca812cf0a 100644 --- a/cinder/volume/drivers/vmware/fcd.py +++ b/cinder/volume/drivers/vmware/fcd.py @@ -23,6 +23,7 @@ from oslo_log import log as logging from oslo_utils import units from oslo_utils import versionutils +from oslo_vmware import exceptions as vexc from oslo_vmware import image_transfer from oslo_vmware.objects import datastore from oslo_vmware import vim_util @@ -147,7 +148,15 @@ def delete_volume(self, volume): LOG.warning("FCD provider location is empty for volume %s", volume.id) else: - self._delete_fcd(volume.provider_location) + try: + self._delete_fcd(volume.provider_location) + except vexc.VimException as ex: + if "could not be found" in str(ex): + LOG.warning("FCD deletion failed for %s not found. " + "delete_volume is considered successful.", + volume.id) + else: + raise ex def initialize_connection(self, volume, connector, initiator_data=None): """Allow connection to connector and return connection info. From f8d0868cc7ec4c28b7842881767c681ef1eb581e Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Thu, 31 Aug 2023 16:17:33 -0400 Subject: [PATCH 141/149] [SAP] Add envlist to tox.ini The envlist was missing from the tox.ini, which prevented running 'tox' without an -e option correctly. The upper-constraints were not being applied to the tox environment runtime, which allowed SQLAlchemy 2.0.x to be installed and then the tests would fail instantly. --- tox.ini | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tox.ini b/tox.ini index 6c80c80bdce..2a270aedeaf 100644 --- a/tox.ini +++ b/tox.ini @@ -6,6 +6,8 @@ requires = virtualenv>=20.17.1 # this allows tox to infer the base python from the environment name # and override any basepython configured in this file ignore_basepython_conflict=true +# python runtimes: https://governance.openstack.org/tc/reference/project-testing-interface.html#tested-runtimes +envlist = py3,compliance,pep8 [testenv] basepython = python3 From 1798b5a5368b81445bb63f53662176987e9ee55e Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Tue, 13 Jun 2023 13:54:37 +0300 Subject: [PATCH 142/149] K8S volumes to be scheduled in the same shard Volumes that are created by a K8S cluster will get scheduled in the same shard (vCenter). This identifies the K8S cluster by looking at the metadata [1] set by the CSI driver when creating the volumes. [1] cinder.csi.openstack.org/cluster --- cinder/objects/volume.py | 13 +++ cinder/scheduler/base_filter.py | 6 ++ cinder/scheduler/filters/shard_filter.py | 80 ++++++++++++++++- cinder/tests/unit/objects/test_volume.py | 21 +++++ .../tests/unit/scheduler/test_base_filter.py | 23 +++++ .../tests/unit/scheduler/test_shard_filter.py | 88 +++++++++++++++---- 6 files changed, 211 insertions(+), 20 deletions(-) diff --git a/cinder/objects/volume.py b/cinder/objects/volume.py index a23c65e102f..ec902d97210 100644 --- a/cinder/objects/volume.py +++ b/cinder/objects/volume.py @@ -692,3 +692,16 @@ def get_all_active_by_window(cls, context, begin, end): expected_attrs = cls._get_expected_attrs(context) return base.obj_make_list(context, cls(context), objects.Volume, volumes, expected_attrs=expected_attrs) + + @classmethod + def get_all_by_metadata(cls, context, project_id, metadata, marker=None, + limit=None, sort_keys=None, sort_dirs=None, + filters=None, offset=None): + query_filters = {'metadata': metadata} + if filters: + query_filters.update(filters) + volumes = db.volume_get_all_by_project( + context, project_id, marker, limit, sort_keys=sort_keys, + sort_dirs=sort_dirs, filters=query_filters, offset=offset) + return base.obj_make_list(context, cls(context), objects.Volume, + volumes) diff --git a/cinder/scheduler/base_filter.py b/cinder/scheduler/base_filter.py index 07180667a10..f41939a1af4 100644 --- a/cinder/scheduler/base_filter.py +++ b/cinder/scheduler/base_filter.py @@ -92,6 +92,7 @@ def get_filtered_objects(self, filter_classes, objs, each resource. """ list_objs = list(objs) + all_objs = list(objs) LOG.debug("Starting with %d host(s)", len(list_objs)) # The 'part_filter_results' list just tracks the number of hosts # before and after the filter, unless the filter returns zero @@ -105,6 +106,11 @@ def get_filtered_objects(self, filter_classes, objs, start_count = len(list_objs) filter_class = filter_cls() + # SAP + # All available backends are needed in ShardFilter + if hasattr(filter_class, 'all_backend_states'): + setattr(filter_class, 'all_backend_states', all_objs) + if filter_class.run_filter_for_index(index): objs = filter_class.filter_all(list_objs, filter_properties) if objs is None: diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 8a0434916f4..a0fa958d5f4 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -19,9 +19,12 @@ from oslo_config import cfg from oslo_log import log as logging +from cinder import context as cinder_context +from cinder.objects.volume import VolumeList from cinder.scheduler import filters from cinder.service_auth import SERVICE_USER_GROUP from cinder import utils as cinder_utils +from cinder.volume import volume_utils LOG = logging.getLogger(__name__) @@ -40,6 +43,8 @@ service_type='identity') CONF.register_opts(keystone_opts, group=KEYSTONE_GROUP) +CSI_CLUSTER_METADATA_KEY = 'cinder.csi.openstack.org/cluster' + class ShardFilter(filters.BaseBackendFilter): """Filters backends by shard of the project @@ -61,6 +66,9 @@ class ShardFilter(filters.BaseBackendFilter): _CAPABILITY_NAME = 'vcenter-shard' _ALL_SHARDS = "sharding_enabled" + # To be populated by the host manager + all_backend_states = [] + def _get_keystone_adapter(self): """Return a keystone adapter @@ -142,14 +150,78 @@ def _get_shards(self, project_id): return self._PROJECT_SHARD_CACHE.get(project_id) - def backend_passes(self, backend_state, filter_properties): + def _is_vmware(self, backend_state): # We only need the shard filter for vmware based pools if backend_state.vendor_name != 'VMware': LOG.info( - "Shard Filter ignoring backend %s as it's not vmware based" - " driver", backend_state.backend_id) - return True + "Shard Filter ignoring backend %s as it's not " + "vmware based driver", backend_state.backend_id) + return False + return True + def filter_all(self, filter_obj_list, filter_properties): + backends = self._filter_by_k8s_cluster(filter_obj_list, + filter_properties) + + return [b for b in backends if + self._backend_passes(b, filter_properties)] + + def _filter_by_k8s_cluster(self, backends, filter_properties): + spec = filter_properties.get('request_spec', {}) + vol_props = spec.get('volume_properties', {}) + project_id = vol_props.get('project_id', None) + metadata = vol_props.get('metadata', {}) + + if (not metadata or not project_id + or spec.get('snapshot_id')): + return backends + + cluster_name = metadata.get(CSI_CLUSTER_METADATA_KEY) + if not cluster_name: + return backends + + props = spec.get('resource_properties', {}) + availability_zone = props.get('availability_zone') + query_filters = None + if availability_zone: + query_filters = {'availability_zone': availability_zone} + + query_metadata = {CSI_CLUSTER_METADATA_KEY: cluster_name} + k8s_volumes = VolumeList.get_all_by_metadata( + cinder_context.get_admin_context(), + project_id, query_metadata, + filters=query_filters) + + if not k8s_volumes: + return backends + + k8s_hosts = set(volume_utils.extract_host(v.host, 'host') + for v in k8s_volumes + if v.id != spec.get('volume_id') and v.host) + if not k8s_hosts: + return backends + + def _backend_shards(backend_state): + cap = backend_state.capabilities.get(self._CAPABILITY_NAME) + return cap.split(',') if cap else [] + + hosts_shards_map = { + volume_utils.extract_host(bs.host, 'host'): + _backend_shards(bs) + for bs in self.all_backend_states} + + k8s_shards = set() + for host in k8s_hosts: + shards = hosts_shards_map[host] + k8s_shards.update(shards) + + return [ + b for b in backends if + (not self._is_vmware(b) or + set(_backend_shards(b)) & k8s_shards) + ] + + def _backend_passes(self, backend_state, filter_properties): spec = filter_properties.get('request_spec', {}) vol = spec.get('volume_properties', {}) project_id = vol.get('project_id', None) diff --git a/cinder/tests/unit/objects/test_volume.py b/cinder/tests/unit/objects/test_volume.py index bb604b1fd1c..69771fd7d07 100644 --- a/cinder/tests/unit/objects/test_volume.py +++ b/cinder/tests/unit/objects/test_volume.py @@ -694,3 +694,24 @@ def test_populate_consistencygroup(self, mock_db_grp_create): volume.populate_consistencygroup() self.assertEqual(volume.group_id, volume.consistencygroup_id) self.assertEqual(volume.group.id, volume.consistencygroup.id) + + @mock.patch('cinder.db.volume_get_all_by_project') + def test_get_by_metadata(self, get_all_by_project): + db_volume = fake_volume.fake_db_volume() + get_all_by_project.return_value = [db_volume] + + volumes = objects.VolumeList.get_all_by_metadata( + self.context, mock.sentinel.project_id, mock.sentinel.metadata, + mock.sentinel.marker, mock.sentinel.limit, + mock.sentinel.sorted_keys, mock.sentinel.sorted_dirs) + + self.assertEqual(1, len(volumes)) + TestVolume._compare(self, db_volume, volumes[0]) + + get_all_by_project.assert_called_once_with( + self.context, mock.sentinel.project_id, + mock.sentinel.marker, mock.sentinel.limit, + sort_keys=mock.sentinel.sorted_keys, + sort_dirs=mock.sentinel.sorted_dirs, + filters={'metadata': mock.sentinel.metadata}, + offset=None) diff --git a/cinder/tests/unit/scheduler/test_base_filter.py b/cinder/tests/unit/scheduler/test_base_filter.py index 47525699cb5..74e78491585 100644 --- a/cinder/tests/unit/scheduler/test_base_filter.py +++ b/cinder/tests/unit/scheduler/test_base_filter.py @@ -90,6 +90,15 @@ class FakeFilter5(BaseFakeFilter): pass +class FakeFilterAllBackends(BaseFakeFilter): + """Derives from BaseFakeFilter but has no entry point. + + Should not be included. + """ + all_backend_states = None + pass + + class FilterA(base_filter.BaseFilter): def filter_all(self, list_objs, filter_properties): # return all but the first object @@ -172,3 +181,17 @@ def test_get_filtered_objects_with_filter_run_once(self): result = self._get_filtered_objects(filter_classes, index=2) self.assertEqual(filter_objs_expected, result) self.assertEqual(1, fake5_filter_all.call_count) + + @mock.patch.object(FakeFilterAllBackends, 'all_backend_states', + new_callable=mock.PropertyMock) + @mock.patch.object(FakeFilterAllBackends, 'filter_all') + def test_get_filtered_objects_with_all_backend_states(self, filter_all, + all_backends): + filter_objs_expected = [1, 2, 3, 4] + filter_classes = [FakeFilterAllBackends] + filter_all.return_value = filter_objs_expected + self._get_filtered_objects(filter_classes) + all_backends.assert_has_calls([ + mock.call(), + mock.call(filter_objs_expected) + ]) diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 9a9a41316d8..6380c0c534f 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -14,6 +14,9 @@ import time from unittest import mock +from cinder import context +from cinder.tests.unit import fake_constants +from cinder.tests.unit import fake_volume from cinder.tests.unit.scheduler import fakes from cinder.tests.unit.scheduler.test_host_filters \ import BackendFiltersTestCase @@ -37,6 +40,8 @@ def setUp(self): } } } + self.context = context.RequestContext(fake_constants.USER_ID, + fake_constants.PROJECT_ID) @mock.patch('cinder.scheduler.filters.shard_filter.' 'ShardFilter._update_cache') @@ -77,7 +82,7 @@ def test_shard_project_not_found(self, mock_update_cache): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_snapshot(self): snap_props = { @@ -90,7 +95,7 @@ def test_snapshot(self): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertTrue(self.filt_cls.backend_passes(host, snap_props)) + self.backend_passes(host, snap_props) def test_snapshot_None(self): snap_props = { @@ -103,7 +108,7 @@ def test_snapshot_None(self): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, snap_props)) + self.backend_no_pass(host, snap_props) def test_shard_project_no_shards(self): caps = {'vcenter-shard': 'vc-a-1'} @@ -111,25 +116,25 @@ def test_shard_project_no_shards(self): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_backend_without_shard(self): host = fakes.FakeBackendState('host1', {'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_backend_shards_dont_match(self): caps = {'vcenter-shard': 'vc-a-1'} host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_backend_shards_match(self): caps = {'vcenter-shard': 'vc-b-0'} host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_shard_override_matches(self): caps = {'vcenter-shard': 'vc-a-1'} @@ -137,7 +142,7 @@ def test_shard_override_matches(self): {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_shard_override_no_match(self): caps = {'vcenter-shard': 'vc-a-0'} @@ -145,7 +150,7 @@ def test_shard_override_no_match(self): {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_shard_override_no_data(self): caps = {'vcenter-shard': 'vc-a-0'} @@ -153,7 +158,7 @@ def test_shard_override_no_data(self): {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': None} - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) def test_sharding_enabled_any_backend_match(self): self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled'] @@ -162,7 +167,7 @@ def test_sharding_enabled_any_backend_match(self): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_sharding_enabled_and_single_shard_any_backend_match(self): self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled', @@ -172,7 +177,7 @@ def test_sharding_enabled_and_single_shard_any_backend_match(self): host = fakes.FakeBackendState('host1', {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_scheduler_hints_override_sharding_enabled(self): self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled'] @@ -182,12 +187,12 @@ def test_scheduler_hints_override_sharding_enabled(self): host = fakes.FakeBackendState('host0', {'capabilities': caps0, 'vendor_name': VMWARE_VENDOR}) - self.assertFalse(self.filt_cls.backend_passes(host, self.props)) + self.backend_no_pass(host, self.props) caps1 = {'vcenter-shard': 'vc-a-1'} host = fakes.FakeBackendState('host1', {'capabilities': caps1, 'vendor_name': VMWARE_VENDOR}) - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_noop_for_find_backend_by_connector_with_hint(self): """Check if we pass any backend @@ -204,7 +209,7 @@ def test_noop_for_find_backend_by_connector_with_hint(self): 'vendor_name': VMWARE_VENDOR}) self.props['scheduler_hints'] = {'vcenter-shard': 'vc-a-1'} self.props['request_spec']['operation'] = 'find_backend_for_connector' - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) def test_noop_for_find_backend_by_connector_without_hint(self): """Check if we pass any backend @@ -221,4 +226,55 @@ def test_noop_for_find_backend_by_connector_without_hint(self): {'capabilities': caps, 'vendor_name': VMWARE_VENDOR}) self.props['request_spec']['operation'] = 'find_backend_for_connector' - self.assertTrue(self.filt_cls.backend_passes(host, self.props)) + self.backend_passes(host, self.props) + + @mock.patch('cinder.context.get_admin_context') + @mock.patch('cinder.objects.volume.VolumeList.get_all_by_metadata') + def test_same_shard_for_k8s_volumes(self, mock_get_all, + mock_get_context): + all_backends = [ + fakes.FakeBackendState( + 'volume-vc-a-0@backend#pool1', + {'capabilities': {'vcenter-shard': 'vc-a-0'}, + 'vendor_name': VMWARE_VENDOR}), + fakes.FakeBackendState( + 'volume-vc-a-1@backend#pool2', + {'capabilities': {'vcenter-shard': 'vc-a-1'}, + 'vendor_name': VMWARE_VENDOR}), + ] + mock_get_context.return_value = self.context + fake_meta = { + 'cinder.csi.openstack.org/cluster': 'cluster-1', + } + mock_get_all.return_value = [ + fake_volume.fake_volume_obj(self.context, metadata=fake_meta, + host='volume-vc-a-1@backend#pool3') + ] + self.filt_cls.all_backend_states = all_backends + self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled', + 'vc-a-1'] + filter_props = dict(self.props) + filter_props['request_spec']['volume_properties'].update({ + 'project_id': 'baz', + 'metadata': fake_meta + }) + filter_props['request_spec']['resource_properties'] = { + 'availability_zone': 'az-1' + } + + filtered = self.filt_cls.filter_all(all_backends, filter_props) + + mock_get_all.assert_called_once_with( + self.context, 'baz', fake_meta, filters={ + 'availability_zone': 'az-1' + }) + self.assertEqual(len(filtered), 1) + self.assertEqual('volume-vc-a-1@backend#pool2', filtered[0].host) + + def backend_passes(self, backend, filter_properties): + filtered = self.filt_cls.filter_all([backend], filter_properties) + self.assertEqual(backend, filtered[0]) + + def backend_no_pass(self, backend, filter_properties): + filtered = self.filt_cls.filter_all([backend], filter_properties) + self.assertEqual(0, len(filtered)) From b1a78036fb1372305937ea738dd962a1ef99b81e Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 3 Jul 2023 15:33:06 +0300 Subject: [PATCH 143/149] Refactored using low-level DB query on volume_metadata --- cinder/db/api.py | 6 +++ cinder/db/sqlalchemy/api.py | 32 ++++++++++++ cinder/objects/volume.py | 13 ----- cinder/scheduler/base_filter.py | 6 --- cinder/scheduler/filters/shard_filter.py | 52 ++++++------------- cinder/tests/unit/objects/test_volume.py | 21 -------- .../tests/unit/scheduler/test_base_filter.py | 23 -------- .../tests/unit/scheduler/test_shard_filter.py | 18 +++---- 8 files changed, 61 insertions(+), 110 deletions(-) diff --git a/cinder/db/api.py b/cinder/db/api.py index fa792a1cc13..4e378a6206e 100644 --- a/cinder/db/api.py +++ b/cinder/db/api.py @@ -281,6 +281,12 @@ def volume_get_all(context, marker=None, limit=None, sort_keys=None, offset=offset) +def get_host_by_volume_metadata(key, value, filters=None): + """Returns the host with the most volumes matching volume metadata.""" + return IMPL.get_host_by_volume_metadata(key, value, + filters=filters) + + def calculate_resource_count(context, resource_type, filters): return IMPL.calculate_resource_count(context, resource_type, filters) diff --git a/cinder/db/sqlalchemy/api.py b/cinder/db/sqlalchemy/api.py index 57b98a73c7e..a88d7ee88be 100644 --- a/cinder/db/sqlalchemy/api.py +++ b/cinder/db/sqlalchemy/api.py @@ -2176,6 +2176,38 @@ def volume_get_all(context, marker=None, limit=None, sort_keys=None, return query.all() +def get_host_by_volume_metadata(meta_key, meta_value, filters=None): + session = get_session() + count_label = func.count().label("n") + query = session.query( + func.substring_index(models.Volume.host, '@', 1).label("h"), + count_label + ).join( + models.VolumeMetadata, + models.VolumeMetadata.volume_id == models.Volume.id + ).filter( + models.VolumeMetadata.key == meta_key, + models.VolumeMetadata.value == meta_value, + models.Volume.deleted == 0, + models.Volume.host.isnot(None) + ) + + if filters: + az = filters.get('availability_zone') + if az: + query = query.filter( + models.Volume.availability_zone == az) + + query = query.group_by("h")\ + .order_by(desc(count_label)).limit(1) + + with session.begin(): + result = query.first() + if result: + return result[0] + return None + + @require_context def get_volume_summary(context, project_only, filters=None): """Retrieves all volumes summary. diff --git a/cinder/objects/volume.py b/cinder/objects/volume.py index ec902d97210..a23c65e102f 100644 --- a/cinder/objects/volume.py +++ b/cinder/objects/volume.py @@ -692,16 +692,3 @@ def get_all_active_by_window(cls, context, begin, end): expected_attrs = cls._get_expected_attrs(context) return base.obj_make_list(context, cls(context), objects.Volume, volumes, expected_attrs=expected_attrs) - - @classmethod - def get_all_by_metadata(cls, context, project_id, metadata, marker=None, - limit=None, sort_keys=None, sort_dirs=None, - filters=None, offset=None): - query_filters = {'metadata': metadata} - if filters: - query_filters.update(filters) - volumes = db.volume_get_all_by_project( - context, project_id, marker, limit, sort_keys=sort_keys, - sort_dirs=sort_dirs, filters=query_filters, offset=offset) - return base.obj_make_list(context, cls(context), objects.Volume, - volumes) diff --git a/cinder/scheduler/base_filter.py b/cinder/scheduler/base_filter.py index f41939a1af4..07180667a10 100644 --- a/cinder/scheduler/base_filter.py +++ b/cinder/scheduler/base_filter.py @@ -92,7 +92,6 @@ def get_filtered_objects(self, filter_classes, objs, each resource. """ list_objs = list(objs) - all_objs = list(objs) LOG.debug("Starting with %d host(s)", len(list_objs)) # The 'part_filter_results' list just tracks the number of hosts # before and after the filter, unless the filter returns zero @@ -106,11 +105,6 @@ def get_filtered_objects(self, filter_classes, objs, start_count = len(list_objs) filter_class = filter_cls() - # SAP - # All available backends are needed in ShardFilter - if hasattr(filter_class, 'all_backend_states'): - setattr(filter_class, 'all_backend_states', all_objs) - if filter_class.run_filter_for_index(index): objs = filter_class.filter_all(list_objs, filter_properties) if objs is None: diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index a0fa958d5f4..7452dd7b7fc 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -19,8 +19,7 @@ from oslo_config import cfg from oslo_log import log as logging -from cinder import context as cinder_context -from cinder.objects.volume import VolumeList +from cinder import db from cinder.scheduler import filters from cinder.service_auth import SERVICE_USER_GROUP from cinder import utils as cinder_utils @@ -66,9 +65,6 @@ class ShardFilter(filters.BaseBackendFilter): _CAPABILITY_NAME = 'vcenter-shard' _ALL_SHARDS = "sharding_enabled" - # To be populated by the host manager - all_backend_states = [] - def _get_keystone_adapter(self): """Return a keystone adapter @@ -151,11 +147,7 @@ def _get_shards(self, project_id): return self._PROJECT_SHARD_CACHE.get(project_id) def _is_vmware(self, backend_state): - # We only need the shard filter for vmware based pools if backend_state.vendor_name != 'VMware': - LOG.info( - "Shard Filter ignoring backend %s as it's not " - "vmware based driver", backend_state.backend_id) return False return True @@ -172,8 +164,10 @@ def _filter_by_k8s_cluster(self, backends, filter_properties): project_id = vol_props.get('project_id', None) metadata = vol_props.get('metadata', {}) + is_vmware = any(self._is_vmware(b) for b in backends) if (not metadata or not project_id - or spec.get('snapshot_id')): + or spec.get('snapshot_id') + or not is_vmware): return backends cluster_name = metadata.get(CSI_CLUSTER_METADATA_KEY) @@ -186,42 +180,28 @@ def _filter_by_k8s_cluster(self, backends, filter_properties): if availability_zone: query_filters = {'availability_zone': availability_zone} - query_metadata = {CSI_CLUSTER_METADATA_KEY: cluster_name} - k8s_volumes = VolumeList.get_all_by_metadata( - cinder_context.get_admin_context(), - project_id, query_metadata, + k8s_host = db.get_host_by_volume_metadata( + key=CSI_CLUSTER_METADATA_KEY, + value=cluster_name, filters=query_filters) - if not k8s_volumes: - return backends - - k8s_hosts = set(volume_utils.extract_host(v.host, 'host') - for v in k8s_volumes - if v.id != spec.get('volume_id') and v.host) - if not k8s_hosts: + if not k8s_host: return backends - def _backend_shards(backend_state): - cap = backend_state.capabilities.get(self._CAPABILITY_NAME) - return cap.split(',') if cap else [] - - hosts_shards_map = { - volume_utils.extract_host(bs.host, 'host'): - _backend_shards(bs) - for bs in self.all_backend_states} - - k8s_shards = set() - for host in k8s_hosts: - shards = hosts_shards_map[host] - k8s_shards.update(shards) - return [ b for b in backends if (not self._is_vmware(b) or - set(_backend_shards(b)) & k8s_shards) + volume_utils.extract_host(b.host, 'host') == k8s_host) ] def _backend_passes(self, backend_state, filter_properties): + # We only need the shard filter for vmware based pools + if not self._is_vmware(backend_state): + LOG.info( + "Shard Filter ignoring backend %s as it's not " + "vmware based driver", backend_state.backend_id) + return True + spec = filter_properties.get('request_spec', {}) vol = spec.get('volume_properties', {}) project_id = vol.get('project_id', None) diff --git a/cinder/tests/unit/objects/test_volume.py b/cinder/tests/unit/objects/test_volume.py index 69771fd7d07..bb604b1fd1c 100644 --- a/cinder/tests/unit/objects/test_volume.py +++ b/cinder/tests/unit/objects/test_volume.py @@ -694,24 +694,3 @@ def test_populate_consistencygroup(self, mock_db_grp_create): volume.populate_consistencygroup() self.assertEqual(volume.group_id, volume.consistencygroup_id) self.assertEqual(volume.group.id, volume.consistencygroup.id) - - @mock.patch('cinder.db.volume_get_all_by_project') - def test_get_by_metadata(self, get_all_by_project): - db_volume = fake_volume.fake_db_volume() - get_all_by_project.return_value = [db_volume] - - volumes = objects.VolumeList.get_all_by_metadata( - self.context, mock.sentinel.project_id, mock.sentinel.metadata, - mock.sentinel.marker, mock.sentinel.limit, - mock.sentinel.sorted_keys, mock.sentinel.sorted_dirs) - - self.assertEqual(1, len(volumes)) - TestVolume._compare(self, db_volume, volumes[0]) - - get_all_by_project.assert_called_once_with( - self.context, mock.sentinel.project_id, - mock.sentinel.marker, mock.sentinel.limit, - sort_keys=mock.sentinel.sorted_keys, - sort_dirs=mock.sentinel.sorted_dirs, - filters={'metadata': mock.sentinel.metadata}, - offset=None) diff --git a/cinder/tests/unit/scheduler/test_base_filter.py b/cinder/tests/unit/scheduler/test_base_filter.py index 74e78491585..47525699cb5 100644 --- a/cinder/tests/unit/scheduler/test_base_filter.py +++ b/cinder/tests/unit/scheduler/test_base_filter.py @@ -90,15 +90,6 @@ class FakeFilter5(BaseFakeFilter): pass -class FakeFilterAllBackends(BaseFakeFilter): - """Derives from BaseFakeFilter but has no entry point. - - Should not be included. - """ - all_backend_states = None - pass - - class FilterA(base_filter.BaseFilter): def filter_all(self, list_objs, filter_properties): # return all but the first object @@ -181,17 +172,3 @@ def test_get_filtered_objects_with_filter_run_once(self): result = self._get_filtered_objects(filter_classes, index=2) self.assertEqual(filter_objs_expected, result) self.assertEqual(1, fake5_filter_all.call_count) - - @mock.patch.object(FakeFilterAllBackends, 'all_backend_states', - new_callable=mock.PropertyMock) - @mock.patch.object(FakeFilterAllBackends, 'filter_all') - def test_get_filtered_objects_with_all_backend_states(self, filter_all, - all_backends): - filter_objs_expected = [1, 2, 3, 4] - filter_classes = [FakeFilterAllBackends] - filter_all.return_value = filter_objs_expected - self._get_filtered_objects(filter_classes) - all_backends.assert_has_calls([ - mock.call(), - mock.call(filter_objs_expected) - ]) diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 6380c0c534f..34cc79c3bc4 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -16,7 +16,6 @@ from cinder import context from cinder.tests.unit import fake_constants -from cinder.tests.unit import fake_volume from cinder.tests.unit.scheduler import fakes from cinder.tests.unit.scheduler.test_host_filters \ import BackendFiltersTestCase @@ -229,9 +228,10 @@ def test_noop_for_find_backend_by_connector_without_hint(self): self.backend_passes(host, self.props) @mock.patch('cinder.context.get_admin_context') - @mock.patch('cinder.objects.volume.VolumeList.get_all_by_metadata') - def test_same_shard_for_k8s_volumes(self, mock_get_all, + @mock.patch('cinder.db.get_host_by_volume_metadata') + def test_same_shard_for_k8s_volumes(self, mock_get_hosts, mock_get_context): + CSI_KEY = 'cinder.csi.openstack.org/cluster' all_backends = [ fakes.FakeBackendState( 'volume-vc-a-0@backend#pool1', @@ -244,13 +244,9 @@ def test_same_shard_for_k8s_volumes(self, mock_get_all, ] mock_get_context.return_value = self.context fake_meta = { - 'cinder.csi.openstack.org/cluster': 'cluster-1', + CSI_KEY: 'cluster-1', } - mock_get_all.return_value = [ - fake_volume.fake_volume_obj(self.context, metadata=fake_meta, - host='volume-vc-a-1@backend#pool3') - ] - self.filt_cls.all_backend_states = all_backends + mock_get_hosts.return_value = 'volume-vc-a-1' self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled', 'vc-a-1'] filter_props = dict(self.props) @@ -264,8 +260,8 @@ def test_same_shard_for_k8s_volumes(self, mock_get_all, filtered = self.filt_cls.filter_all(all_backends, filter_props) - mock_get_all.assert_called_once_with( - self.context, 'baz', fake_meta, filters={ + mock_get_hosts.assert_called_once_with( + key=CSI_KEY, value=fake_meta[CSI_KEY], filters={ 'availability_zone': 'az-1' }) self.assertEqual(len(filtered), 1) From fa539709b75f376c15e09403fe0d9aff17b0a7d1 Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Fri, 11 Mar 2022 12:42:56 -0500 Subject: [PATCH 144/149] [SAP] Rework Capacitty filter This patch adds the new cinder/utils.py calculate_capacity_factors to provide a detailed and more consistent,accurate view of the various factors in determining virtual free space for a particular backend. It takes into consideration total capacity, free space, thin/thick provisioning in the volume type, thin/thick support in the backend, as well as reserved percentage and max_over_subscription_ratio. Since the vmware driver is configured to allow lazy creation of volumes, the free space reported by the pool/datastore isn't a reliable source of how much is free considering what has been requested from cinder to allocate. This patch calculates what should be free based upon the total available capacity ( total - reserved ) and what cinder has tracked as allocated against that backend. If that calculated free is less than the reported free, then the calculated free is what is reported as virtual_free_space. There is a known issue in cinder with keeping track of the allocated space in 2 places: 1) at startup cinder only considers volumes that are in-use and available. All other volumes in other states aren't used to calculate the allocated space. This has to be fixed. This is fixed here: https://github.com/sapcc/cinder/pull/117 2) The allocated space isn't adjusted during volume migrations. --- cinder/scheduler/filters/capacity_filter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cinder/scheduler/filters/capacity_filter.py b/cinder/scheduler/filters/capacity_filter.py index 38c3550c5fe..372a890825e 100644 --- a/cinder/scheduler/filters/capacity_filter.py +++ b/cinder/scheduler/filters/capacity_filter.py @@ -181,9 +181,9 @@ def backend_passes(self, backend_state, filter_properties): if not res: LOG.warning("Insufficient free virtual space " "(%(available)sGB) to accommodate thin " - "provisioned %(requested)sGB volume on" - " %(grouping)s %(grouping_name)s" - " %(provisioning_type)s.", + "provisioned %(requested)sGB volume on " + "%(grouping)s %(grouping_name)s." + " %(provisioning_type)s).", msg_args) else: LOG.debug("Space information for volume creation " From 767aaad1adab9360a42f915ea43beda093534c04 Mon Sep 17 00:00:00 2001 From: Hemna Date: Tue, 15 Mar 2022 08:45:14 -0400 Subject: [PATCH 145/149] [SAP] Replace CapacityFilter This patch reworks the CapacityFilter which uses the new utils.calculate_capacity_factors to determine if the virtual free space available. The main issue is that the vmware driver does lazy creates. This causes an issue of over reporting of free space by the backend, because the space hasn't been consumed yet. So the amount of free space is not accurate with respect to how much has been allocated by cinder. This updated calculate capacity factors as well as the SAPCapacityFilter accounts for the virtual free space by using the cinder allocated capacity tracking. If the free space is reported less than what cinder thinks should be available, then the reported free space is used. This relies on an acurate reporting of the allocated capacity by the driver. We know there is an issue with allocated capacity not being reported correctly for migrated volumes, as well as accounting for existing volumes at startup. The startup issue should be solved with this PR: https://github.com/sapcc/cinder/pull/117 Will have to do a folllow up to account for updating the allocated capacity for migrated volumes. --- cinder/scheduler/filters/capacity_filter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cinder/scheduler/filters/capacity_filter.py b/cinder/scheduler/filters/capacity_filter.py index 372a890825e..38c3550c5fe 100644 --- a/cinder/scheduler/filters/capacity_filter.py +++ b/cinder/scheduler/filters/capacity_filter.py @@ -181,9 +181,9 @@ def backend_passes(self, backend_state, filter_properties): if not res: LOG.warning("Insufficient free virtual space " "(%(available)sGB) to accommodate thin " - "provisioned %(requested)sGB volume on " - "%(grouping)s %(grouping_name)s." - " %(provisioning_type)s).", + "provisioned %(requested)sGB volume on" + " %(grouping)s %(grouping_name)s" + " %(provisioning_type)s.", msg_args) else: LOG.debug("Space information for volume creation " From 694462a80e5b1b5e07553757df9be1ebbaae9593 Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Mon, 9 Oct 2023 10:46:02 +0200 Subject: [PATCH 146/149] [SAP] k8s shard filter ignores migrate-by-connector We already ignore the same in the "normal" shard filter code, but also need to do it in the k8s shard part of it - otherwise, VMs being in the wrong shard will not get their volumes attached. Change-Id: Idf5c8e25916d148bf5cecf5feb1ad2f35e83d7cf --- cinder/scheduler/filters/shard_filter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 7452dd7b7fc..61903bbf47e 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -167,6 +167,7 @@ def _filter_by_k8s_cluster(self, backends, filter_properties): is_vmware = any(self._is_vmware(b) for b in backends) if (not metadata or not project_id or spec.get('snapshot_id') + or spec.get('operation') == 'find_backend_for_connector' or not is_vmware): return backends From c12b43b23864cb3590782a871c704b53c717dc74 Mon Sep 17 00:00:00 2001 From: Marius Leustean Date: Mon, 9 Oct 2023 16:37:32 +0300 Subject: [PATCH 147/149] ShardFilter: allow existing K8S clusters with more than 1 shard Extend the SQL query to return all matching hosts, ordered desc. For new volume creations we allow it in the dominant shard only, however for existing volumes we allow the operation in all the shards of the K8S cluster. This is a backward-compatibility with existing K8S cluster spanning multiple shards in the same AZ. Additionally, this commit fixes the availability_zone value to be taken from filter_properties, which is always seeded by populate_filter_properties() For better debugging, adding a debug log when a backend is filtered out by the K8S logic. Change-Id: I42d526e8b1335e9025543a424f9203858a44663e --- cinder/db/api.py | 6 ++--- cinder/db/sqlalchemy/api.py | 9 +++---- cinder/scheduler/filters/shard_filter.py | 27 ++++++++++++++----- .../tests/unit/scheduler/test_shard_filter.py | 9 +++---- 4 files changed, 31 insertions(+), 20 deletions(-) diff --git a/cinder/db/api.py b/cinder/db/api.py index 4e378a6206e..56eb69b9ebc 100644 --- a/cinder/db/api.py +++ b/cinder/db/api.py @@ -281,10 +281,10 @@ def volume_get_all(context, marker=None, limit=None, sort_keys=None, offset=offset) -def get_host_by_volume_metadata(key, value, filters=None): +def get_hosts_by_volume_metadata(key, value, filters=None): """Returns the host with the most volumes matching volume metadata.""" - return IMPL.get_host_by_volume_metadata(key, value, - filters=filters) + return IMPL.get_hosts_by_volume_metadata(key, value, + filters=filters) def calculate_resource_count(context, resource_type, filters): diff --git a/cinder/db/sqlalchemy/api.py b/cinder/db/sqlalchemy/api.py index a88d7ee88be..7ecee7e4e3a 100644 --- a/cinder/db/sqlalchemy/api.py +++ b/cinder/db/sqlalchemy/api.py @@ -2176,7 +2176,7 @@ def volume_get_all(context, marker=None, limit=None, sort_keys=None, return query.all() -def get_host_by_volume_metadata(meta_key, meta_value, filters=None): +def get_hosts_by_volume_metadata(meta_key, meta_value, filters=None): session = get_session() count_label = func.count().label("n") query = session.query( @@ -2199,13 +2199,10 @@ def get_host_by_volume_metadata(meta_key, meta_value, filters=None): models.Volume.availability_zone == az) query = query.group_by("h")\ - .order_by(desc(count_label)).limit(1) + .order_by(desc(count_label)) with session.begin(): - result = query.first() - if result: - return result[0] - return None + return query.all() @require_context diff --git a/cinder/scheduler/filters/shard_filter.py b/cinder/scheduler/filters/shard_filter.py index 61903bbf47e..87bc0810da7 100644 --- a/cinder/scheduler/filters/shard_filter.py +++ b/cinder/scheduler/filters/shard_filter.py @@ -175,24 +175,39 @@ def _filter_by_k8s_cluster(self, backends, filter_properties): if not cluster_name: return backends - props = spec.get('resource_properties', {}) - availability_zone = props.get('availability_zone') + availability_zone = filter_properties.get('availability_zone') query_filters = None if availability_zone: query_filters = {'availability_zone': availability_zone} - k8s_host = db.get_host_by_volume_metadata( + results = db.get_hosts_by_volume_metadata( key=CSI_CLUSTER_METADATA_KEY, value=cluster_name, filters=query_filters) - if not k8s_host: + if not results: return backends + # Allowing new volumes to be created only in the dominant shard + if spec.get('operation') == 'create_volume': + results = results[:1] + + k8s_hosts = dict(results) + + def _is_k8s_host(b): + host = volume_utils.extract_host(b.host, 'host') + if host in k8s_hosts: + return True + else: + LOG.debug('%(backend)s not in the allowed ' + 'K8S hosts %(k8s_hosts)s.', + {'backend': b, + 'k8s_hosts': k8s_hosts}) + return False + return [ b for b in backends if - (not self._is_vmware(b) or - volume_utils.extract_host(b.host, 'host') == k8s_host) + (not self._is_vmware(b) or _is_k8s_host(b)) ] def _backend_passes(self, backend_state, filter_properties): diff --git a/cinder/tests/unit/scheduler/test_shard_filter.py b/cinder/tests/unit/scheduler/test_shard_filter.py index 34cc79c3bc4..f2a9ed584ea 100644 --- a/cinder/tests/unit/scheduler/test_shard_filter.py +++ b/cinder/tests/unit/scheduler/test_shard_filter.py @@ -228,7 +228,7 @@ def test_noop_for_find_backend_by_connector_without_hint(self): self.backend_passes(host, self.props) @mock.patch('cinder.context.get_admin_context') - @mock.patch('cinder.db.get_host_by_volume_metadata') + @mock.patch('cinder.db.get_hosts_by_volume_metadata') def test_same_shard_for_k8s_volumes(self, mock_get_hosts, mock_get_context): CSI_KEY = 'cinder.csi.openstack.org/cluster' @@ -246,7 +246,8 @@ def test_same_shard_for_k8s_volumes(self, mock_get_hosts, fake_meta = { CSI_KEY: 'cluster-1', } - mock_get_hosts.return_value = 'volume-vc-a-1' + mock_get_hosts.return_value = [('volume-vc-x-1', 2), + ('volume-vc-a-1', 1)] self.filt_cls._PROJECT_SHARD_CACHE['baz'] = ['sharding_enabled', 'vc-a-1'] filter_props = dict(self.props) @@ -254,9 +255,7 @@ def test_same_shard_for_k8s_volumes(self, mock_get_hosts, 'project_id': 'baz', 'metadata': fake_meta }) - filter_props['request_spec']['resource_properties'] = { - 'availability_zone': 'az-1' - } + filter_props['availability_zone'] = 'az-1' filtered = self.filt_cls.filter_all(all_backends, filter_props) From dc1cd7f7f433bb43b6b0950509f2fb4b3f94b4bc Mon Sep 17 00:00:00 2001 From: Johannes Kulik Date: Tue, 10 Oct 2023 10:52:30 +0200 Subject: [PATCH 148/149] [SAP] Fix migrate_volume_by_connector error msg The message we pass into `action_track.track()` was wrongly a tuple instead of a string, because we converted it from a LOG message and forgot to convert the formatting arguments to actual formatting. Change-Id: Ib861b59961a92ceabaa2874226ec367702ddac0c --- cinder/volume/api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index f0f63f8fc91..27e7735cc21 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -849,8 +849,8 @@ def migrate_volume_by_connector(self, ctxt, volume, connector, filter_properties=filter_properties) except exception.NoValidBackend: msg = ("The connector was rejected by the backend. Could not " - "find another backend compatible with the connector %s.", - connector) + "find another backend compatible with the connector %s." + % connector) action_track.track(ctxt, action_track.ACTION_VOLUME_MIGRATE, volume, msg, loglevel=logging.ERROR) return None From 1a6d03dfc35fd415d31f331bb5accced1b37b59f Mon Sep 17 00:00:00 2001 From: Walter Boring IV Date: Wed, 27 Sep 2023 16:42:01 -0400 Subject: [PATCH 149/149] SAP: Add affinity UUID validation This patch adds some basic UUID volume validation for the cinder create volume API request when passing in scheduler hints for volume affinity or anti-affinity. The API now ensures that the UUIDs are valid cinder volumes. The UUID must be a valid cinder volume UUID for the create call to work. --- cinder/volume/api.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/cinder/volume/api.py b/cinder/volume/api.py index 27e7735cc21..2fc8aa18354 100644 --- a/cinder/volume/api.py +++ b/cinder/volume/api.py @@ -26,6 +26,7 @@ from oslo_utils import excutils from oslo_utils import strutils from oslo_utils import timeutils +from oslo_utils import uuidutils from oslo_utils import versionutils from cinder import action_track @@ -204,6 +205,36 @@ def _is_encrypted(self, volume_type): return False return specs.get('encryption', {}) is not {} + def _validate_scheduler_hints(self, scheduler_hints): + if scheduler_hints: + validate_volume_uuids = [] + if 'same_host' in scheduler_hints: + if isinstance(scheduler_hints['same_host'], list): + validate_volume_uuids.extend(scheduler_hints['same_host']) + else: + validate_volume_uuids.append(scheduler_hints['same_host']) + elif 'different_host' in scheduler_hints: + if isinstance(scheduler_hints['different_host'], list): + validate_volume_uuids.extend( + scheduler_hints['different_host']) + else: + validate_volume_uuids.append( + scheduler_hints['different_host']) + + for hint_volume_id in validate_volume_uuids: + if not uuidutils.is_uuid_like(hint_volume_id): + msg = _("Invalid UUID(s) '%s' provided in scheduler " + "hints.") % hint_volume_id + raise exception.InvalidInput(reason=msg) + + # Now validate that the uuids are valid volumes that exist in + # cinder DB. + for hint_volume_id in validate_volume_uuids: + # All we have to do here is try and fetch the UUID as volume. + # If the UUID doesn't exist in the DB, Cinder will throw + # a VolumeNotFound exception. + objects.Volume.get_by_id(context, hint_volume_id) + @action_track.track_decorator(action_track.ACTION_VOLUME_CREATE) def create(self, context, size, name, description, snapshot=None, image_id=None, volume_type=None, metadata=None, @@ -298,6 +329,10 @@ def create(self, context, size, name, description, snapshot=None, if CONF.storage_availability_zone: availability_zones.add(CONF.storage_availability_zone) + # Validate the scheduler_hints same_host and different_hosts as + # valid volume UUIDs. + self._validate_scheduler_hints(scheduler_hints) + # Force the scheduler hints into the volume metadata if not metadata: metadata = {}