Skip to content

Commit c56a8f7

Browse files
committed
fixes for snapshot logic
This commit will disappear. Just saving my fixes for the snapshot logic. Signed-off-by: Babis Chalios <[email protected]>
1 parent d695d39 commit c56a8f7

File tree

5 files changed

+218
-46
lines changed

5 files changed

+218
-46
lines changed

src/vmm/src/device_manager/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ impl<'a> Persist<'a> for DeviceManager {
361361
vm_resources: constructor_args.vm_resources,
362362
instance_id: constructor_args.instance_id,
363363
restored_from_file: constructor_args.restored_from_file,
364+
event_manager: constructor_args.event_manager,
364365
};
365366
let pci_devices = PciDevices::restore(pci_ctor_args, &state.pci_state)?;
366367

src/vmm/src/device_manager/pci_mngr.rs

Lines changed: 109 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,14 @@ use std::collections::HashMap;
55
use std::fmt::Debug;
66
use std::sync::{Arc, Mutex};
77

8-
use event_manager::MutEventSubscriber;
8+
use event_manager::{MutEventSubscriber, SubscriberOps};
99
use kvm_ioctls::{IoEventAddress, NoDatamatch};
10-
use log::{debug, error, warn};
10+
use log::{debug, error, info, warn};
1111
use pci::{PciBarRegionType, PciBdf, PciDevice, PciDeviceError, PciRootError};
1212
use serde::{Deserialize, Serialize};
1313
use vm_device::BusError;
1414

1515
use super::persist::{MmdsVersionState, SharedDeviceType};
16-
use crate::Vm;
1716
use crate::devices::pci::PciSegment;
1817
use crate::devices::virtio::balloon::Balloon;
1918
use crate::devices::virtio::balloon::persist::{BalloonConstructorArgs, BalloonState};
@@ -36,6 +35,7 @@ use crate::resources::VmResources;
3635
use crate::snapshot::Persist;
3736
use crate::vstate::memory::GuestMemoryMmap;
3837
use crate::vstate::vm::{InterruptError, MsiVectorGroup};
38+
use crate::{EventManager, Vm};
3939

4040
#[derive(Debug, Default)]
4141
pub struct PciDevices {
@@ -180,7 +180,8 @@ impl PciDevices {
180180
device: Arc<Mutex<T>>,
181181
device_id: &String,
182182
pci_device_bdf: PciBdf,
183-
transport_state: &VirtioPciDeviceState,
183+
transport_state: VirtioPciDeviceState,
184+
event_manager: &mut EventManager,
184185
) -> Result<(), PciManagerError> {
185186
// We should only be reaching this point if PCI is enabled
186187
let pci_segment = self.pci_segment.as_ref().unwrap();
@@ -192,11 +193,11 @@ impl PciDevices {
192193
let virtio_device = Arc::new(Mutex::new(VirtioPciDevice::new_from_state(
193194
device_id.to_string(),
194195
vm.guest_memory().clone(),
195-
device,
196+
device.clone(),
196197
pci_device_bdf.into(),
197198
msi_vector_group,
198199
true,
199-
transport_state.clone(),
200+
transport_state,
200201
)?));
201202

202203
pci_segment
@@ -252,8 +253,87 @@ impl PciDevices {
252253
vm.fd().register_ioevent(queue_evt, &io_addr, NoDatamatch)?;
253254
}
254255

256+
event_manager.add_subscriber(device);
255257
Ok(())
256258
}
259+
260+
/// Artificially kick devices as if they had external events.
261+
pub fn kick_devices(&self) {
262+
info!("Artificially kick PCI devices.");
263+
// We only kick virtio devices for now.
264+
for (id, device) in &self.virtio_devices {
265+
let virtio_device = device.lock().expect("Poisoned lock").virtio_device();
266+
let mut virtio_locked = virtio_device.lock().expect("Poisoned lock");
267+
match virtio_locked.device_type() {
268+
TYPE_BALLOON => {
269+
let balloon = virtio_locked
270+
.as_mut_any()
271+
.downcast_mut::<Balloon>()
272+
.unwrap();
273+
// If device is activated, kick the balloon queue(s) to make up for any
274+
// pending or in-flight epoll events we may have not captured in snapshot.
275+
// Stats queue doesn't need kicking as it is notified via a `timer_fd`.
276+
if balloon.is_activated() {
277+
info!("kick balloon {}.", id);
278+
balloon.process_virtio_queues();
279+
}
280+
}
281+
TYPE_BLOCK => {
282+
// We only care about kicking virtio block.
283+
// If we need to kick vhost-user-block we can do nothing.
284+
if let Some(block) = virtio_locked.as_mut_any().downcast_mut::<Block>() {
285+
// If device is activated, kick the block queue(s) to make up for any
286+
// pending or in-flight epoll events we may have not captured in
287+
// snapshot. No need to kick Ratelimiters
288+
// because they are restored 'unblocked' so
289+
// any inflight `timer_fd` events can be safely discarded.
290+
if block.is_activated() {
291+
info!("kick block {}.", id);
292+
block.process_virtio_queues();
293+
}
294+
}
295+
}
296+
TYPE_NET => {
297+
let net = virtio_locked.as_mut_any().downcast_mut::<Net>().unwrap();
298+
// If device is activated, kick the net queue(s) to make up for any
299+
// pending or in-flight epoll events we may have not captured in snapshot.
300+
// No need to kick Ratelimiters because they are restored 'unblocked' so
301+
// any inflight `timer_fd` events can be safely discarded.
302+
if net.is_activated() {
303+
info!("kick net {}.", id);
304+
net.process_virtio_queues();
305+
}
306+
}
307+
TYPE_VSOCK => {
308+
// Vsock has complicated protocol that isn't resilient to any packet loss,
309+
// so for Vsock we don't support connection persistence through snapshot.
310+
// Any in-flight packets or events are simply lost.
311+
// Vsock is restored 'empty'.
312+
// The only reason we still `kick` it is to make guest process
313+
// `TRANSPORT_RESET_EVENT` event we sent during snapshot creation.
314+
let vsock = virtio_locked
315+
.as_mut_any()
316+
.downcast_mut::<Vsock<VsockUnixBackend>>()
317+
.unwrap();
318+
if vsock.is_activated() {
319+
info!("kick vsock {id}.");
320+
vsock.signal_used_queue(0).unwrap();
321+
}
322+
}
323+
TYPE_RNG => {
324+
let entropy = virtio_locked
325+
.as_mut_any()
326+
.downcast_mut::<Entropy>()
327+
.unwrap();
328+
if entropy.is_activated() {
329+
info!("kick entropy {id}.");
330+
entropy.process_virtio_queues();
331+
}
332+
}
333+
_ => (),
334+
}
335+
}
336+
}
257337
}
258338

259339
#[derive(Debug, Clone, Serialize, Deserialize)]
@@ -286,13 +366,25 @@ pub struct PciDevicesState {
286366
pub entropy_device: Option<VirtioDeviceState<EntropyState>>,
287367
}
288368

289-
#[derive(Debug)]
290369
pub struct PciDevicesConstructorArgs<'a> {
291370
pub vm: Arc<Vm>,
292371
pub mem: &'a GuestMemoryMmap,
293372
pub vm_resources: &'a mut VmResources,
294373
pub instance_id: &'a str,
295374
pub restored_from_file: bool,
375+
pub event_manager: &'a mut EventManager,
376+
}
377+
378+
impl<'a> Debug for PciDevicesConstructorArgs<'a> {
379+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
380+
f.debug_struct("PciDevicesConstructorArgs")
381+
.field("vm", &self.vm)
382+
.field("mem", &self.mem)
383+
.field("vm_resources", &self.vm_resources)
384+
.field("instance_id", &self.instance_id)
385+
.field("restored_from_file", &self.restored_from_file)
386+
.finish()
387+
}
296388
}
297389

298390
impl<'a> Persist<'a> for PciDevices {
@@ -461,7 +553,8 @@ impl<'a> Persist<'a> for PciDevices {
461553
device,
462554
&balloon_state.device_id,
463555
balloon_state.pci_device_bdf.into(),
464-
&balloon_state.transport_state,
556+
balloon_state.transport_state.clone(),
557+
constructor_args.event_manager,
465558
)
466559
.unwrap()
467560
}
@@ -486,7 +579,8 @@ impl<'a> Persist<'a> for PciDevices {
486579
device,
487580
&block_state.device_id,
488581
block_state.pci_device_bdf.into(),
489-
&block_state.transport_state,
582+
block_state.transport_state.clone(),
583+
constructor_args.event_manager,
490584
)
491585
.unwrap()
492586
}
@@ -536,7 +630,8 @@ impl<'a> Persist<'a> for PciDevices {
536630
device,
537631
&net_state.device_id,
538632
net_state.pci_device_bdf.into(),
539-
&net_state.transport_state,
633+
net_state.transport_state.clone(),
634+
constructor_args.event_manager,
540635
)
541636
.unwrap()
542637
}
@@ -569,7 +664,8 @@ impl<'a> Persist<'a> for PciDevices {
569664
device,
570665
&vsock_state.device_id,
571666
vsock_state.pci_device_bdf.into(),
572-
&vsock_state.transport_state,
667+
vsock_state.transport_state.clone(),
668+
constructor_args.event_manager,
573669
)
574670
.unwrap()
575671
}
@@ -592,7 +688,8 @@ impl<'a> Persist<'a> for PciDevices {
592688
device,
593689
&entropy_state.device_id,
594690
entropy_state.pci_device_bdf.into(),
595-
&entropy_state.transport_state,
691+
entropy_state.transport_state.clone(),
692+
constructor_args.event_manager,
596693
)
597694
.unwrap()
598695
}

src/vmm/src/devices/virtio/transport/pci/device.rs

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ pub struct VirtioPciDeviceState {
301301
pub pci_configuration_state: PciConfigurationState,
302302
pub pci_dev_state: VirtioPciCommonConfigState,
303303
pub msix_state: MsixConfigState,
304-
pub msi_vector_group: Vec<(u32, (u32, bool))>,
304+
pub msi_vector_group: Vec<(u32, u32)>,
305305
pub bar_configuration: Vec<PciBarConfiguration>,
306306
}
307307

@@ -432,8 +432,7 @@ impl VirtioPciDevice {
432432
msix_config: VIRTQ_MSI_NO_VECTOR,
433433
msix_queues: vec![VIRTQ_MSI_NO_VECTOR; num_queues],
434434
});
435-
let (device_activated, interrupt_status, cap_pci_cfg_info) =
436-
(false, 0, VirtioPciCfgCapInfo::default());
435+
let cap_pci_cfg_info = VirtioPciCfgCapInfo::default();
437436

438437
// Dropping the MutexGuard to unlock the VirtioDevice. This is required
439438
// in the context of a restore given the device might require some
@@ -448,8 +447,8 @@ impl VirtioPciDevice {
448447
msix_config: Some(msix_config),
449448
msix_num,
450449
device,
451-
device_activated: Arc::new(AtomicBool::new(device_activated)),
452-
interrupt_status: Arc::new(AtomicUsize::new(interrupt_status)),
450+
device_activated: Arc::new(AtomicBool::new(false)),
451+
interrupt_status: Arc::new(AtomicUsize::new(0)),
453452
virtio_interrupt: None,
454453
memory,
455454
settings_bar: 0,
@@ -524,14 +523,10 @@ impl VirtioPciDevice {
524523
);
525524

526525
let common_config = VirtioPciCommonConfig::new(state.pci_dev_state);
527-
let (device_activated, interrupt_status, cap_pci_cfg_info) = (
528-
state.device_activated,
529-
state.interrupt_status,
530-
VirtioPciCfgCapInfo {
531-
offset: state.cap_pci_cfg_offset,
532-
cap: *VirtioPciCfgCap::from_slice(&state.cap_pci_cfg).unwrap(),
533-
},
534-
);
526+
let cap_pci_cfg_info = VirtioPciCfgCapInfo {
527+
offset: state.cap_pci_cfg_offset,
528+
cap: *VirtioPciCfgCap::from_slice(&state.cap_pci_cfg).unwrap(),
529+
};
535530

536531
// Dropping the MutexGuard to unlock the VirtioDevice. This is required
537532
// in the context of a restore given the device might require some
@@ -546,8 +541,8 @@ impl VirtioPciDevice {
546541
msix_config: Some(msix_config),
547542
msix_num,
548543
device,
549-
device_activated: Arc::new(AtomicBool::new(device_activated)),
550-
interrupt_status: Arc::new(AtomicUsize::new(interrupt_status)),
544+
device_activated: Arc::new(AtomicBool::new(state.device_activated)),
545+
interrupt_status: Arc::new(AtomicUsize::new(state.interrupt_status)),
551546
virtio_interrupt: None,
552547
memory: memory.clone(),
553548
settings_bar: 0,
@@ -1093,6 +1088,7 @@ impl PciDevice for VirtioPciDevice {
10931088
Arc::clone(self.virtio_interrupt.as_ref().unwrap()),
10941089
)
10951090
.unwrap_or_else(|err| error!("Error activating device: {err:?}"));
1091+
self.device_activated.store(true, Ordering::SeqCst);
10961092
} else {
10971093
debug!("Device doesn't need activation");
10981094
}

src/vmm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ impl Vmm {
390390
/// Sends a resume command to the vCPUs.
391391
pub fn resume_vm(&mut self) -> Result<(), VmmError> {
392392
self.device_manager.mmio_devices.kick_devices();
393+
self.device_manager.pci_devices.kick_devices();
393394

394395
// Send the events.
395396
self.vcpus_handles

0 commit comments

Comments
 (0)