Skip to content

Commit 14e6e33

Browse files
bchaliosShadowCurse
andcommitted
virtio: allow IoVecBufferMut to hold multiple DescriptorChain objects
Allow IoVecBufferMut objects to store multiple DescriptorChain objects, so that we can describe guest memory meant to be used for receiving data (for example memory used for network RX) as a single (sparse) memory region. This will allow us to always keep track all the available memory we have for performing RX and use `readv` for copying memory from the TAP device inside guest memory avoiding the extra copy. In the future, it will also facilitate the implementation of mergeable buffers for the RX path of the network device. Co-authored-by: Egor Lazarchuk <[email protected]> Signed-off-by: Babis Chalios <[email protected]>
1 parent 1e4c632 commit 14e6e33

File tree

4 files changed

+186
-43
lines changed

4 files changed

+186
-43
lines changed

src/vmm/src/devices/virtio/iovec.rs

+170-35
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44
use std::io::ErrorKind;
55

66
use libc::{c_void, iovec, size_t};
7+
#[cfg(not(kani))]
78
use smallvec::SmallVec;
89
use vm_memory::bitmap::Bitmap;
910
use vm_memory::{
1011
GuestMemory, GuestMemoryError, ReadVolatile, VolatileMemoryError, VolatileSlice, WriteVolatile,
1112
};
1213

14+
use super::iov_deque::{IovDeque, IovDequeError};
1315
use crate::devices::virtio::queue::DescriptorChain;
1416
use crate::vstate::memory::GuestMemoryMmap;
1517

@@ -23,6 +25,8 @@ pub enum IoVecError {
2325
OverflowedDescriptor,
2426
/// Guest memory error: {0}
2527
GuestMemory(#[from] GuestMemoryError),
28+
/// Error with underlying `IovDeque`: {0}
29+
IovDeque(#[from] IovDequeError),
2630
}
2731

2832
// Using SmallVec in the kani proofs causes kani to use unbounded amounts of memory
@@ -214,64 +218,135 @@ impl IoVecBuffer {
214218
}
215219
}
216220

221+
#[derive(Debug)]
222+
pub struct ParsedDescriptorChain {
223+
pub head_index: u16,
224+
pub length: u32,
225+
pub nr_iovecs: u16,
226+
}
227+
217228
/// This is essentially a wrapper of a `Vec<libc::iovec>` which can be passed to `libc::readv`.
218229
///
219230
/// It describes a write-only buffer passed to us by the guest that is scattered across multiple
220231
/// memory regions. Additionally, this wrapper provides methods that allow reading arbitrary ranges
221232
/// of data from that buffer.
222-
#[derive(Debug, Default, Clone)]
233+
#[derive(Debug)]
223234
pub struct IoVecBufferMut {
224235
// container of the memory regions included in this IO vector
225-
vecs: IoVecVec,
236+
vecs: IovDeque,
226237
// Total length of the IoVecBufferMut
227-
len: u32,
238+
len: usize,
228239
}
229240

230241
impl IoVecBufferMut {
231-
/// Create an `IoVecBuffer` from a `DescriptorChain`
242+
/// Parse a `DescriptorChain` object and append the memory regions it describes in the
243+
/// underlying ring buffer.
232244
///
233245
/// # Safety
234246
///
235247
/// The descriptor chain cannot be referencing the same memory location as another chain
236-
pub unsafe fn load_descriptor_chain(
248+
unsafe fn parse_descriptor(
237249
&mut self,
238250
mem: &GuestMemoryMmap,
239251
head: DescriptorChain,
240-
) -> Result<(), IoVecError> {
241-
self.clear();
242-
252+
) -> Result<ParsedDescriptorChain, IoVecError> {
253+
let head_index = head.index;
243254
let mut next_descriptor = Some(head);
255+
let mut length = 0u32;
256+
let mut nr_iovecs = 0u16;
244257
while let Some(desc) = next_descriptor {
245258
if !desc.is_write_only() {
259+
self.vecs.pop_front(nr_iovecs);
246260
return Err(IoVecError::ReadOnlyDescriptor);
247261
}
248262

249263
// We use get_slice instead of `get_host_address` here in order to have the whole
250264
// range of the descriptor chain checked, i.e. [addr, addr + len) is a valid memory
251265
// region in the GuestMemoryMmap.
252-
let slice = mem.get_slice(desc.addr, desc.len as usize)?;
266+
let slice = mem.get_slice(desc.addr, desc.len as usize).map_err(|err| {
267+
self.vecs.pop_front(nr_iovecs);
268+
err
269+
})?;
253270

254271
// We need to mark the area of guest memory that will be mutated through this
255272
// IoVecBufferMut as dirty ahead of time, as we loose access to all
256273
// vm-memory related information after converting down to iovecs.
257274
slice.bitmap().mark_dirty(0, desc.len as usize);
258275

259276
let iov_base = slice.ptr_guard_mut().as_ptr().cast::<c_void>();
260-
self.vecs.push(iovec {
277+
self.vecs.push_back(iovec {
261278
iov_base,
262279
iov_len: desc.len as size_t,
263280
});
264-
self.len = self
265-
.len
281+
nr_iovecs += 1;
282+
length = length
266283
.checked_add(desc.len)
267-
.ok_or(IoVecError::OverflowedDescriptor)?;
284+
.ok_or(IoVecError::OverflowedDescriptor)
285+
.map_err(|err| {
286+
self.vecs.pop_front(nr_iovecs);
287+
err
288+
})?;
268289

269290
next_descriptor = desc.next_descriptor();
270291
}
271292

293+
self.len = self.len.checked_add(length as usize).ok_or_else(|| {
294+
self.vecs.pop_front(nr_iovecs);
295+
IoVecError::OverflowedDescriptor
296+
})?;
297+
298+
Ok(ParsedDescriptorChain {
299+
head_index,
300+
length,
301+
nr_iovecs,
302+
})
303+
}
304+
305+
/// Create an empty `IoVecBufferMut`.
306+
pub fn new() -> Result<Self, IovDequeError> {
307+
let vecs = IovDeque::new()?;
308+
Ok(Self { vecs, len: 0 })
309+
}
310+
311+
/// Create an `IoVecBufferMut` from a `DescriptorChain`
312+
///
313+
/// This will clear any previous `iovec` objects in the buffer and load the new
314+
/// [`DescriptorChain`].
315+
///
316+
/// # Safety
317+
///
318+
/// The descriptor chain cannot be referencing the same memory location as another chain
319+
pub unsafe fn load_descriptor_chain(
320+
&mut self,
321+
mem: &GuestMemoryMmap,
322+
head: DescriptorChain,
323+
) -> Result<(), IoVecError> {
324+
self.clear();
325+
let _ = self.parse_descriptor(mem, head)?;
272326
Ok(())
273327
}
274328

329+
/// Append a `DescriptorChain` in this `IoVecBufferMut`
330+
///
331+
/// # Safety
332+
///
333+
/// The descriptor chain cannot be referencing the same memory location as another chain
334+
pub unsafe fn append_descriptor_chain(
335+
&mut self,
336+
mem: &GuestMemoryMmap,
337+
head: DescriptorChain,
338+
) -> Result<ParsedDescriptorChain, IoVecError> {
339+
self.parse_descriptor(mem, head)
340+
}
341+
342+
/// Drop memory from the `IoVecBufferMut`
343+
///
344+
/// This will drop memory described by the `IoVecBufferMut` from the beginning.
345+
pub fn drop_descriptor_chain(&mut self, parse_descriptor: &ParsedDescriptorChain) {
346+
self.vecs.pop_front(parse_descriptor.nr_iovecs);
347+
self.len -= parse_descriptor.length as usize;
348+
}
349+
275350
/// Create an `IoVecBuffer` from a `DescriptorChain`
276351
///
277352
/// # Safety
@@ -281,20 +356,29 @@ impl IoVecBufferMut {
281356
mem: &GuestMemoryMmap,
282357
head: DescriptorChain,
283358
) -> Result<Self, IoVecError> {
284-
let mut new_buffer = Self::default();
359+
let mut new_buffer = Self::new()?;
285360
new_buffer.load_descriptor_chain(mem, head)?;
286361
Ok(new_buffer)
287362
}
288363

289364
/// Get the total length of the memory regions covered by this `IoVecBuffer`
290-
pub(crate) fn len(&self) -> u32 {
365+
///
366+
/// In contrast to the equivalent [`IoVecBuffer::len()`] which returns `u32`, this one returns
367+
/// `usize` since the buffer can contain multiple `DescriptorChain` objects, so we don't have
368+
/// the limit that the length of a buffer is limited by `u32`.
369+
pub(crate) fn len(&self) -> usize {
291370
self.len
292371
}
293372

373+
/// Returns a pointer to the memory keeping the `iovec` structs
374+
pub fn as_iovec_mut_slice(&mut self) -> &mut [iovec] {
375+
self.vecs.as_mut_slice()
376+
}
377+
294378
/// Clears the `iovec` array
295379
pub fn clear(&mut self) {
296380
self.vecs.clear();
297-
self.len = 0u32;
381+
self.len = 0;
298382
}
299383

300384
/// Writes a number of bytes into the `IoVecBufferMut` starting at a given offset.
@@ -313,7 +397,7 @@ impl IoVecBufferMut {
313397
mut buf: &[u8],
314398
offset: usize,
315399
) -> Result<(), VolatileMemoryError> {
316-
if offset < self.len() as usize {
400+
if offset < self.len() {
317401
let expected = buf.len();
318402
let bytes_written = self.write_volatile_at(&mut buf, offset, expected)?;
319403

@@ -342,7 +426,7 @@ impl IoVecBufferMut {
342426
) -> Result<usize, VolatileMemoryError> {
343427
let mut total_bytes_read = 0;
344428

345-
for iov in &self.vecs {
429+
for iov in self.vecs.as_slice() {
346430
if len == 0 {
347431
break;
348432
}
@@ -391,6 +475,7 @@ mod tests {
391475
use vm_memory::VolatileMemoryError;
392476

393477
use super::{IoVecBuffer, IoVecBufferMut};
478+
use crate::devices::virtio::iov_deque::IovDeque;
394479
use crate::devices::virtio::queue::{Queue, VIRTQ_DESC_F_NEXT, VIRTQ_DESC_F_WRITE};
395480
use crate::devices::virtio::test_utils::VirtQueue;
396481
use crate::test_utils::multi_region_mem;
@@ -429,17 +514,36 @@ mod tests {
429514

430515
impl From<&mut [u8]> for IoVecBufferMut {
431516
fn from(buf: &mut [u8]) -> Self {
517+
let mut vecs = IovDeque::new().unwrap();
518+
vecs.push_back(iovec {
519+
iov_base: buf.as_mut_ptr().cast::<c_void>(),
520+
iov_len: buf.len(),
521+
});
522+
432523
Self {
433-
vecs: vec![iovec {
434-
iov_base: buf.as_mut_ptr().cast::<c_void>(),
435-
iov_len: buf.len(),
436-
}]
437-
.into(),
438-
len: buf.len().try_into().unwrap(),
524+
vecs,
525+
len: buf.len(),
439526
}
440527
}
441528
}
442529

530+
impl From<Vec<&mut [u8]>> for IoVecBufferMut {
531+
fn from(buffer: Vec<&mut [u8]>) -> Self {
532+
let mut len = 0usize;
533+
let mut vecs = IovDeque::new().unwrap();
534+
for slice in buffer {
535+
len += slice.len();
536+
537+
vecs.push_back(iovec {
538+
iov_base: slice.as_ptr() as *mut c_void,
539+
iov_len: slice.len(),
540+
});
541+
}
542+
543+
Self { vecs, len }
544+
}
545+
}
546+
443547
fn default_mem() -> GuestMemoryMmap {
444548
multi_region_mem(&[
445549
(GuestAddress(0), 0x10000),
@@ -528,8 +632,19 @@ mod tests {
528632
let head = q.pop().unwrap();
529633

530634
// SAFETY: This descriptor chain is only loaded once in this test
531-
let iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
635+
let mut iovec = unsafe { IoVecBufferMut::from_descriptor_chain(&mem, head).unwrap() };
532636
assert_eq!(iovec.len(), 4 * 64);
637+
638+
// We are creating a new queue where we can get descriptors from. Probably, this is not
639+
// something that we will ever want to do, as `IoVecBufferMut`s are typically
640+
// (concpetually) associated with a single `Queue`. We just do this here to be able to test
641+
// the appending logic.
642+
let (mut q, _) = write_only_chain(&mem);
643+
let head = q.pop().unwrap();
644+
// SAFETY: it is actually unsafe, but we just want to check the length of the
645+
// `IoVecBufferMut` after appending.
646+
let _ = unsafe { iovec.append_descriptor_chain(&mem, head).unwrap() };
647+
assert_eq!(iovec.len(), 8 * 64);
533648
}
534649

535650
#[test]
@@ -687,6 +802,7 @@ mod verification {
687802
use vm_memory::VolatileSlice;
688803

689804
use super::{IoVecBuffer, IoVecBufferMut, IoVecVec};
805+
use crate::devices::virtio::iov_deque::IovDeque;
690806

691807
// Maximum memory size to use for our buffers. For the time being 1KB.
692808
const GUEST_MEMORY_SIZE: usize = 1 << 10;
@@ -702,10 +818,10 @@ mod verification {
702818
let mut vecs: Vec<iovec> = Vec::with_capacity(nr_descs);
703819
let mut len = 0u32;
704820
for _ in 0..nr_descs {
705-
// The `IoVecBuffer(Mut)` constructors ensure that the memory region described by every
821+
// The `IoVecBuffer` constructors ensure that the memory region described by every
706822
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
707823
// mmap. The assumption, here, that the last address is within the memory object's
708-
// bound substitutes these checks that `IoVecBuffer(Mut)::new() performs.`
824+
// bound substitutes these checks that `IoVecBuffer::new() performs.`
709825
let addr: usize = kani::any();
710826
let iov_len: usize =
711827
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
@@ -728,6 +844,26 @@ mod verification {
728844
}
729845
}
730846

847+
fn create_iovecs_mut(mem: *mut u8, size: usize, nr_descs: usize) -> (IovDeque, u32) {
848+
let mut vecs = IovDeque::new().unwrap();
849+
let mut len = 0u32;
850+
for _ in 0..nr_descs {
851+
// The `IoVecBufferMut` constructors ensure that the memory region described by every
852+
// `Descriptor` in the chain is a valid, i.e. it is memory with then guest's memory
853+
// mmap. The assumption, here, that the last address is within the memory object's
854+
// bound substitutes these checks that `IoVecBufferMut::new() performs.`
855+
let addr: usize = kani::any();
856+
let iov_len: usize =
857+
kani::any_where(|&len| matches!(addr.checked_add(len), Some(x) if x <= size));
858+
let iov_base = unsafe { mem.offset(addr.try_into().unwrap()) } as *mut c_void;
859+
860+
vecs.push_back(iovec { iov_base, iov_len });
861+
len += u32::try_from(iov_len).unwrap();
862+
}
863+
864+
(vecs, len)
865+
}
866+
731867
impl IoVecBufferMut {
732868
fn any_of_length(nr_descs: usize) -> Self {
733869
// We only write into `IoVecBufferMut` objects, so we can simply create a guest memory
@@ -739,8 +875,11 @@ mod verification {
739875
))
740876
};
741877

742-
let (vecs, len) = create_iovecs(mem, GUEST_MEMORY_SIZE, nr_descs);
743-
Self { vecs, len }
878+
let (vecs, len) = create_iovecs_mut(mem, GUEST_MEMORY_SIZE, nr_descs);
879+
Self {
880+
vecs,
881+
len: len.try_into().unwrap(),
882+
}
744883
}
745884
}
746885

@@ -820,7 +959,7 @@ mod verification {
820959
let mut iov_mut = IoVecBufferMut::any_of_length(nr_descs);
821960

822961
let mut buf = kani::vec::any_vec::<u8, GUEST_MEMORY_SIZE>();
823-
let offset: u32 = kani::any();
962+
let offset: usize = kani::any();
824963

825964
// We can't really check the contents that the operation here writes into
826965
// `IoVecBufferMut`, because our `IoVecBufferMut` being completely arbitrary
@@ -835,11 +974,7 @@ mod verification {
835974
// Ok(...)
836975
assert_eq!(
837976
iov_mut
838-
.write_volatile_at(
839-
&mut KaniBuffer(&mut buf),
840-
offset as usize,
841-
GUEST_MEMORY_SIZE
842-
)
977+
.write_volatile_at(&mut KaniBuffer(&mut buf), offset, GUEST_MEMORY_SIZE)
843978
.unwrap(),
844979
buf.len().min(iov_mut.len().saturating_sub(offset) as usize)
845980
);

0 commit comments

Comments
 (0)