Skip to content

Null terminated slices for exec #358

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
198 changes: 197 additions & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub mod unistd;

use libc::c_char;
use std::{ptr, result};
use std::ffi::{CStr, OsStr};
use std::ffi::{CStr, CString, OsStr};
use std::path::{Path, PathBuf};
use std::os::unix::ffi::OsStrExt;
use std::fmt;
Expand Down Expand Up @@ -196,6 +196,17 @@ impl NixPath for CStr {
}
}

impl NixPath for CString {
fn len(&self) -> usize {
self.to_bytes().len()
}

fn with_nix_path<T, F>(&self, f: F) -> Result<T>
where F: FnOnce(&CStr) -> T {
self.as_c_str().with_nix_path(f)
}
}

impl NixPath for [u8] {
fn len(&self) -> usize {
self.len()
Expand Down Expand Up @@ -257,3 +268,188 @@ impl<'a, NP: ?Sized + NixPath> NixPath for Option<&'a NP> {
}
}
}

/*
*
* ===== Null terminated slices for exec =====
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove this comment header.

*
*/

use std::ops::{Deref, DerefMut};
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please move this use statements to the top of the file.

use std::mem::transmute;
use std::iter;

/// A conversion trait that may borrow or allocate memory depending on the input.
/// Used to convert between terminated slices and `Vec`s.
pub trait IntoRef<'a, T: ?Sized> {
type Target: 'a + AsRef<T> + Deref<Target=T>;

fn into_ref(self) -> Self::Target;
}

/// A slice of references terminated by `None`. Used by API calls that accept
/// null-terminated arrays such as the `exec` family of functions.
pub struct TerminatedSlice<T> {
inner: [Option<T>],
}

impl<T> TerminatedSlice<T> {
/// Instantiate a `TerminatedSlice` from a slice ending in `None`. Returns
/// `None` if the provided slice is not properly terminated.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this return a Result? This seems like you're using Optipn to return an error

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can create an empty error type for this.

pub fn from_slice(slice: &[Option<T>]) -> Option<&Self> {
if slice.last().map(Option::is_none).unwrap_or(false) {
Some(unsafe { Self::from_slice_unchecked(slice) })
} else {
None
}
}

/// Instantiate a `TerminatedSlice` from a mutable slice ending in `None`.
/// Returns `None` if the provided slice is not properly terminated.
pub fn from_slice_mut(slice: &mut [Option<T>]) -> Option<&mut Self> {
if slice.last().map(Option::is_none).unwrap_or(false) {
Some(unsafe { Self::from_slice_mut_unchecked(slice) })
} else {
None
}
}

/// Instantiate a `TerminatedSlice` from a slice ending in `None`.
///
/// ## Unsafety
///
/// This assumes that the slice is properly terminated, and can cause
/// undefined behaviour if that invariant is not upheld.
pub unsafe fn from_slice_unchecked(slice: &[Option<T>]) -> &Self {
transmute(slice)
}

/// Instantiate a `TerminatedSlice` from a mutable slice ending in `None`.
///
/// ## Unsafety
///
/// This assumes that the slice is properly terminated, and can cause
/// undefined behaviour if that invariant is not upheld.
pub unsafe fn from_slice_mut_unchecked(slice: &mut [Option<T>]) -> &mut Self {
transmute(slice)
}
}

impl<'a, U: Sized> TerminatedSlice<&'a U> {
pub fn as_ptr(&self) -> *const *const U {
self.inner.as_ptr() as *const _
}
}

impl<T> Deref for TerminatedSlice<T> {
type Target = [Option<T>];

fn deref(&self) -> &Self::Target {
&self.inner[..self.inner.len() - 1]
}
}

impl<T> DerefMut for TerminatedSlice<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
let len = self.inner.len();
&mut self.inner[..len - 1]
}
}

impl<T> AsRef<TerminatedSlice<T>> for TerminatedSlice<T> {
fn as_ref(&self) -> &Self {
self
}
}

/// Owned variant of `TerminatedSlice`.
pub struct TerminatedVec<T> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should link to TerminatedSlicr if we're only going to put the documentation on that type.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hm, should probably duplicate it anyway. Linking to the type is good too though, what's the syntax for that look like?

inner: Vec<Option<T>>,
}

impl<T> TerminatedVec<T> {
/// Instantiates a `TerminatedVec` from a `None` terminated `Vec`. Returns
/// `None` if the provided `Vec` is not properly terminated.
pub fn from_vec(vec: Vec<Option<T>>) -> Option<Self> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Again, this should return a Result type

if vec.last().map(Option::is_none).unwrap_or(false) {
Some(unsafe { Self::from_vec_unchecked(vec) })
} else {
None
}
}

/// Instantiates a `TerminatedVec` from a `None` terminated `Vec`.
///
/// ## Unsafety
///
/// This assumes that the `Vec` is properly terminated, and can cause
/// undefined behaviour if that invariant is not upheld.
pub unsafe fn from_vec_unchecked(vec: Vec<Option<T>>) -> Self {
TerminatedVec {
inner: vec,
}
}

/// Consume `self` to return the inner wrapped `Vec`.
pub fn into_inner(self) -> Vec<Option<T>> {
self.inner
}
}

impl<'a> TerminatedVec<&'a c_char> {
fn terminate<T: AsRef<CStr> + 'a, I: IntoIterator<Item=T>>(iter: I) -> Self {
fn cstr_char<'a, S: AsRef<CStr> + 'a>(s: S) -> &'a c_char {
unsafe {
&*s.as_ref().as_ptr()
}
}

let terminated = iter.into_iter()
.map(cstr_char)
.map(Some).chain(iter::once(None)).collect();

unsafe {
TerminatedVec::from_vec_unchecked(terminated)
}
}
}

impl<T> Deref for TerminatedVec<T> {
type Target = TerminatedSlice<T>;

fn deref(&self) -> &Self::Target {
unsafe {
TerminatedSlice::from_slice_unchecked(&self.inner)
}
}
}

impl<T> DerefMut for TerminatedVec<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe {
TerminatedSlice::from_slice_mut_unchecked(&mut self.inner)
}
}
}

impl<T> AsRef<TerminatedSlice<T>> for TerminatedVec<T> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean to put this impl up with the Slice implementation code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure what you mean by this. I tend to follow a type definition with the traits that it impls or provides/enables. You mean this should be moved before the definition of TerminatedVec?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, disregard this comment.

fn as_ref(&self) -> &TerminatedSlice<T> {
self
}
}

impl<'a, T: 'a> IntoRef<'a, TerminatedSlice<&'a T>> for &'a TerminatedSlice<&'a T> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean to put this and the followng impl up with the Slice implementation code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay I see this one should probably be moved up to follow the TerminatedSlice definition, although the ones above and below it are more related to TerminatedVec.

type Target = &'a TerminatedSlice<&'a T>;

fn into_ref(self) -> Self::Target {
self
}
}

impl<'a, T: AsRef<CStr> + 'a, I: IntoIterator<Item=T>> IntoRef<'a, TerminatedSlice<&'a c_char>> for I {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this something that makes sense to have globally-defined? Or can this be integrated into the function definition instead?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This impl is pretty central to allowing exec to accept either:

  • &TerminatedSlice (usually actually a TerminatedVec created by terminate())
  • &[CStr] and similar (specifically any IntoIterator where Item: AsRef<CStr>)

... particularly the latter, which enables the old behaviour of allocate-at-call-site. One slightly less global approach might be to move the IntoRef trait and its impls into the unistd module and possibly give it a name that isn't as generic sounding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another option would be to simply remove the old style and require the use of TerminatedVec::terminate_cstr any time exec is called, whether the Vec is being preallocated or not.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a big fan of reducing options and improving consistency. This is why I've been asking for code examples, as I'm not familiar with how people use this API regularly. I want to make sure we know how people use it regularly and our API is easy for the 90% case and possible for the last 10% case. Or something along those lines.

All the examples I see involve a hard-coded array. And in those situations, I think a term_vec!() macro makes a lot of sense; this would look like a regular vec! call and avoid all of the wrapping in Option. But I don't know how people build up these arrays programmatically or if that's ever done in practice. Is that a common operation? I'd think that is handled nicely by the API provided here, but I don't know exactly.

Copy link
Contributor Author

@arcnmx arcnmx Dec 13, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, examples are going to use a hard-coded array because they're just examples. Real world usage varies of course, and can be hard-coded in simple cases, or generated from a config file or argv like in the use case that I originally wrote these changes for.

Most uses for exec I can imagine would use data coming from a dynamic source rather than being hard-coded. The convenient no-preallocation variant mostly comes into play when using exec in a program that doesn't fork and just intends to replace the whole process - I'm not sure how common that is, I guess if you're writing a tool like env(1) or something. A single-threaded program that does fork applies too, although that's not an assumption one should make if writing a library that doesn't know what kind of program might be using it.

type Target = TerminatedVec<&'a c_char>;

fn into_ref(self) -> Self::Target {
TerminatedVec::terminate(self)
}
}
56 changes: 26 additions & 30 deletions src/unistd.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
//! Safe wrappers around functions found in libc "unistd.h" header

use errno::{self, Errno};
use {Error, Result, NixPath};
use {Error, Result, NixPath, IntoRef, TerminatedSlice};
use fcntl::{fcntl, FdFlag, OFlag};
use fcntl::FcntlArg::F_SETFD;
use libc::{self, c_char, c_void, c_int, c_long, c_uint, size_t, pid_t, off_t,
Expand Down Expand Up @@ -552,25 +552,19 @@ pub fn chown<P: ?Sized + NixPath>(path: &P, owner: Option<Uid>, group: Option<Gi
Errno::result(res).map(drop)
}

fn to_exec_array(args: &[CString]) -> Vec<*const c_char> {
let mut args_p: Vec<*const c_char> = args.iter().map(|s| s.as_ptr()).collect();
args_p.push(ptr::null());
args_p
}

/// Replace the current process image with a new one (see
/// [exec(3)](http://pubs.opengroup.org/onlinepubs/9699919799/functions/exec.html)).
///
/// See the `::nix::unistd::execve` system call for additional details. `execv`
/// performs the same action but does not allow for customization of the
/// environment for the new process.
#[inline]
pub fn execv(path: &CString, argv: &[CString]) -> Result<Void> {
let args_p = to_exec_array(argv);
pub fn execv<'a, P: ?Sized + NixPath, A: IntoRef<'a, TerminatedSlice<&'a c_char>>>(path: &P, argv: A) -> Result<Void> {
let args_p = argv.into_ref();

unsafe {
libc::execv(path.as_ptr(), args_p.as_ptr())
};
try!(path.with_nix_path(|cstr| {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the question mark operator instead of try!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure thing! Note that this is inconsistent with the rest of the file, should I just convert them all while I'm at it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to convert any code outside of what you're creating/modifying.

unsafe { libc::execv(cstr.as_ptr(), args_p.as_ptr()) }
}));

Err(Error::Sys(Errno::last()))
}
Expand All @@ -589,13 +583,13 @@ pub fn execv(path: &CString, argv: &[CString]) -> Result<Void> {
/// in the `args` list is an argument to the new process. Each element in the
/// `env` list should be a string in the form "key=value".
#[inline]
pub fn execve(path: &CString, args: &[CString], env: &[CString]) -> Result<Void> {
let args_p = to_exec_array(args);
let env_p = to_exec_array(env);
pub fn execve<'a, 'e, P: ?Sized + NixPath, A: IntoRef<'a, TerminatedSlice<&'a c_char>>, E: IntoRef<'e, TerminatedSlice<&'e c_char>>>(path: &P, args: A, env: E) -> Result<Void> {
let args_p = args.into_ref();
let env_p = env.into_ref();

unsafe {
libc::execve(path.as_ptr(), args_p.as_ptr(), env_p.as_ptr())
};
try!(path.with_nix_path(|cstr| {
unsafe { libc::execve(cstr.as_ptr(), args_p.as_ptr(), env_p.as_ptr()) }
}));

Err(Error::Sys(Errno::last()))
}
Expand All @@ -610,12 +604,14 @@ pub fn execve(path: &CString, args: &[CString], env: &[CString]) -> Result<Void>
/// would not work if "bash" was specified for the path argument, but `execvp`
/// would assuming that a bash executable was on the system `PATH`.
#[inline]
pub fn execvp(filename: &CString, args: &[CString]) -> Result<Void> {
let args_p = to_exec_array(args);
pub fn execvp<'a, P: ?Sized + NixPath, A: IntoRef<'a, TerminatedSlice<&'a c_char>>>(filename: &P, args: A) -> Result<Void> {
let args_p = args.into_ref();

unsafe {
libc::execvp(filename.as_ptr(), args_p.as_ptr())
};
try!(filename.with_nix_path(|cstr| {
unsafe {
libc::execvp(cstr.as_ptr(), args_p.as_ptr())
}
}));

Err(Error::Sys(Errno::last()))
}
Expand All @@ -633,9 +629,9 @@ pub fn execvp(filename: &CString, args: &[CString]) -> Result<Void> {
#[cfg(any(target_os = "android", target_os = "dragonfly", target_os = "freebsd",
target_os = "netbsd", target_os = "openbsd", target_os = "linux"))]
#[inline]
pub fn fexecve(fd: RawFd, args: &[CString], env: &[CString]) -> Result<Void> {
let args_p = to_exec_array(args);
let env_p = to_exec_array(env);
pub fn fexecve<'a, 'e, A: IntoRef<'a, TerminatedSlice<&'a c_char>>, E: IntoRef<'e, TerminatedSlice<&'e c_char>>>(fd: RawFd, args: A, env: E) -> Result<Void> {
let args_p = args.into_ref();
let env_p = env.into_ref();

unsafe {
libc::fexecve(fd, args_p.as_ptr(), env_p.as_ptr())
Expand All @@ -656,10 +652,10 @@ pub fn fexecve(fd: RawFd, args: &[CString], env: &[CString]) -> Result<Void> {
/// is referenced as a file descriptor to the base directory plus a path.
#[cfg(any(target_os = "android", target_os = "linux"))]
#[inline]
pub fn execveat(dirfd: RawFd, pathname: &CString, args: &[CString],
env: &[CString], flags: super::fcntl::AtFlags) -> Result<Void> {
let args_p = to_exec_array(args);
let env_p = to_exec_array(env);
pub fn execveat<'a, 'e, A: IntoRef<'a, TerminatedSlice<&'a c_char>>, E: IntoRef<'e, TerminatedSlice<&'e c_char>>>(dirfd: RawFd, pathname: &CString, args: A,
env: E, flags: super::fcntl::AtFlags) -> Result<Void> {
let args_p = args.into_ref();
let env_p = env.into_ref();

unsafe {
libc::syscall(libc::SYS_execveat, dirfd, pathname.as_ptr(),
Expand Down