From f5aa30a292da0370801e3aae49d7b7b27bcb0939 Mon Sep 17 00:00:00 2001 From: Chayim Refael Friedman Date: Mon, 19 Aug 2024 21:01:23 +0300 Subject: [PATCH] Use vectorcall (where possible) when calling Python functions This works without any changes to user code. The way it works is by creating a methods on `IntoPy` to call functions, and specializing them for tuples. This currently supports only non-kwargs for methods, and kwargs with somewhat slow approach (converting from PyDict) for functions. This can be improved, but that will require additional API. We may consider adding more impls IntoPy> that specialize (for example, for arrays and `Vec`), but this i a good start. --- newsfragments/4456.changed.md | 1 + pyo3-ffi/src/cpython/abstract_.rs | 2 +- src/conversion.rs | 136 +++++++++++++++++++++++++++++- src/types/any.rs | 46 +++++----- src/types/tuple.rs | 113 ++++++++++++++++++++++++- 5 files changed, 270 insertions(+), 28 deletions(-) create mode 100644 newsfragments/4456.changed.md diff --git a/newsfragments/4456.changed.md b/newsfragments/4456.changed.md new file mode 100644 index 00000000000..094dece12a5 --- /dev/null +++ b/newsfragments/4456.changed.md @@ -0,0 +1 @@ +Improve performance of calls to Python by using the vectorcall calling convention where possible. \ No newline at end of file diff --git a/pyo3-ffi/src/cpython/abstract_.rs b/pyo3-ffi/src/cpython/abstract_.rs index 34525cec16f..83295e58f61 100644 --- a/pyo3-ffi/src/cpython/abstract_.rs +++ b/pyo3-ffi/src/cpython/abstract_.rs @@ -40,7 +40,7 @@ extern "C" { } #[cfg(Py_3_8)] -const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t = +pub const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t = 1 << (8 * std::mem::size_of::() as size_t - 1); #[cfg(Py_3_8)] diff --git a/src/conversion.rs b/src/conversion.rs index d909382bdb9..625cdfc0b2b 100644 --- a/src/conversion.rs +++ b/src/conversion.rs @@ -1,10 +1,11 @@ //! Defines conversions between Rust and Python types. use crate::err::PyResult; +use crate::ffi_ptr_ext::FfiPtrExt; #[cfg(feature = "experimental-inspect")] use crate::inspect::types::TypeInfo; use crate::pyclass::boolean_struct::False; use crate::types::any::PyAnyMethods; -use crate::types::PyTuple; +use crate::types::{PyDict, PyString, PyTuple}; use crate::{ ffi, Borrowed, Bound, BoundObject, Py, PyAny, PyClass, PyErr, PyObject, PyRef, PyRefMut, Python, }; @@ -172,6 +173,93 @@ pub trait IntoPy: Sized { fn type_output() -> TypeInfo { TypeInfo::Any } + + // The following methods are helpers to use the vectorcall API where possible. + // They are overridden on tuples to perform a vectorcall. + // Be careful when you're implementing these: they can never refer to `Bound` call methods, + // as those refer to these methods, so this will create an infinite recursion. + #[doc(hidden)] + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + #[inline] + fn inner<'py>( + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + args: Bound<'py, PyTuple>, + ) -> PyResult> { + unsafe { + ffi::PyObject_Call(function.as_ptr(), args.as_ptr(), std::ptr::null_mut()) + .assume_owned_or_err(py) + } + } + inner( + py, + function, + >>::into_py(self, py).into_bound(py), + ) + } + + #[doc(hidden)] + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + #[inline] + fn inner<'py>( + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + args: Bound<'py, PyTuple>, + kwargs: Option>, + ) -> PyResult> { + unsafe { + ffi::PyObject_Call( + function.as_ptr(), + args.as_ptr(), + kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()), + ) + .assume_owned_or_err(py) + } + } + inner( + py, + function, + >>::into_py(self, py).into_bound(py), + kwargs, + ) + } + + #[doc(hidden)] + #[inline] + fn __py_call_method_vectorcall1<'py>( + self, + _py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Bound<'py, PyString>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + // Don't `self.into_py()`! This will lose the optimization of vectorcall. + object + .getattr(method_name) + .and_then(|method| method.call1(self)) + } } /// Defines a conversion from a Rust type to a Python object, which may fail. @@ -502,6 +590,52 @@ impl IntoPy> for () { fn into_py(self, py: Python<'_>) -> Py { PyTuple::empty(py).unbind() } + + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> { + unsafe { ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py) } + } + + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> { + unsafe { + match kwargs { + Some(kwargs) => ffi::PyObject_Call( + function.as_ptr(), + PyTuple::empty(py).as_ptr(), + kwargs.as_ptr(), + ) + .assume_owned_or_err(py), + None => ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py), + } + } + } + + #[inline] + #[allow(clippy::used_underscore_binding)] + fn __py_call_method_vectorcall1<'py>( + self, + py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Bound<'py, PyString>, + _: private::Token, + ) -> PyResult> { + unsafe { + ffi::compat::PyObject_CallMethodNoArgs(object.as_ptr(), method_name.as_ptr()) + .assume_owned_or_err(py) + } + } } impl<'py> IntoPyObject<'py> for () { diff --git a/src/types/any.rs b/src/types/any.rs index c3eb163b064..f9ff2038be5 100644 --- a/src/types/any.rs +++ b/src/types/any.rs @@ -1,5 +1,5 @@ use crate::class::basic::CompareOp; -use crate::conversion::{AsPyPointer, FromPyObjectBound, IntoPy, ToPyObject}; +use crate::conversion::{private, AsPyPointer, FromPyObjectBound, IntoPy, ToPyObject}; use crate::err::{DowncastError, DowncastIntoError, PyErr, PyResult}; use crate::exceptions::{PyAttributeError, PyTypeError}; use crate::ffi_ptr_ext::FfiPtrExt; @@ -1160,33 +1160,24 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { args: impl IntoPy>, kwargs: Option<&Bound<'_, PyDict>>, ) -> PyResult> { - fn inner<'py>( - any: &Bound<'py, PyAny>, - args: Bound<'_, PyTuple>, - kwargs: Option<&Bound<'_, PyDict>>, - ) -> PyResult> { - unsafe { - ffi::PyObject_Call( - any.as_ptr(), - args.as_ptr(), - kwargs.map_or(std::ptr::null_mut(), |dict| dict.as_ptr()), - ) - .assume_owned_or_err(any.py()) - } - } - - let py = self.py(); - inner(self, args.into_py(py).into_bound(py), kwargs) + args.__py_call_vectorcall( + self.py(), + self.as_borrowed(), + kwargs.map(Bound::as_borrowed), + private::Token, + ) } + #[inline] fn call0(&self) -> PyResult> { unsafe { ffi::compat::PyObject_CallNoArgs(self.as_ptr()).assume_owned_or_err(self.py()) } } fn call1(&self, args: impl IntoPy>) -> PyResult> { - self.call(args, None) + args.__py_call_vectorcall1(self.py(), self.as_borrowed(), private::Token) } + #[inline] fn call_method( &self, name: N, @@ -1197,10 +1188,16 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { N: IntoPy>, A: IntoPy>, { - self.getattr(name) - .and_then(|method| method.call(args, kwargs)) + // Don't `args.into_py()`! This will lose the optimization of vectorcall. + match kwargs { + Some(_) => self + .getattr(name) + .and_then(|method| method.call(args, kwargs)), + None => self.call_method1(name, args), + } } + #[inline] fn call_method0(&self, name: N) -> PyResult> where N: IntoPy>, @@ -1218,7 +1215,12 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { N: IntoPy>, A: IntoPy>, { - self.call_method(name, args, None) + args.__py_call_method_vectorcall1( + self.py(), + self.as_borrowed(), + name.into_py(self.py()).into_bound(self.py()), + private::Token, + ) } fn is_truthy(&self) -> PyResult { diff --git a/src/types/tuple.rs b/src/types/tuple.rs index 44a9d15e836..8c274a3ff5d 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -1,13 +1,15 @@ use std::iter::FusedIterator; -use crate::conversion::IntoPyObject; +use crate::conversion::{private, IntoPyObject}; use crate::ffi::{self, Py_ssize_t}; use crate::ffi_ptr_ext::FfiPtrExt; #[cfg(feature = "experimental-inspect")] use crate::inspect::types::TypeInfo; use crate::instance::Borrowed; use crate::internal_tricks::get_ssize_index; -use crate::types::{any::PyAnyMethods, sequence::PySequenceMethods, PyList, PySequence}; +use crate::types::{ + any::PyAnyMethods, sequence::PySequenceMethods, PyDict, PyList, PySequence, PyString, +}; use crate::{ exceptions, Bound, BoundObject, FromPyObject, IntoPy, Py, PyAny, PyErr, PyObject, PyResult, Python, ToPyObject, @@ -522,7 +524,7 @@ macro_rules! tuple_conversion ({$length:expr,$(($refN:ident, $n:tt, $T:ident)),+ } #[cfg(feature = "experimental-inspect")] -fn type_output() -> TypeInfo { + fn type_output() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_output() ),+])) } } @@ -550,6 +552,109 @@ fn type_output() -> TypeInfo { fn type_output() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_output() ),+])) } + + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + if $length == 1 { + unsafe { + ffi::PyObject_CallOneArg(function.as_ptr(), args_bound[0].as_ptr()).assume_owned_or_err(py) + } + } else { + // Prepend one null argument for `PY_VECTORCALL_ARGUMENTS_OFFSET`. + let mut args = [std::ptr::null_mut(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_Vectorcall( + function.as_ptr(), + args.as_mut_ptr().add(1), + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + std::ptr::null_mut(), + ) + .assume_owned_or_err(py) + } + } + } else { + function.call1(>>::into_py(self, py).into_bound(py)) + } + } + } + + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + // Prepend one null argument for `PY_VECTORCALL_ARGUMENTS_OFFSET`. + let mut args = [std::ptr::null_mut(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_VectorcallDict( + function.as_ptr(), + args.as_mut_ptr().add(1), + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()), + ) + .assume_owned_or_err(py) + } + } else { + function.call(>>::into_py(self, py).into_bound(py), kwargs.as_deref()) + } + } + } + + #[inline] + fn __py_call_method_vectorcall1<'py>( + self, + py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Bound<'py, PyString>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + if $length == 1 { + unsafe { + ffi::PyObject_CallMethodOneArg( + object.as_ptr(), + method_name.as_ptr(), + args_bound[0].as_ptr(), + ) + .assume_owned_or_err(py) + } + } else { + let mut args = [object.as_ptr(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_VectorcallMethod( + method_name.as_ptr(), + args.as_mut_ptr(), + // +1 for the receiver. + 1 + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + std::ptr::null_mut(), + ) + .assume_owned_or_err(py) + } + } + } else { + object.call_method1(method_name, >>::into_py(self, py).into_bound(py)) + } + } + } } impl<'py, $($T: FromPyObject<'py>),+> FromPyObject<'py> for ($($T,)+) { @@ -568,7 +673,7 @@ fn type_output() -> TypeInfo { } #[cfg(feature = "experimental-inspect")] -fn type_input() -> TypeInfo { + fn type_input() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_input() ),+])) } }