diff --git a/newsfragments/4456.changed.md b/newsfragments/4456.changed.md new file mode 100644 index 00000000000..094dece12a5 --- /dev/null +++ b/newsfragments/4456.changed.md @@ -0,0 +1 @@ +Improve performance of calls to Python by using the vectorcall calling convention where possible. \ No newline at end of file diff --git a/pyo3-benches/benches/bench_call.rs b/pyo3-benches/benches/bench_call.rs index 8470c8768d3..55ebd67e8de 100644 --- a/pyo3-benches/benches/bench_call.rs +++ b/pyo3-benches/benches/bench_call.rs @@ -3,6 +3,7 @@ use std::hint::black_box; use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion}; use pyo3::prelude::*; +use pyo3::types::IntoPyDict; macro_rules! test_module { ($py:ident, $code:literal) => { @@ -25,6 +26,62 @@ fn bench_call_0(b: &mut Bencher<'_>) { }) } +fn bench_call_1(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!(py, "def foo(a, b, c): pass"); + + let foo_module = &module.getattr("foo").unwrap(); + let args = ( + <_ as IntoPy>::into_py(1, py).into_bound(py), + <_ as IntoPy>::into_py("s", py).into_bound(py), + <_ as IntoPy>::into_py(1.23, py).into_bound(py), + ); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module).call1(args.clone()).unwrap(); + } + }); + }) +} + +fn bench_call(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!(py, "def foo(a, b, c, d, e): pass"); + + let foo_module = &module.getattr("foo").unwrap(); + let args = ( + <_ as IntoPy>::into_py(1, py).into_bound(py), + <_ as IntoPy>::into_py("s", py).into_bound(py), + <_ as IntoPy>::into_py(1.23, py).into_bound(py), + ); + let kwargs = [("d", 1), ("e", 42)].into_py_dict(py); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module) + .call(args.clone(), Some(&kwargs)) + .unwrap(); + } + }); + }) +} + +fn bench_call_one_arg(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!(py, "def foo(a): pass"); + + let foo_module = &module.getattr("foo").unwrap(); + let arg = <_ as IntoPy>::into_py(1, py).into_bound(py); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module).call1((arg.clone(),)).unwrap(); + } + }); + }) +} + fn bench_call_method_0(b: &mut Bencher<'_>) { Python::with_gil(|py| { let module = test_module!( @@ -46,9 +103,96 @@ class Foo: }) } +fn bench_call_method_1(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!( + py, + " +class Foo: + def foo(self, a, b, c): + pass +" + ); + + let foo_module = &module.getattr("Foo").unwrap().call0().unwrap(); + let args = ( + <_ as IntoPy>::into_py(1, py).into_bound(py), + <_ as IntoPy>::into_py("s", py).into_bound(py), + <_ as IntoPy>::into_py(1.23, py).into_bound(py), + ); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module) + .call_method1("foo", args.clone()) + .unwrap(); + } + }); + }) +} + +fn bench_call_method(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!( + py, + " +class Foo: + def foo(self, a, b, c, d, e): + pass +" + ); + + let foo_module = &module.getattr("Foo").unwrap().call0().unwrap(); + let args = ( + <_ as IntoPy>::into_py(1, py).into_bound(py), + <_ as IntoPy>::into_py("s", py).into_bound(py), + <_ as IntoPy>::into_py(1.23, py).into_bound(py), + ); + let kwargs = [("d", 1), ("e", 42)].into_py_dict(py); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module) + .call_method("foo", args.clone(), Some(&kwargs)) + .unwrap(); + } + }); + }) +} + +fn bench_call_method_one_arg(b: &mut Bencher<'_>) { + Python::with_gil(|py| { + let module = test_module!( + py, + " +class Foo: + def foo(self, a): + pass +" + ); + + let foo_module = &module.getattr("Foo").unwrap().call0().unwrap(); + let arg = <_ as IntoPy>::into_py(1, py).into_bound(py); + + b.iter(|| { + for _ in 0..1000 { + black_box(foo_module) + .call_method1("foo", (arg.clone(),)) + .unwrap(); + } + }); + }) +} + fn criterion_benchmark(c: &mut Criterion) { c.bench_function("call_0", bench_call_0); + c.bench_function("call_1", bench_call_1); + c.bench_function("call", bench_call); + c.bench_function("call_one_arg", bench_call_one_arg); c.bench_function("call_method_0", bench_call_method_0); + c.bench_function("call_method_1", bench_call_method_1); + c.bench_function("call_method", bench_call_method); + c.bench_function("call_method_one_arg", bench_call_method_one_arg); } criterion_group!(benches, criterion_benchmark); diff --git a/pyo3-ffi/src/cpython/abstract_.rs b/pyo3-ffi/src/cpython/abstract_.rs index 34525cec16f..83295e58f61 100644 --- a/pyo3-ffi/src/cpython/abstract_.rs +++ b/pyo3-ffi/src/cpython/abstract_.rs @@ -40,7 +40,7 @@ extern "C" { } #[cfg(Py_3_8)] -const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t = +pub const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t = 1 << (8 * std::mem::size_of::() as size_t - 1); #[cfg(Py_3_8)] diff --git a/src/conversion.rs b/src/conversion.rs index 6a089e186bc..ec0b7e2462b 100644 --- a/src/conversion.rs +++ b/src/conversion.rs @@ -1,10 +1,11 @@ //! Defines conversions between Rust and Python types. use crate::err::PyResult; +use crate::ffi_ptr_ext::FfiPtrExt; #[cfg(feature = "experimental-inspect")] use crate::inspect::types::TypeInfo; use crate::pyclass::boolean_struct::False; use crate::types::any::PyAnyMethods; -use crate::types::PyTuple; +use crate::types::{PyDict, PyString, PyTuple}; use crate::{ffi, Borrowed, Bound, Py, PyAny, PyClass, PyObject, PyRef, PyRefMut, Python}; #[cfg(feature = "gil-refs")] use { @@ -176,6 +177,97 @@ pub trait IntoPy: Sized { fn type_output() -> TypeInfo { TypeInfo::Any } + + // The following methods are helpers to use the vectorcall API where possible. + // They are overridden on tuples to perform a vectorcall. + // Be careful when you're implementing these: they can never refer to `Bound` call methods, + // as those refer to these methods, so this will create an infinite recursion. + #[doc(hidden)] + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + #[inline] + fn inner<'py>( + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + args: Bound<'py, PyTuple>, + ) -> PyResult> { + unsafe { + ffi::PyObject_Call(function.as_ptr(), args.as_ptr(), std::ptr::null_mut()) + .assume_owned_or_err(py) + } + } + inner( + py, + function, + >>::into_py(self, py).into_bound(py), + ) + } + + #[doc(hidden)] + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + #[inline] + fn inner<'py>( + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + args: Bound<'py, PyTuple>, + kwargs: Option>, + ) -> PyResult> { + unsafe { + ffi::PyObject_Call( + function.as_ptr(), + args.as_ptr(), + kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()), + ) + .assume_owned_or_err(py) + } + } + inner( + py, + function, + >>::into_py(self, py).into_bound(py), + kwargs, + ) + } + + #[doc(hidden)] + #[inline] + fn __py_call_method_vectorcall1<'py>( + self, + _py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Borrowed<'_, 'py, PyString>, + _: private::Token, + ) -> PyResult> + where + Self: IntoPy>, + { + // Don't `self.into_py()`! This will lose the optimization of vectorcall. + object + .getattr(method_name.to_owned()) + .and_then(|method| method.call1(self)) + } +} + +pub(crate) mod private { + pub struct Token; } /// Extract a type from a Python object. @@ -515,6 +607,52 @@ impl IntoPy> for () { fn into_py(self, py: Python<'_>) -> Py { PyTuple::empty_bound(py).unbind() } + + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> { + unsafe { ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py) } + } + + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> { + unsafe { + match kwargs { + Some(kwargs) => ffi::PyObject_Call( + function.as_ptr(), + PyTuple::empty_bound(py).as_ptr(), + kwargs.as_ptr(), + ) + .assume_owned_or_err(py), + None => ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py), + } + } + } + + #[inline] + #[allow(clippy::used_underscore_binding)] + fn __py_call_method_vectorcall1<'py>( + self, + py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Borrowed<'_, 'py, PyString>, + _: private::Token, + ) -> PyResult> { + unsafe { + ffi::compat::PyObject_CallMethodNoArgs(object.as_ptr(), method_name.as_ptr()) + .assume_owned_or_err(py) + } + } } /// Raw level conversion between `*mut ffi::PyObject` and PyO3 types. diff --git a/src/instance.rs b/src/instance.rs index 06e55c00b0f..aeca0f2d6a8 100644 --- a/src/instance.rs +++ b/src/instance.rs @@ -1502,19 +1502,25 @@ impl Py { /// Calls the object. /// /// This is equivalent to the Python expression `self(*args, **kwargs)`. - pub fn call_bound( + pub fn call_bound( &self, py: Python<'_>, - args: impl IntoPy>, + args: N, kwargs: Option<&Bound<'_, PyDict>>, - ) -> PyResult { + ) -> PyResult + where + N: IntoPy>, + { self.bind(py).as_any().call(args, kwargs).map(Bound::unbind) } /// Calls the object with only positional arguments. /// /// This is equivalent to the Python expression `self(*args)`. - pub fn call1(&self, py: Python<'_>, args: impl IntoPy>) -> PyResult { + pub fn call1(&self, py: Python<'_>, args: N) -> PyResult + where + N: IntoPy>, + { self.bind(py).as_any().call1(args).map(Bound::unbind) } diff --git a/src/types/any.rs b/src/types/any.rs index 335986c60b9..50ffebb34db 100644 --- a/src/types/any.rs +++ b/src/types/any.rs @@ -1,5 +1,5 @@ use crate::class::basic::CompareOp; -use crate::conversion::{AsPyPointer, FromPyObjectBound, IntoPy, ToPyObject}; +use crate::conversion::{private, AsPyPointer, FromPyObjectBound, IntoPy, ToPyObject}; use crate::err::{DowncastError, DowncastIntoError, PyErr, PyResult}; use crate::exceptions::{PyAttributeError, PyTypeError}; use crate::ffi_ptr_ext::FfiPtrExt; @@ -1308,11 +1308,9 @@ pub trait PyAnyMethods<'py>: crate::sealed::Sealed { /// }) /// # } /// ``` - fn call( - &self, - args: impl IntoPy>, - kwargs: Option<&Bound<'_, PyDict>>, - ) -> PyResult>; + fn call(&self, args: A, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult> + where + A: IntoPy>; /// Calls the object without arguments. /// @@ -1363,7 +1361,9 @@ pub trait PyAnyMethods<'py>: crate::sealed::Sealed { /// }) /// # } /// ``` - fn call1(&self, args: impl IntoPy>) -> PyResult>; + fn call1(&self, args: A) -> PyResult> + where + A: IntoPy>; /// Calls a method on the object. /// @@ -2027,38 +2027,31 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { unsafe { ffi::PyCallable_Check(self.as_ptr()) != 0 } } - fn call( - &self, - args: impl IntoPy>, - kwargs: Option<&Bound<'_, PyDict>>, - ) -> PyResult> { - fn inner<'py>( - any: &Bound<'py, PyAny>, - args: Bound<'_, PyTuple>, - kwargs: Option<&Bound<'_, PyDict>>, - ) -> PyResult> { - unsafe { - ffi::PyObject_Call( - any.as_ptr(), - args.as_ptr(), - kwargs.map_or(std::ptr::null_mut(), |dict| dict.as_ptr()), - ) - .assume_owned_or_err(any.py()) - } - } - - let py = self.py(); - inner(self, args.into_py(py).into_bound(py), kwargs) + fn call(&self, args: A, kwargs: Option<&Bound<'_, PyDict>>) -> PyResult> + where + A: IntoPy>, + { + args.__py_call_vectorcall( + self.py(), + self.as_borrowed(), + kwargs.map(Bound::as_borrowed), + private::Token, + ) } + #[inline] fn call0(&self) -> PyResult> { unsafe { ffi::compat::PyObject_CallNoArgs(self.as_ptr()).assume_owned_or_err(self.py()) } } - fn call1(&self, args: impl IntoPy>) -> PyResult> { - self.call(args, None) + fn call1(&self, args: A) -> PyResult> + where + A: IntoPy>, + { + args.__py_call_vectorcall1(self.py(), self.as_borrowed(), private::Token) } + #[inline] fn call_method( &self, name: N, @@ -2069,10 +2062,16 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { N: IntoPy>, A: IntoPy>, { - self.getattr(name) - .and_then(|method| method.call(args, kwargs)) + // Don't `args.into_py()`! This will lose the optimization of vectorcall. + match kwargs { + Some(_) => self + .getattr(name) + .and_then(|method| method.call(args, kwargs)), + None => self.call_method1(name, args), + } } + #[inline] fn call_method0(&self, name: N) -> PyResult> where N: IntoPy>, @@ -2090,7 +2089,12 @@ impl<'py> PyAnyMethods<'py> for Bound<'py, PyAny> { N: IntoPy>, A: IntoPy>, { - self.call_method(name, args, None) + args.__py_call_method_vectorcall1( + self.py(), + self.as_borrowed(), + name.into_py(self.py()).bind_borrowed(self.py()), + private::Token, + ) } fn is_truthy(&self) -> PyResult { diff --git a/src/types/tuple.rs b/src/types/tuple.rs index aacc1efe431..20f53943aba 100644 --- a/src/types/tuple.rs +++ b/src/types/tuple.rs @@ -1,12 +1,15 @@ use std::iter::FusedIterator; +use crate::conversion::private; use crate::ffi::{self, Py_ssize_t}; use crate::ffi_ptr_ext::FfiPtrExt; #[cfg(feature = "experimental-inspect")] use crate::inspect::types::TypeInfo; use crate::instance::Borrowed; use crate::internal_tricks::get_ssize_index; -use crate::types::{any::PyAnyMethods, sequence::PySequenceMethods, PyList, PySequence}; +use crate::types::{ + any::PyAnyMethods, sequence::PySequenceMethods, PyDict, PyList, PySequence, PyString, +}; #[cfg(feature = "gil-refs")] use crate::PyNativeType; use crate::{ @@ -682,7 +685,7 @@ macro_rules! tuple_conversion ({$length:expr,$(($refN:ident, $n:tt, $T:ident)),+ } #[cfg(feature = "experimental-inspect")] -fn type_output() -> TypeInfo { + fn type_output() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_output() ),+])) } } @@ -696,6 +699,109 @@ fn type_output() -> TypeInfo { fn type_output() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_output() ),+])) } + + #[inline] + fn __py_call_vectorcall1<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + if $length == 1 { + unsafe { + ffi::PyObject_CallOneArg(function.as_ptr(), args_bound[0].as_ptr()).assume_owned_or_err(py) + } + } else { + // Prepend one null argument for `PY_VECTORCALL_ARGUMENTS_OFFSET`. + let mut args = [std::ptr::null_mut(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_Vectorcall( + function.as_ptr(), + args.as_mut_ptr().add(1), + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + std::ptr::null_mut(), + ) + .assume_owned_or_err(py) + } + } + } else { + function.call1(>>::into_py(self, py).into_bound(py)) + } + } + } + + #[inline] + fn __py_call_vectorcall<'py>( + self, + py: Python<'py>, + function: Borrowed<'_, 'py, PyAny>, + kwargs: Option>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + // Prepend one null argument for `PY_VECTORCALL_ARGUMENTS_OFFSET`. + let mut args = [std::ptr::null_mut(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_VectorcallDict( + function.as_ptr(), + args.as_mut_ptr().add(1), + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()), + ) + .assume_owned_or_err(py) + } + } else { + function.call(>>::into_py(self, py).into_bound(py), kwargs.as_deref()) + } + } + } + + #[inline] + fn __py_call_method_vectorcall1<'py>( + self, + py: Python<'py>, + object: Borrowed<'_, 'py, PyAny>, + method_name: Borrowed<'_, 'py, PyString>, + _: private::Token, + ) -> PyResult> { + cfg_if::cfg_if! { + if #[cfg(all(Py_3_9, not(any(PyPy, GraalPy, Py_LIMITED_API))))] { + // We need this to drop the arguments correctly. + let args_bound = [$(self.$n.into_py(py).into_bound(py),)*]; + if $length == 1 { + unsafe { + ffi::PyObject_CallMethodOneArg( + object.as_ptr(), + method_name.as_ptr(), + args_bound[0].as_ptr(), + ) + .assume_owned_or_err(py) + } + } else { + let mut args = [object.as_ptr(), $(args_bound[$n].as_ptr()),*]; + unsafe { + ffi::PyObject_VectorcallMethod( + method_name.as_ptr(), + args.as_mut_ptr(), + // +1 for the receiver. + 1 + $length + ffi::PY_VECTORCALL_ARGUMENTS_OFFSET, + std::ptr::null_mut(), + ) + .assume_owned_or_err(py) + } + } + } else { + object.call_method1(method_name.to_owned(), >>::into_py(self, py).into_bound(py)) + } + } + } } impl<'py, $($T: FromPyObject<'py>),+> FromPyObject<'py> for ($($T,)+) { @@ -714,7 +820,7 @@ fn type_output() -> TypeInfo { } #[cfg(feature = "experimental-inspect")] -fn type_input() -> TypeInfo { + fn type_input() -> TypeInfo { TypeInfo::Tuple(Some(vec![$( $T::type_input() ),+])) } }