PyO3 · davidhewitt · Aug 25, 2024 · Aug 19, 2024 · Aug 22, 2024 · Aug 24, 2024
diff --git a/newsfragments/4456.changed.md b/newsfragments/4456.changed.md
@@ -0,0 +1 @@
+Improve performance of calls to Python by using the vectorcall calling convention where possible.
diff --git a/pyo3-benches/benches/bench_call.rs b/pyo3-benches/benches/bench_call.rs
@@ -2,8 +2,9 @@ use std::hint::black_box;
 
 use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion};
 
-use pyo3::prelude::*;
 use pyo3::ffi::c_str;
+use pyo3::prelude::*;
+use pyo3::types::IntoPyDict;
 
 macro_rules! test_module {
     ($py:ident, $code:literal) => {
@@ -26,6 +27,62 @@ fn bench_call_0(b: &mut Bencher<'_>) {
     })
 }
 
+fn bench_call_1(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(py, "def foo(a, b, c): pass");
+
+        let foo_module = &module.getattr("foo").unwrap();
+        let args = (
+            <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
+        );
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module).call1(args.clone()).unwrap();
+            }
+        });
+    })
+}
+
+fn bench_call(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(py, "def foo(a, b, c, d, e): pass");
+
+        let foo_module = &module.getattr("foo").unwrap();
+        let args = (
+            <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
+        );
+        let kwargs = [("d", 1), ("e", 42)].into_py_dict(py);
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module)
+                    .call(args.clone(), Some(&kwargs))
+                    .unwrap();
+            }
+        });
+    })
+}
+
+fn bench_call_one_arg(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(py, "def foo(a): pass");
+
+        let foo_module = &module.getattr("foo").unwrap();
+        let arg = <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py);
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module).call1((arg.clone(),)).unwrap();
+            }
+        });
+    })
+}
+
 fn bench_call_method_0(b: &mut Bencher<'_>) {
     Python::with_gil(|py| {
         let module = test_module!(
@@ -47,9 +104,96 @@ class Foo:
     })
 }
 
+fn bench_call_method_1(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(
+            py,
+            "
+class Foo:
+    def foo(self, a, b, c):
+        pass
+"
+        );
+
+        let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
+        let args = (
+            <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
+        );
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module)
+                    .call_method1("foo", args.clone())
+                    .unwrap();
+            }
+        });
+    })
+}
+
+fn bench_call_method(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(
+            py,
+            "
+class Foo:
+    def foo(self, a, b, c, d, e):
+        pass
+"
+        );
+
+        let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
+        let args = (
+            <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
+            <_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
+        );
+        let kwargs = [("d", 1), ("e", 42)].into_py_dict(py);
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module)
+                    .call_method("foo", args.clone(), Some(&kwargs))
+                    .unwrap();
+            }
+        });
+    })
+}
+
+fn bench_call_method_one_arg(b: &mut Bencher<'_>) {
+    Python::with_gil(|py| {
+        let module = test_module!(
+            py,
+            "
+class Foo:
+    def foo(self, a):
+        pass
+"
+        );
+
+        let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
+        let arg = <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py);
+
+        b.iter(|| {
+            for _ in 0..1000 {
+                black_box(foo_module)
+                    .call_method1("foo", (arg.clone(),))
+                    .unwrap();
+            }
+        });
+    })
+}
+
 fn criterion_benchmark(c: &mut Criterion) {
     c.bench_function("call_0", bench_call_0);
+    c.bench_function("call_1", bench_call_1);
+    c.bench_function("call", bench_call);
+    c.bench_function("call_one_arg", bench_call_one_arg);
     c.bench_function("call_method_0", bench_call_method_0);
+    c.bench_function("call_method_1", bench_call_method_1);
+    c.bench_function("call_method", bench_call_method);
+    c.bench_function("call_method_one_arg", bench_call_method_one_arg);
 }
 
 criterion_group!(benches, criterion_benchmark);

diff --git a/pyo3-ffi/src/cpython/abstract_.rs b/pyo3-ffi/src/cpython/abstract_.rs
@@ -40,7 +40,7 @@ extern "C" {
 }
 
 #[cfg(Py_3_8)]
-const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t =
+pub const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t =
     1 << (8 * std::mem::size_of::<size_t>() as size_t - 1);
 
 #[cfg(Py_3_8)]

diff --git a/src/conversion.rs b/src/conversion.rs
@@ -1,10 +1,11 @@
 //! Defines conversions between Rust and Python types.
 use crate::err::PyResult;
+use crate::ffi_ptr_ext::FfiPtrExt;
 #[cfg(feature = "experimental-inspect")]
 use crate::inspect::types::TypeInfo;
 use crate::pyclass::boolean_struct::False;
 use crate::types::any::PyAnyMethods;
-use crate::types::PyTuple;
+use crate::types::{PyDict, PyString, PyTuple};
 use crate::{
     ffi, Borrowed, Bound, BoundObject, Py, PyAny, PyClass, PyErr, PyObject, PyRef, PyRefMut, Python,
 };
@@ -172,6 +173,93 @@ pub trait IntoPy<T>: Sized {
     fn type_output() -> TypeInfo {
         TypeInfo::Any
     }
+
+    // The following methods are helpers to use the vectorcall API where possible.
+    // They are overridden on tuples to perform a vectorcall.
+    // Be careful when you're implementing these: they can never refer to `Bound` call methods,
+    // as those refer to these methods, so this will create an infinite recursion.
+    #[doc(hidden)]
+    #[inline]
+    fn __py_call_vectorcall1<'py>(
+        self,
+        py: Python<'py>,
+        function: Borrowed<'_, 'py, PyAny>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>>
+    where
+        Self: IntoPy<Py<PyTuple>>,
+    {
+        #[inline]
+        fn inner<'py>(
+            py: Python<'py>,
+            function: Borrowed<'_, 'py, PyAny>,
+            args: Bound<'py, PyTuple>,
+        ) -> PyResult<Bound<'py, PyAny>> {
+            unsafe {
+                ffi::PyObject_Call(function.as_ptr(), args.as_ptr(), std::ptr::null_mut())
+                    .assume_owned_or_err(py)
+            }
+        }
+        inner(
+            py,
+            function,
+            <Self as IntoPy<Py<PyTuple>>>::into_py(self, py).into_bound(py),
+        )
+    }
+
+    #[doc(hidden)]
+    #[inline]
+    fn __py_call_vectorcall<'py>(
+        self,
+        py: Python<'py>,
+        function: Borrowed<'_, 'py, PyAny>,
+        kwargs: Option<Borrowed<'_, '_, PyDict>>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>>
+    where
+        Self: IntoPy<Py<PyTuple>>,
+    {
+        #[inline]
+        fn inner<'py>(
+            py: Python<'py>,
+            function: Borrowed<'_, 'py, PyAny>,
+            args: Bound<'py, PyTuple>,
+            kwargs: Option<Borrowed<'_, '_, PyDict>>,
+        ) -> PyResult<Bound<'py, PyAny>> {
+            unsafe {
+                ffi::PyObject_Call(
+                    function.as_ptr(),
+                    args.as_ptr(),
+                    kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()),
+                )
+                .assume_owned_or_err(py)
+            }
+        }
+        inner(
+            py,
+            function,
+            <Self as IntoPy<Py<PyTuple>>>::into_py(self, py).into_bound(py),
+            kwargs,
+        )
+    }
+
+    #[doc(hidden)]
+    #[inline]
+    fn __py_call_method_vectorcall1<'py>(
+        self,
+        _py: Python<'py>,
+        object: Borrowed<'_, 'py, PyAny>,
+        method_name: Borrowed<'_, 'py, PyString>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>>
+    where
+        Self: IntoPy<Py<PyTuple>>,
+    {
+        // Don't `self.into_py()`! This will lose the optimization of vectorcall.
+        object
+            .getattr(method_name)
+            .and_then(|method| method.call1(self))
+    }
 }
 
 /// Defines a conversion from a Rust type to a Python object, which may fail.
@@ -502,6 +590,52 @@ impl IntoPy<Py<PyTuple>> for () {
     fn into_py(self, py: Python<'_>) -> Py<PyTuple> {
         PyTuple::empty(py).unbind()
     }
+
+    #[inline]
+    fn __py_call_vectorcall1<'py>(
+        self,
+        py: Python<'py>,
+        function: Borrowed<'_, 'py, PyAny>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        unsafe { ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py) }
+    }
+
+    #[inline]
+    fn __py_call_vectorcall<'py>(
+        self,
+        py: Python<'py>,
+        function: Borrowed<'_, 'py, PyAny>,
+        kwargs: Option<Borrowed<'_, '_, PyDict>>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        unsafe {
+            match kwargs {
+                Some(kwargs) => ffi::PyObject_Call(
+                    function.as_ptr(),
+                    PyTuple::empty(py).as_ptr(),
+                    kwargs.as_ptr(),
+                )
+                .assume_owned_or_err(py),
+                None => ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py),
+            }
+        }
+    }
+
+    #[inline]
+    #[allow(clippy::used_underscore_binding)]
+    fn __py_call_method_vectorcall1<'py>(
+        self,
+        py: Python<'py>,
+        object: Borrowed<'_, 'py, PyAny>,
+        method_name: Borrowed<'_, 'py, PyString>,
+        _: private::Token,
+    ) -> PyResult<Bound<'py, PyAny>> {
+        unsafe {
+            ffi::compat::PyObject_CallMethodNoArgs(object.as_ptr(), method_name.as_ptr())
+                .assume_owned_or_err(py)
+        }
+    }
 }
 
 impl<'py> IntoPyObject<'py> for () {

diff --git a/src/conversions/chrono.rs b/src/conversions/chrono.rs
@@ -813,7 +813,7 @@ fn timezone_utc(py: Python<'_>) -> Bound<'_, PyAny> {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::types::PyTuple;
+    use crate::{types::PyTuple, BoundObject};
     use std::{cmp::Ordering, panic};
 
     #[test]
@@ -1333,7 +1333,12 @@ mod tests {
             .unwrap()
             .getattr(name)
             .unwrap()
-            .call1(args)
+            .call1(
+                args.into_pyobject(py)
+                    .map_err(Into::into)
+                    .unwrap()
+                    .into_bound(),
+            )
             .unwrap()
     }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Improve performance of calls to Python by using the vectorcall calling convention where possible.