Skip to content

Commit 2e891d0

Browse files
Use vectorcall (where possible) when calling Python functions (#4456)
* Use vectorcall (where possible) when calling Python functions This works without any changes to user code. The way it works is by creating a methods on `IntoPy` to call functions, and specializing them for tuples. This currently supports only non-kwargs for methods, and kwargs with somewhat slow approach (converting from PyDict) for functions. This can be improved, but that will require additional API. We may consider adding more impls IntoPy<Py<PyTuple>> that specialize (for example, for arrays and `Vec`), but this i a good start. * Add vectorcall benchmarks * Fix Clippy (elide a lifetime) --------- Co-authored-by: David Hewitt <[email protected]>
1 parent 8446937 commit 2e891d0

File tree

8 files changed

+439
-62
lines changed

8 files changed

+439
-62
lines changed

newsfragments/4456.changed.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Improve performance of calls to Python by using the vectorcall calling convention where possible.

pyo3-benches/benches/bench_call.rs

Lines changed: 145 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ use std::hint::black_box;
22

33
use codspeed_criterion_compat::{criterion_group, criterion_main, Bencher, Criterion};
44

5-
use pyo3::prelude::*;
65
use pyo3::ffi::c_str;
6+
use pyo3::prelude::*;
7+
use pyo3::types::IntoPyDict;
78

89
macro_rules! test_module {
910
($py:ident, $code:literal) => {
@@ -26,6 +27,62 @@ fn bench_call_0(b: &mut Bencher<'_>) {
2627
})
2728
}
2829

30+
fn bench_call_1(b: &mut Bencher<'_>) {
31+
Python::with_gil(|py| {
32+
let module = test_module!(py, "def foo(a, b, c): pass");
33+
34+
let foo_module = &module.getattr("foo").unwrap();
35+
let args = (
36+
<_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
37+
<_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
38+
<_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
39+
);
40+
41+
b.iter(|| {
42+
for _ in 0..1000 {
43+
black_box(foo_module).call1(args.clone()).unwrap();
44+
}
45+
});
46+
})
47+
}
48+
49+
fn bench_call(b: &mut Bencher<'_>) {
50+
Python::with_gil(|py| {
51+
let module = test_module!(py, "def foo(a, b, c, d, e): pass");
52+
53+
let foo_module = &module.getattr("foo").unwrap();
54+
let args = (
55+
<_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
56+
<_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
57+
<_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
58+
);
59+
let kwargs = [("d", 1), ("e", 42)].into_py_dict(py);
60+
61+
b.iter(|| {
62+
for _ in 0..1000 {
63+
black_box(foo_module)
64+
.call(args.clone(), Some(&kwargs))
65+
.unwrap();
66+
}
67+
});
68+
})
69+
}
70+
71+
fn bench_call_one_arg(b: &mut Bencher<'_>) {
72+
Python::with_gil(|py| {
73+
let module = test_module!(py, "def foo(a): pass");
74+
75+
let foo_module = &module.getattr("foo").unwrap();
76+
let arg = <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py);
77+
78+
b.iter(|| {
79+
for _ in 0..1000 {
80+
black_box(foo_module).call1((arg.clone(),)).unwrap();
81+
}
82+
});
83+
})
84+
}
85+
2986
fn bench_call_method_0(b: &mut Bencher<'_>) {
3087
Python::with_gil(|py| {
3188
let module = test_module!(
@@ -47,9 +104,96 @@ class Foo:
47104
})
48105
}
49106

107+
fn bench_call_method_1(b: &mut Bencher<'_>) {
108+
Python::with_gil(|py| {
109+
let module = test_module!(
110+
py,
111+
"
112+
class Foo:
113+
def foo(self, a, b, c):
114+
pass
115+
"
116+
);
117+
118+
let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
119+
let args = (
120+
<_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
121+
<_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
122+
<_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
123+
);
124+
125+
b.iter(|| {
126+
for _ in 0..1000 {
127+
black_box(foo_module)
128+
.call_method1("foo", args.clone())
129+
.unwrap();
130+
}
131+
});
132+
})
133+
}
134+
135+
fn bench_call_method(b: &mut Bencher<'_>) {
136+
Python::with_gil(|py| {
137+
let module = test_module!(
138+
py,
139+
"
140+
class Foo:
141+
def foo(self, a, b, c, d, e):
142+
pass
143+
"
144+
);
145+
146+
let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
147+
let args = (
148+
<_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py),
149+
<_ as IntoPy<PyObject>>::into_py("s", py).into_bound(py),
150+
<_ as IntoPy<PyObject>>::into_py(1.23, py).into_bound(py),
151+
);
152+
let kwargs = [("d", 1), ("e", 42)].into_py_dict(py);
153+
154+
b.iter(|| {
155+
for _ in 0..1000 {
156+
black_box(foo_module)
157+
.call_method("foo", args.clone(), Some(&kwargs))
158+
.unwrap();
159+
}
160+
});
161+
})
162+
}
163+
164+
fn bench_call_method_one_arg(b: &mut Bencher<'_>) {
165+
Python::with_gil(|py| {
166+
let module = test_module!(
167+
py,
168+
"
169+
class Foo:
170+
def foo(self, a):
171+
pass
172+
"
173+
);
174+
175+
let foo_module = &module.getattr("Foo").unwrap().call0().unwrap();
176+
let arg = <_ as IntoPy<PyObject>>::into_py(1, py).into_bound(py);
177+
178+
b.iter(|| {
179+
for _ in 0..1000 {
180+
black_box(foo_module)
181+
.call_method1("foo", (arg.clone(),))
182+
.unwrap();
183+
}
184+
});
185+
})
186+
}
187+
50188
fn criterion_benchmark(c: &mut Criterion) {
51189
c.bench_function("call_0", bench_call_0);
190+
c.bench_function("call_1", bench_call_1);
191+
c.bench_function("call", bench_call);
192+
c.bench_function("call_one_arg", bench_call_one_arg);
52193
c.bench_function("call_method_0", bench_call_method_0);
194+
c.bench_function("call_method_1", bench_call_method_1);
195+
c.bench_function("call_method", bench_call_method);
196+
c.bench_function("call_method_one_arg", bench_call_method_one_arg);
53197
}
54198

55199
criterion_group!(benches, criterion_benchmark);

pyo3-ffi/src/cpython/abstract_.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ extern "C" {
4040
}
4141

4242
#[cfg(Py_3_8)]
43-
const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t =
43+
pub const PY_VECTORCALL_ARGUMENTS_OFFSET: size_t =
4444
1 << (8 * std::mem::size_of::<size_t>() as size_t - 1);
4545

4646
#[cfg(Py_3_8)]

src/conversion.rs

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
//! Defines conversions between Rust and Python types.
22
use crate::err::PyResult;
3+
use crate::ffi_ptr_ext::FfiPtrExt;
34
#[cfg(feature = "experimental-inspect")]
45
use crate::inspect::types::TypeInfo;
56
use crate::pyclass::boolean_struct::False;
67
use crate::types::any::PyAnyMethods;
7-
use crate::types::PyTuple;
8+
use crate::types::{PyDict, PyString, PyTuple};
89
use crate::{
910
ffi, Borrowed, Bound, BoundObject, Py, PyAny, PyClass, PyErr, PyObject, PyRef, PyRefMut, Python,
1011
};
@@ -172,6 +173,93 @@ pub trait IntoPy<T>: Sized {
172173
fn type_output() -> TypeInfo {
173174
TypeInfo::Any
174175
}
176+
177+
// The following methods are helpers to use the vectorcall API where possible.
178+
// They are overridden on tuples to perform a vectorcall.
179+
// Be careful when you're implementing these: they can never refer to `Bound` call methods,
180+
// as those refer to these methods, so this will create an infinite recursion.
181+
#[doc(hidden)]
182+
#[inline]
183+
fn __py_call_vectorcall1<'py>(
184+
self,
185+
py: Python<'py>,
186+
function: Borrowed<'_, 'py, PyAny>,
187+
_: private::Token,
188+
) -> PyResult<Bound<'py, PyAny>>
189+
where
190+
Self: IntoPy<Py<PyTuple>>,
191+
{
192+
#[inline]
193+
fn inner<'py>(
194+
py: Python<'py>,
195+
function: Borrowed<'_, 'py, PyAny>,
196+
args: Bound<'py, PyTuple>,
197+
) -> PyResult<Bound<'py, PyAny>> {
198+
unsafe {
199+
ffi::PyObject_Call(function.as_ptr(), args.as_ptr(), std::ptr::null_mut())
200+
.assume_owned_or_err(py)
201+
}
202+
}
203+
inner(
204+
py,
205+
function,
206+
<Self as IntoPy<Py<PyTuple>>>::into_py(self, py).into_bound(py),
207+
)
208+
}
209+
210+
#[doc(hidden)]
211+
#[inline]
212+
fn __py_call_vectorcall<'py>(
213+
self,
214+
py: Python<'py>,
215+
function: Borrowed<'_, 'py, PyAny>,
216+
kwargs: Option<Borrowed<'_, '_, PyDict>>,
217+
_: private::Token,
218+
) -> PyResult<Bound<'py, PyAny>>
219+
where
220+
Self: IntoPy<Py<PyTuple>>,
221+
{
222+
#[inline]
223+
fn inner<'py>(
224+
py: Python<'py>,
225+
function: Borrowed<'_, 'py, PyAny>,
226+
args: Bound<'py, PyTuple>,
227+
kwargs: Option<Borrowed<'_, '_, PyDict>>,
228+
) -> PyResult<Bound<'py, PyAny>> {
229+
unsafe {
230+
ffi::PyObject_Call(
231+
function.as_ptr(),
232+
args.as_ptr(),
233+
kwargs.map_or_else(std::ptr::null_mut, |kwargs| kwargs.as_ptr()),
234+
)
235+
.assume_owned_or_err(py)
236+
}
237+
}
238+
inner(
239+
py,
240+
function,
241+
<Self as IntoPy<Py<PyTuple>>>::into_py(self, py).into_bound(py),
242+
kwargs,
243+
)
244+
}
245+
246+
#[doc(hidden)]
247+
#[inline]
248+
fn __py_call_method_vectorcall1<'py>(
249+
self,
250+
_py: Python<'py>,
251+
object: Borrowed<'_, 'py, PyAny>,
252+
method_name: Borrowed<'_, 'py, PyString>,
253+
_: private::Token,
254+
) -> PyResult<Bound<'py, PyAny>>
255+
where
256+
Self: IntoPy<Py<PyTuple>>,
257+
{
258+
// Don't `self.into_py()`! This will lose the optimization of vectorcall.
259+
object
260+
.getattr(method_name)
261+
.and_then(|method| method.call1(self))
262+
}
175263
}
176264

177265
/// Defines a conversion from a Rust type to a Python object, which may fail.
@@ -502,6 +590,52 @@ impl IntoPy<Py<PyTuple>> for () {
502590
fn into_py(self, py: Python<'_>) -> Py<PyTuple> {
503591
PyTuple::empty(py).unbind()
504592
}
593+
594+
#[inline]
595+
fn __py_call_vectorcall1<'py>(
596+
self,
597+
py: Python<'py>,
598+
function: Borrowed<'_, 'py, PyAny>,
599+
_: private::Token,
600+
) -> PyResult<Bound<'py, PyAny>> {
601+
unsafe { ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py) }
602+
}
603+
604+
#[inline]
605+
fn __py_call_vectorcall<'py>(
606+
self,
607+
py: Python<'py>,
608+
function: Borrowed<'_, 'py, PyAny>,
609+
kwargs: Option<Borrowed<'_, '_, PyDict>>,
610+
_: private::Token,
611+
) -> PyResult<Bound<'py, PyAny>> {
612+
unsafe {
613+
match kwargs {
614+
Some(kwargs) => ffi::PyObject_Call(
615+
function.as_ptr(),
616+
PyTuple::empty(py).as_ptr(),
617+
kwargs.as_ptr(),
618+
)
619+
.assume_owned_or_err(py),
620+
None => ffi::compat::PyObject_CallNoArgs(function.as_ptr()).assume_owned_or_err(py),
621+
}
622+
}
623+
}
624+
625+
#[inline]
626+
#[allow(clippy::used_underscore_binding)]
627+
fn __py_call_method_vectorcall1<'py>(
628+
self,
629+
py: Python<'py>,
630+
object: Borrowed<'_, 'py, PyAny>,
631+
method_name: Borrowed<'_, 'py, PyString>,
632+
_: private::Token,
633+
) -> PyResult<Bound<'py, PyAny>> {
634+
unsafe {
635+
ffi::compat::PyObject_CallMethodNoArgs(object.as_ptr(), method_name.as_ptr())
636+
.assume_owned_or_err(py)
637+
}
638+
}
505639
}
506640

507641
impl<'py> IntoPyObject<'py> for () {

src/conversions/chrono.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,7 @@ fn timezone_utc(py: Python<'_>) -> Bound<'_, PyAny> {
813813
#[cfg(test)]
814814
mod tests {
815815
use super::*;
816-
use crate::types::PyTuple;
816+
use crate::{types::PyTuple, BoundObject};
817817
use std::{cmp::Ordering, panic};
818818

819819
#[test]
@@ -1333,7 +1333,12 @@ mod tests {
13331333
.unwrap()
13341334
.getattr(name)
13351335
.unwrap()
1336-
.call1(args)
1336+
.call1(
1337+
args.into_pyobject(py)
1338+
.map_err(Into::into)
1339+
.unwrap()
1340+
.into_bound(),
1341+
)
13371342
.unwrap()
13381343
}
13391344

0 commit comments

Comments
 (0)