Skip to content

Commit fcedb00

Browse files
NobodyXuosiewiczthomcc
authored
Optimization: Vendor jobserver impl and rm thread spawning in parallel compile_objects (#889)
* Impl vendored jobserver implementation It supports non-blocking `try_acquire` and is much simpler than the one provided by `jobserver` Signed-off-by: Jiahao XU <[email protected]> * Convert parallel `compile_objects` to use future instead of threads Also fixed compilation errors in mod `job_token` Signed-off-by: Jiahao XU <[email protected]> * Optimize parallel `compile_objects` Remove use of mpsc since the future is executed on one single thread only. Signed-off-by: Jiahao XU <[email protected]> * Fix `job_token`: Remove mpsc and make sure tokens are relased The mpsc is stored in a global variable and Rust never calls `Drop::drop` on global variables, so they are never released. This commit removes the mpsc and replaces that with an `AtomicBool` for the implicit token to fix this, also dramatically simplifies the code. Signed-off-by: Jiahao XU <[email protected]> * Optimize `job_token`: Make `JobToken` zero-sized Signed-off-by: Jiahao XU <[email protected]> * Fix `windows::JobServerClient::try_acquire` impl Return `Ok(None)` instead of `Err()` if no token is ready. Signed-off-by: Jiahao XU <[email protected]> * Fix `unix::JobServerClient::from_pipe`: Accept more fd access modes `O_RDWR` is a valid access mode for both read and write end of the pipe. Signed-off-by: Jiahao XU <[email protected]> * Rm unnecessary `'static` bound in parameter of `job_token` Signed-off-by: Jiahao XU <[email protected]> * Optimize parallel `compile_objects`: Sleep/yield if no progress is made Signed-off-by: Jiahao XU <[email protected]> * Fix windows implementation: Match all return value explicitly Signed-off-by: Jiahao XU <[email protected]> * Use Result::ok() in job_token.rs Co-authored-by: Piotr Osiewicz <[email protected]> * Fix grammer in comments Co-authored-by: Piotr Osiewicz <[email protected]> * simplify job_token impl Co-authored-by: Piotr Osiewicz <[email protected]> * Add more comment explaining the design choice Signed-off-by: Jiahao XU <[email protected]> * Refactor: Extract new mod `async_executor` Signed-off-by: Jiahao XU <[email protected]> * Update src/job_token/unix.rs Co-authored-by: Thom Chiovoloni <[email protected]> * Remove outdated comment Signed-off-by: Jiahao XU <[email protected]> * Do not check for `--jobserver-fds` on windows Since the manual specifies that only `--jobsewrver-auth` will be used and windows does not have the concept of fds anyway. Signed-off-by: Jiahao XU <[email protected]> * Accept ASCII only in windows `JobServerClient::open` impl Signed-off-by: Jiahao XU <[email protected]> * Use acquire and release ordering for atomic operation in `JobServer` Signed-off-by: Jiahao XU <[email protected]> * Add a TODO for use of `NUM_JOBS` Signed-off-by: Jiahao XU <[email protected]> * Simplify windows jobserver `WAIT_ABANDONED` errmsg Signed-off-by: Jiahao XU <[email protected]> --------- Signed-off-by: Jiahao XU <[email protected]> Co-authored-by: Piotr Osiewicz <[email protected]> Co-authored-by: Thom Chiovoloni <[email protected]>
1 parent bd25128 commit fcedb00

File tree

8 files changed

+644
-206
lines changed

8 files changed

+644
-206
lines changed

Cargo.toml

+1-4
Original file line numberDiff line numberDiff line change
@@ -18,16 +18,13 @@ exclude = ["/.github"]
1818
edition = "2018"
1919
rust-version = "1.53"
2020

21-
[dependencies]
22-
jobserver = { version = "0.1.16", optional = true }
23-
2421
[target.'cfg(unix)'.dependencies]
2522
# Don't turn on the feature "std" for this, see https://github.com/rust-lang/cargo/issues/4866
2623
# which is still an issue with `resolver = "1"`.
2724
libc = { version = "0.2.62", default-features = false }
2825

2926
[features]
30-
parallel = ["jobserver"]
27+
parallel = []
3128

3229
[dev-dependencies]
3330
tempfile = "3"

gen-windows-sys-binding/windows_sys.list

+13
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ Windows.Win32.Foundation.SysFreeString
66
Windows.Win32.Foundation.SysStringLen
77
Windows.Win32.Foundation.S_FALSE
88
Windows.Win32.Foundation.S_OK
9+
Windows.Win32.Foundation.FALSE
10+
Windows.Win32.Foundation.HANDLE
11+
Windows.Win32.Foundation.WAIT_OBJECT_0
12+
Windows.Win32.Foundation.WAIT_TIMEOUT
13+
Windows.Win32.Foundation.WAIT_FAILED
14+
Windows.Win32.Foundation.WAIT_ABANDONED
915

1016
Windows.Win32.System.Com.SAFEARRAY
1117
Windows.Win32.System.Com.SAFEARRAYBOUND
@@ -25,3 +31,10 @@ Windows.Win32.System.Registry.HKEY_LOCAL_MACHINE
2531
Windows.Win32.System.Registry.KEY_READ
2632
Windows.Win32.System.Registry.KEY_WOW64_32KEY
2733
Windows.Win32.System.Registry.REG_SZ
34+
35+
Windows.Win32.System.Threading.ReleaseSemaphore
36+
Windows.Win32.System.Threading.WaitForSingleObject
37+
Windows.Win32.System.Threading.SEMAPHORE_MODIFY_STATE
38+
Windows.Win32.System.Threading.THREAD_SYNCHRONIZE
39+
40+
Windows.Win32.System.WindowsProgramming.OpenSemaphoreA

src/async_executor.rs

+118
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
use std::{
2+
cell::Cell,
3+
future::Future,
4+
pin::Pin,
5+
ptr,
6+
task::{Context, Poll, RawWaker, RawWakerVTable, Waker},
7+
thread,
8+
time::Duration,
9+
};
10+
11+
use crate::Error;
12+
13+
const NOOP_WAKER_VTABLE: RawWakerVTable = RawWakerVTable::new(
14+
// Cloning just returns a new no-op raw waker
15+
|_| NOOP_RAW_WAKER,
16+
// `wake` does nothing
17+
|_| {},
18+
// `wake_by_ref` does nothing
19+
|_| {},
20+
// Dropping does nothing as we don't allocate anything
21+
|_| {},
22+
);
23+
const NOOP_RAW_WAKER: RawWaker = RawWaker::new(ptr::null(), &NOOP_WAKER_VTABLE);
24+
25+
#[derive(Default)]
26+
pub(super) struct YieldOnce(bool);
27+
28+
impl Future for YieldOnce {
29+
type Output = ();
30+
31+
fn poll(self: Pin<&mut Self>, _cx: &mut Context<'_>) -> Poll<()> {
32+
let flag = &mut std::pin::Pin::into_inner(self).0;
33+
if !*flag {
34+
*flag = true;
35+
Poll::Pending
36+
} else {
37+
Poll::Ready(())
38+
}
39+
}
40+
}
41+
42+
/// Execute the futures and return when they are all done.
43+
///
44+
/// Here we use our own homebrew async executor since cc is used in the build
45+
/// script of many popular projects, pulling in additional dependencies would
46+
/// significantly slow down its compilation.
47+
pub(super) fn block_on<Fut1, Fut2>(
48+
mut fut1: Fut1,
49+
mut fut2: Fut2,
50+
has_made_progress: &Cell<bool>,
51+
) -> Result<(), Error>
52+
where
53+
Fut1: Future<Output = Result<(), Error>>,
54+
Fut2: Future<Output = Result<(), Error>>,
55+
{
56+
// Shadows the future so that it can never be moved and is guaranteed
57+
// to be pinned.
58+
//
59+
// The same trick used in `pin!` macro.
60+
//
61+
// TODO: Once MSRV is bumped to 1.68, replace this with `std::pin::pin!`
62+
let mut fut1 = Some(unsafe { Pin::new_unchecked(&mut fut1) });
63+
let mut fut2 = Some(unsafe { Pin::new_unchecked(&mut fut2) });
64+
65+
// TODO: Once `Waker::noop` stablised and our MSRV is bumped to the version
66+
// which it is stablised, replace this wth `Waker::noop`.
67+
let waker = unsafe { Waker::from_raw(NOOP_RAW_WAKER) };
68+
let mut context = Context::from_waker(&waker);
69+
70+
let mut backoff_cnt = 0;
71+
72+
loop {
73+
has_made_progress.set(false);
74+
75+
if let Some(fut) = fut2.as_mut() {
76+
if let Poll::Ready(res) = fut.as_mut().poll(&mut context) {
77+
fut2 = None;
78+
res?;
79+
}
80+
}
81+
82+
if let Some(fut) = fut1.as_mut() {
83+
if let Poll::Ready(res) = fut.as_mut().poll(&mut context) {
84+
fut1 = None;
85+
res?;
86+
}
87+
}
88+
89+
if fut1.is_none() && fut2.is_none() {
90+
return Ok(());
91+
}
92+
93+
if !has_made_progress.get() {
94+
if backoff_cnt > 3 {
95+
// We have yielded at least three times without making'
96+
// any progress, so we will sleep for a while.
97+
let duration = Duration::from_millis(100 * (backoff_cnt - 3).min(10));
98+
thread::sleep(duration);
99+
} else {
100+
// Given that we spawned a lot of compilation tasks, it is unlikely
101+
// that OS cannot find other ready task to execute.
102+
//
103+
// If all of them are done, then we will yield them and spawn more,
104+
// or simply return.
105+
//
106+
// Thus this will not be turned into a busy-wait loop and it will not
107+
// waste CPU resource.
108+
thread::yield_now();
109+
}
110+
}
111+
112+
backoff_cnt = if has_made_progress.get() {
113+
0
114+
} else {
115+
backoff_cnt + 1
116+
};
117+
}
118+
}

0 commit comments

Comments
 (0)