Skip to content

Commit 3d56b33

Browse files
committed
Auto merge of #47507 - alexcrichton:rerun-bat-scripts, r=michaelwoerister
rustc: Lower link args to `@`-files on Windows more When spawning a linker rustc has historically been known to blow OS limits for the command line being too large, notably on Windows. This is especially true of incremental compilation where there can be dozens of object files per compilation. The compiler currently has logic for detecting a failure to spawn and instead passing arguments via a file instead, but this failure detection only triggers if a process actually fails to spawn. Unfortunately on Windows we've got something else to worry about which is `cmd.exe`. The compiler may be running a linker through `cmd.exe` where `cmd.exe` has a limit of 8192 on the command line vs 32k on `CreateProcess`. Moreso rustc actually succeeds in spawning `cmd.exe` today, it's just that after it's running `cmd.exe` fails to spawn its child, which rustc doesn't currently detect. Consequently this commit updates the logic for the spawning the linker on Windows to instead have a heuristic to see if we need to pass arguments via a file. This heuristic is an overly pessimistic and "inaccurate" calculation which just calls `len` on a bunch of `OsString` instances (where `len` is not precisely the length in u16 elements). This number, when exceeding the 6k threshold, will force rustc to always pass arguments through a file. This strategy should avoid us trying to parse the output on Windows of the linker to see if it successfully spawned yet failed to actually sub-spawn the linker. We may just be passing arguments through files a little more commonly now... The motivation for this commit was a recent bug in Gecko [1] when beta testing, notably when incremental compilation was enabled it blew out the limit on `cmd.exe`. This commit will also fix #46999 as well though as emscripten uses a bat script as well (and we're blowing the limit there). [1]: https://bugzilla.mozilla.org/show_bug.cgi?id=1430886 Closes #46999
2 parents 15a1e28 + 3ca320c commit 3d56b33

File tree

5 files changed

+181
-32
lines changed

5 files changed

+181
-32
lines changed

src/librustc_trans/back/command.rs

+60-12
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,34 @@
1414
use std::ffi::{OsStr, OsString};
1515
use std::fmt;
1616
use std::io;
17+
use std::mem;
1718
use std::process::{self, Output, Child};
1819

20+
#[derive(Clone)]
1921
pub struct Command {
20-
program: OsString,
22+
program: Program,
2123
args: Vec<OsString>,
2224
env: Vec<(OsString, OsString)>,
2325
}
2426

27+
#[derive(Clone)]
28+
enum Program {
29+
Normal(OsString),
30+
CmdBatScript(OsString),
31+
}
32+
2533
impl Command {
2634
pub fn new<P: AsRef<OsStr>>(program: P) -> Command {
27-
Command::_new(program.as_ref())
35+
Command::_new(Program::Normal(program.as_ref().to_owned()))
36+
}
37+
38+
pub fn bat_script<P: AsRef<OsStr>>(program: P) -> Command {
39+
Command::_new(Program::CmdBatScript(program.as_ref().to_owned()))
2840
}
2941

30-
fn _new(program: &OsStr) -> Command {
42+
fn _new(program: Program) -> Command {
3143
Command {
32-
program: program.to_owned(),
44+
program,
3345
args: Vec::new(),
3446
env: Vec::new(),
3547
}
@@ -86,24 +98,60 @@ impl Command {
8698
}
8799

88100
pub fn command(&self) -> process::Command {
89-
let mut ret = process::Command::new(&self.program);
101+
let mut ret = match self.program {
102+
Program::Normal(ref p) => process::Command::new(p),
103+
Program::CmdBatScript(ref p) => {
104+
let mut c = process::Command::new("cmd");
105+
c.arg("/c").arg(p);
106+
c
107+
}
108+
};
90109
ret.args(&self.args);
91110
ret.envs(self.env.clone());
92111
return ret
93112
}
94113

95114
// extensions
96115

97-
pub fn get_program(&self) -> &OsStr {
98-
&self.program
116+
pub fn take_args(&mut self) -> Vec<OsString> {
117+
mem::replace(&mut self.args, Vec::new())
99118
}
100119

101-
pub fn get_args(&self) -> &[OsString] {
102-
&self.args
103-
}
120+
/// Returns a `true` if we're pretty sure that this'll blow OS spawn limits,
121+
/// or `false` if we should attempt to spawn and see what the OS says.
122+
pub fn very_likely_to_exceed_some_spawn_limit(&self) -> bool {
123+
// We mostly only care about Windows in this method, on Unix the limits
124+
// can be gargantuan anyway so we're pretty unlikely to hit them
125+
if cfg!(unix) {
126+
return false
127+
}
104128

105-
pub fn get_env(&self) -> &[(OsString, OsString)] {
106-
&self.env
129+
// Ok so on Windows to spawn a process is 32,768 characters in its
130+
// command line [1]. Unfortunately we don't actually have access to that
131+
// as it's calculated just before spawning. Instead we perform a
132+
// poor-man's guess as to how long our command line will be. We're
133+
// assuming here that we don't have to escape every character...
134+
//
135+
// Turns out though that `cmd.exe` has even smaller limits, 8192
136+
// characters [2]. Linkers can often be batch scripts (for example
137+
// Emscripten, Gecko's current build system) which means that we're
138+
// running through batch scripts. These linkers often just forward
139+
// arguments elsewhere (and maybe tack on more), so if we blow 8192
140+
// bytes we'll typically cause them to blow as well.
141+
//
142+
// Basically as a result just perform an inflated estimate of what our
143+
// command line will look like and test if it's > 8192 (we actually
144+
// test against 6k to artificially inflate our estimate). If all else
145+
// fails we'll fall back to the normal unix logic of testing the OS
146+
// error code if we fail to spawn and automatically re-spawning the
147+
// linker with smaller arguments.
148+
//
149+
// [1]: https://msdn.microsoft.com/en-us/library/windows/desktop/ms682425(v=vs.85).aspx
150+
// [2]: https://blogs.msdn.microsoft.com/oldnewthing/20031210-00/?p=41553
151+
152+
let estimated_command_line_len =
153+
self.args.iter().map(|a| a.len()).sum::<usize>();
154+
estimated_command_line_len > 1024 * 6
107155
}
108156
}
109157

src/librustc_trans/back/link.rs

+18-20
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@ use std::char;
3636
use std::env;
3737
use std::ffi::OsString;
3838
use std::fmt;
39-
use std::fs::{self, File};
40-
use std::io::{self, Write, BufWriter};
39+
use std::fs;
40+
use std::io;
4141
use std::path::{Path, PathBuf};
4242
use std::process::{Output, Stdio};
4343
use std::str;
@@ -71,9 +71,7 @@ pub fn get_linker(sess: &Session) -> (PathBuf, Command, Vec<(OsString, OsString)
7171
let cmd = |linker: &Path| {
7272
if let Some(linker) = linker.to_str() {
7373
if cfg!(windows) && linker.ends_with(".bat") {
74-
let mut cmd = Command::new("cmd");
75-
cmd.arg("/c").arg(linker);
76-
return cmd
74+
return Command::bat_script(linker)
7775
}
7876
}
7977
Command::new(linker)
@@ -758,26 +756,26 @@ fn exec_linker(sess: &Session, cmd: &mut Command, tmpdir: &Path)
758756
// that contains all the arguments. The theory is that this is then
759757
// accepted on all linkers and the linker will read all its options out of
760758
// there instead of looking at the command line.
761-
match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() {
762-
Ok(child) => return child.wait_with_output(),
763-
Err(ref e) if command_line_too_big(e) => {}
764-
Err(e) => return Err(e)
759+
if !cmd.very_likely_to_exceed_some_spawn_limit() {
760+
match cmd.command().stdout(Stdio::piped()).stderr(Stdio::piped()).spawn() {
761+
Ok(child) => return child.wait_with_output(),
762+
Err(ref e) if command_line_too_big(e) => {}
763+
Err(e) => return Err(e)
764+
}
765765
}
766766

767-
let file = tmpdir.join("linker-arguments");
768-
let mut cmd2 = Command::new(cmd.get_program());
769-
cmd2.arg(format!("@{}", file.display()));
770-
for &(ref k, ref v) in cmd.get_env() {
771-
cmd2.env(k, v);
772-
}
773-
let mut f = BufWriter::new(File::create(&file)?);
774-
for arg in cmd.get_args() {
775-
writeln!(f, "{}", Escape {
767+
let mut cmd2 = cmd.clone();
768+
let mut args = String::new();
769+
for arg in cmd2.take_args() {
770+
args.push_str(&Escape {
776771
arg: arg.to_str().unwrap(),
777772
is_like_msvc: sess.target.target.options.is_like_msvc,
778-
})?;
773+
}.to_string());
774+
args.push_str("\n");
779775
}
780-
f.into_inner()?;
776+
let file = tmpdir.join("linker-arguments");
777+
fs::write(&file, args.as_bytes())?;
778+
cmd2.arg(format!("@{}", file.display()));
781779
return cmd2.output();
782780

783781
#[cfg(unix)]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
-include ../tools.mk
2+
3+
all:
4+
$(RUSTC) foo.rs -g
5+
cp foo.bat $(TMPDIR)/
6+
OUT_DIR="$(TMPDIR)" RUSTC="$(RUSTC_ORIGINAL)" $(call RUN,foo)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
%MY_LINKER% %*
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// Copyright 2018 The Rust Project Developers. See the COPYRIGHT
2+
// file at the top-level directory of this distribution and at
3+
// http://rust-lang.org/COPYRIGHT.
4+
//
5+
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6+
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7+
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8+
// option. This file may not be copied, modified, or distributed
9+
// except according to those terms.
10+
11+
// Like the `long-linker-command-lines` test this test attempts to blow
12+
// a command line limit for running the linker. Unlike that test, however,
13+
// this test is testing `cmd.exe` specifically rather than the OS.
14+
//
15+
// Unfortunately `cmd.exe` has a 8192 limit which is relatively small
16+
// in the grand scheme of things and anyone sripting rustc's linker
17+
// is probably using a `*.bat` script and is likely to hit this limit.
18+
//
19+
// This test uses a `foo.bat` script as the linker which just simply
20+
// delegates back to this program. The compiler should use a lower
21+
// limit for arguments before passing everything via `@`, which
22+
// means that everything should still succeed here.
23+
24+
use std::env;
25+
use std::fs::{self, File};
26+
use std::io::{BufWriter, Write, Read};
27+
use std::path::PathBuf;
28+
use std::process::Command;
29+
30+
fn main() {
31+
if !cfg!(windows) {
32+
return
33+
}
34+
35+
let tmpdir = PathBuf::from(env::var_os("OUT_DIR").unwrap());
36+
let ok = tmpdir.join("ok");
37+
let not_ok = tmpdir.join("not_ok");
38+
if env::var("YOU_ARE_A_LINKER").is_ok() {
39+
match env::args().find(|a| a.contains("@")) {
40+
Some(file) => { fs::copy(&file[1..], &ok).unwrap(); }
41+
None => { File::create(&not_ok).unwrap(); }
42+
}
43+
return
44+
}
45+
46+
let rustc = env::var_os("RUSTC").unwrap_or("rustc".into());
47+
let me = env::current_exe().unwrap();
48+
let bat = me.parent()
49+
.unwrap()
50+
.join("foo.bat");
51+
let bat_linker = format!("linker={}", bat.display());
52+
for i in (1..).map(|i| i * 10) {
53+
println!("attempt: {}", i);
54+
55+
let file = tmpdir.join("bar.rs");
56+
let mut f = BufWriter::new(File::create(&file).unwrap());
57+
let mut lib_name = String::new();
58+
for _ in 0..i {
59+
lib_name.push_str("foo");
60+
}
61+
for j in 0..i {
62+
writeln!(f, "#[link(name = \"{}{}\")]", lib_name, j).unwrap();
63+
}
64+
writeln!(f, "extern {{}}\nfn main() {{}}").unwrap();
65+
f.into_inner().unwrap();
66+
67+
drop(fs::remove_file(&ok));
68+
drop(fs::remove_file(&not_ok));
69+
let status = Command::new(&rustc)
70+
.arg(&file)
71+
.arg("-C").arg(&bat_linker)
72+
.arg("--out-dir").arg(&tmpdir)
73+
.env("YOU_ARE_A_LINKER", "1")
74+
.env("MY_LINKER", &me)
75+
.status()
76+
.unwrap();
77+
78+
if !status.success() {
79+
panic!("rustc didn't succeed: {}", status);
80+
}
81+
82+
if !ok.exists() {
83+
assert!(not_ok.exists());
84+
continue
85+
}
86+
87+
let mut contents = String::new();
88+
File::open(&ok).unwrap().read_to_string(&mut contents).unwrap();
89+
90+
for j in 0..i {
91+
assert!(contents.contains(&format!("{}{}", lib_name, j)));
92+
}
93+
94+
break
95+
}
96+
}

0 commit comments

Comments
 (0)