diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2e83bbf643fe..efe4157aae21 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -125,9 +125,6 @@ jobs: # which then uses log commands to actually set them. EXTRA_VARIABLES: ${{ toJson(matrix.env) }} - - name: setup upstream remote - run: src/ci/scripts/setup-upstream-remote.sh - - name: ensure the channel matches the target branch run: src/ci/scripts/verify-channel.sh diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock index 17ee4d610f95..d415668f54a7 100644 --- a/src/bootstrap/Cargo.lock +++ b/src/bootstrap/Cargo.lock @@ -56,6 +56,7 @@ dependencies = [ "sha2", "sysinfo", "tar", + "tempfile", "termcolor", "toml", "tracing", @@ -225,14 +226,20 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.9" +version = "0.3.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d" dependencies = [ "libc", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] +[[package]] +name = "fastrand" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" + [[package]] name = "fd-lock" version = "4.0.2" @@ -240,7 +247,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" dependencies = [ "cfg-if", - "rustix", + "rustix 0.38.40", "windows-sys 0.52.0", ] @@ -266,6 +273,18 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0" +dependencies = [ + "cfg-if", + "libc", + "r-efi", + "wasi", +] + [[package]] name = "globset" version = "0.4.15" @@ -334,9 +353,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.167" +version = "0.2.171" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc" +checksum = "c19937216e9d3aa9956d9bb8dfc0b0c8beb6058fc4f7a4dc4d850edf86a237d6" [[package]] name = "libredox" @@ -355,6 +374,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "linux-raw-sys" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7db12097d22ec582439daf8618b8fdd1a7bef6270e9af3b1ebcd30893cf413" + [[package]] name = "log" version = "0.4.22" @@ -486,6 +511,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" + [[package]] name = "redox_syscall" version = "0.5.7" @@ -548,10 +579,23 @@ dependencies = [ "bitflags", "errno", "libc", - "linux-raw-sys", + "linux-raw-sys 0.4.14", "windows-sys 0.52.0", ] +[[package]] +name = "rustix" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7178faa4b75a30e269c71e61c353ce2748cf3d76f0c44c393f4e60abf49b825" +dependencies = [ + "bitflags", + "errno", + "libc", + "linux-raw-sys 0.9.3", + "windows-sys 0.59.0", +] + [[package]] name = "ryu" version = "1.0.18" @@ -678,6 +722,19 @@ dependencies = [ "xattr", ] +[[package]] +name = "tempfile" +version = "3.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488960f40a3fd53d72c2a29a58722561dee8afdd175bd88e3db4677d7b2ba600" +dependencies = [ + "fastrand", + "getrandom", + "once_cell", + "rustix 1.0.2", + "windows-sys 0.59.0", +] + [[package]] name = "termcolor" version = "1.4.1" @@ -824,6 +881,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "wasi" +version = "0.14.2+wasi-0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3" +dependencies = [ + "wit-bindgen-rt", +] + [[package]] name = "winapi" version = "0.3.9" @@ -990,6 +1056,15 @@ version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" +[[package]] +name = "wit-bindgen-rt" +version = "0.39.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" +dependencies = [ + "bitflags", +] + [[package]] name = "xattr" version = "1.3.1" @@ -997,8 +1072,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" dependencies = [ "libc", - "linux-raw-sys", - "rustix", + "linux-raw-sys 0.4.14", + "rustix 0.38.40", ] [[package]] diff --git a/src/bootstrap/Cargo.toml b/src/bootstrap/Cargo.toml index 23aa87a74075..712a9b04c1a1 100644 --- a/src/bootstrap/Cargo.toml +++ b/src/bootstrap/Cargo.toml @@ -83,6 +83,7 @@ features = [ [dev-dependencies] pretty_assertions = "1.4" +tempfile = "3.15.0" # We care a lot about bootstrap's compile times, so don't include debuginfo for # dependencies, only bootstrap itself. diff --git a/src/bootstrap/src/core/build_steps/compile.rs b/src/bootstrap/src/core/build_steps/compile.rs index 68fa42ee9e65..2e5865e50969 100644 --- a/src/bootstrap/src/core/build_steps/compile.rs +++ b/src/bootstrap/src/core/build_steps/compile.rs @@ -111,14 +111,10 @@ impl Step for Std { // the `rust.download-rustc=true` option. let force_recompile = builder.rust_info().is_managed_git_subrepository() && builder.download_rustc() - && builder.config.last_modified_commit(&["library"], "download-rustc", true).is_none(); + && builder.config.has_changes_from_upstream(&["library"]); trace!("is managed git repo: {}", builder.rust_info().is_managed_git_subrepository()); trace!("download_rustc: {}", builder.download_rustc()); - trace!( - "last modified commit: {:?}", - builder.config.last_modified_commit(&["library"], "download-rustc", true) - ); trace!(force_recompile); run.builder.ensure(Std { diff --git a/src/bootstrap/src/core/build_steps/gcc.rs b/src/bootstrap/src/core/build_steps/gcc.rs index 48bb5cb8e876..ee8bd8286dae 100644 --- a/src/bootstrap/src/core/build_steps/gcc.rs +++ b/src/bootstrap/src/core/build_steps/gcc.rs @@ -96,6 +96,8 @@ pub enum GccBuildStatus { /// Returns a path to the libgccjit.so file. #[cfg(not(test))] fn try_download_gcc(builder: &Builder<'_>, target: TargetSelection) -> Option { + use build_helper::git::PathFreshness; + // Try to download GCC from CI if configured and available if !matches!(builder.config.gcc_ci_mode, crate::core::config::GccCiMode::DownloadFromCi) { return None; @@ -104,18 +106,40 @@ fn try_download_gcc(builder: &Builder<'_>, target: TargetSelection) -> Option { + // Download from upstream CI + let root = ci_gcc_root(&builder.config); + let gcc_stamp = BuildStamp::new(&root).with_prefix("gcc").add_stamp(&upstream); + if !gcc_stamp.is_up_to_date() && !builder.config.dry_run() { + builder.config.download_ci_gcc(&upstream, &root); + t!(gcc_stamp.write()); + } + + let libgccjit = root.join("lib").join("libgccjit.so"); + create_lib_alias(builder, &libgccjit); + Some(libgccjit) + } + PathFreshness::HasLocalModifications { .. } => { + // We have local modifications, rebuild GCC. + eprintln!("Found local GCC modifications, GCC will *not* be downloaded"); + None + } + PathFreshness::MissingUpstream => { + eprintln!("error: could not find commit hash for downloading GCC"); + eprintln!("HELP: maybe your repository history is too shallow?"); + eprintln!("HELP: consider disabling `download-ci-gcc`"); + eprintln!("HELP: or fetch enough history to include one upstream commit"); + None + } } - - let libgccjit = root.join("lib").join("libgccjit.so"); - create_lib_alias(builder, &libgccjit); - Some(libgccjit) } #[cfg(test)] @@ -264,31 +288,16 @@ fn ci_gcc_root(config: &crate::Config) -> PathBuf { config.out.join(config.build).join("ci-gcc") } -/// This retrieves the GCC sha we *want* to use, according to git history. +/// Detect whether GCC sources have been modified locally or not. #[cfg(not(test))] -fn detect_gcc_sha(config: &crate::Config, is_git: bool) -> String { - use build_helper::git::get_closest_merge_commit; - - let gcc_sha = if is_git { - get_closest_merge_commit( - Some(&config.src), - &config.git_config(), - &["src/gcc", "src/bootstrap/download-ci-gcc-stamp"], - ) - .unwrap() +fn detect_gcc_freshness(config: &crate::Config, is_git: bool) -> build_helper::git::PathFreshness { + use build_helper::git::PathFreshness; + + if is_git { + config.check_path_modifications(&["src/gcc", "src/bootstrap/download-ci-gcc-stamp"]) } else if let Some(info) = crate::utils::channel::read_commit_info_file(&config.src) { - info.sha.trim().to_owned() + PathFreshness::LastModifiedUpstream { upstream: info.sha.trim().to_owned() } } else { - "".to_owned() - }; - - if gcc_sha.is_empty() { - eprintln!("error: could not find commit hash for downloading GCC"); - eprintln!("HELP: maybe your repository history is too shallow?"); - eprintln!("HELP: consider disabling `download-ci-gcc`"); - eprintln!("HELP: or fetch enough history to include one upstream commit"); - panic!(); + PathFreshness::MissingUpstream } - - gcc_sha } diff --git a/src/bootstrap/src/core/build_steps/llvm.rs b/src/bootstrap/src/core/build_steps/llvm.rs index 6f6839ad15b0..86af956535e5 100644 --- a/src/bootstrap/src/core/build_steps/llvm.rs +++ b/src/bootstrap/src/core/build_steps/llvm.rs @@ -14,7 +14,7 @@ use std::path::{Path, PathBuf}; use std::sync::OnceLock; use std::{env, fs}; -use build_helper::git::get_closest_merge_commit; +use build_helper::git::PathFreshness; #[cfg(feature = "tracing")] use tracing::instrument; @@ -181,26 +181,15 @@ pub const LLVM_INVALIDATION_PATHS: &[&str] = &[ "src/version", ]; -/// This retrieves the LLVM sha we *want* to use, according to git history. -pub(crate) fn detect_llvm_sha(config: &Config, is_git: bool) -> String { - let llvm_sha = if is_git { - get_closest_merge_commit(Some(&config.src), &config.git_config(), LLVM_INVALIDATION_PATHS) - .unwrap() +/// Detect whether LLVM sources have been modified locally or not. +pub(crate) fn detect_llvm_freshness(config: &Config, is_git: bool) -> PathFreshness { + if is_git { + config.check_path_modifications(LLVM_INVALIDATION_PATHS) } else if let Some(info) = crate::utils::channel::read_commit_info_file(&config.src) { - info.sha.trim().to_owned() + PathFreshness::LastModifiedUpstream { upstream: info.sha.trim().to_owned() } } else { - "".to_owned() - }; - - if llvm_sha.is_empty() { - eprintln!("error: could not find commit hash for downloading LLVM"); - eprintln!("HELP: maybe your repository history is too shallow?"); - eprintln!("HELP: consider disabling `download-ci-llvm`"); - eprintln!("HELP: or fetch enough history to include one upstream commit"); - panic!(); + PathFreshness::MissingUpstream } - - llvm_sha } /// Returns whether the CI-found LLVM is currently usable. diff --git a/src/bootstrap/src/core/build_steps/tool.rs b/src/bootstrap/src/core/build_steps/tool.rs index 3426da51a808..1549d4713af2 100644 --- a/src/bootstrap/src/core/build_steps/tool.rs +++ b/src/bootstrap/src/core/build_steps/tool.rs @@ -696,8 +696,7 @@ impl Step for Rustdoc { let files_to_track = &["src/librustdoc", "src/tools/rustdoc"]; // Check if unchanged - if builder.config.last_modified_commit(files_to_track, "download-rustc", true).is_some() - { + if !builder.config.has_changes_from_upstream(files_to_track) { let precompiled_rustdoc = builder .config .ci_rustc_dir() diff --git a/src/bootstrap/src/core/builder/tests.rs b/src/bootstrap/src/core/builder/tests.rs index 5de824ebab23..8e2c6fc52cd0 100644 --- a/src/bootstrap/src/core/builder/tests.rs +++ b/src/bootstrap/src/core/builder/tests.rs @@ -1,11 +1,14 @@ +use std::env::VarError; use std::{panic, thread}; +use build_helper::stage0_parser::parse_stage0_file; use llvm::prebuilt_llvm_config; use super::*; use crate::Flags; use crate::core::build_steps::doc::DocumentationFormat; use crate::core::config::Config; +use crate::utils::tests::git::{GitCtx, git_test}; static TEST_TRIPLE_1: &str = "i686-unknown-haiku"; static TEST_TRIPLE_2: &str = "i686-unknown-hurd-gnu"; @@ -239,42 +242,80 @@ fn alias_and_path_for_library() { } #[test] -fn ci_rustc_if_unchanged_logic() { - let config = Config::parse_inner( - Flags::parse(&[ - "build".to_owned(), - "--dry-run".to_owned(), - "--set=rust.download-rustc='if-unchanged'".to_owned(), - ]), - |&_| Ok(Default::default()), - ); - - let build = Build::new(config.clone()); - let builder = Builder::new(&build); - - if config.out.exists() { - fs::remove_dir_all(&config.out).unwrap(); - } +fn ci_rustc_if_unchanged_invalidate_on_compiler_changes() { + git_test(|ctx| { + prepare_rustc_checkout(ctx); + ctx.create_upstream_merge(&["compiler/bar"]); + // This change should invalidate download-ci-rustc + ctx.create_nonupstream_merge(&["compiler/foo"]); + + let config = parse_config_download_rustc_at(ctx.get_path(), "if-unchanged", true); + assert_eq!(config.download_rustc_commit, None); + }); +} - builder.run_step_descriptions(&Builder::get_step_descriptions(config.cmd.kind()), &[]); +#[test] +fn ci_rustc_if_unchanged_invalidate_on_library_changes_in_ci() { + git_test(|ctx| { + prepare_rustc_checkout(ctx); + ctx.create_upstream_merge(&["compiler/bar"]); + // This change should invalidate download-ci-rustc + ctx.create_nonupstream_merge(&["library/foo"]); + + let config = parse_config_download_rustc_at(ctx.get_path(), "if-unchanged", true); + assert_eq!(config.download_rustc_commit, None); + }); +} - // Make sure "if-unchanged" logic doesn't try to use CI rustc while there are changes - // in compiler and/or library. - if config.download_rustc_commit.is_some() { - let mut paths = vec!["compiler"]; +#[test] +fn ci_rustc_if_unchanged_do_not_invalidate_on_library_changes_outside_ci() { + git_test(|ctx| { + prepare_rustc_checkout(ctx); + let sha = ctx.create_upstream_merge(&["compiler/bar"]); + // This change should not invalidate download-ci-rustc + ctx.create_nonupstream_merge(&["library/foo"]); + + let config = parse_config_download_rustc_at(ctx.get_path(), "if-unchanged", false); + assert_eq!(config.download_rustc_commit, Some(sha)); + }); +} - // Handle library tree the same way as in `Config::download_ci_rustc_commit`. - if builder.config.is_running_on_ci { - paths.push("library"); - } +#[test] +fn ci_rustc_if_unchanged_do_not_invalidate_on_tool_changes() { + git_test(|ctx| { + prepare_rustc_checkout(ctx); + let sha = ctx.create_upstream_merge(&["compiler/bar"]); + // This change should not invalidate download-ci-rustc + ctx.create_nonupstream_merge(&["src/tools/foo"]); + + let config = parse_config_download_rustc_at(ctx.get_path(), "if-unchanged", true); + assert_eq!(config.download_rustc_commit, Some(sha)); + }); +} - let has_changes = config.last_modified_commit(&paths, "download-rustc", true).is_none(); +/// Prepares the given directory so that it looks like a rustc checkout. +/// Also configures `GitCtx` to use the correct merge bot e-mail for upstream merge commits. +fn prepare_rustc_checkout(ctx: &mut GitCtx) { + ctx.merge_bot_email = + format!("Merge bot <{}>", parse_stage0_file().config.git_merge_commit_email); + ctx.write("src/ci/channel", "nightly"); + ctx.commit(); +} - assert!( - !has_changes, - "CI-rustc can't be used with 'if-unchanged' while there are changes in compiler and/or library." - ); - } +/// Parses a Config directory from `path`, with the given value of `download_rustc`. +fn parse_config_download_rustc_at(path: &Path, download_rustc: &str, ci: bool) -> Config { + Config::parse_inner( + Flags::parse(&[ + "build".to_owned(), + "--dry-run".to_owned(), + "--ci".to_owned(), + if ci { "true" } else { "false" }.to_owned(), + format!("--set=rust.download-rustc='{download_rustc}'"), + "--src".to_owned(), + path.to_str().unwrap().to_owned(), + ]), + |&_| Ok(Default::default()), + ) } mod defaults { diff --git a/src/bootstrap/src/core/config/config.rs b/src/bootstrap/src/core/config/config.rs index 43b62789536d..419976c83b1d 100644 --- a/src/bootstrap/src/core/config/config.rs +++ b/src/bootstrap/src/core/config/config.rs @@ -11,12 +11,12 @@ use std::io::IsTerminal; use std::path::{Path, PathBuf, absolute}; use std::process::Command; use std::str::FromStr; -use std::sync::OnceLock; +use std::sync::{Arc, Mutex, OnceLock}; use std::{cmp, env, fs}; use build_helper::ci::CiEnv; use build_helper::exit; -use build_helper::git::{GitConfig, get_closest_merge_commit, output_result}; +use build_helper::git::{GitConfig, PathFreshness, check_path_modifications, output_result}; use serde::{Deserialize, Deserializer}; use serde_derive::Deserialize; #[cfg(feature = "tracing")] @@ -422,6 +422,9 @@ pub struct Config { pub compiletest_use_stage0_libtest: bool, pub is_running_on_ci: bool, + + /// Cache for determining path modifications + pub path_modification_cache: Arc, PathFreshness>>>, } #[derive(Clone, Debug, Default)] @@ -3193,19 +3196,30 @@ impl Config { let commit = if self.rust_info.is_managed_git_subrepository() { // Look for a version to compare to based on the current commit. // Only commits merged by bors will have CI artifacts. - match self.last_modified_commit(&allowed_paths, "download-rustc", if_unchanged) { - Some(commit) => commit, - None => { + let freshness = self.check_path_modifications(&allowed_paths); + self.verbose(|| { + eprintln!("rustc freshness: {freshness:?}"); + }); + match freshness { + PathFreshness::LastModifiedUpstream { upstream } => upstream, + PathFreshness::HasLocalModifications { upstream } => { if if_unchanged { return None; } - println!("ERROR: could not find commit hash for downloading rustc"); - println!("HELP: maybe your repository history is too shallow?"); - println!( - "HELP: consider setting `rust.download-rustc=false` in bootstrap.toml" - ); - println!("HELP: or fetch enough history to include one upstream commit"); - crate::exit!(1); + + if self.is_running_on_ci { + eprintln!("CI rustc commit matches with HEAD and we are in CI."); + eprintln!( + "`rustc.download-ci` functionality will be skipped as artifacts are not available." + ); + return None; + } + + upstream + } + PathFreshness::MissingUpstream => { + eprintln!("No upstream commit found"); + return None; } } } else { @@ -3214,19 +3228,6 @@ impl Config { .expect("git-commit-info is missing in the project root") }; - if self.is_running_on_ci && { - let head_sha = - output(helpers::git(Some(&self.src)).arg("rev-parse").arg("HEAD").as_command_mut()); - let head_sha = head_sha.trim(); - commit == head_sha - } { - eprintln!("CI rustc commit matches with HEAD and we are in CI."); - eprintln!( - "`rustc.download-ci` functionality will be skipped as artifacts are not available." - ); - return None; - } - if debug_assertions_requested { eprintln!( "WARN: `rust.debug-assertions = true` will prevent downloading CI rustc as alt CI \ @@ -3264,9 +3265,7 @@ impl Config { self.update_submodule("src/llvm-project"); // Check for untracked changes in `src/llvm-project` and other important places. - let has_changes = self - .last_modified_commit(LLVM_INVALIDATION_PATHS, "download-ci-llvm", true) - .is_none(); + let has_changes = self.has_changes_from_upstream(LLVM_INVALIDATION_PATHS); // Return false if there are untracked changes, otherwise check if CI LLVM is available. if has_changes { false } else { llvm::is_ci_llvm_available_for_target(self, asserts) } @@ -3297,51 +3296,30 @@ impl Config { } } - /// Returns the last commit in which any of `modified_paths` were changed, - /// or `None` if there are untracked changes in the working directory and `if_unchanged` is true. - pub fn last_modified_commit( - &self, - modified_paths: &[&str], - option_name: &str, - if_unchanged: bool, - ) -> Option { - assert!( - self.rust_info.is_managed_git_subrepository(), - "Can't run `Config::last_modified_commit` on a non-git source." - ); - - // Look for a version to compare to based on the current commit. - // Only commits merged by bors will have CI artifacts. - let commit = get_closest_merge_commit(Some(&self.src), &self.git_config(), &[]).unwrap(); - if commit.is_empty() { - println!("error: could not find commit hash for downloading components from CI"); - println!("help: maybe your repository history is too shallow?"); - println!("help: consider disabling `{option_name}`"); - println!("help: or fetch enough history to include one upstream commit"); - crate::exit!(1); - } - - // Warn if there were changes to the compiler or standard library since the ancestor commit. - let mut git = helpers::git(Some(&self.src)); - git.args(["diff-index", "--quiet", &commit, "--"]).args(modified_paths); - - let has_changes = !t!(git.as_command_mut().status()).success(); - if has_changes { - if if_unchanged { - if self.is_verbose() { - println!( - "warning: saw changes to one of {modified_paths:?} since {commit}; \ - ignoring `{option_name}`" - ); - } - return None; - } - println!( - "warning: `{option_name}` is enabled, but there are changes to one of {modified_paths:?}" - ); + /// Returns true if any of the `paths` have been modified locally. + pub fn has_changes_from_upstream(&self, paths: &[&'static str]) -> bool { + match self.check_path_modifications(paths) { + PathFreshness::LastModifiedUpstream { .. } => false, + PathFreshness::HasLocalModifications { .. } | PathFreshness::MissingUpstream => true, } + } - Some(commit.to_string()) + /// Checks whether any of the given paths have been modified w.r.t. upstream. + pub fn check_path_modifications(&self, paths: &[&'static str]) -> PathFreshness { + // Checking path modifications through git can be relatively expensive (>100ms). + // We do not assume that the sources would change during bootstrap's execution, + // so we can cache the results here. + // Note that we do not use a static variable for the cache, because it would cause problems + // in tests that create separate `Config` instsances. + self.path_modification_cache + .lock() + .unwrap() + .entry(paths.to_vec()) + .or_insert_with(|| { + check_path_modifications(&self.src, &self.git_config(), paths, CiEnv::current()) + .unwrap() + }) + .clone() } /// Checks if the given target is the same as the host target. @@ -3379,6 +3357,10 @@ impl Config { _ => !self.is_system_llvm(target), } } + + pub fn ci_env(&self) -> CiEnv { + if self.is_running_on_ci { CiEnv::GitHubActions } else { CiEnv::None } + } } /// Compares the current `Llvm` options against those in the CI LLVM builder and detects any incompatible options. diff --git a/src/bootstrap/src/core/config/tests.rs b/src/bootstrap/src/core/config/tests.rs index c8a12c9072c9..96ac8a6d52fa 100644 --- a/src/bootstrap/src/core/config/tests.rs +++ b/src/bootstrap/src/core/config/tests.rs @@ -5,6 +5,7 @@ use std::path::{Path, PathBuf}; use std::{env, fs}; use build_helper::ci::CiEnv; +use build_helper::git::PathFreshness; use clap::CommandFactory; use serde::Deserialize; @@ -15,6 +16,7 @@ use crate::core::build_steps::clippy::{LintConfig, get_clippy_rules_in_order}; use crate::core::build_steps::llvm; use crate::core::build_steps::llvm::LLVM_INVALIDATION_PATHS; use crate::core::config::{LldMode, Target, TargetSelection, TomlConfig}; +use crate::utils::tests::git::git_test; pub(crate) fn parse(config: &str) -> Config { Config::parse_inner( @@ -51,9 +53,7 @@ fn download_ci_llvm() { let if_unchanged_config = parse("llvm.download-ci-llvm = \"if-unchanged\""); if if_unchanged_config.llvm_from_ci && if_unchanged_config.is_running_on_ci { - let has_changes = if_unchanged_config - .last_modified_commit(LLVM_INVALIDATION_PATHS, "download-ci-llvm", true) - .is_none(); + let has_changes = if_unchanged_config.has_changes_from_upstream(LLVM_INVALIDATION_PATHS); assert!( !has_changes, @@ -746,3 +746,242 @@ fn test_include_precedence_over_profile() { // override profile settings, so we expect this to be "dev" here. assert_eq!(config.channel, "dev"); } + +#[test] +fn test_pr_ci_unchanged_anywhere() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_nonupstream_merge(&["b"]); + let src = ctx.check_modifications(&["c"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha }); + }); +} + +#[test] +fn test_pr_ci_changed_in_pr() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_nonupstream_merge(&["b"]); + let src = ctx.check_modifications(&["b"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha }); + }); +} + +#[test] +fn test_auto_ci_unchanged_anywhere_select_parent() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b"]); + let src = ctx.check_modifications(&["c"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::LastModifiedUpstream { upstream: sha }); + }); +} + +#[test] +fn test_auto_ci_changed_in_pr() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b", "c"]); + let src = ctx.check_modifications(&["c", "d"], CiEnv::GitHubActions); + assert_eq!(src, PathFreshness::HasLocalModifications { upstream: sha }); + }); +} + +#[test] +fn test_local_uncommitted_modifications() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_branch("feature"); + ctx.modify("a"); + + assert_eq!( + ctx.check_modifications(&["a", "d"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_local_committed_modifications() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a"]); + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("x"); + ctx.commit(); + ctx.modify("a"); + ctx.commit(); + + assert_eq!( + ctx.check_modifications(&["a", "d"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_local_committed_modifications_subdirectory() { + git_test(|ctx| { + let sha = ctx.create_upstream_merge(&["a/b/c"]); + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("a/b/d"); + ctx.commit(); + + assert_eq!( + ctx.check_modifications(&["a/b"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: sha } + ); + }); +} + +#[test] +fn test_local_changes_in_head_upstream() { + git_test(|ctx| { + // We want to resolve to the upstream commit that made modifications to a, + // even if it is currently HEAD + let sha = ctx.create_upstream_merge(&["a"]); + assert_eq!( + ctx.check_modifications(&["a", "d"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: sha } + ); + }); +} + +#[test] +fn test_local_changes_in_previous_upstream() { + git_test(|ctx| { + // We want to resolve to this commit, which modified a + let sha = ctx.create_upstream_merge(&["a", "e"]); + // Not to this commit, which is the latest upstream commit + ctx.create_upstream_merge(&["b", "c"]); + ctx.create_branch("feature"); + ctx.modify("d"); + ctx.commit(); + + assert_eq!( + ctx.check_modifications(&["a"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: sha } + ); + }); +} + +#[test] +fn test_local_no_upstream_commit_with_changes() { + git_test(|ctx| { + ctx.create_upstream_merge(&["a", "e"]); + ctx.create_upstream_merge(&["a", "e"]); + // We want to fall back to this commit, because there are no commits + // that modified `x`. + let sha = ctx.create_upstream_merge(&["a", "e"]); + ctx.create_branch("feature"); + ctx.modify("d"); + ctx.commit(); + assert_eq!( + ctx.check_modifications(&["x"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: sha } + ); + }); +} + +#[test] +fn test_local_no_upstream_commit() { + git_test(|ctx| { + let src = ctx.check_modifications(&["c", "d"], CiEnv::None); + assert_eq!(src, PathFreshness::MissingUpstream); + }); +} + +#[test] +fn test_local_changes_negative_path() { + git_test(|ctx| { + let upstream = ctx.create_upstream_merge(&["a"]); + ctx.create_branch("feature"); + ctx.modify("b"); + ctx.modify("d"); + ctx.commit(); + + assert_eq!( + ctx.check_modifications(&[":!b", ":!d"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream.clone() } + ); + assert_eq!( + ctx.check_modifications(&[":!c"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream: upstream.clone() } + ); + assert_eq!( + ctx.check_modifications(&[":!d", ":!x"], CiEnv::None), + PathFreshness::HasLocalModifications { upstream } + ); + }); +} + +#[test] +fn test_local_changes_subtree_that_used_bors() { + // Here we simulate a very specific situation related to subtrees. + // When you have merge commits locally, we should ignore them w.r.t. the artifact download + // logic. + // The upstream search code currently uses a simple heuristic: + // - Find commits by bors (or in general an author with the merge commit e-mail) + // - Find the newest such commit + // This should make it work even for subtrees that: + // - Used bors in the past (so they have bors merge commits in their history). + // - Use Josh to merge rustc into the subtree, in a way that the rustc history is the second + // parent, not the first one. + // + // In addition, when searching for modified files, we cannot simply start from HEAD, because + // in this situation git wouldn't find the right commit. + // + // This test checks that this specific scenario will resolve to the right rustc commit, both + // when finding a modified file and when finding a non-existent file (which essentially means + // that we just lookup the most recent upstream commit). + // + // See https://github.com/rust-lang/rust/issues/101907#issuecomment-2697671282 for more details. + git_test(|ctx| { + ctx.create_upstream_merge(&["a"]); + + // Start unrelated subtree history + ctx.run_git(&["switch", "--orphan", "subtree"]); + ctx.modify("bar"); + ctx.commit(); + // Now we need to emulate old bors commits in the subtree. + // Git only has a resolution of one second, which is a problem, since our git logic orders + // merge commits by their date. + // To avoid sleeping in the test, we modify the commit date to be forcefully in the past. + ctx.create_upstream_merge(&["subtree/a"]); + ctx.run_git(&["commit", "--amend", "--date", "Wed Feb 16 14:00 2011 +0100", "--no-edit"]); + + // Merge the subtree history into rustc + ctx.switch_to_branch("main"); + ctx.run_git(&["merge", "subtree", "--allow-unrelated"]); + + // Create a rustc commit that modifies a path that we're interested in (`x`) + let upstream_1 = ctx.create_upstream_merge(&["x"]); + // Create another bors commit + let upstream_2 = ctx.create_upstream_merge(&["a"]); + + ctx.switch_to_branch("subtree"); + + // Create a subtree branch + ctx.create_branch("subtree-pr"); + ctx.modify("baz"); + ctx.commit(); + // We merge rustc into this branch (simulating a "subtree pull") + ctx.merge("main", "committer "); + + // And then merge that branch into the subtree (simulating a situation right before a + // "subtree push") + ctx.switch_to_branch("subtree"); + ctx.merge("subtree-pr", "committer "); + + // And we want to check that we resolve to the right commits. + assert_eq!( + ctx.check_modifications(&["x"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream_1 } + ); + assert_eq!( + ctx.check_modifications(&["nonexistent"], CiEnv::None), + PathFreshness::LastModifiedUpstream { upstream: upstream_2 } + ); + }); +} diff --git a/src/bootstrap/src/core/download.rs b/src/bootstrap/src/core/download.rs index 4e4b948a8fd6..b95d07356c1b 100644 --- a/src/bootstrap/src/core/download.rs +++ b/src/bootstrap/src/core/download.rs @@ -720,8 +720,9 @@ download-rustc = false #[cfg(not(test))] pub(crate) fn maybe_download_ci_llvm(&self) { use build_helper::exit; + use build_helper::git::PathFreshness; - use crate::core::build_steps::llvm::detect_llvm_sha; + use crate::core::build_steps::llvm::detect_llvm_freshness; use crate::core::config::check_incompatible_options_for_ci_llvm; if !self.llvm_from_ci { @@ -729,7 +730,22 @@ download-rustc = false } let llvm_root = self.ci_llvm_root(); - let llvm_sha = detect_llvm_sha(self, self.rust_info.is_managed_git_subrepository()); + let llvm_freshness = + detect_llvm_freshness(self, self.rust_info.is_managed_git_subrepository()); + self.verbose(|| { + eprintln!("LLVM freshness: {llvm_freshness:?}"); + }); + let llvm_sha = match llvm_freshness { + PathFreshness::LastModifiedUpstream { upstream } => upstream, + PathFreshness::HasLocalModifications { upstream } => upstream, + PathFreshness::MissingUpstream => { + eprintln!("error: could not find commit hash for downloading LLVM"); + eprintln!("HELP: maybe your repository history is too shallow?"); + eprintln!("HELP: consider disabling `download-ci-llvm`"); + eprintln!("HELP: or fetch enough history to include one upstream commit"); + crate::exit!(1); + } + }; let stamp_key = format!("{}{}", llvm_sha, self.llvm_assertions); let llvm_stamp = BuildStamp::new(&llvm_root).with_prefix("llvm").add_stamp(stamp_key); if !llvm_stamp.is_up_to_date() && !self.dry_run() { diff --git a/src/bootstrap/src/utils/mod.rs b/src/bootstrap/src/utils/mod.rs index caef8ce3088a..169fcec303e9 100644 --- a/src/bootstrap/src/utils/mod.rs +++ b/src/bootstrap/src/utils/mod.rs @@ -20,4 +20,4 @@ pub(crate) mod tracing; pub(crate) mod metrics; #[cfg(test)] -mod tests; +pub(crate) mod tests; diff --git a/src/bootstrap/src/utils/tests/git.rs b/src/bootstrap/src/utils/tests/git.rs new file mode 100644 index 000000000000..99e0793af466 --- /dev/null +++ b/src/bootstrap/src/utils/tests/git.rs @@ -0,0 +1,152 @@ +use std::ffi::OsStr; +use std::fs::OpenOptions; +use std::path::Path; +use std::process::Command; + +use build_helper::ci::CiEnv; +use build_helper::git::{GitConfig, PathFreshness, check_path_modifications}; + +pub struct GitCtx { + dir: tempfile::TempDir, + pub git_repo: String, + pub nightly_branch: String, + pub merge_bot_email: String, +} + +impl GitCtx { + fn new() -> Self { + let dir = tempfile::TempDir::new().unwrap(); + let ctx = Self { + dir, + git_repo: "rust-lang/rust".to_string(), + nightly_branch: "nightly".to_string(), + merge_bot_email: "Merge bot ".to_string(), + }; + ctx.run_git(&["init"]); + ctx.run_git(&["config", "user.name", "Tester"]); + ctx.run_git(&["config", "user.email", "tester@rust-lang.org"]); + ctx.modify("README.md"); + ctx.commit(); + ctx.run_git(&["branch", "-m", "main"]); + ctx + } + + pub fn get_path(&self) -> &Path { + self.dir.path() + } + + pub fn check_modifications(&self, target_paths: &[&str], ci_env: CiEnv) -> PathFreshness { + check_path_modifications(self.dir.path(), &self.git_config(), target_paths, ci_env).unwrap() + } + + pub fn create_upstream_merge(&self, modified_files: &[&str]) -> String { + self.create_branch_and_merge("previous-pr", modified_files, &self.merge_bot_email) + } + + pub fn create_nonupstream_merge(&self, modified_files: &[&str]) -> String { + self.create_branch_and_merge("pr", modified_files, "Tester ") + } + + pub fn create_branch_and_merge( + &self, + branch: &str, + modified_files: &[&str], + author: &str, + ) -> String { + let current_branch = self.get_current_branch(); + + self.create_branch(branch); + for file in modified_files { + self.modify(file); + } + self.commit(); + self.switch_to_branch(¤t_branch); + self.merge(branch, author); + self.run_git(&["branch", "-d", branch]); + self.get_current_commit() + } + + pub fn get_current_commit(&self) -> String { + self.run_git(&["rev-parse", "HEAD"]) + } + + pub fn get_current_branch(&self) -> String { + self.run_git(&["rev-parse", "--abbrev-ref", "HEAD"]) + } + + pub fn merge(&self, branch: &str, author: &str) { + self.run_git(&["merge", "--no-commit", "--no-ff", branch]); + self.run_git(&[ + "commit".to_string(), + "-m".to_string(), + format!("Merge of {branch} into {}", self.get_current_branch()), + "--author".to_string(), + author.to_string(), + ]); + } + + pub fn modify(&self, path: &str) { + self.write(path, "line"); + } + + pub fn write(&self, path: &str, data: &str) { + use std::io::Write; + + let path = self.dir.path().join(path); + std::fs::create_dir_all(&path.parent().unwrap()).unwrap(); + + let mut file = OpenOptions::new().create(true).append(true).open(path).unwrap(); + writeln!(file, "{data}").unwrap(); + } + + pub fn commit(&self) -> String { + self.run_git(&["add", "."]); + self.run_git(&["commit", "-m", "commit message"]); + self.get_current_commit() + } + + pub fn switch_to_branch(&self, name: &str) { + self.run_git(&["switch", name]); + } + + /// Creates a branch and switches to it. + pub fn create_branch(&self, name: &str) { + self.run_git(&["checkout", "-b", name]); + } + + pub fn run_git>(&self, args: &[S]) -> String { + let mut cmd = self.git_cmd(); + cmd.args(args); + eprintln!("Running {cmd:?}"); + let output = cmd.output().unwrap(); + let stdout = String::from_utf8(output.stdout).unwrap().trim().to_string(); + let stderr = String::from_utf8(output.stderr).unwrap().trim().to_string(); + if !output.status.success() { + panic!("Git command `{cmd:?}` failed\nStdout\n{stdout}\nStderr\n{stderr}"); + } + stdout + } + + fn git_cmd(&self) -> Command { + let mut cmd = Command::new("git"); + cmd.current_dir(&self.dir); + cmd + } + + fn git_config(&self) -> GitConfig<'_> { + GitConfig { + git_repository: &self.git_repo, + nightly_branch: &self.nightly_branch, + git_merge_commit_email: &self.merge_bot_email, + } + } +} + +/// Run an end-to-end test that allows testing git logic. +pub fn git_test(test_fn: F) +where + F: FnOnce(&mut GitCtx), +{ + let mut ctx = GitCtx::new(); + test_fn(&mut ctx); +} diff --git a/src/bootstrap/src/utils/tests/mod.rs b/src/bootstrap/src/utils/tests/mod.rs index 0791f7a6e207..73d55db994cc 100644 --- a/src/bootstrap/src/utils/tests/mod.rs +++ b/src/bootstrap/src/utils/tests/mod.rs @@ -1 +1,2 @@ +pub mod git; mod shared_helpers_tests; diff --git a/src/build_helper/src/git.rs b/src/build_helper/src/git.rs index f5347c308241..8d53a83ea317 100644 --- a/src/build_helper/src/git.rs +++ b/src/build_helper/src/git.rs @@ -3,6 +3,7 @@ use std::process::{Command, Stdio}; use crate::ci::CiEnv; +#[derive(Debug)] pub struct GitConfig<'a> { pub git_repository: &'a str, pub nightly_branch: &'a str, @@ -27,147 +28,234 @@ pub fn output_result(cmd: &mut Command) -> Result { String::from_utf8(output.stdout).map_err(|err| format!("{err:?}")) } -/// Finds the remote for rust-lang/rust. -/// For example for these remotes it will return `upstream`. -/// ```text -/// origin https://github.com/pietroalbani/rust.git (fetch) -/// origin https://github.com/pietroalbani/rust.git (push) -/// upstream https://github.com/rust-lang/rust (fetch) -/// upstream https://github.com/rust-lang/rust (push) -/// ``` -pub fn get_rust_lang_rust_remote( +/// Represents the result of checking whether a set of paths +/// have been modified locally or not. +#[derive(PartialEq, Debug, Clone)] +pub enum PathFreshness { + /// Artifacts should be downloaded from this upstream commit, + /// there are no local modifications. + LastModifiedUpstream { upstream: String }, + /// There are local modifications to a certain set of paths. + /// "Local" essentially means "not-upstream" here. + /// `upstream` is the latest upstream merge commit that made modifications to the + /// set of paths. + HasLocalModifications { upstream: String }, + /// No upstream commit was found. + /// This should not happen in most reasonable circumstances, but one never knows. + MissingUpstream, +} + +/// This function figures out if a set of paths was last modified upstream or +/// if there are some local modifications made to them. +/// It can be used to figure out if we should download artifacts from CI or rather +/// build them locally. +/// +/// The function assumes that at least a single upstream bors merge commit is in the +/// local git history. +/// +/// `target_paths` should be a non-empty slice of paths (git `pathspec`s) relative to `git_dir` +/// whose modifications would invalidate the artifact. +/// Each pathspec can also be a negative match, i.e. `:!foo`. This matches changes outside +/// the `foo` directory. +/// See +/// for how git `pathspec` works. +/// +/// The function behaves differently in CI and outside CI. +/// +/// - Outside CI, we want to find out if `target_paths` were modified in some local commit on +/// top of the latest upstream commit that is available in local git history. +/// If not, we try to find the most recent upstream commit (which we assume are commits +/// made by bors) that modified `target_paths`. +/// We don't want to simply take the latest master commit to avoid changing the output of +/// this function frequently after rebasing on the latest master branch even if `target_paths` +/// were not modified upstream in the meantime. In that case we would be redownloading CI +/// artifacts unnecessarily. +/// +/// - In CI, we use a shallow clone of depth 2, i.e., we fetch only a single parent commit +/// (which will be the most recent bors merge commit) and do not have access +/// to the full git history. Luckily, we only need to distinguish between two situations: +/// 1) The current PR made modifications to `target_paths`. +/// In that case, a build is typically necessary. +/// 2) The current PR did not make modifications to `target_paths`. +/// In that case we simply take the latest upstream commit, because on CI there is no need to avoid +/// redownloading. +pub fn check_path_modifications( + git_dir: &Path, config: &GitConfig<'_>, - git_dir: Option<&Path>, -) -> Result { - let mut git = Command::new("git"); - if let Some(git_dir) = git_dir { - git.current_dir(git_dir); + target_paths: &[&str], + ci_env: CiEnv, +) -> Result { + assert!(!target_paths.is_empty()); + for path in target_paths { + assert!(Path::new(path.trim_start_matches(":!")).is_relative()); } - git.args(["config", "--local", "--get-regex", "remote\\..*\\.url"]); - let stdout = output_result(&mut git)?; - let rust_lang_remote = stdout - .lines() - .find(|remote| remote.contains(config.git_repository)) - .ok_or_else(|| format!("{} remote not found", config.git_repository))?; + let upstream_sha = if matches!(ci_env, CiEnv::GitHubActions) { + // Here the situation is different for PR CI and try/auto CI. + // For PR CI, we have the following history: + // + // 1-N PR commits + // upstream merge commit made by bors + // + // For try/auto CI, we have the following history: + // <**non-upstream** merge commit made by bors> + // 1-N PR commits + // upstream merge commit made by bors + // + // But on both cases, HEAD should be a merge commit. + // So if HEAD contains modifications of `target_paths`, our PR has modified + // them. If not, we can use the only available upstream commit for downloading + // artifacts. - let remote_name = - rust_lang_remote.split('.').nth(1).ok_or_else(|| "remote name not found".to_owned())?; - Ok(remote_name.into()) -} + // Do not include HEAD, as it is never an upstream commit + // If we do not find an upstream commit in CI, something is seriously wrong. + Some( + get_closest_upstream_commit(Some(git_dir), config, ci_env)? + .expect("No upstream commit was found on CI"), + ) + } else { + // Outside CI, we want to find the most recent upstream commit that + // modified the set of paths, to have an upstream reference that does not change + // unnecessarily often. + // However, if such commit is not found, we can fall back to the latest upstream commit + let upstream_with_modifications = + get_latest_upstream_commit_that_modified_files(git_dir, config, target_paths)?; + match upstream_with_modifications { + Some(sha) => Some(sha), + None => get_closest_upstream_commit(Some(git_dir), config, ci_env)?, + } + }; -pub fn rev_exists(rev: &str, git_dir: Option<&Path>) -> Result { - let mut git = Command::new("git"); - if let Some(git_dir) = git_dir { - git.current_dir(git_dir); - } - git.args(["rev-parse", rev]); - let output = git.output().map_err(|err| format!("{err:?}"))?; - - match output.status.code() { - Some(0) => Ok(true), - Some(128) => Ok(false), - None => Err(format!( - "git didn't exit properly: {}", - String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))? - )), - Some(code) => Err(format!( - "git command exited with status code: {code}: {}", - String::from_utf8(output.stderr).map_err(|err| format!("{err:?}"))? - )), + let Some(upstream_sha) = upstream_sha else { + return Ok(PathFreshness::MissingUpstream); + }; + + // For local environments, we want to find out if something has changed + // from the latest upstream commit. + // However, that should be equivalent to checking if something has changed + // from the latest upstream commit *that modified `target_paths`*, and + // with this approach we do not need to invoke git an additional time. + if has_changed_since(git_dir, &upstream_sha, target_paths) { + Ok(PathFreshness::HasLocalModifications { upstream: upstream_sha }) + } else { + Ok(PathFreshness::LastModifiedUpstream { upstream: upstream_sha }) } } -/// Returns the master branch from which we can take diffs to see changes. -/// This will usually be rust-lang/rust master, but sometimes this might not exist. -/// This could be because the user is updating their forked master branch using the GitHub UI -/// and therefore doesn't need an upstream master branch checked out. -/// We will then fall back to origin/master in the hope that at least this exists. -pub fn updated_master_branch( - config: &GitConfig<'_>, - git_dir: Option<&Path>, -) -> Result { - let upstream_remote = get_rust_lang_rust_remote(config, git_dir)?; - let branch = config.nightly_branch; - for upstream_master in [format!("{upstream_remote}/{branch}"), format!("origin/{branch}")] { - if rev_exists(&upstream_master, git_dir)? { - return Ok(upstream_master); - } - } +/// Returns true if any of the passed `paths` have changed since the `base` commit. +pub fn has_changed_since(git_dir: &Path, base: &str, paths: &[&str]) -> bool { + let mut git = Command::new("git"); + git.current_dir(git_dir); + + git.args(["diff-index", "--quiet", base, "--"]).args(paths); - Err("Cannot find any suitable upstream master branch".to_owned()) + // Exit code 0 => no changes + // Exit code 1 => some changes were detected + !git.status().expect("cannot run git diff-index").success() } -/// Finds the nearest merge commit by comparing the local `HEAD` with the upstream branch's state. -/// To work correctly, the upstream remote must be properly configured using `git remote add `. -/// In most cases `get_closest_merge_commit` is the function you are looking for as it doesn't require remote -/// to be configured. -fn git_upstream_merge_base( - config: &GitConfig<'_>, - git_dir: Option<&Path>, -) -> Result { - let updated_master = updated_master_branch(config, git_dir)?; +/// Returns the latest upstream commit that modified `target_paths`, or `None` if no such commit +/// was found. +fn get_latest_upstream_commit_that_modified_files( + git_dir: &Path, + git_config: &GitConfig<'_>, + target_paths: &[&str], +) -> Result, String> { let mut git = Command::new("git"); - if let Some(git_dir) = git_dir { - git.current_dir(git_dir); + git.current_dir(git_dir); + + // In theory, we could just use + // `git rev-list --first-parent HEAD --author= -- ` + // to find the latest upstream commit that modified ``. + // However, this does not work if you are in a subtree sync branch that contains merge commits + // which have the subtree history as their first parent, and the rustc history as second parent: + // `--first-parent` will just walk up the subtree history and never see a single rustc commit. + // We thus have to take a two-pronged approach. First lookup the most recent upstream commit + // by *date* (this should work even in a subtree sync branch), and then start the lookup for + // modified paths starting from that commit. + // + // See https://github.com/rust-lang/rust/pull/138591#discussion_r2037081858 for more details. + let upstream = get_closest_upstream_commit(Some(git_dir), git_config, CiEnv::None)? + .unwrap_or_else(|| "HEAD".to_string()); + + git.args([ + "rev-list", + "--first-parent", + "-n1", + &upstream, + "--author", + git_config.git_merge_commit_email, + ]); + + if !target_paths.is_empty() { + git.arg("--").args(target_paths); } - Ok(output_result(git.arg("merge-base").arg(&updated_master).arg("HEAD"))?.trim().to_owned()) + let output = output_result(&mut git)?.trim().to_owned(); + if output.is_empty() { Ok(None) } else { Ok(Some(output)) } } -/// Searches for the nearest merge commit in the repository. +/// Returns the most recent (ordered chronologically) commit found in the local history that +/// should exist upstream. We identify upstream commits by the e-mail of the commit +/// author. /// -/// **In CI** finds the nearest merge commit that *also exists upstream*. -/// -/// It looks for the most recent commit made by the merge bot by matching the author's email -/// address with the merge bot's email. -pub fn get_closest_merge_commit( +/// If we are in CI, we simply return our first parent. +fn get_closest_upstream_commit( git_dir: Option<&Path>, config: &GitConfig<'_>, - target_paths: &[&str], -) -> Result { + env: CiEnv, +) -> Result, String> { + let base = match env { + CiEnv::None => "HEAD", + CiEnv::GitHubActions => { + // On CI, we should always have a non-upstream merge commit at the tip, + // and our first parent should be the most recently merged upstream commit. + // We thus simply return our first parent. + return resolve_commit_sha(git_dir, "HEAD^1").map(Some); + } + }; + let mut git = Command::new("git"); if let Some(git_dir) = git_dir { git.current_dir(git_dir); } - let channel = include_str!("../../ci/channel").trim(); - - let merge_base = { - if CiEnv::is_ci() && - // FIXME: When running on rust-lang managed CI and it's not a nightly build, - // `git_upstream_merge_base` fails with an error message similar to this: - // ``` - // called `Result::unwrap()` on an `Err` value: "command did not execute successfully: - // cd \"/checkout\" && \"git\" \"merge-base\" \"origin/master\" \"HEAD\"\nexpected success, got: exit status: 1\n" - // ``` - // Investigate and resolve this issue instead of skipping it like this. - (channel == "nightly" || !CiEnv::is_rust_lang_managed_ci_job()) - { - git_upstream_merge_base(config, git_dir).unwrap() - } else { - // For non-CI environments, ignore rust-lang/rust upstream as it usually gets - // outdated very quickly. - "HEAD".to_string() - } - }; - + // We do not use `--first-parent`, because we can be in a situation (outside CI) where we have + // a subtree merge that actually has the main rustc history as its second parent. + // Using `--first-parent` would recurse into the history of the subtree, which could have some + // old bors commits that are not relevant to us. + // With `--author-date-order`, git recurses into all parent subtrees, and returns the most + // chronologically recent bors commit. + // Here we assume that none of our subtrees use bors anymore, and that all their old bors + // commits are way older than recent rustc bors commits! git.args([ "rev-list", + "--author-date-order", &format!("--author={}", config.git_merge_commit_email), "-n1", - "--first-parent", - &merge_base, + &base, ]); - if !target_paths.is_empty() { - git.arg("--").args(target_paths); + let output = output_result(&mut git)?.trim().to_owned(); + if output.is_empty() { Ok(None) } else { Ok(Some(output)) } +} + +/// Resolve the commit SHA of `commit_ref`. +fn resolve_commit_sha(git_dir: Option<&Path>, commit_ref: &str) -> Result { + let mut git = Command::new("git"); + + if let Some(git_dir) = git_dir { + git.current_dir(git_dir); } + git.args(["rev-parse", commit_ref]); + Ok(output_result(&mut git)?.trim().to_owned()) } -/// Returns the files that have been modified in the current branch compared to the last merge. +/// Returns the files that have been modified in the current branch compared to the master branch. +/// This includes committed changes, uncommitted changes, and changes that are not even staged. +/// /// The `extensions` parameter can be used to filter the files by their extension. /// Does not include removed files. /// If `extensions` is empty, all files will be returned. @@ -176,7 +264,9 @@ pub fn get_git_modified_files( git_dir: Option<&Path>, extensions: &[&str], ) -> Result, String> { - let merge_base = get_closest_merge_commit(git_dir, config, &[])?; + let Some(merge_base) = get_closest_upstream_commit(git_dir, config, CiEnv::None)? else { + return Err("No upstream commit was found".to_string()); + }; let mut git = Command::new("git"); if let Some(git_dir) = git_dir { @@ -204,13 +294,7 @@ pub fn get_git_modified_files( } /// Returns the files that haven't been added to git yet. -pub fn get_git_untracked_files( - config: &GitConfig<'_>, - git_dir: Option<&Path>, -) -> Result>, String> { - let Ok(_updated_master) = updated_master_branch(config, git_dir) else { - return Ok(None); - }; +pub fn get_git_untracked_files(git_dir: Option<&Path>) -> Result>, String> { let mut git = Command::new("git"); if let Some(git_dir) = git_dir { git.current_dir(git_dir); diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-llvm-19/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-llvm-19/Dockerfile index be235f648b52..c09be047c6a8 100644 --- a/src/ci/docker/host-x86_64/x86_64-gnu-llvm-19/Dockerfile +++ b/src/ci/docker/host-x86_64/x86_64-gnu-llvm-19/Dockerfile @@ -59,11 +59,10 @@ COPY scripts/shared.sh /scripts/ ARG SCRIPT_ARG -COPY scripts/add_dummy_commit.sh /tmp/ COPY scripts/x86_64-gnu-llvm.sh /tmp/ COPY scripts/x86_64-gnu-llvm2.sh /tmp/ COPY scripts/x86_64-gnu-llvm3.sh /tmp/ COPY scripts/stage_2_test_set1.sh /tmp/ COPY scripts/stage_2_test_set2.sh /tmp/ -ENV SCRIPT "/tmp/add_dummy_commit.sh && /tmp/${SCRIPT_ARG}" +ENV SCRIPT "/tmp/${SCRIPT_ARG}" diff --git a/src/ci/docker/host-x86_64/x86_64-gnu-llvm-20/Dockerfile b/src/ci/docker/host-x86_64/x86_64-gnu-llvm-20/Dockerfile index 408b87125e0c..83a3bfb37a54 100644 --- a/src/ci/docker/host-x86_64/x86_64-gnu-llvm-20/Dockerfile +++ b/src/ci/docker/host-x86_64/x86_64-gnu-llvm-20/Dockerfile @@ -59,11 +59,10 @@ COPY scripts/shared.sh /scripts/ ARG SCRIPT_ARG -COPY scripts/add_dummy_commit.sh /tmp/ COPY scripts/x86_64-gnu-llvm.sh /tmp/ COPY scripts/x86_64-gnu-llvm2.sh /tmp/ COPY scripts/x86_64-gnu-llvm3.sh /tmp/ COPY scripts/stage_2_test_set1.sh /tmp/ COPY scripts/stage_2_test_set2.sh /tmp/ -ENV SCRIPT "/tmp/add_dummy_commit.sh && /tmp/${SCRIPT_ARG}" +ENV SCRIPT "/tmp/${SCRIPT_ARG}" diff --git a/src/ci/docker/scripts/add_dummy_commit.sh b/src/ci/docker/scripts/add_dummy_commit.sh deleted file mode 100755 index 029e4ae141f8..000000000000 --- a/src/ci/docker/scripts/add_dummy_commit.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -set -ex - -if [ "$READ_ONLY_SRC" = "0" ]; then - # `core::builder::tests::ci_rustc_if_unchanged_logic` bootstrap test ensures that - # "download-rustc=if-unchanged" logic don't use CI rustc while there are changes on - # compiler and/or library. Here we are adding a dummy commit on compiler and running - # that test to make sure we never download CI rustc with a change on the compiler tree. - echo "" >> ../compiler/rustc/src/main.rs - git config --global user.email "dummy@dummy.com" - git config --global user.name "dummy" - git add ../compiler/rustc/src/main.rs - git commit -m "test commit for rust.download-rustc=if-unchanged logic" - DISABLE_CI_RUSTC_IF_INCOMPATIBLE=0 ../x.py test bootstrap \ - -- core::builder::tests::ci_rustc_if_unchanged_logic - # Revert the dummy commit - git reset --hard HEAD~1 -fi diff --git a/src/ci/docker/scripts/x86_64-gnu-llvm3.sh b/src/ci/docker/scripts/x86_64-gnu-llvm3.sh index d1bf2dab1e2d..17eb2cea59ac 100755 --- a/src/ci/docker/scripts/x86_64-gnu-llvm3.sh +++ b/src/ci/docker/scripts/x86_64-gnu-llvm3.sh @@ -2,8 +2,6 @@ set -ex -/tmp/add_dummy_commit.sh - ##### Test stage 1 ##### ../x.py --stage 1 test --skip src/tools/tidy diff --git a/src/ci/github-actions/jobs.yml b/src/ci/github-actions/jobs.yml index cb2bec5a9dfa..950a75721c49 100644 --- a/src/ci/github-actions/jobs.yml +++ b/src/ci/github-actions/jobs.yml @@ -108,8 +108,6 @@ pr: - name: x86_64-gnu-llvm-19 env: ENABLE_GCC_CODEGEN: "1" - # We are adding (temporarily) a dummy commit on the compiler - READ_ONLY_SRC: "0" DOCKER_SCRIPT: x86_64-gnu-llvm.sh <<: *job-linux-16c - name: x86_64-gnu-tools diff --git a/src/ci/scripts/setup-upstream-remote.sh b/src/ci/scripts/setup-upstream-remote.sh deleted file mode 100755 index 52b4c98a8901..000000000000 --- a/src/ci/scripts/setup-upstream-remote.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -# In CI environments, bootstrap is forced to use the remote upstream based -# on "git_repository" and "nightly_branch" values from src/stage0 file. -# This script configures the remote as it may not exist by default. - -set -euo pipefail -IFS=$'\n\t' - -ci_dir=$(cd $(dirname $0) && pwd)/.. -source "$ci_dir/shared.sh" - -git_repository=$(parse_stage0_file_by_key "git_repository") -nightly_branch=$(parse_stage0_file_by_key "nightly_branch") - -# Configure "rust-lang/rust" upstream remote only when it's not origin. -if [ -z "$(git config remote.origin.url | grep $git_repository)" ]; then - echo "Configuring https://github.com/$git_repository remote as upstream." - git remote add upstream "https://github.com/$git_repository" - REMOTE_NAME="upstream" -else - REMOTE_NAME="origin" -fi - -git fetch $REMOTE_NAME $nightly_branch diff --git a/src/tools/compiletest/src/lib.rs b/src/tools/compiletest/src/lib.rs index 4bbd4ab4790d..b3b9299ea078 100644 --- a/src/tools/compiletest/src/lib.rs +++ b/src/tools/compiletest/src/lib.rs @@ -745,7 +745,7 @@ fn modified_tests(config: &Config, dir: &Utf8Path) -> Result, S &vec!["rs", "stderr", "fixed"], )?; // Add new test cases to the list, it will be convenient in daily development. - let untracked_files = get_git_untracked_files(&config.git_config(), None)?.unwrap_or(vec![]); + let untracked_files = get_git_untracked_files(Some(dir.as_std_path()))?.unwrap_or(vec![]); let all_paths = [&files[..], &untracked_files[..]].concat(); let full_paths = {