Auto merge of #80262 - Mark-Simulacrum:pgo-rustc, r=pietroalbini

Utilize PGO for rustc linux dist builds

This implements support for applying PGO to the rustc compilation step (not
standard library or any tooling, including rustdoc). Expanding PGO to more tools
is not terribly difficult but will involve more work and greater CI time
commitment.

For the same reason of avoiding greater implementation time commitment,
implementing for platforms outside of x86_64-unknown-linux-gnu is skipped.
In practice it should be quite simple to extend over time to more platforms. The
initial implementation is intentionally minimal here to avoid too much work
investment before we start seeing wins for a subset of Rust users.

The choice of workloads to profile here is somewhat arbitrary, but the general
rationale was to aim for a small set that largely avoided time regressions on
perf.rust-lang.org's full suite of crates. The set chosen is libcore, cargo (and
its dependencies), and a few ad-hoc stress tests from perf.rlo. The stress tests
are arguably the most controversial, but they benefit those cases (avoiding
regressions) and do not really remove wins from other benchmarks.

The primary next step after this PR lands is to implement support for PGO in
LLVM. It is unclear whether we can afford a full LLVM rebuild in CI, though, so
the approach taken there may need to be more staggered. rustc-only PGO seems
well affordable on linux at least, giving us up to 20% wall time wins on some
crates for 15 minutes of extra CI time (1 hour with this PR, up from 45 minutes).

The PGO data is uploaded to allow others to reuse it if attempting to reproduce
the CI build or potentially, in the future, on other platforms where an
off-by-one strategy is used for dist builds at minimal performance cost.

r? `@michaelwoerister` (but tell me if you don't want to / don't feel comfortable approving and we can find others)
This commit is contained in:
bors 2020-12-23 12:54:56 +00:00
commit 3ffea60dd5
8 changed files with 176 additions and 4 deletions

View File

@ -471,6 +471,7 @@ impl<'a> Builder<'a> {
dist::RustDev,
dist::Extended,
dist::BuildManifest,
dist::ReproducibleArtifacts,
),
Kind::Install => describe!(
install::Docs,

View File

@ -501,6 +501,41 @@ impl Step for Rustc {
let mut cargo = builder.cargo(compiler, Mode::Rustc, SourceType::InTree, target, "build");
rustc_cargo(builder, &mut cargo, target);
if builder.config.rust_profile_use.is_some()
&& builder.config.rust_profile_generate.is_some()
{
panic!("Cannot use and generate PGO profiles at the same time");
}
let is_collecting = if let Some(path) = &builder.config.rust_profile_generate {
if compiler.stage == 1 {
cargo.rustflag(&format!("-Cprofile-generate={}", path));
// Apparently necessary to avoid overflowing the counters during
// a Cargo build profile
cargo.rustflag("-Cllvm-args=-vp-counters-per-site=4");
true
} else {
false
}
} else if let Some(path) = &builder.config.rust_profile_use {
if compiler.stage == 1 {
cargo.rustflag(&format!("-Cprofile-use={}", path));
cargo.rustflag("-Cllvm-args=-pgo-warn-missing-function");
true
} else {
false
}
} else {
false
};
if is_collecting {
// Ensure paths to Rust sources are relative, not absolute.
cargo.rustflag(&format!(
"-Cllvm-args=-static-func-strip-dirname-prefix={}",
builder.config.src.components().count()
));
}
builder.info(&format!(
"Building stage{} compiler artifacts ({} -> {})",
compiler.stage, &compiler.host, target
@ -752,7 +787,7 @@ fn copy_codegen_backends_to_sysroot(
// Here we're looking for the output dylib of the `CodegenBackend` step and
// we're copying that into the `codegen-backends` folder.
let dst = builder.sysroot_codegen_backends(target_compiler);
t!(fs::create_dir_all(&dst));
t!(fs::create_dir_all(&dst), dst);
if builder.config.dry_run {
return;

View File

@ -134,6 +134,8 @@ pub struct Config {
pub rust_thin_lto_import_instr_limit: Option<u32>,
pub rust_remap_debuginfo: bool,
pub rust_new_symbol_mangling: bool,
pub rust_profile_use: Option<String>,
pub rust_profile_generate: Option<String>,
pub build: TargetSelection,
pub hosts: Vec<TargetSelection>,
@ -496,6 +498,8 @@ struct Rust {
llvm_libunwind: Option<String>,
control_flow_guard: Option<bool>,
new_symbol_mangling: Option<bool>,
profile_generate: Option<String>,
profile_use: Option<String>,
}
/// TOML representation of how each build target is configured.
@ -874,6 +878,11 @@ impl Config {
config.rust_codegen_units = rust.codegen_units.map(threads_from_config);
config.rust_codegen_units_std = rust.codegen_units_std.map(threads_from_config);
config.rust_profile_use = flags.rust_profile_use.or(rust.profile_use);
config.rust_profile_generate = flags.rust_profile_generate.or(rust.profile_generate);
} else {
config.rust_profile_use = flags.rust_profile_use;
config.rust_profile_generate = flags.rust_profile_generate;
}
if let Some(t) = toml.target {

View File

@ -2648,3 +2648,72 @@ impl Step for BuildManifest {
distdir(builder).join(format!("{}-{}.tar.gz", name, self.target.triple))
}
}
/// Tarball containing artifacts necessary to reproduce the build of rustc.
///
/// Currently this is the PGO profile data.
///
/// Should not be considered stable by end users.
#[derive(Clone, Debug, Eq, Hash, PartialEq)]
pub struct ReproducibleArtifacts {
pub target: TargetSelection,
}
impl Step for ReproducibleArtifacts {
type Output = Option<PathBuf>;
const DEFAULT: bool = true;
const ONLY_HOSTS: bool = true;
fn should_run(run: ShouldRun<'_>) -> ShouldRun<'_> {
run.path("reproducible")
}
fn make_run(run: RunConfig<'_>) {
run.builder.ensure(ReproducibleArtifacts { target: run.target });
}
fn run(self, builder: &Builder<'_>) -> Self::Output {
let name = pkgname(builder, "reproducible-artifacts");
let tmp = tmpdir(builder);
// Prepare the image.
let image = tmp.join("reproducible-artifacts-image");
let _ = fs::remove_dir_all(&image);
if let Some(path) = &builder.config.rust_profile_use {
builder.install(std::path::Path::new(path), &image, 0o644);
} else {
return None;
}
// Prepare the overlay.
let overlay = tmp.join("reproducible-artifacts-overlay");
let _ = fs::remove_dir_all(&overlay);
builder.create_dir(&overlay);
builder.create(&overlay.join("version"), &builder.rust_version());
for file in &["COPYRIGHT", "LICENSE-APACHE", "LICENSE-MIT", "README.md"] {
builder.install(&builder.src.join(file), &overlay, 0o644);
}
// Create the final tarball.
let mut cmd = rust_installer(builder);
cmd.arg("generate")
.arg("--product-name=Rust")
.arg("--rel-manifest-dir=rustlib")
.arg("--success-message=reproducible-artifacts installed.")
.arg("--image-dir")
.arg(&image)
.arg("--work-dir")
.arg(&tmpdir(builder))
.arg("--output-dir")
.arg(&distdir(builder))
.arg("--non-installed-overlay")
.arg(&overlay)
.arg(format!("--package-name={}-{}", name, self.target.triple))
.arg("--legacy-manifest-dirs=rustlib,cargo")
.arg("--component-name=reproducible-artifacts");
builder.run(&mut cmd);
Some(distdir(builder).join(format!("{}-{}.tar.gz", name, self.target.triple)))
}
}

View File

@ -68,6 +68,9 @@ pub struct Flags {
pub deny_warnings: Option<bool>,
pub llvm_skip_rebuild: Option<bool>,
pub rust_profile_use: Option<String>,
pub rust_profile_generate: Option<String>,
}
pub enum Subcommand {
@ -219,6 +222,8 @@ To learn more about a subcommand, run `./x.py <subcommand> -h`",
VALUE overrides the skip-rebuild option in config.toml.",
"VALUE",
);
opts.optopt("", "rust-profile-generate", "rustc error format", "FORMAT");
opts.optopt("", "rust-profile-use", "rustc error format", "FORMAT");
// We can't use getopt to parse the options until we have completed specifying which
// options are valid, but under the current implementation, some options are conditional on
@ -674,6 +679,8 @@ Arguments:
color: matches
.opt_get_default("color", Color::Auto)
.expect("`color` should be `always`, `never`, or `auto`"),
rust_profile_use: matches.opt_str("rust-profile-use"),
rust_profile_generate: matches.opt_str("rust-profile-generate"),
}
}
}

View File

@ -85,6 +85,8 @@ ENV CC=clang CXX=clang++
COPY scripts/sccache.sh /scripts/
RUN sh /scripts/sccache.sh
ENV PGO_HOST=x86_64-unknown-linux-gnu
ENV HOSTS=x86_64-unknown-linux-gnu
ENV RUST_CONFIGURE_ARGS \
@ -98,9 +100,10 @@ ENV RUST_CONFIGURE_ARGS \
--set llvm.thin-lto=true \
--set llvm.ninja=false \
--set rust.jemalloc
ENV SCRIPT python2.7 ../x.py dist --host $HOSTS --target $HOSTS \
--include-default-paths \
src/tools/build-manifest
ENV SCRIPT ../src/ci/pgo.sh python2.7 ../x.py dist \
--host $HOSTS --target $HOSTS \
--include-default-paths \
src/tools/build-manifest
ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_LINKER=clang
# This is the only builder which will create source tarballs

47
src/ci/pgo.sh Executable file
View File

@ -0,0 +1,47 @@
#!/bin/bash
set -euxo pipefail
rm -rf /tmp/rustc-pgo
python2.7 ../x.py build --target=$PGO_HOST --host=$PGO_HOST \
--stage 2 library/std --rust-profile-generate=/tmp/rustc-pgo
./build/$PGO_HOST/stage2/bin/rustc --edition=2018 \
--crate-type=lib ../library/core/src/lib.rs
# Download and build a single-file stress test benchmark on perf.rust-lang.org.
function pgo_perf_benchmark {
local PERF=e095f5021bf01cf3800f50b3a9f14a9683eb3e4e
local github_prefix=https://raw.githubusercontent.com/rust-lang/rustc-perf/$PERF
local name=$1
curl -o /tmp/$name.rs $github_prefix/collector/benchmarks/$name/src/lib.rs
./build/$PGO_HOST/stage2/bin/rustc --edition=2018 --crate-type=lib /tmp/$name.rs
}
pgo_perf_benchmark externs
pgo_perf_benchmark ctfe-stress-4
cp -pri ../src/tools/cargo /tmp/cargo
# Build cargo (with some flags)
function pgo_cargo {
RUSTC=./build/$PGO_HOST/stage2/bin/rustc \
./build/$PGO_HOST/stage0/bin/cargo $@ \
--manifest-path /tmp/cargo/Cargo.toml
}
# Build a couple different variants of Cargo
CARGO_INCREMENTAL=1 pgo_cargo check
echo 'pub fn barbarbar() {}' >> /tmp/cargo/src/cargo/lib.rs
CARGO_INCREMENTAL=1 pgo_cargo check
touch /tmp/cargo/src/cargo/lib.rs
CARGO_INCREMENTAL=1 pgo_cargo check
pgo_cargo build --release
# Merge the profile data we gathered
./build/$PGO_HOST/llvm/bin/llvm-profdata \
merge -o /tmp/rustc-pgo.profdata /tmp/rustc-pgo
# This produces the actual final set of artifacts.
$@ --rust-profile-use=/tmp/rustc-pgo.profdata

View File

@ -299,6 +299,7 @@ impl Builder {
let mut package = |name, targets| self.package(name, &mut manifest.pkg, targets);
package("rustc", HOSTS);
package("rustc-dev", HOSTS);
package("reproducible-artifacts", HOSTS);
package("rustc-docs", HOSTS);
package("cargo", HOSTS);
package("rust-mingw", MINGW);