mirror of https://github.com/rust-lang/rust.git
Auto merge of #123246 - Kobzol:tarball-reproducible, r=Mark-Simulacrum
Make source tarball generation more reproducible This PR performs several changes to source tarball generation (`x dist rustc-src`) in order to make it more reproducible (in light of the recent "xz backdoor"...). I want to follow up on it with making a separate CI workflow for generating the tarball. After this PR, running this locally produces identical checksums: ```bash $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz $ ./x dist rustc-src $ sha256sum build/dist/rustc-1.79.0-src.tar.gz ``` r? `@Mark-Simulacrum`
This commit is contained in:
commit
a8cfc83801
|
@ -995,9 +995,9 @@ impl Step for PlainSourceTarball {
|
||||||
if builder.rust_info().is_managed_git_subrepository()
|
if builder.rust_info().is_managed_git_subrepository()
|
||||||
|| builder.rust_info().is_from_tarball()
|
|| builder.rust_info().is_from_tarball()
|
||||||
{
|
{
|
||||||
if builder.rust_info().is_managed_git_subrepository() {
|
// Ensure we have all submodules from src and other directories checked out.
|
||||||
// Ensure we have the submodules checked out.
|
for submodule in builder.get_all_submodules() {
|
||||||
builder.update_submodule(Path::new("src/tools/cargo"));
|
builder.update_submodule(Path::new(submodule));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Vendor all Cargo dependencies
|
// Vendor all Cargo dependencies
|
||||||
|
@ -1028,6 +1028,20 @@ impl Step for PlainSourceTarball {
|
||||||
builder.create(&cargo_config_dir.join("config.toml"), &config);
|
builder.create(&cargo_config_dir.join("config.toml"), &config);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Delete extraneous directories
|
||||||
|
// FIXME: if we're managed by git, we should probably instead ask git if the given path
|
||||||
|
// is managed by it?
|
||||||
|
for entry in walkdir::WalkDir::new(tarball.image_dir())
|
||||||
|
.follow_links(true)
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|e| e.ok())
|
||||||
|
{
|
||||||
|
if entry.path().is_dir() && entry.path().file_name() == Some(OsStr::new("__pycache__"))
|
||||||
|
{
|
||||||
|
t!(fs::remove_dir_all(entry.path()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
tarball.bare()
|
tarball.bare()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -554,29 +554,7 @@ impl<'a> ShouldRun<'a> {
|
||||||
///
|
///
|
||||||
/// [`path`]: ShouldRun::path
|
/// [`path`]: ShouldRun::path
|
||||||
pub fn paths(mut self, paths: &[&str]) -> Self {
|
pub fn paths(mut self, paths: &[&str]) -> Self {
|
||||||
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
|
let submodules_paths = self.builder.get_all_submodules();
|
||||||
|
|
||||||
let init_submodules_paths = |src: &PathBuf| {
|
|
||||||
let file = File::open(src.join(".gitmodules")).unwrap();
|
|
||||||
|
|
||||||
let mut submodules_paths = vec![];
|
|
||||||
for line in BufReader::new(file).lines() {
|
|
||||||
if let Ok(line) = line {
|
|
||||||
let line = line.trim();
|
|
||||||
|
|
||||||
if line.starts_with("path") {
|
|
||||||
let actual_path =
|
|
||||||
line.split(' ').last().expect("Couldn't get value of path");
|
|
||||||
submodules_paths.push(actual_path.to_owned());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
submodules_paths
|
|
||||||
};
|
|
||||||
|
|
||||||
let submodules_paths =
|
|
||||||
SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.builder.src));
|
|
||||||
|
|
||||||
self.paths.insert(PathSet::Set(
|
self.paths.insert(PathSet::Set(
|
||||||
paths
|
paths
|
||||||
|
@ -2151,6 +2129,37 @@ impl<'a> Builder<'a> {
|
||||||
out
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return paths of all submodules managed by git.
|
||||||
|
/// If the current checkout is not managed by git, returns an empty slice.
|
||||||
|
pub fn get_all_submodules(&self) -> &[String] {
|
||||||
|
if !self.rust_info().is_managed_git_subrepository() {
|
||||||
|
return &[];
|
||||||
|
}
|
||||||
|
|
||||||
|
static SUBMODULES_PATHS: OnceLock<Vec<String>> = OnceLock::new();
|
||||||
|
|
||||||
|
let init_submodules_paths = |src: &PathBuf| {
|
||||||
|
let file = File::open(src.join(".gitmodules")).unwrap();
|
||||||
|
|
||||||
|
let mut submodules_paths = vec![];
|
||||||
|
for line in BufReader::new(file).lines() {
|
||||||
|
if let Ok(line) = line {
|
||||||
|
let line = line.trim();
|
||||||
|
|
||||||
|
if line.starts_with("path") {
|
||||||
|
let actual_path =
|
||||||
|
line.split(' ').last().expect("Couldn't get value of path");
|
||||||
|
submodules_paths.push(actual_path.to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
submodules_paths
|
||||||
|
};
|
||||||
|
|
||||||
|
&SUBMODULES_PATHS.get_or_init(|| init_submodules_paths(&self.src))
|
||||||
|
}
|
||||||
|
|
||||||
/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
|
/// Ensure that a given step is built *only if it's supposed to be built by default*, returning
|
||||||
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
|
/// its output. This will cache the step, so it's safe (and good!) to call this as often as
|
||||||
/// needed to ensure that all dependencies are build.
|
/// needed to ensure that all dependencies are build.
|
||||||
|
|
|
@ -2,7 +2,7 @@ use anyhow::{bail, Context, Result};
|
||||||
use std::fs::{read_link, symlink_metadata};
|
use std::fs::{read_link, symlink_metadata};
|
||||||
use std::io::{BufWriter, Write};
|
use std::io::{BufWriter, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use tar::{Builder, Header};
|
use tar::{Builder, Header, HeaderMode};
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
@ -53,14 +53,19 @@ impl Tarballer {
|
||||||
// Sort files by their suffix, to group files with the same name from
|
// Sort files by their suffix, to group files with the same name from
|
||||||
// different locations (likely identical) and files with the same
|
// different locations (likely identical) and files with the same
|
||||||
// extension (likely containing similar data).
|
// extension (likely containing similar data).
|
||||||
let (dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
|
// Sorting of file and directory paths also helps with the reproducibility
|
||||||
|
// of the resulting archive.
|
||||||
|
let (mut dirs, mut files) = get_recursive_paths(&self.work_dir, &self.input)
|
||||||
.context("failed to collect file paths")?;
|
.context("failed to collect file paths")?;
|
||||||
|
dirs.sort();
|
||||||
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
|
files.sort_by(|a, b| a.bytes().rev().cmp(b.bytes().rev()));
|
||||||
|
|
||||||
// Write the tar into both encoded files. We write all directories
|
// Write the tar into both encoded files. We write all directories
|
||||||
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
|
// first, so files may be directly created. (See rust-lang/rustup.rs#1092.)
|
||||||
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
|
let buf = BufWriter::with_capacity(1024 * 1024, encoder);
|
||||||
let mut builder = Builder::new(buf);
|
let mut builder = Builder::new(buf);
|
||||||
|
// Make uid, gid and mtime deterministic to improve reproducibility
|
||||||
|
builder.mode(HeaderMode::Deterministic);
|
||||||
|
|
||||||
let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
|
let pool = rayon::ThreadPoolBuilder::new().num_threads(2).build().unwrap();
|
||||||
pool.install(move || {
|
pool.install(move || {
|
||||||
|
@ -91,7 +96,8 @@ impl Tarballer {
|
||||||
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
|
fn append_path<W: Write>(builder: &mut Builder<W>, src: &Path, path: &String) -> Result<()> {
|
||||||
let stat = symlink_metadata(src)?;
|
let stat = symlink_metadata(src)?;
|
||||||
let mut header = Header::new_gnu();
|
let mut header = Header::new_gnu();
|
||||||
header.set_metadata(&stat);
|
header.set_metadata_in_mode(&stat, HeaderMode::Deterministic);
|
||||||
|
|
||||||
if stat.file_type().is_symlink() {
|
if stat.file_type().is_symlink() {
|
||||||
let link = read_link(src)?;
|
let link = read_link(src)?;
|
||||||
builder.append_link(&mut header, path, &link)?;
|
builder.append_link(&mut header, path, &link)?;
|
||||||
|
|
Loading…
Reference in New Issue