Skip to content

Commit 717eb6c

Browse files
committed
Auto merge of #78409 - pietroalbini:build-manifest-checksum-cache, r=Mark-Simulacrum
Add checksums cache to build-manifest During the release process we're currently calculating the SHA256 of each file three times: 1. In `build-manifest`, to fill the `hash = "f00"` keys of the manifests. 2. In `promote-release`, to generate the `.sha256` files. 3. In `promote-release`, to generate the `.asc` GPG signatures. Calculations 1. and 2. could be merged into a single one if there was a way for `build-manifest` to pass the checksums it generated over to `promote-release`. Unfortunately calculation 3. can't be merged as GPG requires extra metadata to be hashed. This PR adds support for merging 1. and 2. by creating the `BUILD_MANIFEST_CHECKSUM_CACHE` environment variable, which points to a JSON file storing a cache of all the calculated checksums. `build-manifest` will load it at startup and avoid generating existing checksums, and it will dump its internal checksums cache into it when it exits successfully. This PR also allows to run `build-manifest` multiple times without the need to wait for checksums to be calculated in the following invocations. The speedup will allow to work torwards a fix for rust-lang/promote-release#15 without impacting the release process duration nor our storage costs. This PR can be reviewed commit-by-commit. r? `@Mark-Simulacrum`
2 parents 2eb4fc8 + c2f4bbd commit 717eb6c

File tree

2 files changed

+106
-50
lines changed

2 files changed

+106
-50
lines changed
+97
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
use crate::manifest::{FileHash, Manifest};
2+
use rayon::prelude::*;
3+
use sha2::{Digest, Sha256};
4+
use std::collections::{HashMap, HashSet};
5+
use std::error::Error;
6+
use std::fs::File;
7+
use std::io::BufReader;
8+
use std::path::{Path, PathBuf};
9+
use std::sync::Mutex;
10+
use std::time::Instant;
11+
12+
pub(crate) struct Checksums {
13+
cache_path: Option<PathBuf>,
14+
collected: Mutex<HashMap<PathBuf, String>>,
15+
}
16+
17+
impl Checksums {
18+
pub(crate) fn new() -> Result<Self, Box<dyn Error>> {
19+
let cache_path = std::env::var_os("BUILD_MANIFEST_CHECKSUM_CACHE").map(PathBuf::from);
20+
21+
let mut collected = HashMap::new();
22+
if let Some(path) = &cache_path {
23+
if path.is_file() {
24+
collected = serde_json::from_slice(&std::fs::read(path)?)?;
25+
}
26+
}
27+
28+
Ok(Checksums { cache_path, collected: Mutex::new(collected) })
29+
}
30+
31+
pub(crate) fn store_cache(&self) -> Result<(), Box<dyn Error>> {
32+
if let Some(path) = &self.cache_path {
33+
std::fs::write(path, &serde_json::to_vec(&self.collected)?)?;
34+
}
35+
Ok(())
36+
}
37+
38+
pub(crate) fn fill_missing_checksums(&mut self, manifest: &mut Manifest) {
39+
let need_checksums = self.find_missing_checksums(manifest);
40+
if !need_checksums.is_empty() {
41+
self.collect_checksums(&need_checksums);
42+
}
43+
self.replace_checksums(manifest);
44+
}
45+
46+
fn find_missing_checksums(&mut self, manifest: &mut Manifest) -> HashSet<PathBuf> {
47+
let collected = self.collected.lock().unwrap();
48+
let mut need_checksums = HashSet::new();
49+
crate::manifest::visit_file_hashes(manifest, |file_hash| {
50+
if let FileHash::Missing(path) = file_hash {
51+
let path = std::fs::canonicalize(path).unwrap();
52+
if !collected.contains_key(&path) {
53+
need_checksums.insert(path);
54+
}
55+
}
56+
});
57+
need_checksums
58+
}
59+
60+
fn replace_checksums(&mut self, manifest: &mut Manifest) {
61+
let collected = self.collected.lock().unwrap();
62+
crate::manifest::visit_file_hashes(manifest, |file_hash| {
63+
if let FileHash::Missing(path) = file_hash {
64+
let path = std::fs::canonicalize(path).unwrap();
65+
match collected.get(&path) {
66+
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
67+
None => panic!("missing hash for file {}", path.display()),
68+
}
69+
}
70+
});
71+
}
72+
73+
fn collect_checksums(&mut self, files: &HashSet<PathBuf>) {
74+
let collection_start = Instant::now();
75+
println!(
76+
"collecting hashes for {} tarballs across {} threads",
77+
files.len(),
78+
rayon::current_num_threads().min(files.len()),
79+
);
80+
81+
files.par_iter().for_each(|path| match hash(path) {
82+
Ok(hash) => {
83+
self.collected.lock().unwrap().insert(path.clone(), hash);
84+
}
85+
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
86+
});
87+
88+
println!("collected {} hashes in {:.2?}", files.len(), collection_start.elapsed());
89+
}
90+
}
91+
92+
fn hash(path: &Path) -> Result<String, Box<dyn Error>> {
93+
let mut file = BufReader::new(File::open(path)?);
94+
let mut sha256 = Sha256::default();
95+
std::io::copy(&mut file, &mut sha256)?;
96+
Ok(hex::encode(sha256.finalize()))
97+
}

src/tools/build-manifest/src/main.rs

+9-50
Original file line numberDiff line numberDiff line change
@@ -4,22 +4,19 @@
44
//! via `x.py dist hash-and-sign`; the cmdline arguments are set up
55
//! by rustbuild (in `src/bootstrap/dist.rs`).
66
7+
mod checksum;
78
mod manifest;
89
mod versions;
910

10-
use crate::manifest::{Component, FileHash, Manifest, Package, Rename, Target};
11+
use crate::checksum::Checksums;
12+
use crate::manifest::{Component, Manifest, Package, Rename, Target};
1113
use crate::versions::{PkgType, Versions};
12-
use rayon::prelude::*;
13-
use sha2::Digest;
1414
use std::collections::{BTreeMap, HashMap, HashSet};
1515
use std::env;
16-
use std::error::Error;
1716
use std::fs::{self, File};
18-
use std::io::{self, BufReader, Read, Write};
17+
use std::io::{self, Read, Write};
1918
use std::path::{Path, PathBuf};
2019
use std::process::{Command, Stdio};
21-
use std::sync::Mutex;
22-
use std::time::Instant;
2320

2421
static HOSTS: &[&str] = &[
2522
"aarch64-apple-darwin",
@@ -186,6 +183,7 @@ macro_rules! t {
186183

187184
struct Builder {
188185
versions: Versions,
186+
checksums: Checksums,
189187
shipped_files: HashSet<String>,
190188

191189
input: PathBuf,
@@ -240,6 +238,7 @@ fn main() {
240238

241239
Builder {
242240
versions: Versions::new(&channel, &input).unwrap(),
241+
checksums: t!(Checksums::new()),
243242
shipped_files: HashSet::new(),
244243

245244
input,
@@ -276,6 +275,8 @@ impl Builder {
276275
if let Some(path) = std::env::var_os("BUILD_MANIFEST_SHIPPED_FILES_PATH") {
277276
self.write_shipped_files(&Path::new(&path));
278277
}
278+
279+
t!(self.checksums.store_cache());
279280
}
280281

281282
/// If a tool does not pass its tests, don't ship it.
@@ -321,7 +322,7 @@ impl Builder {
321322
self.add_renames_to(&mut manifest);
322323
manifest.pkg.insert("rust".to_string(), self.rust_package(&manifest));
323324

324-
self.fill_missing_hashes(&mut manifest);
325+
self.checksums.fill_missing_checksums(&mut manifest);
325326

326327
manifest
327328
}
@@ -595,41 +596,6 @@ impl Builder {
595596
assert!(t!(child.wait()).success());
596597
}
597598

598-
fn fill_missing_hashes(&self, manifest: &mut Manifest) {
599-
// First collect all files that need hashes
600-
let mut need_hashes = HashSet::new();
601-
crate::manifest::visit_file_hashes(manifest, |file_hash| {
602-
if let FileHash::Missing(path) = file_hash {
603-
need_hashes.insert(path.clone());
604-
}
605-
});
606-
607-
let collected = Mutex::new(HashMap::new());
608-
let collection_start = Instant::now();
609-
println!(
610-
"collecting hashes for {} tarballs across {} threads",
611-
need_hashes.len(),
612-
rayon::current_num_threads().min(need_hashes.len()),
613-
);
614-
need_hashes.par_iter().for_each(|path| match fetch_hash(path) {
615-
Ok(hash) => {
616-
collected.lock().unwrap().insert(path, hash);
617-
}
618-
Err(err) => eprintln!("error while fetching the hash for {}: {}", path.display(), err),
619-
});
620-
let collected = collected.into_inner().unwrap();
621-
println!("collected {} hashes in {:.2?}", collected.len(), collection_start.elapsed());
622-
623-
crate::manifest::visit_file_hashes(manifest, |file_hash| {
624-
if let FileHash::Missing(path) = file_hash {
625-
match collected.get(path) {
626-
Some(hash) => *file_hash = FileHash::Present(hash.clone()),
627-
None => panic!("missing hash for file {}", path.display()),
628-
}
629-
}
630-
})
631-
}
632-
633599
fn write_channel_files(&mut self, channel_name: &str, manifest: &Manifest) {
634600
self.write(&toml::to_string(&manifest).unwrap(), channel_name, ".toml");
635601
self.write(&manifest.date, channel_name, "-date.txt");
@@ -660,10 +626,3 @@ impl Builder {
660626
t!(std::fs::write(path, content.as_bytes()));
661627
}
662628
}
663-
664-
fn fetch_hash(path: &Path) -> Result<String, Box<dyn Error>> {
665-
let mut file = BufReader::new(File::open(path)?);
666-
let mut sha256 = sha2::Sha256::default();
667-
std::io::copy(&mut file, &mut sha256)?;
668-
Ok(hex::encode(sha256.finalize()))
669-
}

0 commit comments

Comments
 (0)