Skip to content

Commit 6dadfc0

Browse files
committed
Auto merge of #93717 - pietroalbini:pa-ci-profiler, r=Mark-Simulacrum
Add build metrics to rustbuild This PR adds a new module of rustbuild, `ci_profiler`, whose job is to gather as much information as possible about the CI build as possible and store it in a JSON file uploaded to `ci-artifacts`. Right now for each step it collects: * Type name and debug representation of the `Step` object. * Duration of the step (excluding child steps). * Systemwide CPU stats for the duration of the step (both single core and all cores). * Which child steps were executed. This is capable of replacing both the scripts to collect CPU stats and the `[TIMING]` lines in build logs (not yet removed, until we port our tooling to use the CI profiler). The format is also extensible to be able in the future to collect more information. r? `@Mark-Simulacrum`
2 parents 43874a2 + 70cdd7e commit 6dadfc0

10 files changed

+268
-4
lines changed

Cargo.lock

+16
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,7 @@ dependencies = [
223223
"pretty_assertions 0.7.2",
224224
"serde",
225225
"serde_json",
226+
"sysinfo",
226227
"tar",
227228
"toml",
228229
"winapi",
@@ -5126,6 +5127,21 @@ dependencies = [
51265127
"unicode-xid",
51275128
]
51285129

5130+
[[package]]
5131+
name = "sysinfo"
5132+
version = "0.24.1"
5133+
source = "registry+https://github.com/rust-lang/crates.io-index"
5134+
checksum = "a6a8e71535da31837213ac114531d31def75d7aebd133264e420a3451fa7f703"
5135+
dependencies = [
5136+
"cfg-if 1.0.0",
5137+
"core-foundation-sys",
5138+
"libc",
5139+
"ntapi",
5140+
"once_cell",
5141+
"rayon",
5142+
"winapi",
5143+
]
5144+
51295145
[[package]]
51305146
name = "tar"
51315147
version = "0.4.37"

config.toml.example

+6
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,12 @@ changelog-seen = 2
324324
# a Nix toolchain on non-NixOS distributions.
325325
#patch-binaries-for-nix = false
326326

327+
# Collect information and statistics about the current build and writes it to
328+
# disk. Enabling this or not has no impact on the resulting build output. The
329+
# schema of the file generated by the build metrics feature is unstable, and
330+
# this is not intended to be used during local development.
331+
#metrics = false
332+
327333
# =============================================================================
328334
# General install configuration options
329335
# =============================================================================

src/bootstrap/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,9 @@ opener = "0.5"
4949
once_cell = "1.7.2"
5050
xz2 = "0.1"
5151

52+
# Dependencies needed by the build-metrics feature
53+
sysinfo = { version = "0.24.1", optional = true }
54+
5255
[target.'cfg(windows)'.dependencies.winapi]
5356
version = "0.3"
5457
features = [
@@ -64,3 +67,6 @@ features = [
6467

6568
[dev-dependencies]
6669
pretty_assertions = "0.7"
70+
71+
[features]
72+
build-metrics = ["sysinfo"]

src/bootstrap/bootstrap.py

+3
Original file line numberDiff line numberDiff line change
@@ -837,6 +837,9 @@ def build_bootstrap(self):
837837
args.append("--locked")
838838
if self.use_vendored_sources:
839839
args.append("--frozen")
840+
if self.get_toml("metrics", "build"):
841+
args.append("--features")
842+
args.append("build-metrics")
840843
run(args, env=env, verbose=self.verbose)
841844

842845
def build_triple(self):

src/bootstrap/builder.rs

+6
Original file line numberDiff line numberDiff line change
@@ -2010,6 +2010,9 @@ impl<'a> Builder<'a> {
20102010
stack.push(Box::new(step.clone()));
20112011
}
20122012

2013+
#[cfg(feature = "build-metrics")]
2014+
self.metrics.enter_step(&step);
2015+
20132016
let (out, dur) = {
20142017
let start = Instant::now();
20152018
let zero = Duration::new(0, 0);
@@ -2033,6 +2036,9 @@ impl<'a> Builder<'a> {
20332036
);
20342037
}
20352038

2039+
#[cfg(feature = "build-metrics")]
2040+
self.metrics.exit_step();
2041+
20362042
{
20372043
let mut stack = self.stack.borrow_mut();
20382044
let cur_step = stack.pop().expect("step stack empty");

src/bootstrap/config.rs

+1
Original file line numberDiff line numberDiff line change
@@ -550,6 +550,7 @@ define_config! {
550550
dist_stage: Option<u32> = "dist-stage",
551551
bench_stage: Option<u32> = "bench-stage",
552552
patch_binaries_for_nix: Option<bool> = "patch-binaries-for-nix",
553+
metrics: Option<bool> = "metrics",
553554
}
554555
}
555556

src/bootstrap/lib.rs

+12
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,9 @@ mod tool;
150150
mod toolstate;
151151
pub mod util;
152152

153+
#[cfg(feature = "build-metrics")]
154+
mod metrics;
155+
153156
#[cfg(windows)]
154157
mod job;
155158

@@ -312,6 +315,9 @@ pub struct Build {
312315
prerelease_version: Cell<Option<u32>>,
313316
tool_artifacts:
314317
RefCell<HashMap<TargetSelection, HashMap<String, (&'static str, PathBuf, Vec<String>)>>>,
318+
319+
#[cfg(feature = "build-metrics")]
320+
metrics: metrics::BuildMetrics,
315321
}
316322

317323
#[derive(Debug)]
@@ -501,6 +507,9 @@ impl Build {
501507
delayed_failures: RefCell::new(Vec::new()),
502508
prerelease_version: Cell::new(None),
503509
tool_artifacts: Default::default(),
510+
511+
#[cfg(feature = "build-metrics")]
512+
metrics: metrics::BuildMetrics::init(),
504513
};
505514

506515
build.verbose("finding compilers");
@@ -695,6 +704,9 @@ impl Build {
695704
}
696705
process::exit(1);
697706
}
707+
708+
#[cfg(feature = "build-metrics")]
709+
self.metrics.persist(self);
698710
}
699711

700712
/// Clear out `dir` if `input` is newer.

src/bootstrap/metrics.rs

+208
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
//! This module is responsible for collecting metrics profiling information for the current build
2+
//! and dumping it to disk as JSON, to aid investigations on build and CI performance.
3+
//!
4+
//! As this module requires additional dependencies not present during local builds, it's cfg'd
5+
//! away whenever the `build.metrics` config option is not set to `true`.
6+
7+
use crate::builder::Step;
8+
use crate::util::t;
9+
use crate::Build;
10+
use serde::{Deserialize, Serialize};
11+
use std::cell::RefCell;
12+
use std::fs::File;
13+
use std::io::BufWriter;
14+
use std::time::{Duration, Instant};
15+
use sysinfo::{CpuExt, System, SystemExt};
16+
17+
pub(crate) struct BuildMetrics {
18+
state: RefCell<MetricsState>,
19+
}
20+
21+
impl BuildMetrics {
22+
pub(crate) fn init() -> Self {
23+
let state = RefCell::new(MetricsState {
24+
finished_steps: Vec::new(),
25+
running_steps: Vec::new(),
26+
27+
system_info: System::new(),
28+
timer_start: None,
29+
invocation_timer_start: Instant::now(),
30+
});
31+
32+
BuildMetrics { state }
33+
}
34+
35+
pub(crate) fn enter_step<S: Step>(&self, step: &S) {
36+
let mut state = self.state.borrow_mut();
37+
38+
// Consider all the stats gathered so far as the parent's.
39+
if !state.running_steps.is_empty() {
40+
self.collect_stats(&mut *state);
41+
}
42+
43+
state.system_info.refresh_cpu();
44+
state.timer_start = Some(Instant::now());
45+
46+
state.running_steps.push(StepMetrics {
47+
type_: std::any::type_name::<S>().into(),
48+
debug_repr: format!("{step:?}"),
49+
50+
cpu_usage_time_sec: 0.0,
51+
duration_excluding_children_sec: Duration::ZERO,
52+
53+
children: Vec::new(),
54+
});
55+
}
56+
57+
pub(crate) fn exit_step(&self) {
58+
let mut state = self.state.borrow_mut();
59+
60+
self.collect_stats(&mut *state);
61+
62+
let step = state.running_steps.pop().unwrap();
63+
if state.running_steps.is_empty() {
64+
state.finished_steps.push(step);
65+
state.timer_start = None;
66+
} else {
67+
state.running_steps.last_mut().unwrap().children.push(step);
68+
69+
// Start collecting again for the parent step.
70+
state.system_info.refresh_cpu();
71+
state.timer_start = Some(Instant::now());
72+
}
73+
}
74+
75+
fn collect_stats(&self, state: &mut MetricsState) {
76+
let step = state.running_steps.last_mut().unwrap();
77+
78+
let elapsed = state.timer_start.unwrap().elapsed();
79+
step.duration_excluding_children_sec += elapsed;
80+
81+
state.system_info.refresh_cpu();
82+
let cpu = state.system_info.cpus().iter().map(|p| p.cpu_usage()).sum::<f32>();
83+
step.cpu_usage_time_sec += cpu as f64 / 100.0 * elapsed.as_secs_f64();
84+
}
85+
86+
pub(crate) fn persist(&self, build: &Build) {
87+
let mut state = self.state.borrow_mut();
88+
assert!(state.running_steps.is_empty(), "steps are still executing");
89+
90+
let dest = build.out.join("metrics.json");
91+
92+
let mut system = System::new();
93+
system.refresh_cpu();
94+
system.refresh_memory();
95+
96+
let system_stats = JsonInvocationSystemStats {
97+
cpu_threads_count: system.cpus().len(),
98+
cpu_model: system.cpus()[0].brand().into(),
99+
100+
memory_total_bytes: system.total_memory() * 1024,
101+
};
102+
let steps = std::mem::take(&mut state.finished_steps);
103+
104+
// Some of our CI builds consist of multiple independent CI invocations. Ensure all the
105+
// previous invocations are still present in the resulting file.
106+
let mut invocations = match std::fs::read(&dest) {
107+
Ok(contents) => t!(serde_json::from_slice::<JsonRoot>(&contents)).invocations,
108+
Err(err) => {
109+
if err.kind() != std::io::ErrorKind::NotFound {
110+
panic!("failed to open existing metrics file at {}: {err}", dest.display());
111+
}
112+
Vec::new()
113+
}
114+
};
115+
invocations.push(JsonInvocation {
116+
duration_including_children_sec: state.invocation_timer_start.elapsed().as_secs_f64(),
117+
children: steps.into_iter().map(|step| self.prepare_json_step(step)).collect(),
118+
});
119+
120+
let json = JsonRoot { system_stats, invocations };
121+
122+
t!(std::fs::create_dir_all(dest.parent().unwrap()));
123+
let mut file = BufWriter::new(t!(File::create(&dest)));
124+
t!(serde_json::to_writer(&mut file, &json));
125+
}
126+
127+
fn prepare_json_step(&self, step: StepMetrics) -> JsonNode {
128+
JsonNode::RustbuildStep {
129+
type_: step.type_,
130+
debug_repr: step.debug_repr,
131+
132+
duration_excluding_children_sec: step.duration_excluding_children_sec.as_secs_f64(),
133+
system_stats: JsonStepSystemStats {
134+
cpu_utilization_percent: step.cpu_usage_time_sec * 100.0
135+
/ step.duration_excluding_children_sec.as_secs_f64(),
136+
},
137+
138+
children: step
139+
.children
140+
.into_iter()
141+
.map(|child| self.prepare_json_step(child))
142+
.collect(),
143+
}
144+
}
145+
}
146+
147+
struct MetricsState {
148+
finished_steps: Vec<StepMetrics>,
149+
running_steps: Vec<StepMetrics>,
150+
151+
system_info: System,
152+
timer_start: Option<Instant>,
153+
invocation_timer_start: Instant,
154+
}
155+
156+
struct StepMetrics {
157+
type_: String,
158+
debug_repr: String,
159+
160+
cpu_usage_time_sec: f64,
161+
duration_excluding_children_sec: Duration,
162+
163+
children: Vec<StepMetrics>,
164+
}
165+
166+
#[derive(Serialize, Deserialize)]
167+
#[serde(rename_all = "snake_case")]
168+
struct JsonRoot {
169+
system_stats: JsonInvocationSystemStats,
170+
invocations: Vec<JsonInvocation>,
171+
}
172+
173+
#[derive(Serialize, Deserialize)]
174+
#[serde(rename_all = "snake_case")]
175+
struct JsonInvocation {
176+
duration_including_children_sec: f64,
177+
children: Vec<JsonNode>,
178+
}
179+
180+
#[derive(Serialize, Deserialize)]
181+
#[serde(tag = "kind", rename_all = "snake_case")]
182+
enum JsonNode {
183+
RustbuildStep {
184+
#[serde(rename = "type")]
185+
type_: String,
186+
debug_repr: String,
187+
188+
duration_excluding_children_sec: f64,
189+
system_stats: JsonStepSystemStats,
190+
191+
children: Vec<JsonNode>,
192+
},
193+
}
194+
195+
#[derive(Serialize, Deserialize)]
196+
#[serde(rename_all = "snake_case")]
197+
struct JsonInvocationSystemStats {
198+
cpu_threads_count: usize,
199+
cpu_model: String,
200+
201+
memory_total_bytes: u64,
202+
}
203+
204+
#[derive(Serialize, Deserialize)]
205+
#[serde(rename_all = "snake_case")]
206+
struct JsonStepSystemStats {
207+
cpu_utilization_percent: f64,
208+
}

src/ci/run.sh

+1
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ fi
4545

4646
if ! isCI || isCiBranch auto || isCiBranch beta || isCiBranch try; then
4747
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set build.print-step-timings --enable-verbose-tests"
48+
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --set build.metrics"
4849
fi
4950

5051
RUST_CONFIGURE_ARGS="$RUST_CONFIGURE_ARGS --enable-sccache"

src/ci/scripts/upload-artifacts.sh

+9-4
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,24 @@ source "$(cd "$(dirname "$0")" && pwd)/../shared.sh"
1010

1111
upload_dir="$(mktemp -d)"
1212

13+
build_dir=build
14+
if isLinux; then
15+
build_dir=obj/build
16+
fi
17+
1318
# Release tarballs produced by a dist builder.
1419
if [[ "${DEPLOY-0}" -eq "1" ]] || [[ "${DEPLOY_ALT-0}" -eq "1" ]]; then
15-
dist_dir=build/dist
16-
if isLinux; then
17-
dist_dir=obj/build/dist
18-
fi
20+
dist_dir="${build_dir}/dist"
1921
rm -rf "${dist_dir}/doc"
2022
cp -r "${dist_dir}"/* "${upload_dir}"
2123
fi
2224

2325
# CPU usage statistics.
2426
cp cpu-usage.csv "${upload_dir}/cpu-${CI_JOB_NAME}.csv"
2527

28+
# Build metrics generated by x.py.
29+
cp "${build_dir}/metrics.json" "${upload_dir}/metrics-${CI_JOB_NAME}.json"
30+
2631
# Toolstate data.
2732
if [[ -n "${DEPLOY_TOOLSTATES_JSON+x}" ]]; then
2833
cp /tmp/toolstate/toolstates.json "${upload_dir}/${DEPLOY_TOOLSTATES_JSON}"

0 commit comments

Comments
 (0)