Skip to content

Commit 91dc1e1

Browse files
committed
Auto merge of #8192 - alexcrichton:lto-optimizations, r=ehuss
Don't force rustc to do codegen for LTO builds This commit updates Cargo's implementation of LTO builds to do less work and hopefully be faster when doing a cold build. Additionaly this should save space on disk! The general idea is that the compiler does not need object files if it's only going to perform LTO with some artifacts. In this case all rustc needs to do is load bitcode from dependencies. This means that if you're doing an LTO build generating object code for intermediate dependencies is just wasted time! Here Cargo is updated with more intrusive knowledge about LTO. Cargo will now analyze the dependency graph to figure out which crates are being compiled with LTO, and then it will figure out which dependencies only need to have bitcode in them. Pure-bitcode artifacts are emitted with the `-Clinker-plugin-lto` flag. Some artifacts are still used in multiple scenarios (such as those shared between build scripts and final artifacts), so those are not compiled with `-Clinker-plugin-lto` since the linker is not guaranteed to know how to perform LTO. This functionality was recently implemented in rust-lang/rust#71528 where rustc is now capable of reading bitcode from `-Clinker-plugin-lto` rlibs. Previously rustc would only read its own format of bitcode, but this has now been extended! This support is now on nightly, hence this PR.
2 parents 893db8c + e221925 commit 91dc1e1

File tree

6 files changed

+410
-19
lines changed

6 files changed

+410
-19
lines changed

src/cargo/core/compiler/build_context/target_info.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ pub struct TargetInfo {
4040
pub rustflags: Vec<String>,
4141
/// Extra flags to pass to `rustdoc`, see `env_args`.
4242
pub rustdocflags: Vec<String>,
43-
/// Remove this when it hits stable (1.44)
43+
/// Remove this when it hits stable (1.45)
4444
pub supports_embed_bitcode: Option<bool>,
4545
}
4646

src/cargo/core/compiler/context/mod.rs

+8
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use super::custom_build::{self, BuildDeps, BuildScriptOutputs, BuildScripts};
1616
use super::fingerprint::Fingerprint;
1717
use super::job_queue::JobQueue;
1818
use super::layout::Layout;
19+
use super::lto::Lto;
1920
use super::unit_graph::UnitDep;
2021
use super::{BuildContext, Compilation, CompileKind, CompileMode, Executor, FileFlavor};
2122

@@ -72,6 +73,11 @@ pub struct Context<'a, 'cfg> {
7273
/// jobserver clients for each Unit (which eventually becomes a rustc
7374
/// process).
7475
pub rustc_clients: HashMap<Unit, Client>,
76+
77+
/// Map of the LTO-status of each unit. This indicates what sort of
78+
/// compilation is happening (only object, only bitcode, both, etc), and is
79+
/// precalculated early on.
80+
pub lto: HashMap<Unit, Lto>,
7581
}
7682

7783
impl<'a, 'cfg> Context<'a, 'cfg> {
@@ -111,6 +117,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> {
111117
rmeta_required: HashSet::new(),
112118
rustc_clients: HashMap::new(),
113119
pipelining,
120+
lto: HashMap::new(),
114121
})
115122
}
116123

@@ -123,6 +130,7 @@ impl<'a, 'cfg> Context<'a, 'cfg> {
123130
self.prepare_units()?;
124131
self.prepare()?;
125132
custom_build::build_map(&mut self)?;
133+
super::lto::generate(&mut self)?;
126134
self.check_collistions()?;
127135

128136
for unit in &self.bcx.roots {

src/cargo/core/compiler/fingerprint.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
//! -C incremental=… flag | ✓ |
7272
//! mtime of sources | ✓[^3] |
7373
//! RUSTFLAGS/RUSTDOCFLAGS | ✓ |
74+
//! LTO flags | ✓ |
7475
//! is_std | | ✓
7576
//!
7677
//! [^1]: Build script and bin dependencies are not included.
@@ -1241,7 +1242,12 @@ fn calculate_normal(cx: &mut Context<'_, '_>, unit: &Unit) -> CargoResult<Finger
12411242
}
12421243
.to_vec();
12431244

1244-
let profile_hash = util::hash_u64((&unit.profile, unit.mode, cx.bcx.extra_args_for(unit)));
1245+
let profile_hash = util::hash_u64((
1246+
&unit.profile,
1247+
unit.mode,
1248+
cx.bcx.extra_args_for(unit),
1249+
cx.lto[unit],
1250+
));
12451251
// Include metadata since it is exposed as environment variables.
12461252
let m = unit.pkg.manifest().metadata();
12471253
let metadata = util::hash_u64((&m.authors, &m.description, &m.homepage, &m.repository));

src/cargo/core/compiler/lto.rs

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
use crate::core::compiler::{Context, Unit};
2+
use crate::core::interning::InternedString;
3+
use crate::core::profiles;
4+
use crate::util::errors::CargoResult;
5+
use std::collections::hash_map::{Entry, HashMap};
6+
7+
/// Possible ways to run rustc and request various parts of LTO.
8+
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
9+
pub enum Lto {
10+
/// LTO is run for this rustc, and it's `-Clto=foo` where `foo` is optional.
11+
Run(Option<InternedString>),
12+
13+
/// This rustc invocation only needs to produce bitcode, there's no need to
14+
/// produce object files, so we can pass `-Clinker-plugin-lto`
15+
OnlyBitcode,
16+
17+
/// This rustc invocation needs to embed bitcode in object files. This means
18+
/// that object files may be used for a normal link, and the crate may be
19+
/// loaded for LTO later, so both are required.
20+
EmbedBitcode,
21+
22+
/// Nothing related to LTO is required of this compilation.
23+
None,
24+
}
25+
26+
pub fn generate(cx: &mut Context<'_, '_>) -> CargoResult<()> {
27+
let mut map = HashMap::new();
28+
for unit in cx.bcx.roots.iter() {
29+
calculate(cx, &mut map, unit, false)?;
30+
}
31+
cx.lto = map;
32+
Ok(())
33+
}
34+
35+
fn calculate(
36+
cx: &Context<'_, '_>,
37+
map: &mut HashMap<Unit, Lto>,
38+
unit: &Unit,
39+
require_bitcode: bool,
40+
) -> CargoResult<()> {
41+
let (lto, require_bitcode_for_deps) = if unit.target.for_host() {
42+
// Disable LTO for host builds since we only really want to perform LTO
43+
// for the final binary, and LTO on plugins/build scripts/proc macros is
44+
// largely not desired.
45+
(Lto::None, false)
46+
} else if unit.target.can_lto() {
47+
// Otherwise if this target can perform LTO then we're going to read the
48+
// LTO value out of the profile.
49+
assert!(!require_bitcode); // can't depend on binaries/staticlib/etc
50+
match unit.profile.lto {
51+
profiles::Lto::Named(s) => match s.as_str() {
52+
"n" | "no" | "off" => (Lto::Run(Some(s)), false),
53+
_ => (Lto::Run(Some(s)), true),
54+
},
55+
profiles::Lto::Bool(true) => (Lto::Run(None), true),
56+
profiles::Lto::Bool(false) => (Lto::None, false),
57+
}
58+
} else if require_bitcode {
59+
// Otherwise we're a dependency of something, an rlib. This means that
60+
// if our parent required bitcode of some kind then we need to generate
61+
// bitcode.
62+
(Lto::OnlyBitcode, true)
63+
} else {
64+
(Lto::None, false)
65+
};
66+
67+
match map.entry(unit.clone()) {
68+
// If we haven't seen this unit before then insert our value and keep
69+
// going.
70+
Entry::Vacant(v) => {
71+
v.insert(lto);
72+
}
73+
74+
Entry::Occupied(mut v) => {
75+
let result = match (lto, v.get()) {
76+
// Targets which execute LTO cannot be depended on, so these
77+
// units should only show up once in the dependency graph, so we
78+
// should never hit this case.
79+
(Lto::Run(_), _) | (_, Lto::Run(_)) => {
80+
unreachable!("lto-able targets shouldn't show up twice")
81+
}
82+
83+
// If we calculated the same thing as before then we can bail
84+
// out quickly.
85+
(Lto::OnlyBitcode, Lto::OnlyBitcode) | (Lto::None, Lto::None) => return Ok(()),
86+
87+
// This is where the trickiness happens. This unit needs
88+
// bitcode and the previously calculated value for this unit
89+
// says it didn't need bitcode (or vice versa). This means that
90+
// we're a shared dependency between some targets which require
91+
// LTO and some which don't. This means that instead of being
92+
// either only-objects or only-bitcode we have to embed both in
93+
// rlibs (used for different compilations), so we switch to
94+
// embedding bitcode.
95+
(Lto::OnlyBitcode, Lto::None)
96+
| (Lto::OnlyBitcode, Lto::EmbedBitcode)
97+
| (Lto::None, Lto::OnlyBitcode)
98+
| (Lto::None, Lto::EmbedBitcode) => Lto::EmbedBitcode,
99+
100+
// Currently this variant is never calculated above, so no need
101+
// to handle this case.
102+
(Lto::EmbedBitcode, _) => unreachable!(),
103+
};
104+
// No need to recurse if we calculated the same value as before.
105+
if result == *v.get() {
106+
return Ok(());
107+
}
108+
v.insert(result);
109+
}
110+
}
111+
112+
for dep in cx.unit_deps(unit) {
113+
calculate(cx, map, &dep.unit, require_bitcode_for_deps)?;
114+
}
115+
Ok(())
116+
}

src/cargo/core/compiler/mod.rs

+24-17
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ mod job;
1010
mod job_queue;
1111
mod layout;
1212
mod links;
13+
mod lto;
1314
mod output_depinfo;
1415
pub mod standard_lib;
1516
mod timings;
@@ -42,7 +43,7 @@ use self::output_depinfo::output_depinfo;
4243
use self::unit_graph::UnitDep;
4344
pub use crate::core::compiler::unit::{Unit, UnitInterner};
4445
use crate::core::manifest::TargetSourcePath;
45-
use crate::core::profiles::{Lto, PanicStrategy, Profile};
46+
use crate::core::profiles::{PanicStrategy, Profile};
4647
use crate::core::{Edition, Feature, InternedString, PackageId, Target};
4748
use crate::util::errors::{self, CargoResult, CargoResultExt, ProcessError, VerboseError};
4849
use crate::util::machine_message::Message;
@@ -740,7 +741,6 @@ fn build_base_args(
740741
let bcx = cx.bcx;
741742
let Profile {
742743
ref opt_level,
743-
ref lto,
744744
codegen_units,
745745
debuginfo,
746746
debug_assertions,
@@ -793,24 +793,31 @@ fn build_base_args(
793793
cmd.arg("-C").arg(format!("panic={}", panic));
794794
}
795795

796-
// Disable LTO for host builds as prefer_dynamic and it are mutually
797-
// exclusive.
798-
let lto_possible = unit.target.can_lto() && !unit.target.for_host();
799-
match lto {
800-
Lto::Bool(true) => {
801-
if lto_possible {
802-
cmd.args(&["-C", "lto"]);
803-
}
796+
match cx.lto[&unit] {
797+
lto::Lto::Run(None) => {
798+
cmd.arg("-C").arg("lto");
799+
}
800+
lto::Lto::Run(Some(s)) => {
801+
cmd.arg("-C").arg(format!("lto={}", s));
804802
}
805-
Lto::Named(s) => {
806-
if lto_possible {
807-
cmd.arg("-C").arg(format!("lto={}", s));
803+
lto::Lto::EmbedBitcode => {} // this is rustc's default
804+
lto::Lto::OnlyBitcode => {
805+
// Note that this compiler flag, like the one below, is just an
806+
// optimization in terms of build time. If we don't pass it then
807+
// both object code and bitcode will show up. This is lagely just
808+
// compat until the feature lands on stable and we can remove the
809+
// conditional branch.
810+
if cx
811+
.bcx
812+
.target_data
813+
.info(CompileKind::Host)
814+
.supports_embed_bitcode
815+
.unwrap()
816+
{
817+
cmd.arg("-Clinker-plugin-lto");
808818
}
809819
}
810-
// If LTO isn't being enabled then there's no need for bitcode to be
811-
// present in the intermediate artifacts, so shave off some build time
812-
// by removing it.
813-
Lto::Bool(false) => {
820+
lto::Lto::None => {
814821
if cx
815822
.bcx
816823
.target_data

0 commit comments

Comments
 (0)