Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

save LTO import info and check it when trying to reuse build products #67020

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 114 additions & 10 deletions src/librustc_codegen_llvm/back/lto.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,24 @@ use rustc::hir::def_id::LOCAL_CRATE;
use rustc::middle::exported_symbols::SymbolExportLevel;
use rustc::session::config::{self, Lto};
use rustc::util::common::time_ext;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::{FxHashSet, FxHashMap};
use rustc_codegen_ssa::{RLIB_BYTECODE_EXTENSION, ModuleCodegen, ModuleKind};
use log::{info, debug};

use std::ffi::{CStr, CString};
use std::fs::File;
use std::io;
use std::mem;
use std::path::Path;
use std::ptr;
use std::slice;
use std::sync::Arc;

/// We keep track of past LTO imports that were used to produce the current set
/// of compiled object files that we might choose to reuse during this
/// compilation session.
pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-imports.bin";

pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
match crate_type {
config::CrateType::Executable |
Expand Down Expand Up @@ -472,13 +481,26 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,

info!("thin LTO data created");

let import_map = if cgcx.incr_comp_session_dir.is_some() {
ThinLTOImports::from_thin_lto_data(data)
let (import_map_path, prev_import_map, curr_import_map) =
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir
{
let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
// If previous imports have been deleted, or we get an IO error
// reading the file storing them, then we'll just use `None` as the
// prev_import_map, which will force the code to be recompiled.
let prev = if path.exists() {
ThinLTOImports::load_from_file(&path).ok()
} else {
None
};
let curr = ThinLTOImports::from_thin_lto_data(data);
(Some(path), prev, curr)
} else {
// If we don't compile incrementally, we don't need to load the
// import data from LLVM.
assert!(green_modules.is_empty());
ThinLTOImports::default()
let curr = ThinLTOImports::default();
(None, None, curr)
};
info!("thin LTO import map loaded");

Expand All @@ -502,18 +524,36 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
for (module_index, module_name) in shared.module_names.iter().enumerate() {
let module_name = module_name_to_str(module_name);

// If the module hasn't changed and none of the modules it imports
// from has changed, we can re-use the post-ThinLTO version of the
// module.
if green_modules.contains_key(module_name) {
let imports_all_green = import_map.modules_imported_by(module_name)
// If (1.) the module hasn't changed, and (2.) none of the modules
// it imports from has changed, *and* (3.) the import-set itself has
// not changed from the previous compile when it was last
// ThinLTO'ed, then we can re-use the post-ThinLTO version of the
// module. Otherwise, freshly perform LTO optimization.
//
// This strategy means we can always save the computed imports as
// canon: when we reuse the post-ThinLTO version, condition (3.)
// ensures that the curent import set is the same as the previous
// one. (And of course, when we don't reuse the post-ThinLTO
// version, the current import set *is* the correct one, since we
// are doing the ThinLTO in this current compilation cycle.)
//
// See rust-lang/rust#59535.
if let (Some(prev_import_map), true) =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Interesting pattern :)

(prev_import_map.as_ref(), green_modules.contains_key(module_name))
{
assert!(cgcx.incr_comp_session_dir.is_some());

let prev_imports = prev_import_map.modules_imported_by(module_name);
let curr_imports = curr_import_map.modules_imported_by(module_name);
let imports_all_green = curr_imports
.iter()
.all(|imported_module| green_modules.contains_key(imported_module));

if imports_all_green {
if imports_all_green && equivalent_as_sets(prev_imports, curr_imports) {
let work_product = green_modules[module_name].clone();
copy_jobs.push(work_product);
info!(" - {}: re-used", module_name);
assert!(cgcx.incr_comp_session_dir.is_some());
cgcx.cgu_reuse_tracker.set_actual_reuse(module_name,
CguReuse::PostLto);
continue
Expand All @@ -527,10 +567,33 @@ fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
}));
}

// Save the curent ThinLTO import information for the next compilation
// session, overwriting the previous serialized imports (if any).
if let Some(path) = import_map_path {
if let Err(err) = curr_import_map.save_to_file(&path) {
let msg = format!("Error while writing ThinLTO import data: {}", err);
return Err(write::llvm_err(&diag_handler, &msg));
}
}

Ok((opt_jobs, copy_jobs))
}
}

/// Given two slices, each with no repeat elements. returns true if and only if
/// the two slices have the same contents when considered as sets (i.e. when
/// element order is disregarded).
fn equivalent_as_sets(a: &[String], b: &[String]) -> bool {
// cheap path: unequal lengths means cannot possibly be set equivalent.
if a.len() != b.len() { return false; }
// fast path: before building new things, check if inputs are equivalent as is.
if a == b { return true; }
// slow path: general set comparison.
let a: FxHashSet<&str> = a.iter().map(|s| s.as_str()).collect();
let b: FxHashSet<&str> = b.iter().map(|s| s.as_str()).collect();
a == b
}

pub(crate) fn run_pass_manager(cgcx: &CodegenContext<LlvmCodegenBackend>,
module: &ModuleCodegen<ModuleLlvm>,
config: &ModuleConfig,
Expand Down Expand Up @@ -832,6 +895,47 @@ impl ThinLTOImports {
self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
}

fn save_to_file(&self, path: &Path) -> io::Result<()> {
use std::io::Write;
let file = File::create(path)?;
let mut writer = io::BufWriter::new(file);
for (importing_module_name, imported_modules) in &self.imports {
writeln!(writer, "{}", importing_module_name)?;
for imported_module in imported_modules {
writeln!(writer, " {}", imported_module)?;
}
writeln!(writer)?;
}
Ok(())
}

fn load_from_file(path: &Path) -> io::Result<ThinLTOImports> {
use std::io::BufRead;
let mut imports = FxHashMap::default();
let mut current_module = None;
let mut current_imports = vec![];
let file = File::open(path)?;
for line in io::BufReader::new(file).lines() {
let line = line?;
if line.is_empty() {
let importing_module = current_module
.take()
.expect("Importing module not set");
imports.insert(importing_module,
mem::replace(&mut current_imports, vec![]));
} else if line.starts_with(" ") {
// Space marks an imported module
assert_ne!(current_module, None);
current_imports.push(line.trim().to_string());
} else {
// Otherwise, beginning of a new module (must be start or follow empty line)
assert_eq!(current_module, None);
current_module = Some(line.trim().to_string());
}
}
Ok(ThinLTOImports { imports })
}

/// Loads the ThinLTO import map from ThinLTOData.
unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {
unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
Expand Down
62 changes: 62 additions & 0 deletions src/test/incremental/thinlto/cgu_invalidated_when_import_added.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// revisions: cfail1 cfail2
// compile-flags: -O -Zhuman-readable-cgu-names -Cllvm-args=-import-instr-limit=10
// build-pass

// rust-lang/rust#59535:
//
// This is analgous to cgu_invalidated_when_import_removed.rs, but it covers
// the other direction:
//
// We start with a call-graph like `[A] -> [B -> D] [C]` (where the letters are
// functions and the modules are enclosed in `[]`), and add a new call `D <- C`,
// yielding the new call-graph: `[A] -> [B -> D] <- [C]`
//
// The effect of this is that the compiler previously classfied `D` as internal
// and the import-set of `[A]` to be just `B`. But after adding the `D <- C` call,
// `D` is no longer classified as internal, and the import-set of `[A]` becomes
// both `B` and `D`.
//
// We check this case because an early proposed pull request included an
// assertion that the import-sets monotonically decreased over time, a claim
// which this test case proves to be false.

fn main() {
foo::foo();
bar::baz();
}

mod foo {

// In cfail1, ThinLTO decides that foo() does not get inlined into main, and
// instead bar() gets inlined into foo().
// In cfail2, foo() gets inlined into main.
pub fn foo(){
bar()
}

// This function needs to be big so that it does not get inlined by ThinLTO
// but *does* get inlined into foo() when it is declared `internal` in
// cfail1 (alone).
pub fn bar(){
println!("quux1");
println!("quux2");
println!("quux3");
println!("quux4");
println!("quux5");
println!("quux6");
println!("quux7");
println!("quux8");
println!("quux9");
}
}

mod bar {

#[inline(never)]
pub fn baz() {
#[cfg(cfail2)]
{
crate::foo::bar();
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// revisions: cfail1 cfail2
// compile-flags: -O -Zhuman-readable-cgu-names -Cllvm-args=-import-instr-limit=10
// build-pass

// rust-lang/rust#59535:
//
// Consider a call-graph like `[A] -> [B -> D] <- [C]` (where the letters are
// functions and the modules are enclosed in `[]`)
//
// In our specific instance, the earlier compilations were inlining the call
// to`B` into `A`; thus `A` ended up with a external reference to the symbol `D`
// in its object code, to be resolved at subsequent link time. The LTO import
// information provided by LLVM for those runs reflected that information: it
// explicitly says during those runs, `B` definition and `D` declaration were
// imported into `[A]`.
//
// The change between incremental builds was that the call `D <- C` was removed.
//
// That change, coupled with other decisions within `rustc`, made the compiler
// decide to make `D` an internal symbol (since it was no longer accessed from
// other codegen units, this makes sense locally). And then the definition of
// `D` was inlined into `B` and `D` itself was eliminated entirely.
//
// The current LTO import information reported that `B` alone is imported into
// `[A]` for the *current compilation*. So when the Rust compiler surveyed the
// dependence graph, it determined that nothing `[A]` imports changed since the
// last build (and `[A]` itself has not changed either), so it chooses to reuse
// the object code generated during the previous compilation.
//
// But that previous object code has an unresolved reference to `D`, and that
// causes a link time failure!

fn main() {
foo::foo();
bar::baz();
}

mod foo {

// In cfail1, foo() gets inlined into main.
// In cfail2, ThinLTO decides that foo() does not get inlined into main, and
// instead bar() gets inlined into foo(). But faulty logic in our incr.
// ThinLTO implementation thought that `main()` is unchanged and thus reused
// the object file still containing a call to the now non-existant bar().
pub fn foo(){
bar()
}

// This function needs to be big so that it does not get inlined by ThinLTO
// but *does* get inlined into foo() once it is declared `internal` in
// cfail2.
pub fn bar(){
println!("quux1");
println!("quux2");
println!("quux3");
println!("quux4");
println!("quux5");
println!("quux6");
println!("quux7");
println!("quux8");
println!("quux9");
}
}

mod bar {

#[inline(never)]
pub fn baz() {
#[cfg(cfail1)]
{
crate::foo::bar();
}
}
}