diff --git a/src/doc/rustc/src/codegen-options/index.md b/src/doc/rustc/src/codegen-options/index.md index 0dc81378e05b2..8dc6257ce2e58 100644 --- a/src/doc/rustc/src/codegen-options/index.md +++ b/src/doc/rustc/src/codegen-options/index.md @@ -257,9 +257,8 @@ them in parallel. Increasing parallelism may speed up compile times, but may also produce slower code. Setting this to 1 may improve the performance of generated code, but may be slower to compile. -The default, if not specified, is 16. This flag is ignored if -[incremental](#incremental) is enabled, in which case an internal heuristic is -used to split the crate. +The default, if not specified, is 16 for non-incremental builds. For +incremental builds the default is 256 which allows caching to be more granular. ## remark diff --git a/src/libcore/convert/mod.rs b/src/libcore/convert/mod.rs index 47ab8715cfa14..eef9ee7cb0093 100644 --- a/src/libcore/convert/mod.rs +++ b/src/libcore/convert/mod.rs @@ -41,6 +41,7 @@ #![stable(feature = "rust1", since = "1.0.0")] use crate::fmt; +use crate::hash::{Hash, Hasher}; mod num; @@ -746,3 +747,10 @@ impl From for Infallible { x } } + +#[stable(feature = "convert_infallible_hash", since = "1.44.0")] +impl Hash for Infallible { + fn hash(&self, _: &mut H) { + match *self {} + } +} diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs index ba01e370aaef5..9068c0541a428 100644 --- a/src/librustc_mir/monomorphize/partitioning.rs +++ b/src/librustc_mir/monomorphize/partitioning.rs @@ -107,19 +107,11 @@ use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; use rustc_middle::ty::print::characteristic_def_id_of_type; use rustc_middle::ty::query::Providers; use rustc_middle::ty::{self, DefIdTree, InstanceDef, TyCtxt}; -use rustc_span::symbol::Symbol; +use rustc_span::symbol::{Symbol, SymbolStr}; use crate::monomorphize::collector::InliningMap; use crate::monomorphize::collector::{self, MonoItemCollectionMode}; -pub enum PartitioningStrategy { - /// Generates one codegen unit per source-level module. - PerModule, - - /// Partition the whole crate into a fixed number of codegen units. - FixedUnitCount(usize), -} - // Anything we can't find a proper codegen unit for goes into this. fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol { name_builder.build_cgu_name(LOCAL_CRATE, &["fallback"], Some("cgu")) @@ -128,7 +120,7 @@ fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol { pub fn partition<'tcx, I>( tcx: TyCtxt<'tcx>, mono_items: I, - strategy: PartitioningStrategy, + max_cgu_count: usize, inlining_map: &InliningMap<'tcx>, ) -> Vec> where @@ -148,11 +140,10 @@ where debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter()); - // If the partitioning should produce a fixed count of codegen units, merge - // until that count is reached. - if let PartitioningStrategy::FixedUnitCount(count) = strategy { + // Merge until we have at most `max_cgu_count` codegen units. + { let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus"); - merge_codegen_units(tcx, &mut initial_partitioning, count); + merge_codegen_units(tcx, &mut initial_partitioning, max_cgu_count); debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter()); } @@ -480,6 +471,10 @@ fn merge_codegen_units<'tcx>( // the stable sort below will keep everything nice and deterministic. codegen_units.sort_by_cached_key(|cgu| cgu.name().as_str()); + // This map keeps track of what got merged into what. + let mut cgu_contents: FxHashMap> = + codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name().as_str()])).collect(); + // Merge the two smallest codegen units until the target size is reached. while codegen_units.len() > target_cgu_count { // Sort small cgus to the back @@ -487,20 +482,67 @@ fn merge_codegen_units<'tcx>( let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); + // Move the mono-items from `smallest` to `second_smallest` second_smallest.modify_size_estimate(smallest.size_estimate()); for (k, v) in smallest.items_mut().drain() { second_smallest.items_mut().insert(k, v); } + + // Record that `second_smallest` now contains all the stuff that was in + // `smallest` before. + let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap(); + cgu_contents.get_mut(&second_smallest.name()).unwrap().extend(consumed_cgu_names.drain(..)); + debug!( - "CodegenUnit {} merged in to CodegenUnit {}", + "CodegenUnit {} merged into CodegenUnit {}", smallest.name(), second_smallest.name() ); } let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx); - for (index, cgu) in codegen_units.iter_mut().enumerate() { - cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index)); + + if tcx.sess.opts.incremental.is_some() { + // If we are doing incremental compilation, we want CGU names to + // reflect the path of the source level module they correspond to. + // For CGUs that contain the code of multiple modules because of the + // merging done above, we use a concatenation of the names of + // all contained CGUs. + let new_cgu_names: FxHashMap = cgu_contents + .into_iter() + // This `filter` makes sure we only update the name of CGUs that + // were actually modified by merging. + .filter(|(_, cgu_contents)| cgu_contents.len() > 1) + .map(|(current_cgu_name, cgu_contents)| { + let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| &s[..]).collect(); + + // Sort the names, so things are deterministic and easy to + // predict. + cgu_contents.sort(); + + (current_cgu_name, cgu_contents.join("--")) + }) + .collect(); + + for cgu in codegen_units.iter_mut() { + if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) { + if tcx.sess.opts.debugging_opts.human_readable_cgu_names { + cgu.set_name(Symbol::intern(&new_cgu_name)); + } else { + // If we don't require CGU names to be human-readable, we + // use a fixed length hash of the composite CGU name + // instead. + let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name); + cgu.set_name(Symbol::intern(&new_cgu_name)); + } + } + } + } else { + // If we are compiling non-incrementally we just generate simple CGU + // names containing an index. + for (index, cgu) in codegen_units.iter_mut().enumerate() { + cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index)); + } } } @@ -879,13 +921,7 @@ fn collect_and_partition_mono_items( let (codegen_units, _) = tcx.sess.time("partition_and_assert_distinct_symbols", || { sync::join( || { - let strategy = if tcx.sess.opts.incremental.is_some() { - PartitioningStrategy::PerModule - } else { - PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units()) - }; - - partition(tcx, items.iter().cloned(), strategy, &inlining_map) + partition(tcx, items.iter().cloned(), tcx.sess.codegen_units(), &inlining_map) .into_iter() .map(Arc::new) .collect::>() diff --git a/src/librustc_session/session.rs b/src/librustc_session/session.rs index b3d75143c5639..7f8b55d9d768d 100644 --- a/src/librustc_session/session.rs +++ b/src/librustc_session/session.rs @@ -758,6 +758,13 @@ impl Session { return n as usize; } + // If incremental compilation is turned on, we default to a high number + // codegen units in order to reduce the "collateral damage" small + // changes cause. + if self.opts.incremental.is_some() { + return 256; + } + // Why is 16 codegen units the default all the time? // // The main reason for enabling multiple codegen units by default is to diff --git a/src/libstd/sys/cloudabi/thread.rs b/src/libstd/sys/cloudabi/thread.rs index 3afcae7ae7516..abc15b18e321a 100644 --- a/src/libstd/sys/cloudabi/thread.rs +++ b/src/libstd/sys/cloudabi/thread.rs @@ -4,8 +4,8 @@ use crate::io; use crate::mem; use crate::ptr; use crate::sys::cloudabi::abi; +use crate::sys::stack_overflow; use crate::sys::time::checked_dur2intervals; -use crate::sys_common::thread::*; use crate::time::Duration; pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024; @@ -22,7 +22,7 @@ unsafe impl Sync for Thread {} impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements pub unsafe fn new(stack: usize, p: Box) -> io::Result { - let p = box p; + let p = Box::into_raw(box p); let mut native: libc::pthread_t = mem::zeroed(); let mut attr: libc::pthread_attr_t = mem::zeroed(); assert_eq!(libc::pthread_attr_init(&mut attr), 0); @@ -30,19 +30,28 @@ impl Thread { let stack_size = cmp::max(stack, min_stack_size(&attr)); assert_eq!(libc::pthread_attr_setstacksize(&mut attr, stack_size), 0); - let ret = libc::pthread_create(&mut native, &attr, thread_start, &*p as *const _ as *mut _); + let ret = libc::pthread_create(&mut native, &attr, thread_start, p as *mut _); + // Note: if the thread creation fails and this assert fails, then p will + // be leaked. However, an alternative design could cause double-free + // which is clearly worse. assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); return if ret != 0 { + // The thread failed to start and as a result p was not consumed. Therefore, it is + // safe to reconstruct the box so that it gets deallocated. + drop(Box::from_raw(p)); Err(io::Error::from_raw_os_error(ret)) } else { - mem::forget(p); // ownership passed to pthread_create Ok(Thread { id: native }) }; extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { unsafe { - start_thread(main as *mut u8); + // Next, set up our stack overflow handler which may get triggered if we run + // out of stack. + let _handler = stack_overflow::Handler::new(); + // Finally, let's run some code. + Box::from_raw(main as *mut Box)(); } ptr::null_mut() } diff --git a/src/libstd/sys/hermit/thread.rs b/src/libstd/sys/hermit/thread.rs index c3c29c93826de..4f20a6453fc27 100644 --- a/src/libstd/sys/hermit/thread.rs +++ b/src/libstd/sys/hermit/thread.rs @@ -5,11 +5,10 @@ use crate::fmt; use crate::io; use crate::mem; use crate::sys::hermit::abi; +use crate::sys::stack_overflow; use crate::time::Duration; use core::u32; -use crate::sys_common::thread::*; - pub type Tid = abi::Tid; /// Priority of a task @@ -49,26 +48,32 @@ impl Thread { p: Box, core_id: isize, ) -> io::Result { - let p = box p; + let p = Box::into_raw(box p); let mut tid: Tid = u32::MAX; let ret = abi::spawn( &mut tid as *mut Tid, thread_start, - &*p as *const _ as *const u8 as usize, + p as *mut u8 as usize, Priority::into(NORMAL_PRIO), core_id, ); - return if ret == 0 { - mem::forget(p); // ownership passed to pthread_create - Ok(Thread { tid: tid }) - } else { + return if ret != 0 { + // The thread failed to start and as a result p was not consumed. Therefore, it is + // safe to reconstruct the box so that it gets deallocated. + drop(Box::from_raw(p)); Err(io::Error::new(io::ErrorKind::Other, "Unable to create thread!")) + } else { + Ok(Thread { tid: tid }) }; extern "C" fn thread_start(main: usize) { unsafe { - start_thread(main as *mut u8); + // Next, set up our stack overflow handler which may get triggered if we run + // out of stack. + let _handler = stack_overflow::Handler::new(); + // Finally, let's run some code. + Box::from_raw(main as *mut Box)(); } } } diff --git a/src/libstd/sys/unix/thread.rs b/src/libstd/sys/unix/thread.rs index 674d4c7113801..aab5a92a7ad2a 100644 --- a/src/libstd/sys/unix/thread.rs +++ b/src/libstd/sys/unix/thread.rs @@ -3,11 +3,9 @@ use crate::ffi::CStr; use crate::io; use crate::mem; use crate::ptr; -use crate::sys::os; +use crate::sys::{os, stack_overflow}; use crate::time::Duration; -use crate::sys_common::thread::*; - #[cfg(not(target_os = "l4re"))] pub const DEFAULT_MIN_STACK_SIZE: usize = 2 * 1024 * 1024; #[cfg(target_os = "l4re")] @@ -43,7 +41,7 @@ unsafe fn pthread_attr_setstacksize( impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements pub unsafe fn new(stack: usize, p: Box) -> io::Result { - let p = box p; + let p = Box::into_raw(box p); let mut native: libc::pthread_t = mem::zeroed(); let mut attr: libc::pthread_attr_t = mem::zeroed(); assert_eq!(libc::pthread_attr_init(&mut attr), 0); @@ -65,19 +63,28 @@ impl Thread { } }; - let ret = libc::pthread_create(&mut native, &attr, thread_start, &*p as *const _ as *mut _); + let ret = libc::pthread_create(&mut native, &attr, thread_start, p as *mut _); + // Note: if the thread creation fails and this assert fails, then p will + // be leaked. However, an alternative design could cause double-free + // which is clearly worse. assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); return if ret != 0 { + // The thread failed to start and as a result p was not consumed. Therefore, it is + // safe to reconstruct the box so that it gets deallocated. + drop(Box::from_raw(p)); Err(io::Error::from_raw_os_error(ret)) } else { - mem::forget(p); // ownership passed to pthread_create Ok(Thread { id: native }) }; extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { unsafe { - start_thread(main as *mut u8); + // Next, set up our stack overflow handler which may get triggered if we run + // out of stack. + let _handler = stack_overflow::Handler::new(); + // Finally, let's run some code. + Box::from_raw(main as *mut Box)(); } ptr::null_mut() } diff --git a/src/libstd/sys/vxworks/thread.rs b/src/libstd/sys/vxworks/thread.rs index e0d104b5f3ec9..4d0196e4b4de5 100644 --- a/src/libstd/sys/vxworks/thread.rs +++ b/src/libstd/sys/vxworks/thread.rs @@ -3,11 +3,9 @@ use crate::ffi::CStr; use crate::io; use crate::mem; use crate::ptr; -use crate::sys::os; +use crate::sys::{os, stack_overflow}; use crate::time::Duration; -use crate::sys_common::thread::*; - pub const DEFAULT_MIN_STACK_SIZE: usize = 0x40000; // 256K pub struct Thread { @@ -31,7 +29,7 @@ unsafe fn pthread_attr_setstacksize( impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements pub unsafe fn new(stack: usize, p: Box) -> io::Result { - let p = box p; + let p = Box::into_raw(box p); let mut native: libc::pthread_t = mem::zeroed(); let mut attr: libc::pthread_attr_t = mem::zeroed(); assert_eq!(libc::pthread_attr_init(&mut attr), 0); @@ -53,19 +51,28 @@ impl Thread { } }; - let ret = libc::pthread_create(&mut native, &attr, thread_start, &*p as *const _ as *mut _); + let ret = libc::pthread_create(&mut native, &attr, thread_start, p as *mut _); + // Note: if the thread creation fails and this assert fails, then p will + // be leaked. However, an alternative design could cause double-free + // which is clearly worse. assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); return if ret != 0 { + // The thread failed to start and as a result p was not consumed. Therefore, it is + // safe to reconstruct the box so that it gets deallocated. + drop(Box::from_raw(p)); Err(io::Error::from_raw_os_error(ret)) } else { - mem::forget(p); // ownership passed to pthread_create Ok(Thread { id: native }) }; extern "C" fn thread_start(main: *mut libc::c_void) -> *mut libc::c_void { unsafe { - start_thread(main as *mut u8); + // Next, set up our stack overflow handler which may get triggered if we run + // out of stack. + let _handler = stack_overflow::Handler::new(); + // Finally, let's run some code. + Box::from_raw(main as *mut Box)(); } ptr::null_mut() } diff --git a/src/libstd/sys/windows/thread.rs b/src/libstd/sys/windows/thread.rs index c828243a59b11..38839ea5e90ed 100644 --- a/src/libstd/sys/windows/thread.rs +++ b/src/libstd/sys/windows/thread.rs @@ -1,10 +1,9 @@ use crate::ffi::CStr; use crate::io; -use crate::mem; use crate::ptr; use crate::sys::c; use crate::sys::handle::Handle; -use crate::sys_common::thread::*; +use crate::sys::stack_overflow; use crate::time::Duration; use libc::c_void; @@ -20,7 +19,7 @@ pub struct Thread { impl Thread { // unsafe: see thread::Builder::spawn_unchecked for safety requirements pub unsafe fn new(stack: usize, p: Box) -> io::Result { - let p = box p; + let p = Box::into_raw(box p); // FIXME On UNIX, we guard against stack sizes that are too small but // that's because pthreads enforces that stacks are at least @@ -34,21 +33,27 @@ impl Thread { ptr::null_mut(), stack_size, thread_start, - &*p as *const _ as *mut _, + p as *mut _, c::STACK_SIZE_PARAM_IS_A_RESERVATION, ptr::null_mut(), ); return if ret as usize == 0 { + // The thread failed to start and as a result p was not consumed. Therefore, it is + // safe to reconstruct the box so that it gets deallocated. + drop(Box::from_raw(p)); Err(io::Error::last_os_error()) } else { - mem::forget(p); // ownership passed to CreateThread Ok(Thread { handle: Handle::new(ret) }) }; extern "system" fn thread_start(main: *mut c_void) -> c::DWORD { unsafe { - start_thread(main as *mut u8); + // Next, set up our stack overflow handler which may get triggered if we run + // out of stack. + let _handler = stack_overflow::Handler::new(); + // Finally, let's run some code. + Box::from_raw(main as *mut Box)(); } 0 } diff --git a/src/libstd/sys_common/thread.rs b/src/libstd/sys_common/thread.rs index 6ab0d5cbe9c96..f3a8bef8f718f 100644 --- a/src/libstd/sys_common/thread.rs +++ b/src/libstd/sys_common/thread.rs @@ -1,18 +1,7 @@ use crate::env; use crate::sync::atomic::{self, Ordering}; -use crate::sys::stack_overflow; use crate::sys::thread as imp; -#[allow(dead_code)] -pub unsafe fn start_thread(main: *mut u8) { - // Next, set up our stack overflow handler which may get triggered if we run - // out of stack. - let _handler = stack_overflow::Handler::new(); - - // Finally, let's run some code. - Box::from_raw(main as *mut Box)() -} - pub fn min_stack() -> usize { static MIN: atomic::AtomicUsize = atomic::AtomicUsize::new(0); match MIN.load(Ordering::SeqCst) { diff --git a/src/test/codegen-units/partitioning/incremental-merging.rs b/src/test/codegen-units/partitioning/incremental-merging.rs new file mode 100644 index 0000000000000..ca2df19096e4d --- /dev/null +++ b/src/test/codegen-units/partitioning/incremental-merging.rs @@ -0,0 +1,42 @@ +// ignore-tidy-linelength +// We specify -C incremental here because we want to test the partitioning for +// incremental compilation +// compile-flags:-Zprint-mono-items=lazy -Cincremental=tmp/partitioning-tests/incremental-merging +// compile-flags:-Ccodegen-units=3 + +#![crate_type = "rlib"] + +// This test makes sure that merging of CGUs works together with incremental +// compilation but at the same time does not modify names of CGUs that were not +// affected by merging. +// +// We expect CGUs `aaa` and `bbb` to be merged (because they are the smallest), +// while `ccc` and `ddd` are supposed to stay untouched. + +pub mod aaa { + //~ MONO_ITEM fn incremental_merging::aaa[0]::foo[0] @@ incremental_merging-aaa--incremental_merging-bbb[External] + pub fn foo(a: u64) -> u64 { + a + 1 + } +} + +pub mod bbb { + //~ MONO_ITEM fn incremental_merging::bbb[0]::foo[0] @@ incremental_merging-aaa--incremental_merging-bbb[External] + pub fn foo(a: u64, b: u64) -> u64 { + a + b + 1 + } +} + +pub mod ccc { + //~ MONO_ITEM fn incremental_merging::ccc[0]::foo[0] @@ incremental_merging-ccc[External] + pub fn foo(a: u64, b: u64, c: u64) -> u64 { + a + b + c + 1 + } +} + +pub mod ddd { + //~ MONO_ITEM fn incremental_merging::ddd[0]::foo[0] @@ incremental_merging-ddd[External] + pub fn foo(a: u64, b: u64, c: u64, d: u64) -> u64 { + a + b + c + d + 1 + } +} diff --git a/src/tools/compiletest/src/runtest.rs b/src/tools/compiletest/src/runtest.rs index 8a291a3611efd..04d34fd0d4a59 100644 --- a/src/tools/compiletest/src/runtest.rs +++ b/src/tools/compiletest/src/runtest.rs @@ -2513,7 +2513,7 @@ impl<'test> TestCx<'test> { .filter(|s| !s.is_empty()) .map(|s| { if cgu_has_crate_disambiguator { - remove_crate_disambiguator_from_cgu(s) + remove_crate_disambiguators_from_set_of_cgu_names(s) } else { s.to_string() } @@ -2563,6 +2563,16 @@ impl<'test> TestCx<'test> { new_name } + + // The name of merged CGUs is constructed as the names of the original + // CGUs joined with "--". This function splits such composite CGU names + // and handles each component individually. + fn remove_crate_disambiguators_from_set_of_cgu_names(cgus: &str) -> String { + cgus.split("--") + .map(|cgu| remove_crate_disambiguator_from_cgu(cgu)) + .collect::>() + .join("--") + } } fn init_incremental_test(&self) {