Auto merge of #61779 - Zoxc:sharded, r=oli-obk

bors · bors · commit 3fe8285d3a87 · 2019-07-20T18:03:14.000Z
Use sharded maps for interning Cuts down runtime from 5.5s to 3.8s for non-incremental `syntex_syntax` check builds with 16 threads / 8 cores. r? @eddyb
diff --git a/src/librustc/ty/context.rs b/src/librustc/ty/context.rs
@@ -46,14 +46,14 @@ use crate::util::common::ErrorReported;
 use crate::util::nodemap::{DefIdMap, DefIdSet, ItemLocalMap, ItemLocalSet};
 use crate::util::nodemap::{FxHashMap, FxHashSet};
 use errors::DiagnosticBuilder;
-use rustc_data_structures::interner::HashInterner;
 use smallvec::SmallVec;
 use rustc_data_structures::stable_hasher::{HashStable, hash_stable_hashmap,
                                            StableHasher, StableHasherResult,
                                            StableVec};
 use arena::SyncDroplessArena;
 use rustc_data_structures::indexed_vec::{Idx, IndexVec};
 use rustc_data_structures::sync::{Lrc, Lock, WorkerLocal};
+use rustc_data_structures::sharded::ShardedHashMap;
 use std::any::Any;
 use std::borrow::Borrow;
 use std::cmp::Ordering;
@@ -88,7 +88,7 @@ impl AllArenas {
     }
 }
 
-type InternedSet<'tcx, T> = Lock<FxHashMap<Interned<'tcx, T>, ()>>;
+type InternedSet<'tcx, T> = ShardedHashMap<Interned<'tcx, T>, ()>;
 
 pub struct CtxtInterners<'tcx> {
     /// The arena that types, regions, etc are allocated from
@@ -135,7 +135,7 @@ impl<'tcx> CtxtInterners<'tcx> {
     fn intern_ty(&self,
         st: TyKind<'tcx>
     ) -> Ty<'tcx> {
-        self.type_.borrow_mut().intern(st, |st| {
+        self.type_.intern(st, |st| {
             let flags = super::flags::FlagComputation::for_sty(&st);
 
             let ty_struct = TyS {
@@ -924,7 +924,7 @@ impl<'tcx> CommonTypes<'tcx> {
 impl<'tcx> CommonLifetimes<'tcx> {
     fn new(interners: &CtxtInterners<'tcx>) -> CommonLifetimes<'tcx> {
         let mk = |r| {
-            interners.region.borrow_mut().intern(r, |r| {
+            interners.region.intern(r, |r| {
                 Interned(interners.arena.alloc(r))
             }).0
         };
@@ -940,7 +940,7 @@ impl<'tcx> CommonLifetimes<'tcx> {
 impl<'tcx> CommonConsts<'tcx> {
     fn new(interners: &CtxtInterners<'tcx>, types: &CommonTypes<'tcx>) -> CommonConsts<'tcx> {
         let mk_const = |c| {
-            interners.const_.borrow_mut().intern(c, |c| {
+            interners.const_.intern(c, |c| {
                 Interned(interners.arena.alloc(c))
             }).0
         };
@@ -1053,14 +1053,14 @@ pub struct GlobalCtxt<'tcx> {
     /// Data layout specification for the current target.
     pub data_layout: TargetDataLayout,
 
-    stability_interner: Lock<FxHashMap<&'tcx attr::Stability, ()>>,
+    stability_interner: ShardedHashMap<&'tcx attr::Stability, ()>,
 
     /// Stores the value of constants (and deduplicates the actual memory)
-    allocation_interner: Lock<FxHashMap<&'tcx Allocation, ()>>,
+    allocation_interner: ShardedHashMap<&'tcx Allocation, ()>,
 
     pub alloc_map: Lock<interpret::AllocMap<'tcx>>,
 
-    layout_interner: Lock<FxHashMap<&'tcx LayoutDetails, ()>>,
+    layout_interner: ShardedHashMap<&'tcx LayoutDetails, ()>,
 
     /// A general purpose channel to throw data out the back towards LLVM worker
     /// threads.
@@ -1103,7 +1103,7 @@ impl<'tcx> TyCtxt<'tcx> {
     }
 
     pub fn intern_const_alloc(self, alloc: Allocation) -> &'tcx Allocation {
-        self.allocation_interner.borrow_mut().intern(alloc, |alloc| {
+        self.allocation_interner.intern(alloc, |alloc| {
             self.arena.alloc(alloc)
         })
     }
@@ -1117,13 +1117,13 @@ impl<'tcx> TyCtxt<'tcx> {
     }
 
     pub fn intern_stability(self, stab: attr::Stability) -> &'tcx attr::Stability {
-        self.stability_interner.borrow_mut().intern(stab, |stab| {
+        self.stability_interner.intern(stab, |stab| {
             self.arena.alloc(stab)
         })
     }
 
     pub fn intern_layout(self, layout: LayoutDetails) -> &'tcx LayoutDetails {
-        self.layout_interner.borrow_mut().intern(layout, |layout| {
+        self.layout_interner.intern(layout, |layout| {
             self.arena.alloc(layout)
         })
     }
@@ -2023,7 +2023,9 @@ macro_rules! sty_debug_print {
                 };
                 $(let mut $variant = total;)*
 
-                for &Interned(t) in tcx.interners.type_.borrow().keys() {
+                let shards = tcx.interners.type_.lock_shards();
+                let types = shards.iter().flat_map(|shard| shard.keys());
+                for &Interned(t) in types {
                     let variant = match t.sty {
                         ty::Bool | ty::Char | ty::Int(..) | ty::Uint(..) |
                             ty::Float(..) | ty::Str | ty::Never => continue,
@@ -2074,11 +2076,11 @@ impl<'tcx> TyCtxt<'tcx> {
             Generator, GeneratorWitness, Dynamic, Closure, Tuple, Bound,
             Param, Infer, UnnormalizedProjection, Projection, Opaque, Foreign);
 
-        println!("InternalSubsts interner: #{}", self.interners.substs.borrow().len());
-        println!("Region interner: #{}", self.interners.region.borrow().len());
-        println!("Stability interner: #{}", self.stability_interner.borrow().len());
-        println!("Allocation interner: #{}", self.allocation_interner.borrow().len());
-        println!("Layout interner: #{}", self.layout_interner.borrow().len());
+        println!("InternalSubsts interner: #{}", self.interners.substs.len());
+        println!("Region interner: #{}", self.interners.region.len());
+        println!("Stability interner: #{}", self.stability_interner.len());
+        println!("Allocation interner: #{}", self.allocation_interner.len());
+        println!("Layout interner: #{}", self.layout_interner.len());
     }
 }
 
@@ -2207,7 +2209,7 @@ macro_rules! intern_method {
             pub fn $method(self, v: $alloc) -> &$lt_tcx $ty {
                 let key = ($alloc_to_key)(&v);
 
-                self.interners.$name.borrow_mut().intern_ref(key, || {
+                self.interners.$name.intern_ref(key, || {
                     Interned($alloc_method(&self.interners.arena, v))
 
                 }).0
diff --git a/src/librustc_data_structures/interner.rs b/src/librustc_data_structures/interner.rs
diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs
@@ -77,7 +77,6 @@ pub mod flock;
 pub mod fx;
 pub mod graph;
 pub mod indexed_vec;
-pub mod interner;
 pub mod jobserver;
 pub mod obligation_forest;
 pub mod owning_ref;
@@ -89,6 +88,7 @@ pub use ena::snapshot_vec;
 pub mod sorted_map;
 #[macro_use] pub mod stable_hasher;
 pub mod sync;
+pub mod sharded;
 pub mod tiny_list;
 pub mod thin_vec;
 pub mod transitive_relation;
diff --git a/src/librustc_data_structures/sharded.rs b/src/librustc_data_structures/sharded.rs
@@ -0,0 +1,128 @@
+use std::hash::{Hasher, Hash};
+use std::mem;
+use std::borrow::Borrow;
+use std::collections::hash_map::RawEntryMut;
+use crate::fx::{FxHasher, FxHashMap};
+use crate::sync::{Lock, LockGuard};
+
+#[derive(Clone, Default)]
+#[cfg_attr(parallel_compiler, repr(align(64)))]
+struct CacheAligned<T>(T);
+
+#[cfg(parallel_compiler)]
+// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700,
+// but this should be tested on higher core count CPUs. How the `Sharded` type gets used
+// may also affect the ideal nunber of shards.
+const SHARD_BITS: usize = 5;
+
+#[cfg(not(parallel_compiler))]
+const SHARD_BITS: usize = 0;
+
+const SHARDS: usize = 1 << SHARD_BITS;
+
+/// An array of cache-line aligned inner locked structures with convenience methods.
+#[derive(Clone)]
+pub struct Sharded<T> {
+    shards: [CacheAligned<Lock<T>>; SHARDS],
+}
+
+impl<T: Default> Default for Sharded<T> {
+    #[inline]
+    fn default() -> Self {
+        let mut shards: mem::MaybeUninit<[CacheAligned<Lock<T>>; SHARDS]> =
+            mem::MaybeUninit::uninit();
+        let first = shards.as_mut_ptr() as *mut CacheAligned<Lock<T>>;
+        unsafe {
+            for i in 0..SHARDS {
+                first.add(i).write(CacheAligned(Lock::new(T::default())));
+            }
+            Sharded {
+                shards: shards.assume_init(),
+            }
+        }
+    }
+}
+
+impl<T> Sharded<T> {
+    #[inline]
+    pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> {
+        if SHARDS == 1 {
+            &self.shards[0].0
+        } else {
+            self.get_shard_by_hash(make_hash(val))
+        }
+    }
+
+    #[inline]
+    pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> {
+        let hash_len = mem::size_of::<usize>();
+        // Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits.
+        // hashbrown also uses the lowest bits, so we can't use those
+        let bits = (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize;
+        let i = bits % SHARDS;
+        &self.shards[i].0
+    }
+
+    pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> {
+        (0..SHARDS).map(|i| self.shards[i].0.lock()).collect()
+    }
+
+    pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> {
+        (0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect()
+    }
+}
+
+pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>;
+
+impl<K: Eq + Hash, V> ShardedHashMap<K, V> {
+    pub fn len(&self) -> usize {
+        self.lock_shards().iter().map(|shard| shard.len()).sum()
+    }
+}
+
+impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> {
+    #[inline]
+    pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K
+        where K: Borrow<Q>,
+              Q: Hash + Eq
+    {
+        let hash = make_hash(value);
+        let mut shard = self.get_shard_by_hash(hash).lock();
+        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value);
+
+        match entry {
+            RawEntryMut::Occupied(e) => *e.key(),
+            RawEntryMut::Vacant(e) => {
+                let v = make();
+                e.insert_hashed_nocheck(hash, v, ());
+                v
+            }
+        }
+    }
+
+    #[inline]
+    pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K
+        where K: Borrow<Q>,
+              Q: Hash + Eq
+    {
+        let hash = make_hash(&value);
+        let mut shard = self.get_shard_by_hash(hash).lock();
+        let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value);
+
+        match entry {
+            RawEntryMut::Occupied(e) => *e.key(),
+            RawEntryMut::Vacant(e) => {
+                let v = make(value);
+                e.insert_hashed_nocheck(hash, v, ());
+                v
+            }
+        }
+    }
+}
+
+#[inline]
+fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 {
+    let mut state = FxHasher::default();
+    val.hash(&mut state);
+    state.finish()
+}