-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use sharded maps for interning #61779
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
use std::hash::{Hasher, Hash}; | ||
use std::mem; | ||
use std::borrow::Borrow; | ||
use std::collections::hash_map::RawEntryMut; | ||
use crate::fx::{FxHasher, FxHashMap}; | ||
use crate::sync::{Lock, LockGuard}; | ||
|
||
#[derive(Clone, Default)] | ||
#[cfg_attr(parallel_compiler, repr(align(64)))] | ||
struct CacheAligned<T>(T); | ||
|
||
#[cfg(parallel_compiler)] | ||
// 32 shards is sufficient to reduce contention on an 8-core Ryzen 7 1700, | ||
// but this should be tested on higher core count CPUs. How the `Sharded` type gets used | ||
// may also affect the ideal nunber of shards. | ||
const SHARD_BITS: usize = 5; | ||
|
||
#[cfg(not(parallel_compiler))] | ||
const SHARD_BITS: usize = 0; | ||
|
||
const SHARDS: usize = 1 << SHARD_BITS; | ||
|
||
/// An array of cache-line aligned inner locked structures with convenience methods. | ||
#[derive(Clone)] | ||
pub struct Sharded<T> { | ||
shards: [CacheAligned<Lock<T>>; SHARDS], | ||
} | ||
|
||
impl<T: Default> Default for Sharded<T> { | ||
#[inline] | ||
fn default() -> Self { | ||
let mut shards: mem::MaybeUninit<[CacheAligned<Lock<T>>; SHARDS]> = | ||
mem::MaybeUninit::uninit(); | ||
let first = shards.as_mut_ptr() as *mut CacheAligned<Lock<T>>; | ||
unsafe { | ||
for i in 0..SHARDS { | ||
first.add(i).write(CacheAligned(Lock::new(T::default()))); | ||
} | ||
Sharded { | ||
shards: shards.assume_init(), | ||
} | ||
} | ||
} | ||
} | ||
|
||
impl<T> Sharded<T> { | ||
#[inline] | ||
pub fn get_shard_by_value<K: Hash + ?Sized>(&self, val: &K) -> &Lock<T> { | ||
if SHARDS == 1 { | ||
&self.shards[0].0 | ||
} else { | ||
self.get_shard_by_hash(make_hash(val)) | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn get_shard_by_hash(&self, hash: u64) -> &Lock<T> { | ||
oli-obk marked this conversation as resolved.
Show resolved
Hide resolved
|
||
let hash_len = mem::size_of::<usize>(); | ||
// Ignore the top 7 bits as hashbrown uses these and get the next SHARD_BITS highest bits. | ||
// hashbrown also uses the lowest bits, so we can't use those | ||
let bits = (hash >> (hash_len * 8 - 7 - SHARD_BITS)) as usize; | ||
let i = bits % SHARDS; | ||
&self.shards[i].0 | ||
} | ||
|
||
pub fn lock_shards(&self) -> Vec<LockGuard<'_, T>> { | ||
(0..SHARDS).map(|i| self.shards[i].0.lock()).collect() | ||
} | ||
|
||
pub fn try_lock_shards(&self) -> Option<Vec<LockGuard<'_, T>>> { | ||
(0..SHARDS).map(|i| self.shards[i].0.try_lock()).collect() | ||
} | ||
} | ||
|
||
pub type ShardedHashMap<K, V> = Sharded<FxHashMap<K, V>>; | ||
|
||
impl<K: Eq + Hash, V> ShardedHashMap<K, V> { | ||
pub fn len(&self) -> usize { | ||
self.lock_shards().iter().map(|shard| shard.len()).sum() | ||
} | ||
} | ||
|
||
impl<K: Eq + Hash + Copy> ShardedHashMap<K, ()> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Absent intent to actually use this type as a map, you might as well just make this ShardedHashSet? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't have set operations, so I'd find that misleading. |
||
#[inline] | ||
pub fn intern_ref<Q: ?Sized>(&self, value: &Q, make: impl FnOnce() -> K) -> K | ||
where K: Borrow<Q>, | ||
Q: Hash + Eq | ||
{ | ||
let hash = make_hash(value); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth putting a comment here explaining that we can't use the map's hasher because we need the hash to find the map? Also you could arguably do something silly like make a HashMap::default here just so this code is easier to change but... meh? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The hash map's hasher is never used, so there isn't really a reason to access it. |
||
let mut shard = self.get_shard_by_hash(hash).lock(); | ||
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, value); | ||
|
||
match entry { | ||
RawEntryMut::Occupied(e) => *e.key(), | ||
RawEntryMut::Vacant(e) => { | ||
let v = make(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's a bit off to call this v when it's a key (same for other function) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's also the interned value we return =P |
||
e.insert_hashed_nocheck(hash, v, ()); | ||
v | ||
} | ||
} | ||
} | ||
|
||
#[inline] | ||
pub fn intern<Q>(&self, value: Q, make: impl FnOnce(Q) -> K) -> K | ||
where K: Borrow<Q>, | ||
Q: Hash + Eq | ||
{ | ||
let hash = make_hash(&value); | ||
let mut shard = self.get_shard_by_hash(hash).lock(); | ||
let entry = shard.raw_entry_mut().from_key_hashed_nocheck(hash, &value); | ||
|
||
match entry { | ||
RawEntryMut::Occupied(e) => *e.key(), | ||
RawEntryMut::Vacant(e) => { | ||
let v = make(value); | ||
e.insert_hashed_nocheck(hash, v, ()); | ||
v | ||
} | ||
} | ||
} | ||
} | ||
|
||
#[inline] | ||
fn make_hash<K: Hash + ?Sized>(val: &K) -> u64 { | ||
let mut state = FxHasher::default(); | ||
val.hash(&mut state); | ||
state.finish() | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some comment on the magic number would ne nice
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added a comment here.