Skip to content

Commit c435821

Browse files
committed
Cache conscious hashmap table
1 parent a7bfb1a commit c435821

File tree

1 file changed

+68
-88
lines changed

1 file changed

+68
-88
lines changed

src/libstd/collections/hash/table.rs

+68-88
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,10 @@ use self::BucketState::*;
2424
const EMPTY_BUCKET: u64 = 0;
2525

2626
/// The raw hashtable, providing safe-ish access to the unzipped and highly
27-
/// optimized arrays of hashes, keys, and values.
27+
/// optimized arrays of hashes, and key-value pairs.
2828
///
29-
/// This design uses less memory and is a lot faster than the naive
30-
/// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an
29+
/// This design is a lot faster than the naive
30+
/// `Vec<Option<(u64, K, V)>>`, because we don't pay for the overhead of an
3131
/// option on every element, and we get a generally more cache-aware design.
3232
///
3333
/// Essential invariants of this structure:
@@ -48,17 +48,19 @@ const EMPTY_BUCKET: u64 = 0;
4848
/// which will likely map to the same bucket, while not being confused
4949
/// with "empty".
5050
///
51-
/// - All three "arrays represented by pointers" are the same length:
51+
/// - Both "arrays represented by pointers" are the same length:
5252
/// `capacity`. This is set at creation and never changes. The arrays
53-
/// are unzipped to save space (we don't have to pay for the padding
54-
/// between odd sized elements, such as in a map from u64 to u8), and
55-
/// be more cache aware (scanning through 8 hashes brings in at most
56-
/// 2 cache lines, since they're all right beside each other).
53+
/// are unzipped and are more cache aware (scanning through 8 hashes
54+
/// brings in at most 2 cache lines, since they're all right beside each
55+
/// other). This layout may waste space in padding such as in a map from
56+
/// u64 to u8, but is a more cache conscious layout as the key-value pairs
57+
/// are only very shortly probed and the desired value will be in the same
58+
/// or next cache line.
5759
///
5860
/// You can kind of think of this module/data structure as a safe wrapper
5961
/// around just the "table" part of the hashtable. It enforces some
6062
/// invariants at the type level and employs some performance trickery,
61-
/// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
63+
/// but in general is just a tricked out `Vec<Option<(u64, K, V)>>`.
6264
pub struct RawTable<K, V> {
6365
capacity: usize,
6466
size: usize,
@@ -74,10 +76,8 @@ unsafe impl<K: Sync, V: Sync> Sync for RawTable<K, V> {}
7476

7577
struct RawBucket<K, V> {
7678
hash: *mut u64,
77-
7879
// We use *const to ensure covariance with respect to K and V
79-
key: *const K,
80-
val: *const V,
80+
pair: *const (K, V),
8181
_marker: marker::PhantomData<(K, V)>,
8282
}
8383

@@ -181,8 +181,7 @@ impl<K, V> RawBucket<K, V> {
181181
unsafe fn offset(self, count: isize) -> RawBucket<K, V> {
182182
RawBucket {
183183
hash: self.hash.offset(count),
184-
key: self.key.offset(count),
185-
val: self.val.offset(count),
184+
pair: self.pair.offset(count),
186185
_marker: marker::PhantomData,
187186
}
188187
}
@@ -370,8 +369,7 @@ impl<K, V, M> EmptyBucket<K, V, M>
370369
pub fn put(mut self, hash: SafeHash, key: K, value: V) -> FullBucket<K, V, M> {
371370
unsafe {
372371
*self.raw.hash = hash.inspect();
373-
ptr::write(self.raw.key as *mut K, key);
374-
ptr::write(self.raw.val as *mut V, value);
372+
ptr::write(self.raw.pair as *mut (K, V), (key, value));
375373

376374
self.table.borrow_table_mut().size += 1;
377375
}
@@ -430,7 +428,7 @@ impl<K, V, M: Deref<Target = RawTable<K, V>>> FullBucket<K, V, M> {
430428

431429
/// Gets references to the key and value at a given index.
432430
pub fn read(&self) -> (&K, &V) {
433-
unsafe { (&*self.raw.key, &*self.raw.val) }
431+
unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
434432
}
435433
}
436434

@@ -447,13 +445,14 @@ impl<'t, K, V> FullBucket<K, V, &'t mut RawTable<K, V>> {
447445

448446
unsafe {
449447
*self.raw.hash = EMPTY_BUCKET;
448+
let (k, v) = ptr::read(self.raw.pair);
450449
(EmptyBucket {
451450
raw: self.raw,
452451
idx: self.idx,
453452
table: self.table,
454453
},
455-
ptr::read(self.raw.key),
456-
ptr::read(self.raw.val))
454+
k,
455+
v)
457456
}
458457
}
459458
}
@@ -466,8 +465,7 @@ impl<K, V, M> FullBucket<K, V, M>
466465
pub fn replace(&mut self, h: SafeHash, k: K, v: V) -> (SafeHash, K, V) {
467466
unsafe {
468467
let old_hash = ptr::replace(self.raw.hash as *mut SafeHash, h);
469-
let old_key = ptr::replace(self.raw.key as *mut K, k);
470-
let old_val = ptr::replace(self.raw.val as *mut V, v);
468+
let (old_key, old_val) = ptr::replace(self.raw.pair as *mut (K, V), (k, v));
471469

472470
(old_hash, old_key, old_val)
473471
}
@@ -479,7 +477,8 @@ impl<K, V, M> FullBucket<K, V, M>
479477
{
480478
/// Gets mutable references to the key and value at a given index.
481479
pub fn read_mut(&mut self) -> (&mut K, &mut V) {
482-
unsafe { (&mut *(self.raw.key as *mut K), &mut *(self.raw.val as *mut V)) }
480+
let pair_mut = self.raw.pair as *mut (K, V);
481+
unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
483482
}
484483
}
485484

@@ -492,7 +491,7 @@ impl<'t, K, V, M> FullBucket<K, V, M>
492491
/// in exchange for this, the returned references have a longer lifetime
493492
/// than the references returned by `read()`.
494493
pub fn into_refs(self) -> (&'t K, &'t V) {
495-
unsafe { (&*self.raw.key, &*self.raw.val) }
494+
unsafe { (&(*self.raw.pair).0, &(*self.raw.pair).1) }
496495
}
497496
}
498497

@@ -502,7 +501,8 @@ impl<'t, K, V, M> FullBucket<K, V, M>
502501
/// This works similarly to `into_refs`, exchanging a bucket state
503502
/// for mutable references into the table.
504503
pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) {
505-
unsafe { (&mut *(self.raw.key as *mut K), &mut *(self.raw.val as *mut V)) }
504+
let pair_mut = self.raw.pair as *mut (K, V);
505+
unsafe { (&mut (*pair_mut).0, &mut (*pair_mut).1) }
506506
}
507507
}
508508

@@ -517,8 +517,7 @@ impl<K, V, M> GapThenFull<K, V, M>
517517
pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
518518
unsafe {
519519
*self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
520-
ptr::copy_nonoverlapping(self.full.raw.key, self.gap.raw.key as *mut K, 1);
521-
ptr::copy_nonoverlapping(self.full.raw.val, self.gap.raw.val as *mut V, 1);
520+
ptr::copy_nonoverlapping(self.full.raw.pair, self.gap.raw.pair as *mut (K, V), 1);
522521
}
523522

524523
let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full;
@@ -560,49 +559,42 @@ fn test_rounding() {
560559
assert_eq!(round_up_to_next(5, 4), 8);
561560
}
562561

563-
// Returns a tuple of (key_offset, val_offset),
562+
// Returns a tuple of (pairs_offset, end_of_pairs_offset),
564563
// from the start of a mallocated array.
565564
#[inline]
566565
fn calculate_offsets(hashes_size: usize,
567-
keys_size: usize,
568-
keys_align: usize,
569-
vals_align: usize)
566+
pairs_size: usize,
567+
pairs_align: usize)
570568
-> (usize, usize, bool) {
571-
let keys_offset = round_up_to_next(hashes_size, keys_align);
572-
let (end_of_keys, oflo) = keys_offset.overflowing_add(keys_size);
573-
574-
let vals_offset = round_up_to_next(end_of_keys, vals_align);
569+
let pairs_offset = round_up_to_next(hashes_size, pairs_align);
570+
let (end_of_pairs, oflo) = pairs_offset.overflowing_add(pairs_size);
575571

576-
(keys_offset, vals_offset, oflo)
572+
(pairs_offset, end_of_pairs, oflo)
577573
}
578574

579575
// Returns a tuple of (minimum required malloc alignment, hash_offset,
580576
// array_size), from the start of a mallocated array.
581577
fn calculate_allocation(hash_size: usize,
582578
hash_align: usize,
583-
keys_size: usize,
584-
keys_align: usize,
585-
vals_size: usize,
586-
vals_align: usize)
579+
pairs_size: usize,
580+
pairs_align: usize)
587581
-> (usize, usize, usize, bool) {
588582
let hash_offset = 0;
589-
let (_, vals_offset, oflo) = calculate_offsets(hash_size, keys_size, keys_align, vals_align);
590-
let (end_of_vals, oflo2) = vals_offset.overflowing_add(vals_size);
583+
let (_, end_of_pairs, oflo) = calculate_offsets(hash_size, pairs_size, pairs_align);
591584

592-
let align = cmp::max(hash_align, cmp::max(keys_align, vals_align));
585+
let align = cmp::max(hash_align, pairs_align);
593586

594-
(align, hash_offset, end_of_vals, oflo || oflo2)
587+
(align, hash_offset, end_of_pairs, oflo)
595588
}
596589

597590
#[test]
598591
fn test_offset_calculation() {
599-
assert_eq!(calculate_allocation(128, 8, 15, 1, 4, 4),
600-
(8, 0, 148, false));
601-
assert_eq!(calculate_allocation(3, 1, 2, 1, 1, 1), (1, 0, 6, false));
602-
assert_eq!(calculate_allocation(6, 2, 12, 4, 24, 8), (8, 0, 48, false));
603-
assert_eq!(calculate_offsets(128, 15, 1, 4), (128, 144, false));
604-
assert_eq!(calculate_offsets(3, 2, 1, 1), (3, 5, false));
605-
assert_eq!(calculate_offsets(6, 12, 4, 8), (8, 24, false));
592+
assert_eq!(calculate_allocation(128, 8, 16, 8), (8, 0, 144, false));
593+
assert_eq!(calculate_allocation(3, 1, 2, 1), (1, 0, 5, false));
594+
assert_eq!(calculate_allocation(6, 2, 12, 4), (4, 0, 20, false));
595+
assert_eq!(calculate_offsets(128, 15, 4), (128, 143, false));
596+
assert_eq!(calculate_offsets(3, 2, 4), (4, 6, false));
597+
assert_eq!(calculate_offsets(6, 12, 4), (8, 20, false));
606598
}
607599

608600
impl<K, V> RawTable<K, V> {
@@ -620,39 +612,31 @@ impl<K, V> RawTable<K, V> {
620612

621613
// No need for `checked_mul` before a more restrictive check performed
622614
// later in this method.
623-
let hashes_size = capacity * size_of::<u64>();
624-
let keys_size = capacity * size_of::<K>();
625-
let vals_size = capacity * size_of::<V>();
615+
let hashes_size = capacity.wrapping_mul(size_of::<u64>());
616+
let pairs_size = capacity.wrapping_mul(size_of::<(K, V)>());
626617

627-
// Allocating hashmaps is a little tricky. We need to allocate three
618+
// Allocating hashmaps is a little tricky. We need to allocate two
628619
// arrays, but since we know their sizes and alignments up front,
629620
// we just allocate a single array, and then have the subarrays
630621
// point into it.
631622
//
632623
// This is great in theory, but in practice getting the alignment
633624
// right is a little subtle. Therefore, calculating offsets has been
634625
// factored out into a different function.
635-
let (malloc_alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size,
636-
align_of::<u64>(),
637-
keys_size,
638-
align_of::<K>(),
639-
vals_size,
640-
align_of::<V>());
641-
626+
let (alignment, hash_offset, size, oflo) = calculate_allocation(hashes_size,
627+
align_of::<u64>(),
628+
pairs_size,
629+
align_of::<(K, V)>());
642630
assert!(!oflo, "capacity overflow");
643631

644632
// One check for overflow that covers calculation and rounding of size.
645-
let size_of_bucket = size_of::<u64>()
646-
.checked_add(size_of::<K>())
647-
.unwrap()
648-
.checked_add(size_of::<V>())
649-
.unwrap();
633+
let size_of_bucket = size_of::<u64>().checked_add(size_of::<(K, V)>()).unwrap();
650634
assert!(size >=
651635
capacity.checked_mul(size_of_bucket)
652636
.expect("capacity overflow"),
653637
"capacity overflow");
654638

655-
let buffer = allocate(size, malloc_alignment);
639+
let buffer = allocate(size, alignment);
656640
if buffer.is_null() {
657641
::alloc::oom()
658642
}
@@ -669,17 +653,16 @@ impl<K, V> RawTable<K, V> {
669653

670654
fn first_bucket_raw(&self) -> RawBucket<K, V> {
671655
let hashes_size = self.capacity * size_of::<u64>();
672-
let keys_size = self.capacity * size_of::<K>();
656+
let pairs_size = self.capacity * size_of::<(K, V)>();
673657

674-
let buffer = *self.hashes as *const u8;
675-
let (keys_offset, vals_offset, oflo) =
676-
calculate_offsets(hashes_size, keys_size, align_of::<K>(), align_of::<V>());
658+
let buffer = *self.hashes as *mut u8;
659+
let (pairs_offset, _, oflo) =
660+
calculate_offsets(hashes_size, pairs_size, align_of::<(K, V)>());
677661
debug_assert!(!oflo, "capacity overflow");
678662
unsafe {
679663
RawBucket {
680664
hash: *self.hashes,
681-
key: buffer.offset(keys_offset as isize) as *const K,
682-
val: buffer.offset(vals_offset as isize) as *const V,
665+
pair: buffer.offset(pairs_offset as isize) as *const _,
683666
_marker: marker::PhantomData,
684667
}
685668
}
@@ -844,7 +827,7 @@ impl<'a, K, V> Iterator for RevMoveBuckets<'a, K, V> {
844827

845828
if *self.raw.hash != EMPTY_BUCKET {
846829
self.elems_left -= 1;
847-
return Some((ptr::read(self.raw.key), ptr::read(self.raw.val)));
830+
return Some(ptr::read(self.raw.pair));
848831
}
849832
}
850833
}
@@ -909,7 +892,7 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> {
909892
fn next(&mut self) -> Option<(&'a K, &'a V)> {
910893
self.iter.next().map(|bucket| {
911894
self.elems_left -= 1;
912-
unsafe { (&*bucket.key, &*bucket.val) }
895+
unsafe { (&(*bucket.pair).0, &(*bucket.pair).1) }
913896
})
914897
}
915898

@@ -929,7 +912,8 @@ impl<'a, K, V> Iterator for IterMut<'a, K, V> {
929912
fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
930913
self.iter.next().map(|bucket| {
931914
self.elems_left -= 1;
932-
unsafe { (&*bucket.key, &mut *(bucket.val as *mut V)) }
915+
let pair_mut = bucket.pair as *mut (K, V);
916+
unsafe { (&(*pair_mut).0, &mut (*pair_mut).1) }
933917
})
934918
}
935919

@@ -950,7 +934,8 @@ impl<K, V> Iterator for IntoIter<K, V> {
950934
self.iter.next().map(|bucket| {
951935
self.table.size -= 1;
952936
unsafe {
953-
(SafeHash { hash: *bucket.hash }, ptr::read(bucket.key), ptr::read(bucket.val))
937+
let (k, v) = ptr::read(bucket.pair);
938+
(SafeHash { hash: *bucket.hash }, k, v)
954939
}
955940
})
956941
}
@@ -974,9 +959,8 @@ impl<'a, K, V> Iterator for Drain<'a, K, V> {
974959
self.iter.next().map(|bucket| {
975960
unsafe {
976961
(**self.table).size -= 1;
977-
(SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) },
978-
ptr::read(bucket.key),
979-
ptr::read(bucket.val))
962+
let (k, v) = ptr::read(bucket.pair);
963+
(SafeHash { hash: ptr::replace(bucket.hash, EMPTY_BUCKET) }, k, v)
980964
}
981965
})
982966
}
@@ -1015,8 +999,7 @@ impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
1015999
(full.hash(), k.clone(), v.clone())
10161000
};
10171001
*new_buckets.raw.hash = h.inspect();
1018-
ptr::write(new_buckets.raw.key as *mut K, k);
1019-
ptr::write(new_buckets.raw.val as *mut V, v);
1002+
ptr::write(new_buckets.raw.pair as *mut (K, V), (k, v));
10201003
}
10211004
Empty(..) => {
10221005
*new_buckets.raw.hash = EMPTY_BUCKET;
@@ -1054,14 +1037,11 @@ impl<K, V> Drop for RawTable<K, V> {
10541037
}
10551038

10561039
let hashes_size = self.capacity * size_of::<u64>();
1057-
let keys_size = self.capacity * size_of::<K>();
1058-
let vals_size = self.capacity * size_of::<V>();
1040+
let pairs_size = self.capacity * size_of::<(K, V)>();
10591041
let (align, _, size, oflo) = calculate_allocation(hashes_size,
10601042
align_of::<u64>(),
1061-
keys_size,
1062-
align_of::<K>(),
1063-
vals_size,
1064-
align_of::<V>());
1043+
pairs_size,
1044+
align_of::<(K, V)>());
10651045

10661046
debug_assert!(!oflo, "should be impossible");
10671047

0 commit comments

Comments
 (0)