Skip to content

Commit cd6f1e2

Browse files
RReverserBurntSushi
authored andcommittedSep 11, 2018
compile: use SparseSet-like structure for SuffixCache
For simple regexes, allocating and filling 1000 of SuffixCache elements seemed to be quite high on the profile as part of Compiler::new, so I decided to optimise it by using ideas similar to sparse set, but specialised for a hashmap-like structure, instead of a flat array with versioning. In my performance comparisons this gives 8-10% improvement for simple regexes where overhead of Compiler::new is most noticeable.
1 parent a0e72bc commit cd6f1e2

File tree

1 file changed

+22
-22
lines changed

1 file changed

+22
-22
lines changed
 

‎src/compile.rs

+22-22
Original file line numberDiff line numberDiff line change
@@ -944,20 +944,19 @@ impl<'a, 'b> CompileClass<'a, 'b> {
944944
///
945945
/// Note that a HashMap could be trivially used for this, but we don't need its
946946
/// overhead. Some small bounded space (LRU style) is more than enough.
947+
///
948+
/// This uses similar idea to [`SparseSet`](../sparse/struct.SparseSet.html),
949+
/// except it uses hashes as original indices and then compares full keys for
950+
/// validation against `dense` array.
947951
struct SuffixCache {
948-
table: Vec<SuffixCacheEntry>,
949-
// Every time the cache is cleared, we increment the version number instead
950-
// of actually zeroing memory. Since we store a copy of the current version
951-
// in every element, all we need to do is make sure to invalidate any stale
952-
// entries upon access. This saves quite a bit of time!
953-
version: usize,
952+
sparse: Box<[usize]>,
953+
dense: Vec<SuffixCacheEntry>,
954954
}
955955

956956
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
957957
struct SuffixCacheEntry {
958958
key: SuffixCacheKey,
959959
pc: InstPtr,
960-
version: usize,
961960
}
962961

963962
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)]
@@ -970,28 +969,29 @@ struct SuffixCacheKey {
970969
impl SuffixCache {
971970
fn new(size: usize) -> Self {
972971
SuffixCache {
973-
table: vec![SuffixCacheEntry::default(); size],
974-
version: 0,
972+
sparse: vec![0usize; size].into(),
973+
dense: Vec::with_capacity(size),
975974
}
976975
}
977976

978977
fn get(&mut self, key: SuffixCacheKey, pc: InstPtr) -> Option<InstPtr> {
979-
let h = self.hash(&key);
980-
let e = self.table[h];
981-
if e.key == key && e.version == self.version {
982-
Some(e.pc)
983-
} else {
984-
self.table[h] = SuffixCacheEntry {
985-
key: key,
986-
pc: pc,
987-
version: self.version,
988-
};
989-
None
978+
let hash = self.hash(&key);
979+
let pos = &mut self.sparse[hash];
980+
if let Some(entry) = self.dense.get(*pos) {
981+
if entry.key == key {
982+
return Some(entry.pc);
983+
}
990984
}
985+
*pos = self.dense.len();
986+
self.dense.push(SuffixCacheEntry {
987+
key: key,
988+
pc: pc,
989+
});
990+
None
991991
}
992992

993993
fn clear(&mut self) {
994-
self.version += 1;
994+
self.dense.clear();
995995
}
996996

997997
fn hash(&self, suffix: &SuffixCacheKey) -> usize {
@@ -1002,7 +1002,7 @@ impl SuffixCache {
10021002
h = (h ^ (suffix.from_inst as u64)).wrapping_mul(FNV_PRIME);
10031003
h = (h ^ (suffix.start as u64)).wrapping_mul(FNV_PRIME);
10041004
h = (h ^ (suffix.end as u64)).wrapping_mul(FNV_PRIME);
1005-
(h as usize) % self.table.len()
1005+
(h as usize) % self.sparse.len()
10061006
}
10071007
}
10081008

0 commit comments

Comments
 (0)
Please sign in to comment.