Skip to content

Commit 43c47db

Browse files
committed
Auto merge of #98097 - lqd:const-alloc-hash, r=oli-obk
ctfe: limit hashing of big const allocations when interning Const allocations are only hashed for interning. However, they can be large, making the hashing expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially big buffers like the actual bytes of allocation and the associated 1/8th sized `InitMask`. We can partially hash these fields when they're large, hashing the length, and head and tail of these buffers, to limit possible collisions while avoiding most of the hashing work. r? `@ghost`
2 parents 949a64a + 61dc080 commit 43c47db

File tree

1 file changed

+75
-2
lines changed

1 file changed

+75
-2
lines changed

compiler/rustc_middle/src/mir/interpret/allocation.rs

+75-2
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
use std::borrow::Cow;
44
use std::convert::{TryFrom, TryInto};
55
use std::fmt;
6+
use std::hash;
67
use std::iter;
78
use std::ops::{Deref, Range};
89
use std::ptr;
@@ -25,7 +26,9 @@ use crate::ty;
2526
/// Its public API is rather low-level, working directly with allocation offsets and a custom error
2627
/// type to account for the lack of an AllocId on this level. The Miri/CTFE core engine `memory`
2728
/// module provides higher-level access.
28-
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
29+
// Note: for performance reasons when interning, some of the `Allocation` fields can be partially
30+
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
31+
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
2932
#[derive(HashStable)]
3033
pub struct Allocation<Tag = AllocId, Extra = ()> {
3134
/// The actual bytes of the allocation.
@@ -49,6 +52,46 @@ pub struct Allocation<Tag = AllocId, Extra = ()> {
4952
pub extra: Extra,
5053
}
5154

55+
/// This is the maximum size we will hash at a time, when interning an `Allocation` and its
56+
/// `InitMask`. Note, we hash that amount of bytes twice: at the start, and at the end of a buffer.
57+
/// Used when these two structures are large: we only partially hash the larger fields in that
58+
/// situation. See the comment at the top of their respective `Hash` impl for more details.
59+
const MAX_BYTES_TO_HASH: usize = 64;
60+
61+
/// This is the maximum size (in bytes) for which a buffer will be fully hashed, when interning.
62+
/// Otherwise, it will be partially hashed in 2 slices, requiring at least 2 `MAX_BYTES_TO_HASH`
63+
/// bytes.
64+
const MAX_HASHED_BUFFER_LEN: usize = 2 * MAX_BYTES_TO_HASH;
65+
66+
// Const allocations are only hashed for interning. However, they can be large, making the hashing
67+
// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
68+
// big buffers like the actual bytes of allocation. We can partially hash some fields when they're
69+
// large.
70+
impl hash::Hash for Allocation {
71+
fn hash<H: hash::Hasher>(&self, state: &mut H) {
72+
// Partially hash the `bytes` buffer when it is large. To limit collisions with common
73+
// prefixes and suffixes, we hash the length and some slices of the buffer.
74+
let byte_count = self.bytes.len();
75+
if byte_count > MAX_HASHED_BUFFER_LEN {
76+
// Hash the buffer's length.
77+
byte_count.hash(state);
78+
79+
// And its head and tail.
80+
self.bytes[..MAX_BYTES_TO_HASH].hash(state);
81+
self.bytes[byte_count - MAX_BYTES_TO_HASH..].hash(state);
82+
} else {
83+
self.bytes.hash(state);
84+
}
85+
86+
// Hash the other fields as usual.
87+
self.relocations.hash(state);
88+
self.init_mask.hash(state);
89+
self.align.hash(state);
90+
self.mutability.hash(state);
91+
self.extra.hash(state);
92+
}
93+
}
94+
5295
/// Interned types generally have an `Outer` type and an `Inner` type, where
5396
/// `Outer` is a newtype around `Interned<Inner>`, and all the operations are
5497
/// done on `Outer`, because all occurrences are interned. E.g. `Ty` is an
@@ -665,13 +708,43 @@ type Block = u64;
665708

666709
/// A bitmask where each bit refers to the byte with the same index. If the bit is `true`, the byte
667710
/// is initialized. If it is `false` the byte is uninitialized.
668-
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, Hash, TyEncodable, TyDecodable)]
711+
// Note: for performance reasons when interning, some of the `InitMask` fields can be partially
712+
// hashed. (see the `Hash` impl below for more details), so the impl is not derived.
713+
#[derive(Clone, Debug, Eq, PartialEq, PartialOrd, Ord, TyEncodable, TyDecodable)]
669714
#[derive(HashStable)]
670715
pub struct InitMask {
671716
blocks: Vec<Block>,
672717
len: Size,
673718
}
674719

720+
// Const allocations are only hashed for interning. However, they can be large, making the hashing
721+
// expensive especially since it uses `FxHash`: it's better suited to short keys, not potentially
722+
// big buffers like the allocation's init mask. We can partially hash some fields when they're
723+
// large.
724+
impl hash::Hash for InitMask {
725+
fn hash<H: hash::Hasher>(&self, state: &mut H) {
726+
const MAX_BLOCKS_TO_HASH: usize = MAX_BYTES_TO_HASH / std::mem::size_of::<Block>();
727+
const MAX_BLOCKS_LEN: usize = MAX_HASHED_BUFFER_LEN / std::mem::size_of::<Block>();
728+
729+
// Partially hash the `blocks` buffer when it is large. To limit collisions with common
730+
// prefixes and suffixes, we hash the length and some slices of the buffer.
731+
let block_count = self.blocks.len();
732+
if block_count > MAX_BLOCKS_LEN {
733+
// Hash the buffer's length.
734+
block_count.hash(state);
735+
736+
// And its head and tail.
737+
self.blocks[..MAX_BLOCKS_TO_HASH].hash(state);
738+
self.blocks[block_count - MAX_BLOCKS_TO_HASH..].hash(state);
739+
} else {
740+
self.blocks.hash(state);
741+
}
742+
743+
// Hash the other fields as usual.
744+
self.len.hash(state);
745+
}
746+
}
747+
675748
impl InitMask {
676749
pub const BLOCK_SIZE: u64 = 64;
677750

0 commit comments

Comments
 (0)