Skip to content

Commit d4a32d5

Browse files
committed
Auto merge of #59887 - eddyb:safer-metadata, r=Zoxc
rustc_metadata: more safely read/write the index positions. This is a small part of a larger refactor, that I want to benchmark independently. The final code would be even cleaner than this, so this is sort of an "worst case" test.
2 parents cfeb917 + f51e6c7 commit d4a32d5

File tree

1 file changed

+80
-45
lines changed

1 file changed

+80
-45
lines changed

src/librustc_metadata/index.rs

+80-45
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,71 @@ use crate::schema::*;
22

33
use rustc::hir::def_id::{DefId, DefIndex, DefIndexAddressSpace};
44
use rustc_serialize::opaque::Encoder;
5-
use std::slice;
65
use std::u32;
76
use log::debug;
87

8+
/// Helper trait, for encoding to, and decoding from, a fixed number of bytes.
9+
pub trait FixedSizeEncoding {
10+
const BYTE_LEN: usize;
11+
12+
// FIXME(eddyb) convert to and from `[u8; Self::BYTE_LEN]` instead,
13+
// once that starts being allowed by the compiler (i.e. lazy normalization).
14+
fn from_bytes(b: &[u8]) -> Self;
15+
fn write_to_bytes(self, b: &mut [u8]);
16+
17+
// FIXME(eddyb) make these generic functions, or at least defaults here.
18+
// (same problem as above, needs `[u8; Self::BYTE_LEN]`)
19+
// For now, a macro (`fixed_size_encoding_byte_len_and_defaults`) is used.
20+
fn read_from_bytes_at(b: &[u8], i: usize) -> Self;
21+
fn write_to_bytes_at(self, b: &mut [u8], i: usize);
22+
}
23+
24+
// HACK(eddyb) this shouldn't be needed (see comments on the methods above).
25+
macro_rules! fixed_size_encoding_byte_len_and_defaults {
26+
($byte_len:expr) => {
27+
const BYTE_LEN: usize = $byte_len;
28+
fn read_from_bytes_at(b: &[u8], i: usize) -> Self {
29+
const BYTE_LEN: usize = $byte_len;
30+
// HACK(eddyb) ideally this would be done with fully safe code,
31+
// but slicing `[u8]` with `i * N..` is optimized worse, due to the
32+
// possibility of `i * N` overflowing, than indexing `[[u8; N]]`.
33+
let b = unsafe {
34+
std::slice::from_raw_parts(
35+
b.as_ptr() as *const [u8; BYTE_LEN],
36+
b.len() / BYTE_LEN,
37+
)
38+
};
39+
Self::from_bytes(&b[i])
40+
}
41+
fn write_to_bytes_at(self, b: &mut [u8], i: usize) {
42+
const BYTE_LEN: usize = $byte_len;
43+
// HACK(eddyb) ideally this would be done with fully safe code,
44+
// see similar comment in `read_from_bytes_at` for why it can't yet.
45+
let b = unsafe {
46+
std::slice::from_raw_parts_mut(
47+
b.as_mut_ptr() as *mut [u8; BYTE_LEN],
48+
b.len() / BYTE_LEN,
49+
)
50+
};
51+
self.write_to_bytes(&mut b[i]);
52+
}
53+
}
54+
}
55+
56+
impl FixedSizeEncoding for u32 {
57+
fixed_size_encoding_byte_len_and_defaults!(4);
58+
59+
fn from_bytes(b: &[u8]) -> Self {
60+
let mut bytes = [0; Self::BYTE_LEN];
61+
bytes.copy_from_slice(&b[..Self::BYTE_LEN]);
62+
Self::from_le_bytes(bytes)
63+
}
64+
65+
fn write_to_bytes(self, b: &mut [u8]) {
66+
b[..Self::BYTE_LEN].copy_from_slice(&self.to_le_bytes());
67+
}
68+
}
69+
970
/// While we are generating the metadata, we also track the position
1071
/// of each DefIndex. It is not required that all definitions appear
1172
/// in the metadata, nor that they are serialized in order, and
@@ -14,14 +75,14 @@ use log::debug;
1475
/// appropriate spot by calling `record_position`. We should never
1576
/// visit the same index twice.
1677
pub struct Index {
17-
positions: [Vec<u32>; 2]
78+
positions: [Vec<u8>; 2]
1879
}
1980

2081
impl Index {
2182
pub fn new((max_index_lo, max_index_hi): (usize, usize)) -> Index {
2283
Index {
23-
positions: [vec![u32::MAX; max_index_lo],
24-
vec![u32::MAX; max_index_hi]],
84+
positions: [vec![0xff; max_index_lo * 4],
85+
vec![0xff; max_index_hi * 4]],
2586
}
2687
}
2788

@@ -36,26 +97,27 @@ impl Index {
3697
let space_index = item.address_space().index();
3798
let array_index = item.as_array_index();
3899

39-
assert!(self.positions[space_index][array_index] == u32::MAX,
100+
let positions = &mut self.positions[space_index];
101+
assert!(u32::read_from_bytes_at(positions, array_index) == u32::MAX,
40102
"recorded position for item {:?} twice, first at {:?} and now at {:?}",
41103
item,
42-
self.positions[space_index][array_index],
104+
u32::read_from_bytes_at(positions, array_index),
43105
position);
44106

45-
self.positions[space_index][array_index] = position.to_le();
107+
position.write_to_bytes_at(positions, array_index)
46108
}
47109

48110
pub fn write_index(&self, buf: &mut Encoder) -> LazySeq<Index> {
49111
let pos = buf.position();
50112

51113
// First we write the length of the lower range ...
52-
buf.emit_raw_bytes(words_to_bytes(&[(self.positions[0].len() as u32).to_le()]));
114+
buf.emit_raw_bytes(&(self.positions[0].len() as u32 / 4).to_le_bytes());
53115
// ... then the values in the lower range ...
54-
buf.emit_raw_bytes(words_to_bytes(&self.positions[0][..]));
116+
buf.emit_raw_bytes(&self.positions[0]);
55117
// ... then the values in the higher range.
56-
buf.emit_raw_bytes(words_to_bytes(&self.positions[1][..]));
118+
buf.emit_raw_bytes(&self.positions[1]);
57119
LazySeq::with_position_and_length(pos as usize,
58-
self.positions[0].len() + self.positions[1].len() + 1)
120+
(self.positions[0].len() + self.positions[1].len()) / 4 + 1)
59121
}
60122
}
61123

@@ -64,24 +126,21 @@ impl<'tcx> LazySeq<Index> {
64126
/// DefIndex (if any).
65127
#[inline(never)]
66128
pub fn lookup(&self, bytes: &[u8], def_index: DefIndex) -> Option<Lazy<Entry<'tcx>>> {
67-
let words = &bytes_to_words(&bytes[self.position..])[..self.len];
68-
69-
debug!("Index::lookup: index={:?} words.len={:?}",
129+
let bytes = &bytes[self.position..];
130+
debug!("Index::lookup: index={:?} len={:?}",
70131
def_index,
71-
words.len());
132+
self.len);
72133

73-
let positions = match def_index.address_space() {
74-
DefIndexAddressSpace::Low => &words[1..],
134+
let i = def_index.as_array_index() + match def_index.address_space() {
135+
DefIndexAddressSpace::Low => 0,
75136
DefIndexAddressSpace::High => {
76137
// This is a DefIndex in the higher range, so find out where
77138
// that starts:
78-
let lo_count = u32::from_le(words[0].get()) as usize;
79-
&words[lo_count + 1 .. ]
139+
u32::read_from_bytes_at(bytes, 0) as usize
80140
}
81141
};
82142

83-
let array_index = def_index.as_array_index();
84-
let position = u32::from_le(positions[array_index].get());
143+
let position = u32::read_from_bytes_at(bytes, 1 + i);
85144
if position == u32::MAX {
86145
debug!("Index::lookup: position=u32::MAX");
87146
None
@@ -91,27 +150,3 @@ impl<'tcx> LazySeq<Index> {
91150
}
92151
}
93152
}
94-
95-
#[repr(packed)]
96-
#[derive(Copy)]
97-
struct Unaligned<T>(T);
98-
99-
// The derived Clone impl is unsafe for this packed struct since it needs to pass a reference to
100-
// the field to `T::clone`, but this reference may not be properly aligned.
101-
impl<T: Copy> Clone for Unaligned<T> {
102-
fn clone(&self) -> Self {
103-
*self
104-
}
105-
}
106-
107-
impl<T> Unaligned<T> {
108-
fn get(self) -> T { self.0 }
109-
}
110-
111-
fn bytes_to_words(b: &[u8]) -> &[Unaligned<u32>] {
112-
unsafe { slice::from_raw_parts(b.as_ptr() as *const Unaligned<u32>, b.len() / 4) }
113-
}
114-
115-
fn words_to_bytes(w: &[u32]) -> &[u8] {
116-
unsafe { slice::from_raw_parts(w.as_ptr() as *const u8, w.len() * 4) }
117-
}

0 commit comments

Comments
 (0)