Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rustc_metadata: more safely read/write the index positions. #59887

Merged
merged 1 commit into from
Apr 27, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 80 additions & 45 deletions src/librustc_metadata/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,71 @@ use crate::schema::*;

use rustc::hir::def_id::{DefId, DefIndex, DefIndexAddressSpace};
use rustc_serialize::opaque::Encoder;
use std::slice;
use std::u32;
use log::debug;

/// Helper trait, for encoding to, and decoding from, a fixed number of bytes.
pub trait FixedSizeEncoding {
const BYTE_LEN: usize;

// FIXME(eddyb) convert to and from `[u8; Self::BYTE_LEN]` instead,
// once that starts being allowed by the compiler (i.e. lazy normalization).
fn from_bytes(b: &[u8]) -> Self;
fn write_to_bytes(self, b: &mut [u8]);

// FIXME(eddyb) make these generic functions, or at least defaults here.
// (same problem as above, needs `[u8; Self::BYTE_LEN]`)
// For now, a macro (`fixed_size_encoding_byte_len_and_defaults`) is used.
fn read_from_bytes_at(b: &[u8], i: usize) -> Self;
fn write_to_bytes_at(self, b: &mut [u8], i: usize);
}

// HACK(eddyb) this shouldn't be needed (see comments on the methods above).
macro_rules! fixed_size_encoding_byte_len_and_defaults {
($byte_len:expr) => {
const BYTE_LEN: usize = $byte_len;
fn read_from_bytes_at(b: &[u8], i: usize) -> Self {
const BYTE_LEN: usize = $byte_len;
// HACK(eddyb) ideally this would be done with fully safe code,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably could split the unsafe code into a function which casts [u8] to [[u8; N]], just to make it a bit clearer what is going on.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I... would do that, if I could write such a function generically.

// but slicing `[u8]` with `i * N..` is optimized worse, due to the
// possibility of `i * N` overflowing, than indexing `[[u8; N]]`.
let b = unsafe {
std::slice::from_raw_parts(
b.as_ptr() as *const [u8; BYTE_LEN],
b.len() / BYTE_LEN,
)
};
Self::from_bytes(&b[i])
}
fn write_to_bytes_at(self, b: &mut [u8], i: usize) {
const BYTE_LEN: usize = $byte_len;
// HACK(eddyb) ideally this would be done with fully safe code,
// see similar comment in `read_from_bytes_at` for why it can't yet.
let b = unsafe {
std::slice::from_raw_parts_mut(
b.as_mut_ptr() as *mut [u8; BYTE_LEN],
b.len() / BYTE_LEN,
)
};
self.write_to_bytes(&mut b[i]);
}
}
}

impl FixedSizeEncoding for u32 {
fixed_size_encoding_byte_len_and_defaults!(4);

fn from_bytes(b: &[u8]) -> Self {
let mut bytes = [0; Self::BYTE_LEN];
bytes.copy_from_slice(&b[..Self::BYTE_LEN]);
Self::from_le_bytes(bytes)
}

fn write_to_bytes(self, b: &mut [u8]) {
b[..Self::BYTE_LEN].copy_from_slice(&self.to_le_bytes());
}
}

/// While we are generating the metadata, we also track the position
/// of each DefIndex. It is not required that all definitions appear
/// in the metadata, nor that they are serialized in order, and
Expand All @@ -14,14 +75,14 @@ use log::debug;
/// appropriate spot by calling `record_position`. We should never
/// visit the same index twice.
pub struct Index {
positions: [Vec<u32>; 2]
positions: [Vec<u8>; 2]
}

impl Index {
pub fn new((max_index_lo, max_index_hi): (usize, usize)) -> Index {
Index {
positions: [vec![u32::MAX; max_index_lo],
vec![u32::MAX; max_index_hi]],
positions: [vec![0xff; max_index_lo * 4],
vec![0xff; max_index_hi * 4]],
}
}

Expand All @@ -36,26 +97,27 @@ impl Index {
let space_index = item.address_space().index();
let array_index = item.as_array_index();

assert!(self.positions[space_index][array_index] == u32::MAX,
let positions = &mut self.positions[space_index];
assert!(u32::read_from_bytes_at(positions, array_index) == u32::MAX,
"recorded position for item {:?} twice, first at {:?} and now at {:?}",
item,
self.positions[space_index][array_index],
u32::read_from_bytes_at(positions, array_index),
position);

self.positions[space_index][array_index] = position.to_le();
position.write_to_bytes_at(positions, array_index)
}

pub fn write_index(&self, buf: &mut Encoder) -> LazySeq<Index> {
let pos = buf.position();

// First we write the length of the lower range ...
buf.emit_raw_bytes(words_to_bytes(&[(self.positions[0].len() as u32).to_le()]));
buf.emit_raw_bytes(&(self.positions[0].len() as u32 / 4).to_le_bytes());
// ... then the values in the lower range ...
buf.emit_raw_bytes(words_to_bytes(&self.positions[0][..]));
buf.emit_raw_bytes(&self.positions[0]);
// ... then the values in the higher range.
buf.emit_raw_bytes(words_to_bytes(&self.positions[1][..]));
buf.emit_raw_bytes(&self.positions[1]);
LazySeq::with_position_and_length(pos as usize,
self.positions[0].len() + self.positions[1].len() + 1)
(self.positions[0].len() + self.positions[1].len()) / 4 + 1)
}
}

Expand All @@ -64,24 +126,21 @@ impl<'tcx> LazySeq<Index> {
/// DefIndex (if any).
#[inline(never)]
pub fn lookup(&self, bytes: &[u8], def_index: DefIndex) -> Option<Lazy<Entry<'tcx>>> {
let words = &bytes_to_words(&bytes[self.position..])[..self.len];

debug!("Index::lookup: index={:?} words.len={:?}",
let bytes = &bytes[self.position..];
debug!("Index::lookup: index={:?} len={:?}",
def_index,
words.len());
self.len);

let positions = match def_index.address_space() {
DefIndexAddressSpace::Low => &words[1..],
let i = def_index.as_array_index() + match def_index.address_space() {
DefIndexAddressSpace::Low => 0,
DefIndexAddressSpace::High => {
// This is a DefIndex in the higher range, so find out where
// that starts:
let lo_count = u32::from_le(words[0].get()) as usize;
&words[lo_count + 1 .. ]
u32::read_from_bytes_at(bytes, 0) as usize
}
};

let array_index = def_index.as_array_index();
let position = u32::from_le(positions[array_index].get());
let position = u32::read_from_bytes_at(bytes, 1 + i);
if position == u32::MAX {
debug!("Index::lookup: position=u32::MAX");
None
Expand All @@ -91,27 +150,3 @@ impl<'tcx> LazySeq<Index> {
}
}
}

#[repr(packed)]
#[derive(Copy)]
struct Unaligned<T>(T);

// The derived Clone impl is unsafe for this packed struct since it needs to pass a reference to
// the field to `T::clone`, but this reference may not be properly aligned.
impl<T: Copy> Clone for Unaligned<T> {
fn clone(&self) -> Self {
*self
}
}

impl<T> Unaligned<T> {
fn get(self) -> T { self.0 }
}

fn bytes_to_words(b: &[u8]) -> &[Unaligned<u32>] {
unsafe { slice::from_raw_parts(b.as_ptr() as *const Unaligned<u32>, b.len() / 4) }
}

fn words_to_bytes(w: &[u32]) -> &[u8] {
unsafe { slice::from_raw_parts(w.as_ptr() as *const u8, w.len() * 4) }
}