Skip to content

Commit fddbee6

Browse files
authored
Rollup merge of #68941 - Aaron1011:fix/imported-span, r=petrochenkov
Properly handle Spans that reference imported SourceFiles Previously, metadata encoding used DUMMY_SP to represent any spans that referenced an 'imported' SourceFile - e.g. a SourceFile from an upstream dependency. This currently has no visible consequences, since these kinds of spans don't currently seem to be emitted anywhere. However, there's no reason that we couldn't start using such spans in diagnostics. This PR changes how we encode and decode spans in crate metadata. We encode spans in one of two ways: * 'Local' spans, which reference non-imported SourceFiles, are encoded exactly as before. * 'Foreign' spans, which reference imported SourceFiles, are encoded with the CrateNum of their 'originating' crate. Additionally, their 'lo' and 'high' values are rebased on top of the 'originating' crate, which allows them to be used with the SourceMap data encoded for that crate. To support this change, I've also made the following modifications: * `DefId` and related structs are now moved to `rustc_span`. This allows us to use a `CrateNum` inside `SourceFile`. `CrateNum` has special handling during deserialization (it gets remapped to be the proper `CrateNum` from the point of view of the current compilation session), so using a `CrateNum` instead of a plain integer 'workaround type' helps to simplify deserialization. * The `ExternalSource` enum is renamed to `ExternalSourceKind`. There is now a struct called `ExternalSource`, which holds an `ExternalSourceKind` along with the original line number information for the file. This is used during `Span` serialization to rebase spans onto their 'owning' crate.
2 parents 57e1da5 + 5e28561 commit fddbee6

File tree

11 files changed

+234
-52
lines changed

11 files changed

+234
-52
lines changed

src/librustc/hir/map/collector.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
1010
use rustc_data_structures::svh::Svh;
1111
use rustc_hir as hir;
1212
use rustc_hir::def_id::CRATE_DEF_INDEX;
13-
use rustc_hir::def_id::{CrateNum, DefIndex, LOCAL_CRATE};
13+
use rustc_hir::def_id::{DefIndex, LOCAL_CRATE};
1414
use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
1515
use rustc_hir::*;
1616
use rustc_index::vec::{Idx, IndexVec};
@@ -175,7 +175,7 @@ impl<'a, 'hir> NodeCollector<'a, 'hir> {
175175
.source_map
176176
.files()
177177
.iter()
178-
.filter(|source_file| CrateNum::from_u32(source_file.crate_of_origin) == LOCAL_CRATE)
178+
.filter(|source_file| source_file.cnum == LOCAL_CRATE)
179179
.map(|source_file| source_file.name_hash)
180180
.collect();
181181

src/librustc/ich/impls_syntax.rs

+3-5
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ use crate::ich::StableHashingContext;
55

66
use rustc_ast::ast;
77
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
8-
use rustc_hir::def_id::{CrateNum, DefId, CRATE_DEF_INDEX};
98
use rustc_span::SourceFile;
109

1110
use smallvec::SmallVec;
@@ -59,7 +58,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
5958
name_hash,
6059
name_was_remapped,
6160
unmapped_path: _,
62-
crate_of_origin,
61+
cnum,
6362
// Do not hash the source as it is not encoded
6463
src: _,
6564
src_hash,
@@ -75,9 +74,6 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
7574
(name_hash as u64).hash_stable(hcx, hasher);
7675
name_was_remapped.hash_stable(hcx, hasher);
7776

78-
DefId { krate: CrateNum::from_u32(crate_of_origin), index: CRATE_DEF_INDEX }
79-
.hash_stable(hcx, hasher);
80-
8177
src_hash.hash_stable(hcx, hasher);
8278

8379
// We only hash the relative position within this source_file
@@ -101,6 +97,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
10197
for &char_pos in normalized_pos.iter() {
10298
stable_normalized_pos(char_pos, start_pos).hash_stable(hcx, hasher);
10399
}
100+
101+
cnum.hash_stable(hcx, hasher);
104102
}
105103
}
106104

src/librustc_metadata/rmeta/decoder.rs

+85-6
Original file line numberDiff line numberDiff line change
@@ -386,7 +386,7 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
386386
return Ok(DUMMY_SP);
387387
}
388388

389-
debug_assert_eq!(tag, TAG_VALID_SPAN);
389+
debug_assert!(tag == TAG_VALID_SPAN_LOCAL || tag == TAG_VALID_SPAN_FOREIGN);
390390

391391
let lo = BytePos::decode(self)?;
392392
let len = BytePos::decode(self)?;
@@ -398,7 +398,68 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
398398
bug!("Cannot decode Span without Session.")
399399
};
400400

401-
let imported_source_files = self.cdata().imported_source_files(&sess.source_map());
401+
// There are two possibilities here:
402+
// 1. This is a 'local span', which is located inside a `SourceFile`
403+
// that came from this crate. In this case, we use the source map data
404+
// encoded in this crate. This branch should be taken nearly all of the time.
405+
// 2. This is a 'foreign span', which is located inside a `SourceFile`
406+
// that came from a *different* crate (some crate upstream of the one
407+
// whose metadata we're looking at). For example, consider this dependency graph:
408+
//
409+
// A -> B -> C
410+
//
411+
// Suppose that we're currently compiling crate A, and start deserializing
412+
// metadata from crate B. When we deserialize a Span from crate B's metadata,
413+
// there are two posibilites:
414+
//
415+
// 1. The span references a file from crate B. This makes it a 'local' span,
416+
// which means that we can use crate B's serialized source map information.
417+
// 2. The span references a file from crate C. This makes it a 'foreign' span,
418+
// which means we need to use Crate *C* (not crate B) to determine the source
419+
// map information. We only record source map information for a file in the
420+
// crate that 'owns' it, so deserializing a Span may require us to look at
421+
// a transitive dependency.
422+
//
423+
// When we encode a foreign span, we adjust its 'lo' and 'high' values
424+
// to be based on the *foreign* crate (e.g. crate C), not the crate
425+
// we are writing metadata for (e.g. crate B). This allows us to
426+
// treat the 'local' and 'foreign' cases almost identically during deserialization:
427+
// we can call `imported_source_files` for the proper crate, and binary search
428+
// through the returned slice using our span.
429+
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
430+
self.cdata().imported_source_files(sess.source_map())
431+
} else {
432+
// FIXME: We don't decode dependencies of proc-macros.
433+
// Remove this once #69976 is merged
434+
if self.cdata().root.is_proc_macro_crate() {
435+
debug!(
436+
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
437+
self.cdata().cnum
438+
);
439+
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
440+
// since we don't have `cnum_map` populated.
441+
// This advances the decoder position so that we can continue
442+
// to read metadata.
443+
let _ = u32::decode(self)?;
444+
return Ok(DUMMY_SP);
445+
}
446+
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
447+
let cnum = CrateNum::decode(self)?;
448+
debug!(
449+
"SpecializedDecoder<Span>::specialized_decode: loading source files from cnum {:?}",
450+
cnum
451+
);
452+
453+
// Decoding 'foreign' spans should be rare enough that it's
454+
// not worth it to maintain a per-CrateNum cache for `last_source_file_index`.
455+
// We just set it to 0, to ensure that we don't try to access something out
456+
// of bounds for our initial 'guess'
457+
self.last_source_file_index = 0;
458+
459+
let foreign_data = self.cdata().cstore.get_crate_data(cnum);
460+
foreign_data.imported_source_files(sess.source_map())
461+
};
462+
402463
let source_file = {
403464
// Optimize for the case that most spans within a translated item
404465
// originate from the same source_file.
@@ -412,16 +473,32 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
412473
.binary_search_by_key(&lo, |source_file| source_file.original_start_pos)
413474
.unwrap_or_else(|index| index - 1);
414475

415-
self.last_source_file_index = index;
476+
// Don't try to cache the index for foreign spans,
477+
// as this would require a map from CrateNums to indices
478+
if tag == TAG_VALID_SPAN_LOCAL {
479+
self.last_source_file_index = index;
480+
}
416481
&imported_source_files[index]
417482
}
418483
};
419484

420485
// Make sure our binary search above is correct.
421-
debug_assert!(lo >= source_file.original_start_pos && lo <= source_file.original_end_pos);
486+
debug_assert!(
487+
lo >= source_file.original_start_pos && lo <= source_file.original_end_pos,
488+
"Bad binary search: lo={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
489+
lo,
490+
source_file.original_start_pos,
491+
source_file.original_end_pos
492+
);
422493

423494
// Make sure we correctly filtered out invalid spans during encoding
424-
debug_assert!(hi >= source_file.original_start_pos && hi <= source_file.original_end_pos);
495+
debug_assert!(
496+
hi >= source_file.original_start_pos && hi <= source_file.original_end_pos,
497+
"Bad binary search: hi={:?} source_file.original_start_pos={:?} source_file.original_end_pos={:?}",
498+
hi,
499+
source_file.original_start_pos,
500+
source_file.original_end_pos
501+
);
425502

426503
let lo =
427504
(lo + source_file.translated_source_file.start_pos) - source_file.original_start_pos;
@@ -1425,14 +1502,16 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
14251502
let local_version = local_source_map.new_imported_source_file(
14261503
name,
14271504
name_was_remapped,
1428-
self.cnum.as_u32(),
14291505
src_hash,
14301506
name_hash,
14311507
source_length,
1508+
self.cnum,
14321509
lines,
14331510
multibyte_chars,
14341511
non_narrow_chars,
14351512
normalized_pos,
1513+
start_pos,
1514+
end_pos,
14361515
);
14371516
debug!(
14381517
"CrateMetaData::imported_source_files alloc \

src/librustc_metadata/rmeta/encoder.rs

+48-13
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
use crate::rmeta::table::FixedSizeEncoding;
22
use crate::rmeta::*;
33

4+
use log::{debug, trace};
45
use rustc::hir::map::definitions::DefPathTable;
56
use rustc::hir::map::Map;
67
use rustc::middle::cstore::{EncodedMetadata, ForeignModule, LinkagePreference, NativeLibrary};
@@ -29,9 +30,7 @@ use rustc_serialize::{opaque, Encodable, Encoder, SpecializedEncoder};
2930
use rustc_session::config::{self, CrateType};
3031
use rustc_span::source_map::Spanned;
3132
use rustc_span::symbol::{kw, sym, Ident, Symbol};
32-
use rustc_span::{self, FileName, SourceFile, Span};
33-
34-
use log::{debug, trace};
33+
use rustc_span::{self, ExternalSource, FileName, SourceFile, Span};
3534
use std::hash::Hash;
3635
use std::num::NonZeroUsize;
3736
use std::path::Path;
@@ -165,20 +164,56 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
165164
return TAG_INVALID_SPAN.encode(self);
166165
}
167166

168-
// HACK(eddyb) there's no way to indicate which crate a Span is coming
169-
// from right now, so decoding would fail to find the SourceFile if
170-
// it's not local to the crate the Span is found in.
171-
if self.source_file_cache.is_imported() {
172-
return TAG_INVALID_SPAN.encode(self);
173-
}
167+
// There are two possible cases here:
168+
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
169+
// crate we are writing metadata for. When the metadata for *this* crate gets
170+
// deserialized, the deserializer will need to know which crate it originally came
171+
// from. We use `TAG_VALID_SPAN_FOREIGN` to indicate that a `CrateNum` should
172+
// be deserialized after the rest of the span data, which tells the deserializer
173+
// which crate contains the source map information.
174+
// 2. This span comes from our own crate. No special hamdling is needed - we just
175+
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
176+
// our own source map information.
177+
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
178+
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
179+
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
180+
// are relative to the source map information for the 'foreign' crate whose CrateNum
181+
// we write into the metadata. This allows `imported_source_files` to binary
182+
// search through the 'foreign' crate's source map information, using the
183+
// deserialized 'lo' and 'hi' values directly.
184+
//
185+
// All of this logic ensures that the final result of deserialization is a 'normal'
186+
// Span that can be used without any additional trouble.
187+
let external_start_pos = {
188+
// Introduce a new scope so that we drop the 'lock()' temporary
189+
match &*self.source_file_cache.external_src.lock() {
190+
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
191+
src => panic!("Unexpected external source {:?}", src),
192+
}
193+
};
194+
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
195+
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
174196

175-
TAG_VALID_SPAN.encode(self)?;
176-
span.lo.encode(self)?;
197+
(TAG_VALID_SPAN_FOREIGN, lo, hi)
198+
} else {
199+
(TAG_VALID_SPAN_LOCAL, span.lo, span.hi)
200+
};
201+
202+
tag.encode(self)?;
203+
lo.encode(self)?;
177204

178205
// Encode length which is usually less than span.hi and profits more
179206
// from the variable-length integer encoding that we use.
180-
let len = span.hi - span.lo;
181-
len.encode(self)
207+
let len = hi - lo;
208+
len.encode(self)?;
209+
210+
if tag == TAG_VALID_SPAN_FOREIGN {
211+
// This needs to be two lines to avoid holding the `self.source_file_cache`
212+
// while calling `cnum.encode(self)`
213+
let cnum = self.source_file_cache.cnum;
214+
cnum.encode(self)?;
215+
}
216+
Ok(())
182217

183218
// Don't encode the expansion context.
184219
}

src/librustc_metadata/rmeta/mod.rs

+3-2
Original file line numberDiff line numberDiff line change
@@ -405,5 +405,6 @@ struct GeneratorData<'tcx> {
405405
}
406406

407407
// Tags used for encoding Spans:
408-
const TAG_VALID_SPAN: u8 = 0;
409-
const TAG_INVALID_SPAN: u8 = 1;
408+
const TAG_VALID_SPAN_LOCAL: u8 = 0;
409+
const TAG_VALID_SPAN_FOREIGN: u8 = 1;
410+
const TAG_INVALID_SPAN: u8 = 2;

0 commit comments

Comments
 (0)