Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove special code-path for handing unknown tokens #63017

Merged
merged 3 commits into from
Aug 6, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/librustc/ich/impls_syntax.rs
Original file line number Diff line number Diff line change
@@ -363,7 +363,8 @@ impl<'a> HashStable<StableHashingContext<'a>> for token::TokenKind {
}

token::DocComment(val) |
token::Shebang(val) => val.hash_stable(hcx, hasher),
token::Shebang(val) |
token::Unknown(val) => val.hash_stable(hcx, hasher),
}
}
}
29 changes: 15 additions & 14 deletions src/librustdoc/html/highlight.rs
Original file line number Diff line number Diff line change
@@ -44,7 +44,7 @@ pub fn render_with_highlighting(

let mut highlighted_source = vec![];
if classifier.write_source(&mut highlighted_source).is_err() {
Err(classifier.lexer.buffer_fatal_errors())
Err(())
} else {
Ok(String::from_utf8_lossy(&highlighted_source).into_owned())
}
@@ -59,14 +59,9 @@ pub fn render_with_highlighting(
}
write_footer(&mut out).unwrap();
}
Err(errors) => {
// If errors are encountered while trying to highlight, cancel the errors and just emit
// the unhighlighted source. The errors will have already been reported in the
// `check-code-block-syntax` pass.
for mut error in errors {
error.cancel();
}

Err(()) => {
// If errors are encountered while trying to highlight, just emit
// the unhighlighted source.
write!(out, "<pre><code>{}</code></pre>", src).unwrap();
}
}
@@ -192,14 +187,20 @@ impl<'a> Classifier<'a> {
if let Some(token) = self.peek_token.take() {
return Ok(token);
}
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is HighlightError::LexError necessary?
Can't the highlighter treat the token as a whitespace and continue, similarly to parser?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mainly to minimize the diff in code and tests. I think rustdoc side needs a different approach altogether, to avoid duplicating lexer errors in two passes. But rusdoc can be improved separatelly

}
Ok(token)
}

fn peek(&mut self) -> Result<&Token, HighlightError> {
if self.peek_token.is_none() {
self.peek_token = Some(
self.lexer.try_next_token().map_err(|()| HighlightError::LexError)?
);
let token = self.lexer.next_token();
if let token::Unknown(..) = &token.kind {
return Err(HighlightError::LexError);
}
self.peek_token = Some(token);
}
Ok(self.peek_token.as_ref().unwrap())
}
@@ -237,7 +238,7 @@ impl<'a> Classifier<'a> {
return Ok(());
},

token::Whitespace => Class::None,
token::Whitespace | token::Unknown(..) => Class::None,
token::Comment => Class::Comment,
token::DocComment(..) => Class::DocComment,

32 changes: 9 additions & 23 deletions src/librustdoc/passes/check_code_block_syntax.rs
Original file line number Diff line number Diff line change
@@ -32,24 +32,20 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
dox[code_block.code].to_owned(),
);

let errors = {
let has_errors = {
let mut has_errors = false;
let mut lexer = Lexer::new(&sess, source_file, None);
while let Ok(token::Token { kind, .. }) = lexer.try_next_token() {
if kind == token::Eof {
break;
loop {
match lexer.next_token().kind {
token::Eof => break,
token::Unknown(..) => has_errors = true,
_ => (),
}
}

let errors = lexer.buffer_fatal_errors();

if !errors.is_empty() {
Err(errors)
} else {
Ok(())
}
has_errors
};

if let Err(errors) = errors {
if has_errors {
let mut diag = if let Some(sp) =
super::source_span_for_markdown_range(self.cx, &dox, &code_block.range, &item.attrs)
{
@@ -58,11 +54,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
.sess()
.struct_span_warn(sp, "could not parse code block as Rust code");

for mut err in errors {
diag.note(&format!("error from rustc: {}", err.message()));
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
let sp = sp.from_inner(InnerSpan::new(0, 3));
diag.span_suggestion(
@@ -82,11 +73,6 @@ impl<'a, 'tcx> SyntaxChecker<'a, 'tcx> {
"doc comment contains an invalid Rust code block",
);

for mut err in errors {
// Don't bother reporting the error, because we can't show where it happened.
err.cancel();
}

if code_block.syntax.is_none() && code_block.is_fenced {
diag.help("mark blocks that do not contain Rust code as text: ```text");
}
2 changes: 1 addition & 1 deletion src/libsyntax/ext/proc_macro_server.rs
Original file line number Diff line number Diff line change
@@ -184,7 +184,7 @@ impl FromInternal<(TreeAndJoint, &'_ ParseSess, &'_ mut Vec<Self>)>
}

OpenDelim(..) | CloseDelim(..) => unreachable!(),
Whitespace | Comment | Shebang(..) | Eof => unreachable!(),
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => unreachable!(),
}
}
}
73 changes: 13 additions & 60 deletions src/libsyntax/parse/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -3,7 +3,7 @@ use crate::parse::token::{self, Token, TokenKind};
use crate::symbol::{sym, Symbol};
use crate::parse::unescape_error_reporting::{emit_unescape_error, push_escaped_char};

use errors::{FatalError, Diagnostic, DiagnosticBuilder};
use errors::{FatalError, DiagnosticBuilder};
use syntax_pos::{BytePos, Pos, Span, NO_EXPANSION};
use rustc_lexer::Base;
use rustc_lexer::unescape;
@@ -39,7 +39,6 @@ pub struct StringReader<'a> {
pos: BytePos,
/// Stop reading src at this index.
end_src_index: usize,
fatal_errs: Vec<DiagnosticBuilder<'a>>,
/// Source text to tokenize.
src: Lrc<String>,
override_span: Option<Span>,
@@ -62,7 +61,6 @@ impl<'a> StringReader<'a> {
pos: source_file.start_pos,
end_src_index: src.len(),
src,
fatal_errs: Vec::new(),
override_span,
}
}
@@ -89,29 +87,17 @@ impl<'a> StringReader<'a> {
self.override_span.unwrap_or_else(|| Span::new(lo, hi, NO_EXPANSION))
}

fn unwrap_or_abort(&mut self, res: Result<Token, ()>) -> Token {
match res {
Ok(tok) => tok,
Err(_) => {
self.emit_fatal_errors();
FatalError.raise();
}
}
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// `Err(())` means that some errors were encountered, which can be
/// retrieved using `buffer_fatal_errors`.
pub fn try_next_token(&mut self) -> Result<Token, ()> {
assert!(self.fatal_errs.is_empty());

pub fn next_token(&mut self) -> Token {
let start_src_index = self.src_index(self.pos);
let text: &str = &self.src[start_src_index..self.end_src_index];

if text.is_empty() {
let span = self.mk_sp(self.pos, self.pos);
return Ok(Token::new(token::Eof, span));
return Token::new(token::Eof, span);
}

{
@@ -125,7 +111,7 @@ impl<'a> StringReader<'a> {
let kind = token::Shebang(sym);

let span = self.mk_sp(start, self.pos);
return Ok(Token::new(kind, span));
return Token::new(kind, span);
}
}
}
@@ -139,39 +125,10 @@ impl<'a> StringReader<'a> {

// This could use `?`, but that makes code significantly (10-20%) slower.
// https://github.com/rust-lang/rust/issues/37939
let kind = match self.cook_lexer_token(token.kind, start) {
Ok(it) => it,
Err(err) => return Err(self.fatal_errs.push(err)),
};
let kind = self.cook_lexer_token(token.kind, start);

let span = self.mk_sp(start, self.pos);
Ok(Token::new(kind, span))
}

/// Returns the next token, including trivia like whitespace or comments.
///
/// Aborts in case of an error.
pub fn next_token(&mut self) -> Token {
let res = self.try_next_token();
self.unwrap_or_abort(res)
}

fn emit_fatal_errors(&mut self) {
for err in &mut self.fatal_errs {
err.emit();
}

self.fatal_errs.clear();
}

pub fn buffer_fatal_errors(&mut self) -> Vec<Diagnostic> {
let mut buffer = Vec::new();

for err in self.fatal_errs.drain(..) {
err.buffer(&mut buffer);
}

buffer
Token::new(kind, span)
}

/// Report a fatal lexical error with a given span.
@@ -218,8 +175,8 @@ impl<'a> StringReader<'a> {
&self,
token: rustc_lexer::TokenKind,
start: BytePos,
) -> Result<TokenKind, DiagnosticBuilder<'a>> {
let kind = match token {
) -> TokenKind {
match token {
rustc_lexer::TokenKind::LineComment => {
let string = self.str_from(start);
// comments with only more "/"s are not doc comments
@@ -396,16 +353,12 @@ impl<'a> StringReader<'a> {
// this should be inside `rustc_lexer`. However, we should first remove compound
// tokens like `<<` from `rustc_lexer`, and then add fancier error recovery to it,
// as there will be less overall work to do this way.
return match unicode_chars::check_for_substitution(self, start, c, &mut err) {
Some(token) => {
err.emit();
Ok(token)
}
None => Err(err),
}
let token = unicode_chars::check_for_substitution(self, start, c, &mut err)
.unwrap_or_else(|| token::Unknown(self.symbol_from(start)));
err.emit();
token
}
};
Ok(kind)
}
}

fn cook_lexer_literal(
2 changes: 1 addition & 1 deletion src/libsyntax/parse/lexer/tokentrees.rs
Original file line number Diff line number Diff line change
@@ -217,7 +217,7 @@ impl<'a> TokenTreesReader<'a> {
loop {
let token = self.string_reader.next_token();
match token.kind {
token::Whitespace | token::Comment | token::Shebang(_) => {
token::Whitespace | token::Comment | token::Shebang(_) | token::Unknown(_) => {
self.joint_to_prev = NonJoint;
}
_ => {
4 changes: 3 additions & 1 deletion src/libsyntax/parse/token.rs
Original file line number Diff line number Diff line change
@@ -255,6 +255,8 @@ pub enum TokenKind {
/// A comment.
Comment,
Shebang(ast::Name),
/// A completely invalid token which should be skipped.
Unknown(ast::Name),

Eof,
}
@@ -603,7 +605,7 @@ impl Token {
DotDotEq | Comma | Semi | ModSep | RArrow | LArrow | FatArrow | Pound | Dollar |
Question | OpenDelim(..) | CloseDelim(..) |
Literal(..) | Ident(..) | Lifetime(..) | Interpolated(..) | DocComment(..) |
Whitespace | Comment | Shebang(..) | Eof => return None,
Whitespace | Comment | Shebang(..) | Unknown(..) | Eof => return None,
};

Some(Token::new(kind, self.span.to(joint.span)))
1 change: 1 addition & 0 deletions src/libsyntax/print/pprust.rs
Original file line number Diff line number Diff line change
@@ -288,6 +288,7 @@ fn token_kind_to_string_ext(tok: &TokenKind, convert_dollar_crate: Option<Span>)
token::Whitespace => " ".to_string(),
token::Comment => "/* */".to_string(),
token::Shebang(s) => format!("/* shebang: {}*/", s),
token::Unknown(s) => s.to_string(),

token::Interpolated(ref nt) => nonterminal_to_string(nt),
}
Loading