Skip to content

Commit ebe1305

Browse files
committed
Remove the bogus special case from Parser::look_ahead.
The general case at the bottom of `look_ahead` is slow, because it clones the token cursor. Above it there is a special case for performance that is hit most of the time and avoids the cloning. Unfortunately, its behaviour differs from the general case in two ways. - When within a pair of delimiters, if you look any distance past the closing delimiter you get the closing delimiter instead of what comes after the closing delimiter. - It uses `tree_cursor.look_ahead(dist - 1)` which totally confuses tokens with token trees. This means that only the first token in a token tree will be seen. E.g. in a sequence like `{ a }` the `a` and `}` will be skipped over. Bad! It's likely that these differences weren't noticed before now because the use of `look_ahead` in the parser is limited to small distances and relatively few contexts. Removing the special case causes slowdowns up of to 2% on a range of benchmarks. The next commit will add a new, correct special case to regain that lost performance.
1 parent dad9557 commit ebe1305

File tree

2 files changed

+28
-56
lines changed

2 files changed

+28
-56
lines changed

compiler/rustc_parse/src/parser/mod.rs

+2-35
Original file line numberDiff line numberDiff line change
@@ -1118,41 +1118,8 @@ impl<'a> Parser<'a> {
11181118
return looker(&self.token);
11191119
}
11201120

1121-
if let Some(&(_, span, _, delim)) = self.token_cursor.stack.last()
1122-
&& delim != Delimiter::Invisible
1123-
{
1124-
// We are not in the outermost token stream, and the token stream
1125-
// we are in has non-skipped delimiters. Look for skipped
1126-
// delimiters in the lookahead range.
1127-
let tree_cursor = &self.token_cursor.tree_cursor;
1128-
let all_normal = (0..dist).all(|i| {
1129-
let token = tree_cursor.look_ahead(i);
1130-
!matches!(token, Some(TokenTree::Delimited(.., Delimiter::Invisible, _)))
1131-
});
1132-
if all_normal {
1133-
// There were no skipped delimiters. Do lookahead by plain indexing.
1134-
return match tree_cursor.look_ahead(dist - 1) {
1135-
Some(tree) => {
1136-
// Indexing stayed within the current token stream.
1137-
match tree {
1138-
TokenTree::Token(token, _) => looker(token),
1139-
TokenTree::Delimited(dspan, _, delim, _) => {
1140-
looker(&Token::new(token::OpenDelim(*delim), dspan.open))
1141-
}
1142-
}
1143-
}
1144-
None => {
1145-
// Indexing went past the end of the current token
1146-
// stream. Use the close delimiter, no matter how far
1147-
// ahead `dist` went.
1148-
looker(&Token::new(token::CloseDelim(delim), span.close))
1149-
}
1150-
};
1151-
}
1152-
}
1153-
1154-
// We are in a more complex case. Just clone the token cursor and use
1155-
// `next`, skipping delimiters as necessary. Slow but simple.
1121+
// Just clone the token cursor and use `next`, skipping delimiters as
1122+
// necessary. Slow but simple.
11561123
let mut cursor = self.token_cursor.clone();
11571124
let mut i = 0;
11581125
let mut token = Token::dummy();

compiler/rustc_parse/src/parser/tests.rs

+26-21
Original file line numberDiff line numberDiff line change
@@ -1424,12 +1424,15 @@ fn look_ahead() {
14241424
look!(p, 1, token::Colon);
14251425
look!(p, 2, token::Ident(sym::u32, raw_no));
14261426
look!(p, 3, token::CloseDelim(Delimiter::Parenthesis));
1427-
// FIXME(nnethercote) If we lookahead any distance past a close delim
1428-
// we currently return that close delim.
1429-
look!(p, 4, token::CloseDelim(Delimiter::Parenthesis));
1430-
look!(p, 5, token::CloseDelim(Delimiter::Parenthesis));
1431-
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
1432-
look!(p, 100, token::CloseDelim(Delimiter::Parenthesis));
1427+
look!(p, 4, token::OpenDelim(Delimiter::Brace));
1428+
look!(p, 5, token::Ident(sym_x, raw_no));
1429+
look!(p, 6, token::CloseDelim(Delimiter::Brace));
1430+
look!(p, 7, token::Ident(kw::Struct, raw_no));
1431+
look!(p, 8, token::Ident(sym_S, raw_no));
1432+
look!(p, 9, token::Semi);
1433+
look!(p, 10, token::Eof);
1434+
look!(p, 11, token::Eof);
1435+
look!(p, 100, token::Eof);
14331436

14341437
// Move forward to the `;`.
14351438
for _ in 0..9 {
@@ -1454,12 +1457,13 @@ fn look_ahead() {
14541457
});
14551458
}
14561459

1457-
/// FIXME(nnethercote) Currently there is some buggy behaviour when using
1458-
/// `look_ahead` not within the outermost token stream, as this test shows.
1460+
/// There used to be some buggy behaviour when using `look_ahead` not within
1461+
/// the outermost token stream, which this test covers.
14591462
#[test]
14601463
fn look_ahead_non_outermost_stream() {
14611464
create_default_session_globals_then(|| {
14621465
let sym_f = Symbol::intern("f");
1466+
let sym_x = Symbol::intern("x");
14631467
#[allow(non_snake_case)]
14641468
let sym_S = Symbol::intern("S");
14651469
let raw_no = IdentIsRaw::No;
@@ -1475,20 +1479,21 @@ fn look_ahead_non_outermost_stream() {
14751479
look!(p, 0, token::Ident(kw::Fn, raw_no));
14761480
look!(p, 1, token::Ident(sym_f, raw_no));
14771481
look!(p, 2, token::OpenDelim(Delimiter::Parenthesis));
1478-
// FIXME(nnethercote) The current code incorrectly skips the `x: u32)`
1479-
// to the next token tree.
1480-
look!(p, 3, token::OpenDelim(Delimiter::Brace));
1481-
// FIXME(nnethercote) The current code incorrectly skips the `x }`
1482-
// to the next token tree.
1483-
look!(p, 4, token::Ident(kw::Struct, raw_no));
1484-
look!(p, 5, token::Ident(sym_S, raw_no));
1485-
look!(p, 6, token::Semi);
1486-
// FIXME(nnethercote) If we lookahead any distance past a close delim
1487-
// we currently return that close delim.
1488-
look!(p, 7, token::CloseDelim(Delimiter::Brace));
1489-
look!(p, 8, token::CloseDelim(Delimiter::Brace));
1482+
look!(p, 3, token::Ident(sym_x, raw_no));
1483+
look!(p, 4, token::Colon);
1484+
look!(p, 5, token::Ident(sym::u32, raw_no));
1485+
look!(p, 6, token::CloseDelim(Delimiter::Parenthesis));
1486+
look!(p, 7, token::OpenDelim(Delimiter::Brace));
1487+
look!(p, 8, token::Ident(sym_x, raw_no));
14901488
look!(p, 9, token::CloseDelim(Delimiter::Brace));
1491-
look!(p, 100, token::CloseDelim(Delimiter::Brace));
1489+
look!(p, 10, token::Ident(kw::Struct, raw_no));
1490+
look!(p, 11, token::Ident(sym_S, raw_no));
1491+
look!(p, 12, token::Semi);
1492+
look!(p, 13, token::CloseDelim(Delimiter::Brace));
1493+
// Any lookahead past the end of the token stream returns `Eof`.
1494+
look!(p, 14, token::Eof);
1495+
look!(p, 15, token::Eof);
1496+
look!(p, 100, token::Eof);
14921497
});
14931498
}
14941499

0 commit comments

Comments
 (0)