Skip to content

Commit 109ae5a

Browse files
authored
Unrolled build for rust-lang#133070
Rollup merge of rust-lang#133070 - nnethercote:lexer-tweaks, r=chenyukang Lexer tweaks Some cleanups and small performance improvements. r? ```@chenyukang```
2 parents dff3e7c + 16a39bb commit 109ae5a

File tree

5 files changed

+121
-125
lines changed

5 files changed

+121
-125
lines changed

compiler/rustc_lexer/src/lib.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -566,19 +566,19 @@ impl Cursor<'_> {
566566

567567
fn c_or_byte_string(
568568
&mut self,
569-
mk_kind: impl FnOnce(bool) -> LiteralKind,
570-
mk_kind_raw: impl FnOnce(Option<u8>) -> LiteralKind,
569+
mk_kind: fn(bool) -> LiteralKind,
570+
mk_kind_raw: fn(Option<u8>) -> LiteralKind,
571571
single_quoted: Option<fn(bool) -> LiteralKind>,
572572
) -> TokenKind {
573573
match (self.first(), self.second(), single_quoted) {
574-
('\'', _, Some(mk_kind)) => {
574+
('\'', _, Some(single_quoted)) => {
575575
self.bump();
576576
let terminated = self.single_quoted_string();
577577
let suffix_start = self.pos_within_token();
578578
if terminated {
579579
self.eat_literal_suffix();
580580
}
581-
let kind = mk_kind(terminated);
581+
let kind = single_quoted(terminated);
582582
Literal { kind, suffix_start }
583583
}
584584
('"', _, _) => {

compiler/rustc_lexer/src/tests.rs

+31-41
Original file line numberDiff line numberDiff line change
@@ -77,61 +77,51 @@ fn test_too_many_hashes() {
7777
check_raw_str(&s2, Err(RawStrError::TooManyDelimiters { found: u32::from(max_count) + 1 }));
7878
}
7979

80+
// https://github.com/rust-lang/rust/issues/70528
8081
#[test]
8182
fn test_valid_shebang() {
82-
// https://github.com/rust-lang/rust/issues/70528
83-
let input = "#!/usr/bin/rustrun\nlet x = 5;";
84-
assert_eq!(strip_shebang(input), Some(18));
85-
}
83+
let input = "#!/bin/bash";
84+
assert_eq!(strip_shebang(input), Some(input.len()));
8685

87-
#[test]
88-
fn test_invalid_shebang_valid_rust_syntax() {
89-
// https://github.com/rust-lang/rust/issues/70528
90-
let input = "#! [bad_attribute]";
86+
let input = "#![attribute]";
9187
assert_eq!(strip_shebang(input), None);
92-
}
9388

94-
#[test]
95-
fn test_shebang_second_line() {
96-
// Because shebangs are interpreted by the kernel, they must be on the first line
97-
let input = "\n#!/bin/bash";
89+
let input = "#! /bin/bash";
90+
assert_eq!(strip_shebang(input), Some(input.len()));
91+
92+
let input = "#! [attribute]";
9893
assert_eq!(strip_shebang(input), None);
99-
}
10094

101-
#[test]
102-
fn test_shebang_space() {
103-
let input = "#! /bin/bash";
95+
let input = "#! /* blah */ /bin/bash";
10496
assert_eq!(strip_shebang(input), Some(input.len()));
105-
}
10697

107-
#[test]
108-
fn test_shebang_empty_shebang() {
109-
let input = "#! \n[attribute(foo)]";
98+
let input = "#! /* blah */ [attribute]";
11099
assert_eq!(strip_shebang(input), None);
111-
}
112100

113-
#[test]
114-
fn test_invalid_shebang_comment() {
115-
let input = "#!//bin/ami/a/comment\n[";
116-
assert_eq!(strip_shebang(input), None)
117-
}
101+
let input = "#! // blah\n/bin/bash";
102+
assert_eq!(strip_shebang(input), Some(10)); // strip up to the newline
118103

119-
#[test]
120-
fn test_invalid_shebang_another_comment() {
121-
let input = "#!/*bin/ami/a/comment*/\n[attribute";
122-
assert_eq!(strip_shebang(input), None)
123-
}
104+
let input = "#! // blah\n[attribute]";
105+
assert_eq!(strip_shebang(input), None);
124106

125-
#[test]
126-
fn test_shebang_valid_rust_after() {
127-
let input = "#!/*bin/ami/a/comment*/\npub fn main() {}";
128-
assert_eq!(strip_shebang(input), Some(23))
129-
}
107+
let input = "#! /* blah\nblah\nblah */ /bin/bash";
108+
assert_eq!(strip_shebang(input), Some(10));
130109

131-
#[test]
132-
fn test_shebang_followed_by_attrib() {
133-
let input = "#!/bin/rust-scripts\n#![allow_unused(true)]";
134-
assert_eq!(strip_shebang(input), Some(19));
110+
let input = "#! /* blah\nblah\nblah */ [attribute]";
111+
assert_eq!(strip_shebang(input), None);
112+
113+
let input = "#!\n/bin/sh";
114+
assert_eq!(strip_shebang(input), Some(2));
115+
116+
let input = "#!\n[attribute]";
117+
assert_eq!(strip_shebang(input), None);
118+
119+
// Because shebangs are interpreted by the kernel, they must be on the first line
120+
let input = "\n#!/bin/bash";
121+
assert_eq!(strip_shebang(input), None);
122+
123+
let input = "\n#![attribute]";
124+
assert_eq!(strip_shebang(input), None);
135125
}
136126

137127
fn check_lexing(src: &str, expect: Expect) {

compiler/rustc_parse/src/lexer/mod.rs

+44-31
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use rustc_span::symbol::Symbol;
1818
use rustc_span::{BytePos, Pos, Span};
1919
use tracing::debug;
2020

21+
use crate::lexer::diagnostics::TokenTreeDiagInfo;
2122
use crate::lexer::unicode_chars::UNICODE_ARRAY;
2223
use crate::{errors, make_unclosed_delims_error};
2324

@@ -56,7 +57,7 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
5657
}
5758

5859
let cursor = Cursor::new(src);
59-
let string_reader = StringReader {
60+
let mut lexer = Lexer {
6061
psess,
6162
start_pos,
6263
pos: start_pos,
@@ -65,34 +66,31 @@ pub(crate) fn lex_token_trees<'psess, 'src>(
6566
override_span,
6667
nbsp_is_whitespace: false,
6768
last_lifetime: None,
69+
token: Token::dummy(),
70+
diag_info: TokenTreeDiagInfo::default(),
6871
};
69-
let (stream, res, unmatched_delims) =
70-
tokentrees::TokenTreesReader::lex_all_token_trees(string_reader);
71-
match res {
72-
Ok(()) if unmatched_delims.is_empty() => Ok(stream),
73-
_ => {
74-
// Return error if there are unmatched delimiters or unclosed delimiters.
75-
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
76-
// because the delimiter mismatch is more likely to be the root cause of error
77-
78-
let mut buffer = Vec::with_capacity(1);
79-
for unmatched in unmatched_delims {
80-
if let Some(err) = make_unclosed_delims_error(unmatched, psess) {
81-
buffer.push(err);
82-
}
83-
}
84-
if let Err(errs) = res {
85-
// Add unclosing delimiter or diff marker errors
86-
for err in errs {
87-
buffer.push(err);
88-
}
89-
}
90-
Err(buffer)
72+
let (_open_spacing, stream, res) = lexer.lex_token_trees(/* is_delimited */ false);
73+
let unmatched_delims = lexer.diag_info.unmatched_delims;
74+
75+
if res.is_ok() && unmatched_delims.is_empty() {
76+
Ok(stream)
77+
} else {
78+
// Return error if there are unmatched delimiters or unclosed delimiters.
79+
// We emit delimiter mismatch errors first, then emit the unclosing delimiter mismatch
80+
// because the delimiter mismatch is more likely to be the root cause of error
81+
let mut buffer: Vec<_> = unmatched_delims
82+
.into_iter()
83+
.filter_map(|unmatched_delim| make_unclosed_delims_error(unmatched_delim, psess))
84+
.collect();
85+
if let Err(errs) = res {
86+
// Add unclosing delimiter or diff marker errors
87+
buffer.extend(errs);
9188
}
89+
Err(buffer)
9290
}
9391
}
9492

95-
struct StringReader<'psess, 'src> {
93+
struct Lexer<'psess, 'src> {
9694
psess: &'psess ParseSess,
9795
/// Initial position, read-only.
9896
start_pos: BytePos,
@@ -111,9 +109,14 @@ struct StringReader<'psess, 'src> {
111109
/// Track the `Span` for the leading `'` of the last lifetime. Used for
112110
/// diagnostics to detect possible typo where `"` was meant.
113111
last_lifetime: Option<Span>,
112+
113+
/// The current token.
114+
token: Token,
115+
116+
diag_info: TokenTreeDiagInfo,
114117
}
115118

116-
impl<'psess, 'src> StringReader<'psess, 'src> {
119+
impl<'psess, 'src> Lexer<'psess, 'src> {
117120
fn dcx(&self) -> DiagCtxtHandle<'psess> {
118121
self.psess.dcx()
119122
}
@@ -124,7 +127,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
124127

125128
/// Returns the next token, paired with a bool indicating if the token was
126129
/// preceded by whitespace.
127-
fn next_token(&mut self) -> (Token, bool) {
130+
fn next_token_from_cursor(&mut self) -> (Token, bool) {
128131
let mut preceded_by_whitespace = false;
129132
let mut swallow_next_invalid = 0;
130133
// Skip trivial (whitespace & comments) tokens
@@ -231,7 +234,8 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
231234
.push(span);
232235
token::Ident(sym, IdentIsRaw::No)
233236
}
234-
// split up (raw) c string literals to an ident and a string literal when edition < 2021.
237+
// split up (raw) c string literals to an ident and a string literal when edition <
238+
// 2021.
235239
rustc_lexer::TokenKind::Literal {
236240
kind: kind @ (LiteralKind::CStr { .. } | LiteralKind::RawCStr { .. }),
237241
suffix_start: _,
@@ -252,7 +256,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
252256
let prefix_span = self.mk_sp(start, lit_start);
253257
return (Token::new(self.ident(start), prefix_span), preceded_by_whitespace);
254258
}
255-
rustc_lexer::TokenKind::GuardedStrPrefix => self.maybe_report_guarded_str(start, str_before),
259+
rustc_lexer::TokenKind::GuardedStrPrefix => {
260+
self.maybe_report_guarded_str(start, str_before)
261+
}
256262
rustc_lexer::TokenKind::Literal { kind, suffix_start } => {
257263
let suffix_start = start + BytePos(suffix_start);
258264
let (kind, symbol) = self.cook_lexer_literal(start, suffix_start, kind);
@@ -296,13 +302,20 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
296302
if prefix_span.at_least_rust_2021() {
297303
let span = self.mk_sp(start, self.pos);
298304

299-
let lifetime_name_without_tick = Symbol::intern(&self.str_from(ident_start));
305+
let lifetime_name_without_tick =
306+
Symbol::intern(&self.str_from(ident_start));
300307
if !lifetime_name_without_tick.can_be_raw() {
301-
self.dcx().emit_err(errors::CannotBeRawLifetime { span, ident: lifetime_name_without_tick });
308+
self.dcx().emit_err(
309+
errors::CannotBeRawLifetime {
310+
span,
311+
ident: lifetime_name_without_tick
312+
}
313+
);
302314
}
303315

304316
// Put the `'` back onto the lifetime name.
305-
let mut lifetime_name = String::with_capacity(lifetime_name_without_tick.as_str().len() + 1);
317+
let mut lifetime_name =
318+
String::with_capacity(lifetime_name_without_tick.as_str().len() + 1);
306319
lifetime_name.push('\'');
307320
lifetime_name += lifetime_name_without_tick.as_str();
308321
let sym = Symbol::intern(&lifetime_name);

0 commit comments

Comments
 (0)