Skip to content

Commit 92debb1

Browse files
committed
Properly handle emojis as literal prefix in macros
Do not accept the following ```rust macro_rules! lexes {($($_:tt)*) => {}} lexes!(🐛"foo"); ``` Before, invalid emoji identifiers were gated during parsing instead of lexing in all cases, but this didn't account for macro expansion of literal prefixes. Fix #123696.
1 parent e78913b commit 92debb1

File tree

4 files changed

+31
-2
lines changed

4 files changed

+31
-2
lines changed

compiler/rustc_lexer/src/lib.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ pub enum TokenKind {
8888
/// tokens.
8989
UnknownPrefix,
9090

91+
/// Similar to the above, but *always* an error on every edition. This is used
92+
/// for emoji identifier recovery, as those are not meant to be ever accepted.
93+
InvalidPrefix,
94+
9195
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
9296
/// suffix, but may be present here on string and float literals. Users of
9397
/// this type will need to check for and reject that case.
@@ -528,7 +532,7 @@ impl Cursor<'_> {
528532
// Known prefixes must have been handled earlier. So if
529533
// we see a prefix here, it is definitely an unknown prefix.
530534
match self.first() {
531-
'#' | '"' | '\'' => UnknownPrefix,
535+
'#' | '"' | '\'' => InvalidPrefix,
532536
_ => InvalidIdent,
533537
}
534538
}

compiler/rustc_parse/src/lexer/mod.rs

+4-1
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
205205
self.ident(start)
206206
}
207207
rustc_lexer::TokenKind::InvalidIdent
208+
| rustc_lexer::TokenKind::InvalidPrefix
208209
// Do not recover an identifier with emoji if the codepoint is a confusable
209210
// with a recoverable substitution token, like `➖`.
210211
if !UNICODE_ARRAY
@@ -302,7 +303,9 @@ impl<'psess, 'src> StringReader<'psess, 'src> {
302303
rustc_lexer::TokenKind::Caret => token::BinOp(token::Caret),
303304
rustc_lexer::TokenKind::Percent => token::BinOp(token::Percent),
304305

305-
rustc_lexer::TokenKind::Unknown | rustc_lexer::TokenKind::InvalidIdent => {
306+
rustc_lexer::TokenKind::Unknown
307+
| rustc_lexer::TokenKind::InvalidIdent
308+
| rustc_lexer::TokenKind::InvalidPrefix => {
306309
// Don't emit diagnostics for sequences of the same invalid token
307310
if swallow_next_invalid > 0 {
308311
swallow_next_invalid -= 1;
+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
macro_rules! lexes {($($_:tt)*) => {}}
2+
3+
lexes!(🐛#); //~ ERROR identifiers cannot contain emoji
4+
lexes!(🐛"foo");
5+
lexes!(🐛'q');
6+
lexes!(🐛'q);
7+
8+
fn main() {}
+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
error: identifiers cannot contain emoji: `🐛`
2+
--> $DIR/emoji-literal-prefix.rs:3:8
3+
|
4+
LL | lexes!(🐛#);
5+
| ^^
6+
LL | lexes!(🐛"foo");
7+
| ^^
8+
LL | lexes!(🐛'q');
9+
| ^^
10+
LL | lexes!(🐛'q);
11+
| ^^
12+
13+
error: aborting due to 1 previous error
14+

0 commit comments

Comments
 (0)