Skip to content

Commit e8cb1a4

Browse files
committed
Auto merge of #85359 - lrh2000:reserved-prefixes, r=nikomatsakis
Reserve prefixed identifiers and literals (RFC 3101) This PR denies any identifiers immediately followed by one of three tokens `"`, `'` or `#`, which is stricter than the requirements of RFC 3101 but may be necessary according to the discussion at [Zulip]. [Zulip]: https://rust-lang.zulipchat.com/#narrow/stream/268952-edition-2021/topic/reserved.20prefixes/near/238470099 The tracking issue #84599 says we'll add a feature gate named `reserved_prefixes`, but I don't think I can do this because it is impossible for the lexer to know whether a feature is enabled or not. I guess determining the behavior by the edition information should be enough. Fixes #84599
2 parents a4f832b + f6dd137 commit e8cb1a4

16 files changed

+518
-9
lines changed

compiler/rustc_lexer/src/lib.rs

+17-5
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,13 @@ pub enum TokenKind {
6666
Ident,
6767
/// "r#ident"
6868
RawIdent,
69+
/// An unknown prefix like `foo#`, `foo'`, `foo"`. Note that only the
70+
/// prefix (`foo`) is included in the token, not the separator (which is
71+
/// lexed as its own distinct token). In Rust 2021 and later, reserved
72+
/// prefixes are reported as errors; in earlier editions, they result in a
73+
/// (allowed by default) lint, and are treated as regular identifier
74+
/// tokens.
75+
UnknownPrefix,
6976
/// "12_u8", "1.0e-40", "b"123"". See `LiteralKind` for more details.
7077
Literal { kind: LiteralKind, suffix_start: usize },
7178
/// "'a"
@@ -323,7 +330,7 @@ impl Cursor<'_> {
323330
let kind = RawStr { n_hashes, err };
324331
Literal { kind, suffix_start }
325332
}
326-
_ => self.ident(),
333+
_ => self.ident_or_unknown_prefix(),
327334
},
328335

329336
// Byte literal, byte string literal, raw byte string literal or identifier.
@@ -358,12 +365,12 @@ impl Cursor<'_> {
358365
let kind = RawByteStr { n_hashes, err };
359366
Literal { kind, suffix_start }
360367
}
361-
_ => self.ident(),
368+
_ => self.ident_or_unknown_prefix(),
362369
},
363370

364371
// Identifier (this should be checked after other variant that can
365372
// start as identifier).
366-
c if is_id_start(c) => self.ident(),
373+
c if is_id_start(c) => self.ident_or_unknown_prefix(),
367374

368375
// Numeric literal.
369376
c @ '0'..='9' => {
@@ -487,11 +494,16 @@ impl Cursor<'_> {
487494
RawIdent
488495
}
489496

490-
fn ident(&mut self) -> TokenKind {
497+
fn ident_or_unknown_prefix(&mut self) -> TokenKind {
491498
debug_assert!(is_id_start(self.prev()));
492499
// Start is already eaten, eat the rest of identifier.
493500
self.eat_while(is_id_continue);
494-
Ident
501+
// Known prefixes must have been handled earlier. So if
502+
// we see a prefix here, it is definitely a unknown prefix.
503+
match self.first() {
504+
'#' | '"' | '\'' => UnknownPrefix,
505+
_ => Ident,
506+
}
495507
}
496508

497509
fn number(&mut self, first_digit: char) -> LiteralKind {

compiler/rustc_lint/src/context.rs

+9
Original file line numberDiff line numberDiff line change
@@ -723,6 +723,15 @@ pub trait LintContext: Sized {
723723
BuiltinLintDiagnostics::OrPatternsBackCompat(span,suggestion) => {
724724
db.span_suggestion(span, "use pat_param to preserve semantics", suggestion, Applicability::MachineApplicable);
725725
}
726+
BuiltinLintDiagnostics::ReservedPrefix(span) => {
727+
db.span_label(span, "unknown prefix");
728+
db.span_suggestion_verbose(
729+
span.shrink_to_hi(),
730+
"insert whitespace here to avoid this being parsed as a prefix in Rust 2021",
731+
" ".into(),
732+
Applicability::MachineApplicable,
733+
);
734+
}
726735
}
727736
// Rewrap `db`, and pass control to the user.
728737
decorate(LintDiagnosticBuilder::new(db));

compiler/rustc_lint_defs/src/builtin.rs

+37
Original file line numberDiff line numberDiff line change
@@ -2973,6 +2973,7 @@ declare_lint_pass! {
29732973
OR_PATTERNS_BACK_COMPAT,
29742974
LARGE_ASSIGNMENTS,
29752975
FUTURE_PRELUDE_COLLISION,
2976+
RESERVED_PREFIX,
29762977
]
29772978
}
29782979

@@ -3263,3 +3264,39 @@ declare_lint! {
32633264
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2021),
32643265
};
32653266
}
3267+
3268+
declare_lint! {
3269+
/// The `reserved_prefix` lint detects identifiers that will be parsed as a
3270+
/// prefix instead in Rust 2021.
3271+
///
3272+
/// ### Example
3273+
///
3274+
/// ```rust,compile_fail
3275+
/// #![deny(reserved_prefix)]
3276+
///
3277+
/// macro_rules! m {
3278+
/// (z $x:expr) => ();
3279+
/// }
3280+
///
3281+
/// m!(z"hey");
3282+
/// ```
3283+
///
3284+
/// {{produces}}
3285+
///
3286+
/// ### Explanation
3287+
///
3288+
/// In Rust 2015 and 2018, `z"hey"` is two tokens: the identifier `z`
3289+
/// followed by the string literal `"hey"`. In Rust 2021, the `z` is
3290+
/// considered a prefix for `"hey"`.
3291+
///
3292+
/// This lint suggests to add whitespace between the `z` and `"hey"` tokens
3293+
/// to keep them separated in Rust 2021.
3294+
pub RESERVED_PREFIX,
3295+
Allow,
3296+
"identifiers that will be parsed as a prefix in Rust 2021",
3297+
@future_incompatible = FutureIncompatibleInfo {
3298+
reference: "issue #84978 <https://github.com/rust-lang/rust/issues/84978>",
3299+
reason: FutureIncompatibilityReason::EditionError(Edition::Edition2021),
3300+
};
3301+
crate_level_only
3302+
}

compiler/rustc_lint_defs/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ pub enum BuiltinLintDiagnostics {
300300
ExternDepSpec(String, ExternDepSpec),
301301
ProcMacroBackCompat(String),
302302
OrPatternsBackCompat(Span, String),
303+
ReservedPrefix(Span),
303304
}
304305

305306
/// Lints that are buffered up early on in the `Session` before the

compiler/rustc_parse/src/lexer/mod.rs

+47-3
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
use rustc_ast::ast::AttrStyle;
1+
use rustc_ast::ast::{self, AttrStyle};
22
use rustc_ast::token::{self, CommentKind, Token, TokenKind};
33
use rustc_ast::tokenstream::{Spacing, TokenStream};
44
use rustc_errors::{error_code, Applicability, DiagnosticBuilder, FatalError, PResult};
55
use rustc_lexer::unescape::{self, Mode};
66
use rustc_lexer::{Base, DocStyle, RawStrError};
7+
use rustc_session::lint::builtin::RESERVED_PREFIX;
8+
use rustc_session::lint::BuiltinLintDiagnostics;
79
use rustc_session::parse::ParseSess;
810
use rustc_span::symbol::{sym, Symbol};
9-
use rustc_span::{BytePos, Pos, Span};
11+
use rustc_span::{edition::Edition, BytePos, Pos, Span};
1012

1113
use tracing::debug;
1214

@@ -166,12 +168,18 @@ impl<'a> StringReader<'a> {
166168
self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
167169
}
168170
rustc_lexer::TokenKind::Whitespace => return None,
169-
rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {
171+
rustc_lexer::TokenKind::Ident
172+
| rustc_lexer::TokenKind::RawIdent
173+
| rustc_lexer::TokenKind::UnknownPrefix => {
170174
let is_raw_ident = token == rustc_lexer::TokenKind::RawIdent;
175+
let is_unknown_prefix = token == rustc_lexer::TokenKind::UnknownPrefix;
171176
let mut ident_start = start;
172177
if is_raw_ident {
173178
ident_start = ident_start + BytePos(2);
174179
}
180+
if is_unknown_prefix {
181+
self.report_unknown_prefix(start);
182+
}
175183
let sym = nfc_normalize(self.str_from(ident_start));
176184
let span = self.mk_sp(start, self.pos);
177185
self.sess.symbol_gallery.insert(sym, span);
@@ -491,6 +499,42 @@ impl<'a> StringReader<'a> {
491499
FatalError.raise()
492500
}
493501

502+
// RFC 3101 introduced the idea of (reserved) prefixes. As of Rust 2021,
503+
// using a (unknown) prefix is an error. In earlier editions, however, they
504+
// only result in a (allowed by default) lint, and are treated as regular
505+
// identifier tokens.
506+
fn report_unknown_prefix(&self, start: BytePos) {
507+
let prefix_span = self.mk_sp(start, self.pos);
508+
let msg = format!("prefix `{}` is unknown", self.str_from_to(start, self.pos));
509+
510+
let expn_data = prefix_span.ctxt().outer_expn_data();
511+
512+
if expn_data.edition >= Edition::Edition2021 {
513+
// In Rust 2021, this is a hard error.
514+
let mut err = self.sess.span_diagnostic.struct_span_err(prefix_span, &msg);
515+
err.span_label(prefix_span, "unknown prefix");
516+
if expn_data.is_root() {
517+
err.span_suggestion_verbose(
518+
prefix_span.shrink_to_hi(),
519+
"consider inserting whitespace here",
520+
" ".into(),
521+
Applicability::MachineApplicable,
522+
);
523+
}
524+
err.note("prefixed identifiers and literals are reserved since Rust 2021");
525+
err.emit();
526+
} else {
527+
// Before Rust 2021, only emit a lint for migration.
528+
self.sess.buffer_lint_with_diagnostic(
529+
&RESERVED_PREFIX,
530+
prefix_span,
531+
ast::CRATE_NODE_ID,
532+
&msg,
533+
BuiltinLintDiagnostics::ReservedPrefix(prefix_span),
534+
);
535+
}
536+
}
537+
494538
/// Note: It was decided to not add a test case, because it would be too big.
495539
/// <https://github.com/rust-lang/rust/pull/50296#issuecomment-392135180>
496540
fn report_too_many_hashes(&self, start: BytePos, found: usize) -> ! {

src/librustdoc/html/highlight.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ impl<'a> Classifier<'a> {
413413
},
414414
c => c,
415415
},
416-
TokenKind::RawIdent => Class::Ident,
416+
TokenKind::RawIdent | TokenKind::UnknownPrefix => Class::Ident,
417417
TokenKind::Lifetime { .. } => Class::Lifetime,
418418
};
419419
// Anything that didn't return above is the simple case where we the
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// force-host
2+
// edition:2018
3+
// no-prefer-dynamic
4+
5+
#![crate_type = "proc-macro"]
6+
7+
extern crate proc_macro;
8+
9+
use proc_macro::TokenStream;
10+
use std::str::FromStr;
11+
12+
#[proc_macro]
13+
pub fn number_of_tokens_in_a_prefixed_integer_literal(_: TokenStream) -> TokenStream {
14+
TokenStream::from_str("hey#123").unwrap().into_iter().count().to_string().parse().unwrap()
15+
}
16+
17+
#[proc_macro]
18+
pub fn number_of_tokens_in_a_prefixed_char_literal(_: TokenStream) -> TokenStream {
19+
TokenStream::from_str("hey#'a'").unwrap().into_iter().count().to_string().parse().unwrap()
20+
}
21+
22+
#[proc_macro]
23+
pub fn number_of_tokens_in_a_prefixed_string_literal(_: TokenStream) -> TokenStream {
24+
TokenStream::from_str("hey#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// force-host
2+
// edition:2021
3+
// no-prefer-dynamic
4+
5+
#![crate_type = "proc-macro"]
6+
7+
extern crate proc_macro;
8+
9+
use proc_macro::TokenStream;
10+
use std::str::FromStr;
11+
12+
#[proc_macro]
13+
pub fn number_of_tokens_in_a_prefixed_integer_literal(_: TokenStream) -> TokenStream {
14+
TokenStream::from_str("hey#123").unwrap().into_iter().count().to_string().parse().unwrap()
15+
}
16+
17+
#[proc_macro]
18+
pub fn number_of_tokens_in_a_prefixed_char_literal(_: TokenStream) -> TokenStream {
19+
TokenStream::from_str("hey#'a'").unwrap().into_iter().count().to_string().parse().unwrap()
20+
}
21+
22+
#[proc_macro]
23+
pub fn number_of_tokens_in_a_prefixed_string_literal(_: TokenStream) -> TokenStream {
24+
TokenStream::from_str("hey#\"abc\"").unwrap().into_iter().count().to_string().parse().unwrap()
25+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// check-pass
2+
// run-rustfix
3+
// compile-flags: -Z unstable-options --edition 2018
4+
5+
#![warn(reserved_prefix)]
6+
7+
macro_rules! m2 {
8+
($a:tt $b:tt) => {};
9+
}
10+
11+
macro_rules! m3 {
12+
($a:tt $b:tt $c:tt) => {};
13+
}
14+
15+
fn main() {
16+
m2!(z "hey");
17+
//~^ WARNING prefix `z` is unknown [reserved_prefix]
18+
//~| WARNING hard error in Rust 2021
19+
m2!(prefix "hey");
20+
//~^ WARNING prefix `prefix` is unknown [reserved_prefix]
21+
//~| WARNING hard error in Rust 2021
22+
m3!(hey #123);
23+
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
24+
//~| WARNING hard error in Rust 2021
25+
m3!(hey #hey);
26+
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
27+
//~| WARNING hard error in Rust 2021
28+
}
29+
30+
macro_rules! quote {
31+
(# name = # kind # value) => {};
32+
}
33+
34+
quote! {
35+
#name = #kind #value
36+
//~^ WARNING prefix `kind` is unknown [reserved_prefix]
37+
//~| WARNING hard error in Rust 2021
38+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
// check-pass
2+
// run-rustfix
3+
// compile-flags: -Z unstable-options --edition 2018
4+
5+
#![warn(reserved_prefix)]
6+
7+
macro_rules! m2 {
8+
($a:tt $b:tt) => {};
9+
}
10+
11+
macro_rules! m3 {
12+
($a:tt $b:tt $c:tt) => {};
13+
}
14+
15+
fn main() {
16+
m2!(z"hey");
17+
//~^ WARNING prefix `z` is unknown [reserved_prefix]
18+
//~| WARNING hard error in Rust 2021
19+
m2!(prefix"hey");
20+
//~^ WARNING prefix `prefix` is unknown [reserved_prefix]
21+
//~| WARNING hard error in Rust 2021
22+
m3!(hey#123);
23+
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
24+
//~| WARNING hard error in Rust 2021
25+
m3!(hey#hey);
26+
//~^ WARNING prefix `hey` is unknown [reserved_prefix]
27+
//~| WARNING hard error in Rust 2021
28+
}
29+
30+
macro_rules! quote {
31+
(# name = # kind # value) => {};
32+
}
33+
34+
quote! {
35+
#name = #kind#value
36+
//~^ WARNING prefix `kind` is unknown [reserved_prefix]
37+
//~| WARNING hard error in Rust 2021
38+
}

0 commit comments

Comments
 (0)