@@ -104,6 +104,12 @@ pub enum TokenKind {
104
104
/// for emoji identifier recovery, as those are not meant to be ever accepted.
105
105
InvalidPrefix ,
106
106
107
+ /// Guarded string literal prefix: `#"` or `##`.
108
+ ///
109
+ /// Used for reserving "guarded strings" (RFC 3598) in edition 2024.
110
+ /// Split into the component tokens on older editions.
111
+ GuardedStrPrefix ,
112
+
107
113
/// Examples: `12u8`, `1.0e-40`, `b"123"`. Note that `_` is an invalid
108
114
/// suffix, but may be present here on string and float literals. Users of
109
115
/// this type will need to check for and reject that case.
@@ -191,30 +197,41 @@ pub enum DocStyle {
191
197
/// `rustc_ast::ast::LitKind`).
192
198
#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
193
199
pub enum LiteralKind {
194
- /// " 12_u8", " 0o100", " 0b120i99", " 1f32" .
200
+ /// ` 12_u8`, ` 0o100`, ` 0b120i99`, ` 1f32` .
195
201
Int { base : Base , empty_int : bool } ,
196
- /// " 12.34f32", " 1e3" , but not " 1f32" .
202
+ /// ` 12.34f32`, ` 1e3` , but not ` 1f32` .
197
203
Float { base : Base , empty_exponent : bool } ,
198
- /// " 'a'", " '\\'", " '''", "';"
204
+ /// ` 'a'`, ` '\\'`, ` '''`, `';`
199
205
Char { terminated : bool } ,
200
- /// " b'a'", " b'\\'", " b'''", " b';"
206
+ /// ` b'a'`, ` b'\\'`, ` b'''`, ` b';`
201
207
Byte { terminated : bool } ,
202
- /// "" abc"", "" abc"
208
+ /// `" abc"`, `" abc`
203
209
Str { terminated : bool } ,
204
- /// " b"abc"", " b"abc"
210
+ /// ` b"abc"`, ` b"abc`
205
211
ByteStr { terminated : bool } ,
206
212
/// `c"abc"`, `c"abc`
207
213
CStr { terminated : bool } ,
208
- /// " r"abc"", " r#"abc"#", " r####"ab"###"c"####", " r#"a" . `None` indicates
214
+ /// ` r"abc"`, ` r#"abc"#`, ` r####"ab"###"c"####`, ` r#"a` . `None` indicates
209
215
/// an invalid literal.
210
216
RawStr { n_hashes : Option < u8 > } ,
211
- /// " br"abc"", " br#"abc"#", " br####"ab"###"c"####", " br#"a" . `None`
217
+ /// ` br"abc"`, ` br#"abc"#`, ` br####"ab"###"c"####`, ` br#"a` . `None`
212
218
/// indicates an invalid literal.
213
219
RawByteStr { n_hashes : Option < u8 > } ,
214
220
/// `cr"abc"`, "cr#"abc"#", `cr#"a`. `None` indicates an invalid literal.
215
221
RawCStr { n_hashes : Option < u8 > } ,
216
222
}
217
223
224
+ /// `#"abc"#`, `##"a"` (fewer closing), or even `#"a` (unterminated).
225
+ ///
226
+ /// Can capture fewer closing hashes than starting hashes,
227
+ /// for more efficient lexing and better backwards diagnostics.
228
+ #[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
229
+ pub struct GuardedStr {
230
+ pub n_hashes : u32 ,
231
+ pub terminated : bool ,
232
+ pub token_len : u32 ,
233
+ }
234
+
218
235
#[ derive( Clone , Copy , Debug , PartialEq , Eq , PartialOrd , Ord ) ]
219
236
pub enum RawStrError {
220
237
/// Non `#` characters exist between `r` and `"`, e.g. `r##~"abcde"##`
@@ -403,6 +420,12 @@ impl Cursor<'_> {
403
420
TokenKind :: Literal { kind : literal_kind, suffix_start }
404
421
}
405
422
423
+ // Guarded string literal prefix: `#"` or `##`
424
+ '#' if matches ! ( self . first( ) , '"' | '#' ) => {
425
+ self . bump ( ) ;
426
+ TokenKind :: GuardedStrPrefix
427
+ }
428
+
406
429
// One-symbol tokens.
407
430
';' => Semi ,
408
431
',' => Comma ,
@@ -780,6 +803,60 @@ impl Cursor<'_> {
780
803
false
781
804
}
782
805
806
+ /// Attempt to lex for a guarded string literal.
807
+ ///
808
+ /// Used by `rustc_parse::lexer` to lex for guarded strings
809
+ /// conditionally based on edition.
810
+ ///
811
+ /// Note: this will not reset the `Cursor` when a
812
+ /// guarded string is not found. It is the caller's
813
+ /// responsibility to do so.
814
+ pub fn guarded_double_quoted_string ( & mut self ) -> Option < GuardedStr > {
815
+ debug_assert ! ( self . prev( ) != '#' ) ;
816
+
817
+ let mut n_start_hashes: u32 = 0 ;
818
+ while self . first ( ) == '#' {
819
+ n_start_hashes += 1 ;
820
+ self . bump ( ) ;
821
+ }
822
+
823
+ if self . first ( ) != '"' {
824
+ return None ;
825
+ }
826
+ self . bump ( ) ;
827
+ debug_assert ! ( self . prev( ) == '"' ) ;
828
+
829
+ // Lex the string itself as a normal string literal
830
+ // so we can recover that for older editions later.
831
+ let terminated = self . double_quoted_string ( ) ;
832
+ if !terminated {
833
+ let token_len = self . pos_within_token ( ) ;
834
+ self . reset_pos_within_token ( ) ;
835
+
836
+ return Some ( GuardedStr { n_hashes : n_start_hashes, terminated : false , token_len } ) ;
837
+ }
838
+
839
+ // Consume closing '#' symbols.
840
+ // Note that this will not consume extra trailing `#` characters:
841
+ // `###"abcde"####` is lexed as a `GuardedStr { n_end_hashes: 3, .. }`
842
+ // followed by a `#` token.
843
+ let mut n_end_hashes = 0 ;
844
+ while self . first ( ) == '#' && n_end_hashes < n_start_hashes {
845
+ n_end_hashes += 1 ;
846
+ self . bump ( ) ;
847
+ }
848
+
849
+ // Reserved syntax, always an error, so it doesn't matter if
850
+ // `n_start_hashes != n_end_hashes`.
851
+
852
+ self . eat_literal_suffix ( ) ;
853
+
854
+ let token_len = self . pos_within_token ( ) ;
855
+ self . reset_pos_within_token ( ) ;
856
+
857
+ Some ( GuardedStr { n_hashes : n_start_hashes, terminated : true , token_len } )
858
+ }
859
+
783
860
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
784
861
fn raw_double_quoted_string ( & mut self , prefix_len : u32 ) -> Result < u8 , RawStrError > {
785
862
// Wrap the actual function to handle the error with too many hashes.
0 commit comments