3
3
use crate :: ast:: { self , LitKind , MetaItemLit , StrStyle } ;
4
4
use crate :: token:: { self , Token } ;
5
5
use rustc_lexer:: unescape:: {
6
- byte_from_char, unescape_byte, unescape_c_string, unescape_char, unescape_literal, CStrUnit ,
7
- Mode ,
6
+ byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode, MixedUnit , Mode ,
8
7
} ;
9
8
use rustc_span:: symbol:: { kw, sym, Symbol } ;
10
9
use rustc_span:: Span ;
@@ -48,6 +47,9 @@ impl LitKind {
48
47
return Err ( LitError :: InvalidSuffix ) ;
49
48
}
50
49
50
+ // For byte/char/string literals, chars and escapes have already been
51
+ // checked in the lexer (in `cook_lexer_literal`). So we can assume all
52
+ // chars and escapes are valid here.
51
53
Ok ( match kind {
52
54
token:: Bool => {
53
55
assert ! ( symbol. is_bool_lit( ) ) ;
@@ -56,12 +58,12 @@ impl LitKind {
56
58
token:: Byte => {
57
59
return unescape_byte ( symbol. as_str ( ) )
58
60
. map ( LitKind :: Byte )
59
- . map_err ( |_| LitError :: LexerError ) ;
61
+ . map_err ( |_| panic ! ( "failed to unescape byte literal" ) ) ;
60
62
}
61
63
token:: Char => {
62
64
return unescape_char ( symbol. as_str ( ) )
63
65
. map ( LitKind :: Char )
64
- . map_err ( |_| LitError :: LexerError ) ;
66
+ . map_err ( |_| panic ! ( "failed to unescape char literal" ) ) ;
65
67
}
66
68
67
69
// There are some valid suffixes for integer and float literals,
@@ -77,113 +79,69 @@ impl LitKind {
77
79
let s = symbol. as_str ( ) ;
78
80
// Vanilla strings are so common we optimize for the common case where no chars
79
81
// requiring special behaviour are present.
80
- let symbol = if s. contains ( [ '\\' , '\r' ] ) {
82
+ let symbol = if s. contains ( '\\' ) {
81
83
let mut buf = String :: with_capacity ( s. len ( ) ) ;
82
- let mut error = Ok ( ( ) ) ;
83
84
// Force-inlining here is aggressive but the closure is
84
- // called on every char in the string, so it can be
85
- // hot in programs with many long strings.
86
- unescape_literal (
85
+ // called on every char in the string, so it can be hot in
86
+ // programs with many long strings containing escapes .
87
+ unescape_unicode (
87
88
s,
88
89
Mode :: Str ,
89
90
& mut #[ inline ( always) ]
90
- |_, unescaped_char | match unescaped_char {
91
+ |_, c | match c {
91
92
Ok ( c) => buf. push ( c) ,
92
93
Err ( err) => {
93
- if err. is_fatal ( ) {
94
- error = Err ( LitError :: LexerError ) ;
95
- }
94
+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
96
95
}
97
96
} ,
98
97
) ;
99
- error?;
100
98
Symbol :: intern ( & buf)
101
99
} else {
102
100
symbol
103
101
} ;
104
102
LitKind :: Str ( symbol, ast:: StrStyle :: Cooked )
105
103
}
106
104
token:: StrRaw ( n) => {
107
- // Raw strings have no escapes, so we only need to check for invalid chars, and we
108
- // can reuse the symbol on success.
109
- let mut error = Ok ( ( ) ) ;
110
- unescape_literal ( symbol. as_str ( ) , Mode :: RawStr , & mut |_, unescaped_char| {
111
- match unescaped_char {
112
- Ok ( _) => { }
113
- Err ( err) => {
114
- if err. is_fatal ( ) {
115
- error = Err ( LitError :: LexerError ) ;
116
- }
117
- }
118
- }
119
- } ) ;
120
- error?;
105
+ // Raw strings have no escapes so no work is needed here.
121
106
LitKind :: Str ( symbol, ast:: StrStyle :: Raw ( n) )
122
107
}
123
108
token:: ByteStr => {
124
109
let s = symbol. as_str ( ) ;
125
110
let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
126
- let mut error = Ok ( ( ) ) ;
127
- unescape_literal ( s, Mode :: ByteStr , & mut |_, c| match c {
111
+ unescape_unicode ( s, Mode :: ByteStr , & mut |_, c| match c {
128
112
Ok ( c) => buf. push ( byte_from_char ( c) ) ,
129
113
Err ( err) => {
130
- if err. is_fatal ( ) {
131
- error = Err ( LitError :: LexerError ) ;
132
- }
114
+ assert ! ( !err. is_fatal( ) , "failed to unescape string literal" )
133
115
}
134
116
} ) ;
135
- error?;
136
117
LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Cooked )
137
118
}
138
119
token:: ByteStrRaw ( n) => {
139
- // Raw strings have no escapes, so we only need to check for invalid chars, and we
140
- // can convert the symbol directly to a `Lrc<u8>` on success.
141
- let s = symbol. as_str ( ) ;
142
- let mut error = Ok ( ( ) ) ;
143
- unescape_literal ( s, Mode :: RawByteStr , & mut |_, c| match c {
144
- Ok ( _) => { }
145
- Err ( err) => {
146
- if err. is_fatal ( ) {
147
- error = Err ( LitError :: LexerError ) ;
148
- }
149
- }
150
- } ) ;
151
- LitKind :: ByteStr ( s. to_owned ( ) . into_bytes ( ) . into ( ) , StrStyle :: Raw ( n) )
120
+ // Raw strings have no escapes so we can convert the symbol
121
+ // directly to a `Lrc<u8>`.
122
+ let buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
123
+ LitKind :: ByteStr ( buf. into ( ) , StrStyle :: Raw ( n) )
152
124
}
153
125
token:: CStr => {
154
126
let s = symbol. as_str ( ) ;
155
127
let mut buf = Vec :: with_capacity ( s. len ( ) ) ;
156
- let mut error = Ok ( ( ) ) ;
157
- unescape_c_string ( s, Mode :: CStr , & mut |_span, c| match c {
158
- Ok ( CStrUnit :: Byte ( b) ) => buf. push ( b) ,
159
- Ok ( CStrUnit :: Char ( c) ) => {
128
+ unescape_mixed ( s, Mode :: CStr , & mut |_span, c| match c {
129
+ Ok ( MixedUnit :: Char ( c) ) => {
160
130
buf. extend_from_slice ( c. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( ) )
161
131
}
132
+ Ok ( MixedUnit :: HighByte ( b) ) => buf. push ( b) ,
162
133
Err ( err) => {
163
- if err. is_fatal ( ) {
164
- error = Err ( LitError :: LexerError ) ;
165
- }
134
+ assert ! ( !err. is_fatal( ) , "failed to unescape C string literal" )
166
135
}
167
136
} ) ;
168
- error?;
169
137
buf. push ( 0 ) ;
170
138
LitKind :: CStr ( buf. into ( ) , StrStyle :: Cooked )
171
139
}
172
140
token:: CStrRaw ( n) => {
173
- // Raw strings have no escapes, so we only need to check for invalid chars, and we
174
- // can convert the symbol directly to a `Lrc<u8>` on success.
175
- let s = symbol. as_str ( ) ;
176
- let mut error = Ok ( ( ) ) ;
177
- unescape_c_string ( s, Mode :: RawCStr , & mut |_, c| match c {
178
- Ok ( _) => { }
179
- Err ( err) => {
180
- if err. is_fatal ( ) {
181
- error = Err ( LitError :: LexerError ) ;
182
- }
183
- }
184
- } ) ;
185
- error?;
186
- let mut buf = s. to_owned ( ) . into_bytes ( ) ;
141
+ // Raw strings have no escapes so we can convert the symbol
142
+ // directly to a `Lrc<u8>` after appending the terminating NUL
143
+ // char.
144
+ let mut buf = symbol. as_str ( ) . to_owned ( ) . into_bytes ( ) ;
187
145
buf. push ( 0 ) ;
188
146
LitKind :: CStr ( buf. into ( ) , StrStyle :: Raw ( n) )
189
147
}
0 commit comments