4
4
use std:: ops:: Range ;
5
5
use std:: str:: Chars ;
6
6
7
+ use Mode :: * ;
8
+
7
9
#[ cfg( test) ]
8
10
mod tests;
9
11
10
- /// Errors and warnings that can occur during string unescaping.
12
+ /// Errors and warnings that can occur during string unescaping. They mostly
13
+ /// relate to malformed escape sequences, but there are a few that are about
14
+ /// other problems.
11
15
#[ derive( Debug , PartialEq , Eq ) ]
12
16
pub enum EscapeError {
13
17
/// Expected 1 char, but 0 were found.
@@ -73,25 +77,24 @@ impl EscapeError {
73
77
}
74
78
}
75
79
76
- /// Takes a contents of a literal (without quotes) and produces a
77
- /// sequence of escaped characters or errors.
78
- /// Values are returned through invoking of the provided callback.
80
+ /// Takes a contents of a literal (without quotes) and produces a sequence of
81
+ /// escaped characters or errors.
82
+ ///
83
+ /// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
84
+ /// the callback will be called exactly once.
79
85
pub fn unescape_literal < F > ( src : & str , mode : Mode , callback : & mut F )
80
86
where
81
87
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
82
88
{
83
89
match mode {
84
- Mode :: Char | Mode :: Byte => {
90
+ Char | Byte => {
85
91
let mut chars = src. chars ( ) ;
86
- let res = unescape_char_or_byte ( & mut chars, mode == Mode :: Byte ) ;
92
+ let res = unescape_char_or_byte ( & mut chars, mode) ;
87
93
callback ( 0 ..( src. len ( ) - chars. as_str ( ) . len ( ) ) , res) ;
88
94
}
89
- Mode :: Str | Mode :: ByteStr => unescape_str_common ( src, mode, callback) ,
90
-
91
- Mode :: RawStr | Mode :: RawByteStr => {
92
- unescape_raw_str_or_raw_byte_str ( src, mode == Mode :: RawByteStr , callback)
93
- }
94
- Mode :: CStr | Mode :: RawCStr => unreachable ! ( ) ,
95
+ Str | ByteStr => unescape_str_common ( src, mode, callback) ,
96
+ RawStr | RawByteStr => unescape_raw_str_or_raw_byte_str ( src, mode, callback) ,
97
+ CStr | RawCStr => unreachable ! ( ) ,
95
98
}
96
99
}
97
100
@@ -117,84 +120,87 @@ pub fn unescape_c_string<F>(src: &str, mode: Mode, callback: &mut F)
117
120
where
118
121
F : FnMut ( Range < usize > , Result < CStrUnit , EscapeError > ) ,
119
122
{
120
- if mode == Mode :: RawCStr {
121
- unescape_raw_str_or_raw_byte_str (
122
- src,
123
- mode. characters_should_be_ascii ( ) ,
124
- & mut |r, result| callback ( r, result. map ( CStrUnit :: Char ) ) ,
125
- ) ;
126
- } else {
127
- unescape_str_common ( src, mode, callback) ;
123
+ match mode {
124
+ CStr => {
125
+ unescape_str_common ( src, mode, callback) ;
126
+ }
127
+ RawCStr => {
128
+ unescape_raw_str_or_raw_byte_str ( src, mode, & mut |r, result| {
129
+ callback ( r, result. map ( CStrUnit :: Char ) )
130
+ } ) ;
131
+ }
132
+ Char | Byte | Str | RawStr | ByteStr | RawByteStr => unreachable ! ( ) ,
128
133
}
129
134
}
130
135
131
136
/// Takes a contents of a char literal (without quotes), and returns an
132
137
/// unescaped char or an error.
133
138
pub fn unescape_char ( src : & str ) -> Result < char , EscapeError > {
134
- unescape_char_or_byte ( & mut src. chars ( ) , false )
139
+ unescape_char_or_byte ( & mut src. chars ( ) , Char )
135
140
}
136
141
137
142
/// Takes a contents of a byte literal (without quotes), and returns an
138
143
/// unescaped byte or an error.
139
144
pub fn unescape_byte ( src : & str ) -> Result < u8 , EscapeError > {
140
- unescape_char_or_byte ( & mut src. chars ( ) , true ) . map ( byte_from_char)
145
+ unescape_char_or_byte ( & mut src. chars ( ) , Byte ) . map ( byte_from_char)
141
146
}
142
147
143
148
/// What kind of literal do we parse.
144
149
#[ derive( Debug , Clone , Copy , PartialEq ) ]
145
150
pub enum Mode {
146
151
Char ,
147
- Str ,
152
+
148
153
Byte ,
149
- ByteStr ,
154
+
155
+ Str ,
150
156
RawStr ,
157
+
158
+ ByteStr ,
151
159
RawByteStr ,
160
+
152
161
CStr ,
153
162
RawCStr ,
154
163
}
155
164
156
165
impl Mode {
157
166
pub fn in_double_quotes ( self ) -> bool {
158
167
match self {
159
- Mode :: Str
160
- | Mode :: ByteStr
161
- | Mode :: RawStr
162
- | Mode :: RawByteStr
163
- | Mode :: CStr
164
- | Mode :: RawCStr => true ,
165
- Mode :: Char | Mode :: Byte => false ,
168
+ Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true ,
169
+ Char | Byte => false ,
166
170
}
167
171
}
168
172
169
173
/// Non-byte literals should have `\xXX` escapes that are within the ASCII range.
170
- pub fn ascii_escapes_should_be_ascii ( self ) -> bool {
174
+ fn ascii_escapes_should_be_ascii ( self ) -> bool {
171
175
match self {
172
- Mode :: Char | Mode :: Str | Mode :: RawStr => true ,
173
- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr | Mode :: CStr | Mode :: RawCStr => false ,
176
+ Char | Str => true ,
177
+ Byte | ByteStr | CStr => false ,
178
+ RawStr | RawByteStr | RawCStr => unreachable ! ( ) ,
174
179
}
175
180
}
176
181
177
- /// Whether characters within the literal must be within the ASCII range
178
- pub fn characters_should_be_ascii ( self ) -> bool {
182
+ /// Whether characters within the literal must be within the ASCII range.
183
+ #[ inline]
184
+ fn chars_should_be_ascii ( self ) -> bool {
179
185
match self {
180
- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
181
- Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
186
+ Byte | ByteStr | RawByteStr => true ,
187
+ Char | Str | RawStr | CStr | RawCStr => false ,
182
188
}
183
189
}
184
190
185
191
/// Byte literals do not allow unicode escape.
186
- pub fn is_unicode_escape_disallowed ( self ) -> bool {
192
+ fn is_unicode_escape_disallowed ( self ) -> bool {
187
193
match self {
188
- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => true ,
189
- Mode :: Char | Mode :: Str | Mode :: RawStr | Mode :: CStr | Mode :: RawCStr => false ,
194
+ Byte | ByteStr | RawByteStr => true ,
195
+ Char | Str | RawStr | CStr | RawCStr => false ,
190
196
}
191
197
}
192
198
193
199
pub fn prefix_noraw ( self ) -> & ' static str {
194
200
match self {
195
- Mode :: Byte | Mode :: ByteStr | Mode :: RawByteStr => "b " ,
196
- Mode :: CStr | Mode :: RawCStr => "c " ,
197
- Mode :: Char | Mode :: Str | Mode :: RawStr => "" ,
201
+ Char | Str | RawStr => "" ,
202
+ Byte | ByteStr | RawByteStr => "b " ,
203
+ CStr | RawCStr => "c " ,
198
204
}
199
205
}
200
206
}
@@ -294,22 +300,21 @@ fn scan_unicode(
294
300
}
295
301
296
302
#[ inline]
297
- fn ascii_check ( c : char , characters_should_be_ascii : bool ) -> Result < char , EscapeError > {
298
- if characters_should_be_ascii && !c. is_ascii ( ) {
299
- // Byte literal can't be a non-ascii character.
303
+ fn ascii_check ( c : char , chars_should_be_ascii : bool ) -> Result < char , EscapeError > {
304
+ if chars_should_be_ascii && !c. is_ascii ( ) {
300
305
Err ( EscapeError :: NonAsciiCharInByte )
301
306
} else {
302
307
Ok ( c)
303
308
}
304
309
}
305
310
306
- fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , is_byte : bool ) -> Result < char , EscapeError > {
311
+ fn unescape_char_or_byte ( chars : & mut Chars < ' _ > , mode : Mode ) -> Result < char , EscapeError > {
307
312
let c = chars. next ( ) . ok_or ( EscapeError :: ZeroChars ) ?;
308
313
let res = match c {
309
- '\\' => scan_escape ( chars, if is_byte { Mode :: Byte } else { Mode :: Char } ) ,
314
+ '\\' => scan_escape ( chars, mode ) ,
310
315
'\n' | '\t' | '\'' => Err ( EscapeError :: EscapeOnlyChar ) ,
311
316
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
312
- _ => ascii_check ( c, is_byte ) ,
317
+ _ => ascii_check ( c, mode . chars_should_be_ascii ( ) ) ,
313
318
} ?;
314
319
if chars. next ( ) . is_some ( ) {
315
320
return Err ( EscapeError :: MoreThanOneChar ) ;
@@ -324,6 +329,7 @@ where
324
329
F : FnMut ( Range < usize > , Result < T , EscapeError > ) ,
325
330
{
326
331
let mut chars = src. chars ( ) ;
332
+ let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
327
333
328
334
// The `start` and `end` computation here is complicated because
329
335
// `skip_ascii_whitespace` makes us to skip over chars without counting
@@ -346,14 +352,12 @@ where
346
352
_ => scan_escape :: < T > ( & mut chars, mode) ,
347
353
}
348
354
}
349
- '\n' => Ok ( b'\n' . into ( ) ) ,
350
- '\t' => Ok ( b'\t' . into ( ) ) ,
351
355
'"' => Err ( EscapeError :: EscapeOnlyChar ) ,
352
356
'\r' => Err ( EscapeError :: BareCarriageReturn ) ,
353
- _ => ascii_check ( c, mode . characters_should_be_ascii ( ) ) . map ( Into :: into) ,
357
+ _ => ascii_check ( c, chars_should_be_ascii ) . map ( Into :: into) ,
354
358
} ;
355
359
let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
356
- callback ( start..end, res. map ( Into :: into ) ) ;
360
+ callback ( start..end, res) ;
357
361
}
358
362
}
359
363
@@ -387,20 +391,21 @@ where
387
391
/// sequence of characters or errors.
388
392
/// NOTE: Raw strings do not perform any explicit character escaping, here we
389
393
/// only produce errors on bare CR.
390
- fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , is_byte : bool , callback : & mut F )
394
+ fn unescape_raw_str_or_raw_byte_str < F > ( src : & str , mode : Mode , callback : & mut F )
391
395
where
392
396
F : FnMut ( Range < usize > , Result < char , EscapeError > ) ,
393
397
{
394
398
let mut chars = src. chars ( ) ;
399
+ let chars_should_be_ascii = mode. chars_should_be_ascii ( ) ; // get this outside the loop
395
400
396
401
// The `start` and `end` computation here matches the one in
397
- // `unescape_str_or_byte_str ` for consistency, even though this function
402
+ // `unescape_str_common ` for consistency, even though this function
398
403
// doesn't have to worry about skipping any chars.
399
404
while let Some ( c) = chars. next ( ) {
400
405
let start = src. len ( ) - chars. as_str ( ) . len ( ) - c. len_utf8 ( ) ;
401
406
let res = match c {
402
407
'\r' => Err ( EscapeError :: BareCarriageReturnInRawString ) ,
403
- _ => ascii_check ( c, is_byte ) ,
408
+ _ => ascii_check ( c, chars_should_be_ascii ) ,
404
409
} ;
405
410
let end = src. len ( ) - chars. as_str ( ) . len ( ) ;
406
411
callback ( start..end, res) ;
@@ -410,7 +415,7 @@ where
410
415
#[ inline]
411
416
pub fn byte_from_char ( c : char ) -> u8 {
412
417
let res = c as u32 ;
413
- debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of Mode:: ByteStr" ) ;
418
+ debug_assert ! ( res <= u8 :: MAX as u32 , "guaranteed because of ByteStr" ) ;
414
419
res as u8
415
420
}
416
421
0 commit comments