@@ -868,7 +868,7 @@ impl<'tcx> ExtraInfo<'tcx> {
868
868
869
869
#[ derive( Eq , PartialEq , Clone , Debug ) ]
870
870
pub ( crate ) struct LangString {
871
- original : String ,
871
+ pub ( crate ) original : String ,
872
872
pub ( crate ) should_panic : bool ,
873
873
pub ( crate ) no_run : bool ,
874
874
pub ( crate ) ignore : Ignore ,
@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
893
893
/// ```eBNF
894
894
/// lang-string = *(token-list / delimited-attribute-list / comment)
895
895
///
896
- /// bareword = CHAR *(CHAR)
896
+ /// bareword = LEADINGCHAR *(CHAR)
897
+ /// bareword-without-leading-char = CHAR *(CHAR)
897
898
/// quoted-string = QUOTE *(NONQUOTE) QUOTE
898
899
/// token = bareword / quoted-string
900
+ /// token-without-leading-char = bareword-without-leading-char / quoted-string
899
901
/// sep = COMMA/WS *(COMMA/WS)
900
- /// attribute = (DOT token)/(token EQUAL token)
902
+ /// attribute = (DOT token)/(token EQUAL token-without-leading-char )
901
903
/// attribute-list = [sep] attribute *(sep attribute) [sep]
902
904
/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
903
905
/// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
907
909
/// CLOSE_PARENT = ")"
908
910
/// OPEN-CURLY-BRACKET = "{"
909
911
/// CLOSE-CURLY-BRACKET = "}"
910
- /// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
911
- /// QUOTE = %x22
912
+ /// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
913
+ /// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
914
+ /// ; Comma is used to separate language tokens, so it can't be used in one.
915
+ /// ; Quote is used to allow otherwise-disallowed characters in language tokens.
916
+ /// ; Equals is used to make key=value pairs in attribute blocks.
917
+ /// ; Backslash and grave are special Markdown characters.
918
+ /// ; Braces are used to start an attribute block.
919
+ /// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
920
+ /// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
912
921
/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
913
922
/// COMMA = ","
914
923
/// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
932
941
KeyValueAttribute ( & ' a str , & ' a str ) ,
933
942
}
934
943
935
- fn is_bareword_char ( c : char ) -> bool {
944
+ fn is_leading_char ( c : char ) -> bool {
936
945
c == '_' || c == '-' || c == ':' || c. is_ascii_alphabetic ( ) || c. is_ascii_digit ( )
937
946
}
947
+ fn is_bareword_char ( c : char ) -> bool {
948
+ is_leading_char ( c) || ".!#$%&*+/;<>?@^|~" . contains ( c)
949
+ }
938
950
fn is_separator ( c : char ) -> bool {
939
951
c == ' ' || c == ',' || c == '\t'
940
952
}
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1077
1089
return self . next ( ) ;
1078
1090
} else if c == '.' {
1079
1091
return self . parse_class ( pos) ;
1080
- } else if c == '"' || is_bareword_char ( c) {
1092
+ } else if c == '"' || is_leading_char ( c) {
1081
1093
return self . parse_key_value ( c, pos) ;
1082
1094
} else {
1083
1095
self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1107
1119
return None ;
1108
1120
}
1109
1121
let indices = self . parse_string ( pos) ?;
1110
- if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) && c != '{' && !is_separator ( c) && c != '(' {
1122
+ if let Some ( ( _, c) ) = self . inner . peek ( ) . copied ( ) &&
1123
+ c != '{' &&
1124
+ !is_separator ( c) &&
1125
+ c != '('
1126
+ {
1111
1127
self . emit_error ( format ! ( "expected ` `, `{{` or `,` after `\" `, found `{c}`" ) ) ;
1112
1128
return None ;
1113
1129
}
1114
1130
return Some ( LangStringToken :: LangToken ( & self . data [ indices. start ..indices. end ] ) ) ;
1115
1131
} else if c == '{' {
1116
1132
self . is_in_attribute_block = true ;
1117
1133
return self . next ( ) ;
1118
- } else if is_bareword_char ( c) {
1119
- continue ;
1120
1134
} else if is_separator ( c) {
1121
1135
if pos != start {
1122
1136
return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
1130
1144
return Some ( LangStringToken :: LangToken ( & self . data [ start..pos] ) ) ;
1131
1145
}
1132
1146
return self . next ( ) ;
1147
+ } else if pos == start && is_leading_char ( c) {
1148
+ continue ;
1149
+ } else if pos != start && is_bareword_char ( c) {
1150
+ continue ;
1133
1151
} else {
1134
1152
self . emit_error ( format ! ( "unexpected character `{c}`" ) ) ;
1135
1153
return None ;
@@ -1158,6 +1176,29 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
1158
1176
}
1159
1177
}
1160
1178
1179
+ fn tokens ( string : & str ) -> impl Iterator < Item = LangStringToken < ' _ > > {
1180
+ // Pandoc, which Rust once used for generating documentation,
1181
+ // expects lang strings to be surrounded by `{}` and for each token
1182
+ // to be proceeded by a `.`. Since some of these lang strings are still
1183
+ // loose in the wild, we strip a pair of surrounding `{}` from the lang
1184
+ // string and a leading `.` from each token.
1185
+
1186
+ let string = string. trim ( ) ;
1187
+
1188
+ let first = string. chars ( ) . next ( ) ;
1189
+ let last = string. chars ( ) . last ( ) ;
1190
+
1191
+ let string =
1192
+ if first == Some ( '{' ) && last == Some ( '}' ) { & string[ 1 ..string. len ( ) - 1 ] } else { string } ;
1193
+
1194
+ string
1195
+ . split ( |c| c == ',' || c == ' ' || c == '\t' )
1196
+ . map ( str:: trim)
1197
+ . map ( |token| token. strip_prefix ( '.' ) . unwrap_or ( token) )
1198
+ . filter ( |token| !token. is_empty ( ) )
1199
+ . map ( |token| LangStringToken :: LangToken ( token) )
1200
+ }
1201
+
1161
1202
impl Default for LangString {
1162
1203
fn default ( ) -> Self {
1163
1204
Self {
@@ -1208,122 +1249,130 @@ impl LangString {
1208
1249
1209
1250
data. original = string. to_owned ( ) ;
1210
1251
1211
- for token in TagIterator :: new ( string, extra) {
1212
- match token {
1213
- LangStringToken :: LangToken ( "should_panic" ) => {
1214
- data. should_panic = true ;
1215
- seen_rust_tags = !seen_other_tags;
1216
- }
1217
- LangStringToken :: LangToken ( "no_run" ) => {
1218
- data. no_run = true ;
1219
- seen_rust_tags = !seen_other_tags;
1220
- }
1221
- LangStringToken :: LangToken ( "ignore" ) => {
1222
- data. ignore = Ignore :: All ;
1223
- seen_rust_tags = !seen_other_tags;
1224
- }
1225
- LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
1226
- if enable_per_target_ignores {
1227
- ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
1252
+ let mut call = |tokens : & mut dyn Iterator < Item = LangStringToken < ' _ > > | {
1253
+ for token in tokens {
1254
+ match token {
1255
+ LangStringToken :: LangToken ( "should_panic" ) => {
1256
+ data. should_panic = true ;
1228
1257
seen_rust_tags = !seen_other_tags;
1229
1258
}
1230
- }
1231
- LangStringToken :: LangToken ( "rust" ) => {
1232
- data. rust = true ;
1233
- seen_rust_tags = true ;
1234
- }
1235
- LangStringToken :: LangToken ( "custom" ) => {
1236
- if custom_code_classes_in_docs {
1237
- seen_custom_tag = true ;
1238
- } else {
1239
- seen_other_tags = true ;
1259
+ LangStringToken :: LangToken ( "no_run" ) => {
1260
+ data. no_run = true ;
1261
+ seen_rust_tags = !seen_other_tags;
1240
1262
}
1241
- }
1242
- LangStringToken :: LangToken ( "test_harness" ) => {
1243
- data. test_harness = true ;
1244
- seen_rust_tags = !seen_other_tags || seen_rust_tags;
1245
- }
1246
- LangStringToken :: LangToken ( "compile_fail" ) => {
1247
- data. compile_fail = true ;
1248
- seen_rust_tags = !seen_other_tags || seen_rust_tags;
1249
- data. no_run = true ;
1250
- }
1251
- LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
1252
- data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
1253
- }
1254
- LangStringToken :: LangToken ( x)
1255
- if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
1256
- {
1257
- if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
1258
- data. error_codes . push ( x. to_owned ( ) ) ;
1263
+ LangStringToken :: LangToken ( "ignore" ) => {
1264
+ data. ignore = Ignore :: All ;
1265
+ seen_rust_tags = !seen_other_tags;
1266
+ }
1267
+ LangStringToken :: LangToken ( x) if x. starts_with ( "ignore-" ) => {
1268
+ if enable_per_target_ignores {
1269
+ ignores. push ( x. trim_start_matches ( "ignore-" ) . to_owned ( ) ) ;
1270
+ seen_rust_tags = !seen_other_tags;
1271
+ }
1272
+ }
1273
+ LangStringToken :: LangToken ( "rust" ) => {
1274
+ data. rust = true ;
1275
+ seen_rust_tags = true ;
1276
+ }
1277
+ LangStringToken :: LangToken ( "custom" ) => {
1278
+ if custom_code_classes_in_docs {
1279
+ seen_custom_tag = true ;
1280
+ } else {
1281
+ seen_other_tags = true ;
1282
+ }
1283
+ }
1284
+ LangStringToken :: LangToken ( "test_harness" ) => {
1285
+ data. test_harness = true ;
1259
1286
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1260
- } else {
1261
- seen_other_tags = true ;
1262
1287
}
1263
- }
1264
- LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
1265
- let s = x. to_lowercase ( ) ;
1266
- if let Some ( ( flag, help) ) = if s == "compile-fail"
1267
- || s == "compile_fail"
1268
- || s == "compilefail"
1288
+ LangStringToken :: LangToken ( "compile_fail" ) => {
1289
+ data. compile_fail = true ;
1290
+ seen_rust_tags = !seen_other_tags || seen_rust_tags;
1291
+ data. no_run = true ;
1292
+ }
1293
+ LangStringToken :: LangToken ( x) if x. starts_with ( "edition" ) => {
1294
+ data. edition = x[ 7 ..] . parse :: < Edition > ( ) . ok ( ) ;
1295
+ }
1296
+ LangStringToken :: LangToken ( x)
1297
+ if allow_error_code_check && x. starts_with ( 'E' ) && x. len ( ) == 5 =>
1269
1298
{
1270
- Some ( (
1271
- "compile_fail" ,
1272
- "the code block will either not be tested if not marked as a rust one \
1273
- or won't fail if it compiles successfully",
1274
- ) )
1275
- } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1276
- Some ( (
1277
- "should_panic" ,
1278
- "the code block will either not be tested if not marked as a rust one \
1279
- or won't fail if it doesn't panic when running",
1280
- ) )
1281
- } else if s == "no-run" || s == "no_run" || s == "norun" {
1282
- Some ( (
1283
- "no_run" ,
1284
- "the code block will either not be tested if not marked as a rust one \
1285
- or will be run (which you might not want)",
1286
- ) )
1287
- } else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1288
- Some ( (
1289
- "test_harness" ,
1290
- "the code block will either not be tested if not marked as a rust one \
1291
- or the code will be wrapped inside a main function",
1292
- ) )
1293
- } else {
1294
- None
1295
- } {
1296
- if let Some ( extra) = extra {
1297
- extra. error_invalid_codeblock_attr_with_help (
1298
- format ! ( "unknown attribute `{x}`. Did you mean `{flag}`?" ) ,
1299
- help,
1300
- ) ;
1299
+ if x[ 1 ..] . parse :: < u32 > ( ) . is_ok ( ) {
1300
+ data. error_codes . push ( x. to_owned ( ) ) ;
1301
+ seen_rust_tags = !seen_other_tags || seen_rust_tags;
1302
+ } else {
1303
+ seen_other_tags = true ;
1301
1304
}
1302
1305
}
1303
- seen_other_tags = true ;
1304
- data. unknown . push ( x. to_owned ( ) ) ;
1305
- }
1306
- LangStringToken :: LangToken ( x) => {
1307
- seen_other_tags = true ;
1308
- data. unknown . push ( x. to_owned ( ) ) ;
1309
- }
1310
- LangStringToken :: KeyValueAttribute ( key, value) => {
1311
- if custom_code_classes_in_docs {
1312
- if key == "class" {
1313
- data. added_classes . push ( value. to_owned ( ) ) ;
1314
- } else if let Some ( extra) = extra {
1315
- extra. error_invalid_codeblock_attr ( format ! (
1316
- "unsupported attribute `{key}`"
1317
- ) ) ;
1306
+ LangStringToken :: LangToken ( x) if extra. is_some ( ) => {
1307
+ let s = x. to_lowercase ( ) ;
1308
+ if let Some ( ( flag, help) ) = if s == "compile-fail"
1309
+ || s == "compile_fail"
1310
+ || s == "compilefail"
1311
+ {
1312
+ Some ( (
1313
+ "compile_fail" ,
1314
+ "the code block will either not be tested if not marked as a rust one \
1315
+ or won't fail if it compiles successfully",
1316
+ ) )
1317
+ } else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1318
+ Some ( (
1319
+ "should_panic" ,
1320
+ "the code block will either not be tested if not marked as a rust one \
1321
+ or won't fail if it doesn't panic when running",
1322
+ ) )
1323
+ } else if s == "no-run" || s == "no_run" || s == "norun" {
1324
+ Some ( (
1325
+ "no_run" ,
1326
+ "the code block will either not be tested if not marked as a rust one \
1327
+ or will be run (which you might not want)",
1328
+ ) )
1329
+ } else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1330
+ Some ( (
1331
+ "test_harness" ,
1332
+ "the code block will either not be tested if not marked as a rust one \
1333
+ or the code will be wrapped inside a main function",
1334
+ ) )
1335
+ } else {
1336
+ None
1337
+ } {
1338
+ if let Some ( extra) = extra {
1339
+ extra. error_invalid_codeblock_attr_with_help (
1340
+ format ! ( "unknown attribute `{x}`. Did you mean `{flag}`?" ) ,
1341
+ help,
1342
+ ) ;
1343
+ }
1318
1344
}
1319
- } else {
1320
1345
seen_other_tags = true ;
1346
+ data. unknown . push ( x. to_owned ( ) ) ;
1347
+ }
1348
+ LangStringToken :: LangToken ( x) => {
1349
+ seen_other_tags = true ;
1350
+ data. unknown . push ( x. to_owned ( ) ) ;
1351
+ }
1352
+ LangStringToken :: KeyValueAttribute ( key, value) => {
1353
+ if custom_code_classes_in_docs {
1354
+ if key == "class" {
1355
+ data. added_classes . push ( value. to_owned ( ) ) ;
1356
+ } else if let Some ( extra) = extra {
1357
+ extra. error_invalid_codeblock_attr ( format ! (
1358
+ "unsupported attribute `{key}`"
1359
+ ) ) ;
1360
+ }
1361
+ } else {
1362
+ seen_other_tags = true ;
1363
+ }
1364
+ }
1365
+ LangStringToken :: ClassAttribute ( class) => {
1366
+ data. added_classes . push ( class. to_owned ( ) ) ;
1321
1367
}
1322
- }
1323
- LangStringToken :: ClassAttribute ( class) => {
1324
- data. added_classes . push ( class. to_owned ( ) ) ;
1325
1368
}
1326
1369
}
1370
+ } ;
1371
+
1372
+ if custom_code_classes_in_docs {
1373
+ call ( & mut TagIterator :: new ( string, extra) . into_iter ( ) )
1374
+ } else {
1375
+ call ( & mut tokens ( string) )
1327
1376
}
1328
1377
1329
1378
// ignore-foo overrides ignore
0 commit comments