Skip to content

Commit 52a0d13

Browse files
Rollup merge of rust-lang#115947 - GuillaumeGomez:custom_code_classes_in_docs-warning, r=notriddle
Custom code classes in docs warning Fixes rust-lang#115938. This PR does two things: 1. Unless the `custom_code_classes_in_docs` feature is enabled, it will use the old codeblock tag parser. 2. If there is a codeblock tag that starts with a `.`, it will emit a behaviour change warning. Hopefully this is the last missing part for this feature until stabilization. Follow-up of rust-lang#110800. r? `@notriddle`
2 parents 0060db7 + 295ec09 commit 52a0d13

6 files changed

+198
-143
lines changed

src/librustdoc/html/markdown.rs

+162-113
Original file line numberDiff line numberDiff line change
@@ -868,7 +868,7 @@ impl<'tcx> ExtraInfo<'tcx> {
868868

869869
#[derive(Eq, PartialEq, Clone, Debug)]
870870
pub(crate) struct LangString {
871-
original: String,
871+
pub(crate) original: String,
872872
pub(crate) should_panic: bool,
873873
pub(crate) no_run: bool,
874874
pub(crate) ignore: Ignore,
@@ -893,11 +893,13 @@ pub(crate) enum Ignore {
893893
/// ```eBNF
894894
/// lang-string = *(token-list / delimited-attribute-list / comment)
895895
///
896-
/// bareword = CHAR *(CHAR)
896+
/// bareword = LEADINGCHAR *(CHAR)
897+
/// bareword-without-leading-char = CHAR *(CHAR)
897898
/// quoted-string = QUOTE *(NONQUOTE) QUOTE
898899
/// token = bareword / quoted-string
900+
/// token-without-leading-char = bareword-without-leading-char / quoted-string
899901
/// sep = COMMA/WS *(COMMA/WS)
900-
/// attribute = (DOT token)/(token EQUAL token)
902+
/// attribute = (DOT token)/(token EQUAL token-without-leading-char)
901903
/// attribute-list = [sep] attribute *(sep attribute) [sep]
902904
/// delimited-attribute-list = OPEN-CURLY-BRACKET attribute-list CLOSE-CURLY-BRACKET
903905
/// token-list = [sep] token *(sep token) [sep]
@@ -907,8 +909,15 @@ pub(crate) enum Ignore {
907909
/// CLOSE_PARENT = ")"
908910
/// OPEN-CURLY-BRACKET = "{"
909911
/// CLOSE-CURLY-BRACKET = "}"
910-
/// CHAR = ALPHA / DIGIT / "_" / "-" / ":"
911-
/// QUOTE = %x22
912+
/// LEADINGCHAR = ALPHA | DIGIT | "_" | "-" | ":"
913+
/// ; All ASCII punctuation except comma, quote, equals, backslash, grave (backquote) and braces.
914+
/// ; Comma is used to separate language tokens, so it can't be used in one.
915+
/// ; Quote is used to allow otherwise-disallowed characters in language tokens.
916+
/// ; Equals is used to make key=value pairs in attribute blocks.
917+
/// ; Backslash and grave are special Markdown characters.
918+
/// ; Braces are used to start an attribute block.
919+
/// CHAR = ALPHA | DIGIT | "_" | "-" | ":" | "." | "!" | "#" | "$" | "%" | "&" | "*" | "+" | "/" |
920+
/// ";" | "<" | ">" | "?" | "@" | "^" | "|" | "~"
912921
/// NONQUOTE = %x09 / %x20 / %x21 / %x23-7E ; TAB / SPACE / all printable characters except `"`
913922
/// COMMA = ","
914923
/// DOT = "."
@@ -932,9 +941,12 @@ pub(crate) enum LangStringToken<'a> {
932941
KeyValueAttribute(&'a str, &'a str),
933942
}
934943

935-
fn is_bareword_char(c: char) -> bool {
944+
fn is_leading_char(c: char) -> bool {
936945
c == '_' || c == '-' || c == ':' || c.is_ascii_alphabetic() || c.is_ascii_digit()
937946
}
947+
fn is_bareword_char(c: char) -> bool {
948+
is_leading_char(c) || ".!#$%&*+/;<>?@^|~".contains(c)
949+
}
938950
fn is_separator(c: char) -> bool {
939951
c == ' ' || c == ',' || c == '\t'
940952
}
@@ -1077,7 +1089,7 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
10771089
return self.next();
10781090
} else if c == '.' {
10791091
return self.parse_class(pos);
1080-
} else if c == '"' || is_bareword_char(c) {
1092+
} else if c == '"' || is_leading_char(c) {
10811093
return self.parse_key_value(c, pos);
10821094
} else {
10831095
self.emit_error(format!("unexpected character `{c}`"));
@@ -1107,16 +1119,18 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11071119
return None;
11081120
}
11091121
let indices = self.parse_string(pos)?;
1110-
if let Some((_, c)) = self.inner.peek().copied() && c != '{' && !is_separator(c) && c != '(' {
1122+
if let Some((_, c)) = self.inner.peek().copied() &&
1123+
c != '{' &&
1124+
!is_separator(c) &&
1125+
c != '('
1126+
{
11111127
self.emit_error(format!("expected ` `, `{{` or `,` after `\"`, found `{c}`"));
11121128
return None;
11131129
}
11141130
return Some(LangStringToken::LangToken(&self.data[indices.start..indices.end]));
11151131
} else if c == '{' {
11161132
self.is_in_attribute_block = true;
11171133
return self.next();
1118-
} else if is_bareword_char(c) {
1119-
continue;
11201134
} else if is_separator(c) {
11211135
if pos != start {
11221136
return Some(LangStringToken::LangToken(&self.data[start..pos]));
@@ -1130,6 +1144,10 @@ impl<'a, 'tcx> TagIterator<'a, 'tcx> {
11301144
return Some(LangStringToken::LangToken(&self.data[start..pos]));
11311145
}
11321146
return self.next();
1147+
} else if pos == start && is_leading_char(c) {
1148+
continue;
1149+
} else if pos != start && is_bareword_char(c) {
1150+
continue;
11331151
} else {
11341152
self.emit_error(format!("unexpected character `{c}`"));
11351153
return None;
@@ -1158,6 +1176,29 @@ impl<'a, 'tcx> Iterator for TagIterator<'a, 'tcx> {
11581176
}
11591177
}
11601178

1179+
fn tokens(string: &str) -> impl Iterator<Item = LangStringToken<'_>> {
1180+
// Pandoc, which Rust once used for generating documentation,
1181+
// expects lang strings to be surrounded by `{}` and for each token
1182+
// to be proceeded by a `.`. Since some of these lang strings are still
1183+
// loose in the wild, we strip a pair of surrounding `{}` from the lang
1184+
// string and a leading `.` from each token.
1185+
1186+
let string = string.trim();
1187+
1188+
let first = string.chars().next();
1189+
let last = string.chars().last();
1190+
1191+
let string =
1192+
if first == Some('{') && last == Some('}') { &string[1..string.len() - 1] } else { string };
1193+
1194+
string
1195+
.split(|c| c == ',' || c == ' ' || c == '\t')
1196+
.map(str::trim)
1197+
.map(|token| token.strip_prefix('.').unwrap_or(token))
1198+
.filter(|token| !token.is_empty())
1199+
.map(|token| LangStringToken::LangToken(token))
1200+
}
1201+
11611202
impl Default for LangString {
11621203
fn default() -> Self {
11631204
Self {
@@ -1208,122 +1249,130 @@ impl LangString {
12081249

12091250
data.original = string.to_owned();
12101251

1211-
for token in TagIterator::new(string, extra) {
1212-
match token {
1213-
LangStringToken::LangToken("should_panic") => {
1214-
data.should_panic = true;
1215-
seen_rust_tags = !seen_other_tags;
1216-
}
1217-
LangStringToken::LangToken("no_run") => {
1218-
data.no_run = true;
1219-
seen_rust_tags = !seen_other_tags;
1220-
}
1221-
LangStringToken::LangToken("ignore") => {
1222-
data.ignore = Ignore::All;
1223-
seen_rust_tags = !seen_other_tags;
1224-
}
1225-
LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
1226-
if enable_per_target_ignores {
1227-
ignores.push(x.trim_start_matches("ignore-").to_owned());
1252+
let mut call = |tokens: &mut dyn Iterator<Item = LangStringToken<'_>>| {
1253+
for token in tokens {
1254+
match token {
1255+
LangStringToken::LangToken("should_panic") => {
1256+
data.should_panic = true;
12281257
seen_rust_tags = !seen_other_tags;
12291258
}
1230-
}
1231-
LangStringToken::LangToken("rust") => {
1232-
data.rust = true;
1233-
seen_rust_tags = true;
1234-
}
1235-
LangStringToken::LangToken("custom") => {
1236-
if custom_code_classes_in_docs {
1237-
seen_custom_tag = true;
1238-
} else {
1239-
seen_other_tags = true;
1259+
LangStringToken::LangToken("no_run") => {
1260+
data.no_run = true;
1261+
seen_rust_tags = !seen_other_tags;
12401262
}
1241-
}
1242-
LangStringToken::LangToken("test_harness") => {
1243-
data.test_harness = true;
1244-
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1245-
}
1246-
LangStringToken::LangToken("compile_fail") => {
1247-
data.compile_fail = true;
1248-
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1249-
data.no_run = true;
1250-
}
1251-
LangStringToken::LangToken(x) if x.starts_with("edition") => {
1252-
data.edition = x[7..].parse::<Edition>().ok();
1253-
}
1254-
LangStringToken::LangToken(x)
1255-
if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
1256-
{
1257-
if x[1..].parse::<u32>().is_ok() {
1258-
data.error_codes.push(x.to_owned());
1263+
LangStringToken::LangToken("ignore") => {
1264+
data.ignore = Ignore::All;
1265+
seen_rust_tags = !seen_other_tags;
1266+
}
1267+
LangStringToken::LangToken(x) if x.starts_with("ignore-") => {
1268+
if enable_per_target_ignores {
1269+
ignores.push(x.trim_start_matches("ignore-").to_owned());
1270+
seen_rust_tags = !seen_other_tags;
1271+
}
1272+
}
1273+
LangStringToken::LangToken("rust") => {
1274+
data.rust = true;
1275+
seen_rust_tags = true;
1276+
}
1277+
LangStringToken::LangToken("custom") => {
1278+
if custom_code_classes_in_docs {
1279+
seen_custom_tag = true;
1280+
} else {
1281+
seen_other_tags = true;
1282+
}
1283+
}
1284+
LangStringToken::LangToken("test_harness") => {
1285+
data.test_harness = true;
12591286
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1260-
} else {
1261-
seen_other_tags = true;
12621287
}
1263-
}
1264-
LangStringToken::LangToken(x) if extra.is_some() => {
1265-
let s = x.to_lowercase();
1266-
if let Some((flag, help)) = if s == "compile-fail"
1267-
|| s == "compile_fail"
1268-
|| s == "compilefail"
1288+
LangStringToken::LangToken("compile_fail") => {
1289+
data.compile_fail = true;
1290+
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1291+
data.no_run = true;
1292+
}
1293+
LangStringToken::LangToken(x) if x.starts_with("edition") => {
1294+
data.edition = x[7..].parse::<Edition>().ok();
1295+
}
1296+
LangStringToken::LangToken(x)
1297+
if allow_error_code_check && x.starts_with('E') && x.len() == 5 =>
12691298
{
1270-
Some((
1271-
"compile_fail",
1272-
"the code block will either not be tested if not marked as a rust one \
1273-
or won't fail if it compiles successfully",
1274-
))
1275-
} else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1276-
Some((
1277-
"should_panic",
1278-
"the code block will either not be tested if not marked as a rust one \
1279-
or won't fail if it doesn't panic when running",
1280-
))
1281-
} else if s == "no-run" || s == "no_run" || s == "norun" {
1282-
Some((
1283-
"no_run",
1284-
"the code block will either not be tested if not marked as a rust one \
1285-
or will be run (which you might not want)",
1286-
))
1287-
} else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1288-
Some((
1289-
"test_harness",
1290-
"the code block will either not be tested if not marked as a rust one \
1291-
or the code will be wrapped inside a main function",
1292-
))
1293-
} else {
1294-
None
1295-
} {
1296-
if let Some(extra) = extra {
1297-
extra.error_invalid_codeblock_attr_with_help(
1298-
format!("unknown attribute `{x}`. Did you mean `{flag}`?"),
1299-
help,
1300-
);
1299+
if x[1..].parse::<u32>().is_ok() {
1300+
data.error_codes.push(x.to_owned());
1301+
seen_rust_tags = !seen_other_tags || seen_rust_tags;
1302+
} else {
1303+
seen_other_tags = true;
13011304
}
13021305
}
1303-
seen_other_tags = true;
1304-
data.unknown.push(x.to_owned());
1305-
}
1306-
LangStringToken::LangToken(x) => {
1307-
seen_other_tags = true;
1308-
data.unknown.push(x.to_owned());
1309-
}
1310-
LangStringToken::KeyValueAttribute(key, value) => {
1311-
if custom_code_classes_in_docs {
1312-
if key == "class" {
1313-
data.added_classes.push(value.to_owned());
1314-
} else if let Some(extra) = extra {
1315-
extra.error_invalid_codeblock_attr(format!(
1316-
"unsupported attribute `{key}`"
1317-
));
1306+
LangStringToken::LangToken(x) if extra.is_some() => {
1307+
let s = x.to_lowercase();
1308+
if let Some((flag, help)) = if s == "compile-fail"
1309+
|| s == "compile_fail"
1310+
|| s == "compilefail"
1311+
{
1312+
Some((
1313+
"compile_fail",
1314+
"the code block will either not be tested if not marked as a rust one \
1315+
or won't fail if it compiles successfully",
1316+
))
1317+
} else if s == "should-panic" || s == "should_panic" || s == "shouldpanic" {
1318+
Some((
1319+
"should_panic",
1320+
"the code block will either not be tested if not marked as a rust one \
1321+
or won't fail if it doesn't panic when running",
1322+
))
1323+
} else if s == "no-run" || s == "no_run" || s == "norun" {
1324+
Some((
1325+
"no_run",
1326+
"the code block will either not be tested if not marked as a rust one \
1327+
or will be run (which you might not want)",
1328+
))
1329+
} else if s == "test-harness" || s == "test_harness" || s == "testharness" {
1330+
Some((
1331+
"test_harness",
1332+
"the code block will either not be tested if not marked as a rust one \
1333+
or the code will be wrapped inside a main function",
1334+
))
1335+
} else {
1336+
None
1337+
} {
1338+
if let Some(extra) = extra {
1339+
extra.error_invalid_codeblock_attr_with_help(
1340+
format!("unknown attribute `{x}`. Did you mean `{flag}`?"),
1341+
help,
1342+
);
1343+
}
13181344
}
1319-
} else {
13201345
seen_other_tags = true;
1346+
data.unknown.push(x.to_owned());
1347+
}
1348+
LangStringToken::LangToken(x) => {
1349+
seen_other_tags = true;
1350+
data.unknown.push(x.to_owned());
1351+
}
1352+
LangStringToken::KeyValueAttribute(key, value) => {
1353+
if custom_code_classes_in_docs {
1354+
if key == "class" {
1355+
data.added_classes.push(value.to_owned());
1356+
} else if let Some(extra) = extra {
1357+
extra.error_invalid_codeblock_attr(format!(
1358+
"unsupported attribute `{key}`"
1359+
));
1360+
}
1361+
} else {
1362+
seen_other_tags = true;
1363+
}
1364+
}
1365+
LangStringToken::ClassAttribute(class) => {
1366+
data.added_classes.push(class.to_owned());
13211367
}
1322-
}
1323-
LangStringToken::ClassAttribute(class) => {
1324-
data.added_classes.push(class.to_owned());
13251368
}
13261369
}
1370+
};
1371+
1372+
if custom_code_classes_in_docs {
1373+
call(&mut TagIterator::new(string, extra).into_iter())
1374+
} else {
1375+
call(&mut tokens(string))
13271376
}
13281377

13291378
// ignore-foo overrides ignore

src/librustdoc/html/markdown/tests.rs

+18-3
Original file line numberDiff line numberDiff line change
@@ -226,13 +226,28 @@ fn test_lang_string_parse() {
226226
..Default::default()
227227
});
228228
// error
229-
t(LangString { original: "{.first.second}".into(), rust: true, ..Default::default() });
229+
t(LangString {
230+
original: "{.first.second}".into(),
231+
rust: true,
232+
added_classes: vec!["first.second".into()],
233+
..Default::default()
234+
});
230235
// error
231236
t(LangString { original: "{class=first=second}".into(), rust: true, ..Default::default() });
232237
// error
233-
t(LangString { original: "{class=first.second}".into(), rust: true, ..Default::default() });
238+
t(LangString {
239+
original: "{class=first.second}".into(),
240+
rust: true,
241+
added_classes: vec!["first.second".into()],
242+
..Default::default()
243+
});
234244
// error
235-
t(LangString { original: "{class=.first}".into(), rust: true, ..Default::default() });
245+
t(LangString {
246+
original: "{class=.first}".into(),
247+
added_classes: vec![".first".into()],
248+
rust: true,
249+
..Default::default()
250+
});
236251
t(LangString {
237252
original: r#"{class="first"}"#.into(),
238253
added_classes: vec!["first".into()],

0 commit comments

Comments
 (0)