Skip to content

Commit 75b0568

Browse files
committed
Auto merge of #41992 - ollie27:linkchecker_base, r=alexcrichton
linkchecker: Add support for <base> tag Add support for the HTML <base> tag as used by mdBook so The Unstable Book can be checked. Also cleanup a few things: * Stop checking the name attribute. It should never have been used and mdBook has since been fixed not to use it. * Make sure we only check html files. * Remove a few unnecessary allocations. Finally, dead links in The Unstable Book have been fixed.
2 parents ac254fb + d4f20eb commit 75b0568

File tree

11 files changed

+45
-67
lines changed

11 files changed

+45
-67
lines changed

src/doc/unstable-book/src/language-features/advanced-slice-patterns.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The tracking issue for this feature is: [#23121]
44

55
[#23121]: https://github.com/rust-lang/rust/issues/23121
66

7-
See also [`slice_patterns`](slice-patterns.html).
7+
See also [`slice_patterns`](language-features/slice-patterns.html).
88

99
------------------------
1010

src/doc/unstable-book/src/language-features/asm.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,4 +190,4 @@ constraints, etc.
190190
[llvm-docs]: http://llvm.org/docs/LangRef.html#inline-assembler-expressions
191191

192192
If you need more power and don't mind losing some of the niceties of
193-
`asm!`, check out [global_asm](global_asm.html).
193+
`asm!`, check out [global_asm](language-features/global_asm.html).

src/doc/unstable-book/src/language-features/box-patterns.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The tracking issue for this feature is: [#29641]
44

55
[#29641]: https://github.com/rust-lang/rust/issues/29641
66

7-
See also [`box_syntax`](box-syntax.html)
7+
See also [`box_syntax`](language-features/box-syntax.html)
88

99
------------------------
1010

src/doc/unstable-book/src/language-features/box-syntax.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The tracking issue for this feature is: [#27779]
44

55
[#27779]: https://github.com/rust-lang/rust/issues/27779
66

7-
See also [`box_patterns`](box-patterns.html)
7+
See also [`box_patterns`](language-features/box-patterns.html)
88

99
------------------------
1010

src/doc/unstable-book/src/language-features/global_asm.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,5 @@ usages and placed the larger, single usage in the crate root.
7474

7575
If you don't need quite as much power and flexibility as
7676
`global_asm!` provides, and you don't mind restricting your inline
77-
assembly to `fn` bodies only, you might try the [asm](asm.html)
78-
feature instead.
77+
assembly to `fn` bodies only, you might try the
78+
[asm](language-features/asm.html) feature instead.

src/doc/unstable-book/src/language-features/plugin-registrar.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,6 @@ This feature is part of "compiler plugins." It will often be used with the
88
[`plugin`] and `rustc_private` features as well. For more details, see
99
their docs.
1010

11-
[`plugin`]: plugin.html
11+
[`plugin`]: language-features/plugin.html
1212

1313
------------------------

src/doc/unstable-book/src/language-features/plugin.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ The tracking issue for this feature is: [#29597]
88
This feature is part of "compiler plugins." It will often be used with the
99
[`plugin_registrar`] and `rustc_private` features.
1010

11-
[`plugin_registrar`]: plugin-registrar.html
11+
[`plugin_registrar`]: language-features/plugin-registrar.html
1212

1313
------------------------
1414

src/doc/unstable-book/src/language-features/slice-patterns.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ The tracking issue for this feature is: [#23121]
44

55
[#23121]: https://github.com/rust-lang/rust/issues/23121
66

7-
See also [`advanced_slice_patterns`](advanced-slice-patterns.html).
7+
See also
8+
[`advanced_slice_patterns`](language-features/advanced-slice-patterns.html).
89

910
------------------------
1011

src/doc/unstable-book/src/library-features/alloc-jemalloc.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
44

55
[#33082]: https://github.com/rust-lang/rust/issues/33082
66

7-
See also [`alloc_system`](alloc-system.html).
7+
See also [`alloc_system`](library-features/alloc-system.html).
88

99
------------------------
1010

src/doc/unstable-book/src/library-features/alloc-system.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ The tracking issue for this feature is: [#33082]
44

55
[#33082]: https://github.com/rust-lang/rust/issues/33082
66

7-
See also [`alloc_jemalloc`](alloc-jemalloc.html).
7+
See also [`alloc_jemalloc`](library-features/alloc-jemalloc.html).
88

99
------------------------
1010

src/tools/linkchecker/main.rs

+33-56
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ macro_rules! t {
4141
}
4242

4343
fn main() {
44-
let docs = env::args().nth(1).unwrap();
44+
let docs = env::args_os().nth(1).unwrap();
4545
let docs = env::current_dir().unwrap().join(docs);
4646
let mut errors = false;
4747
walk(&mut HashMap::new(), &docs, &docs, &mut errors);
@@ -65,15 +65,14 @@ enum Redirect {
6565
struct FileEntry {
6666
source: String,
6767
ids: HashSet<String>,
68-
names: HashSet<String>,
6968
}
7069

7170
type Cache = HashMap<PathBuf, FileEntry>;
7271

7372
impl FileEntry {
7473
fn parse_ids(&mut self, file: &Path, contents: &str, errors: &mut bool) {
7574
if self.ids.is_empty() {
76-
with_attrs_in_source(contents, " id", |fragment, i| {
75+
with_attrs_in_source(contents, " id", |fragment, i, _| {
7776
let frag = fragment.trim_left_matches("#").to_owned();
7877
if !self.ids.insert(frag) {
7978
*errors = true;
@@ -82,15 +81,6 @@ impl FileEntry {
8281
});
8382
}
8483
}
85-
86-
fn parse_names(&mut self, contents: &str) {
87-
if self.names.is_empty() {
88-
with_attrs_in_source(contents, " name", |fragment, _| {
89-
let frag = fragment.trim_left_matches("#").to_owned();
90-
self.names.insert(frag);
91-
});
92-
}
93-
}
9484
}
9585

9686
fn walk(cache: &mut Cache, root: &Path, dir: &Path, errors: &mut bool) {
@@ -116,15 +106,8 @@ fn check(cache: &mut Cache,
116106
file: &Path,
117107
errors: &mut bool)
118108
-> Option<PathBuf> {
119-
// ignore js files as they are not prone to errors as the rest of the
120-
// documentation is and they otherwise bring up false positives.
121-
if file.extension().and_then(|s| s.to_str()) == Some("js") {
122-
return None;
123-
}
124-
125-
// ignore handlebars files as they use {{}} to build links, we only
126-
// want to test the generated files
127-
if file.extension().and_then(|s| s.to_str()) == Some("hbs") {
109+
// Ignore none HTML files.
110+
if file.extension().and_then(|s| s.to_str()) != Some("html") {
128111
return None;
129112
}
130113

@@ -147,13 +130,7 @@ fn check(cache: &mut Cache,
147130
return None;
148131
}
149132

150-
// mdbook uses the HTML <base> tag to handle links for subdirectories, which
151-
// linkchecker doesn't support
152-
if file.to_str().unwrap().contains("unstable-book") {
153-
return None;
154-
}
155-
156-
let res = load_file(cache, root, PathBuf::from(file), SkipRedirect);
133+
let res = load_file(cache, root, file, SkipRedirect);
157134
let (pretty_file, contents) = match res {
158135
Ok(res) => res,
159136
Err(_) => return None,
@@ -162,13 +139,10 @@ fn check(cache: &mut Cache,
162139
cache.get_mut(&pretty_file)
163140
.unwrap()
164141
.parse_ids(&pretty_file, &contents, errors);
165-
cache.get_mut(&pretty_file)
166-
.unwrap()
167-
.parse_names(&contents);
168142
}
169143

170144
// Search for anything that's the regex 'href[ ]*=[ ]*".*?"'
171-
with_attrs_in_source(&contents, " href", |url, i| {
145+
with_attrs_in_source(&contents, " href", |url, i, base| {
172146
// Ignore external URLs
173147
if url.starts_with("http:") || url.starts_with("https:") ||
174148
url.starts_with("javascript:") || url.starts_with("ftp:") ||
@@ -184,9 +158,9 @@ fn check(cache: &mut Cache,
184158
// Once we've plucked out the URL, parse it using our base url and
185159
// then try to extract a file path.
186160
let mut path = file.to_path_buf();
187-
if !url.is_empty() {
161+
if !base.is_empty() || !url.is_empty() {
188162
path.pop();
189-
for part in Path::new(url).components() {
163+
for part in Path::new(base).join(url).components() {
190164
match part {
191165
Component::Prefix(_) |
192166
Component::RootDir => panic!(),
@@ -197,13 +171,6 @@ fn check(cache: &mut Cache,
197171
}
198172
}
199173

200-
if let Some(extension) = path.extension() {
201-
// don't check these files
202-
if extension == "png" {
203-
return;
204-
}
205-
}
206-
207174
// Alright, if we've found a file name then this file had better
208175
// exist! If it doesn't then we register and print an error.
209176
if path.exists() {
@@ -218,11 +185,17 @@ fn check(cache: &mut Cache,
218185
pretty_path.display());
219186
return;
220187
}
221-
let res = load_file(cache, root, path.clone(), FromRedirect(false));
188+
if let Some(extension) = path.extension() {
189+
// Ignore none HTML files.
190+
if extension != "html" {
191+
return;
192+
}
193+
}
194+
let res = load_file(cache, root, &path, FromRedirect(false));
222195
let (pretty_path, contents) = match res {
223196
Ok(res) => res,
224197
Err(LoadError::IOError(err)) => {
225-
panic!(format!("error loading {}: {}", path.display(), err));
198+
panic!("error loading {}: {}", path.display(), err);
226199
}
227200
Err(LoadError::BrokenRedirect(target, _)) => {
228201
*errors = true;
@@ -245,11 +218,10 @@ fn check(cache: &mut Cache,
245218

246219
let entry = &mut cache.get_mut(&pretty_path).unwrap();
247220
entry.parse_ids(&pretty_path, &contents, errors);
248-
entry.parse_names(&contents);
249221

250-
if !(entry.ids.contains(*fragment) || entry.names.contains(*fragment)) {
222+
if !entry.ids.contains(*fragment) {
251223
*errors = true;
252-
print!("{}:{}: broken link fragment ",
224+
print!("{}:{}: broken link fragment ",
253225
pretty_file.display(),
254226
i + 1);
255227
println!("`#{}` pointing to `{}`", fragment, pretty_path.display());
@@ -267,7 +239,7 @@ fn check(cache: &mut Cache,
267239

268240
fn load_file(cache: &mut Cache,
269241
root: &Path,
270-
mut file: PathBuf,
242+
file: &Path,
271243
redirect: Redirect)
272244
-> Result<(PathBuf, String), LoadError> {
273245
let mut contents = String::new();
@@ -279,9 +251,9 @@ fn load_file(cache: &mut Cache,
279251
None
280252
}
281253
Entry::Vacant(entry) => {
282-
let mut fp = File::open(file.clone()).map_err(|err| {
254+
let mut fp = File::open(file).map_err(|err| {
283255
if let FromRedirect(true) = redirect {
284-
LoadError::BrokenRedirect(file.clone(), err)
256+
LoadError::BrokenRedirect(file.to_path_buf(), err)
285257
} else {
286258
LoadError::IOError(err)
287259
}
@@ -297,17 +269,14 @@ fn load_file(cache: &mut Cache,
297269
entry.insert(FileEntry {
298270
source: contents.clone(),
299271
ids: HashSet::new(),
300-
names: HashSet::new(),
301272
});
302273
}
303274
maybe
304275
}
305276
};
306-
file.pop();
307-
match maybe_redirect.map(|url| file.join(url)) {
277+
match maybe_redirect.map(|url| file.parent().unwrap().join(url)) {
308278
Some(redirect_file) => {
309-
let path = PathBuf::from(redirect_file);
310-
load_file(cache, root, path, FromRedirect(true))
279+
load_file(cache, root, &redirect_file, FromRedirect(true))
311280
}
312281
None => Ok((pretty_file, contents)),
313282
}
@@ -329,10 +298,14 @@ fn maybe_redirect(source: &str) -> Option<String> {
329298
})
330299
}
331300

332-
fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f: F) {
301+
fn with_attrs_in_source<F: FnMut(&str, usize, &str)>(contents: &str, attr: &str, mut f: F) {
302+
let mut base = "";
333303
for (i, mut line) in contents.lines().enumerate() {
334304
while let Some(j) = line.find(attr) {
335305
let rest = &line[j + attr.len()..];
306+
// The base tag should always be the first link in the document so
307+
// we can get away with using one pass.
308+
let is_base = line[..j].ends_with("<base");
336309
line = rest;
337310
let pos_equals = match rest.find("=") {
338311
Some(i) => i,
@@ -358,7 +331,11 @@ fn with_attrs_in_source<F: FnMut(&str, usize)>(contents: &str, attr: &str, mut f
358331
Some(i) => &rest[..i],
359332
None => continue,
360333
};
361-
f(url, i)
334+
if is_base {
335+
base = url;
336+
continue;
337+
}
338+
f(url, i, base)
362339
}
363340
}
364341
}

0 commit comments

Comments
 (0)