Skip to content

Commit a4575eb

Browse files
committed
Remove EscapeAll mode
EscapeAll tries to escape all non-ASCII characters. Unfortunately, HTML5 numeric entities can't represent most codepoints between U+0080 and U+009F. The only way to handle those is to use XML entity rules, but this is an HTML5 entity library. Also change ' to &rust-lang#39;. It turns out ' isn't part of HTML until HTML5, so using &rust-lang#39; is more compatible with pre-HTML5 parsers.
1 parent 63d3b2f commit a4575eb

File tree

2 files changed

+22
-63
lines changed

2 files changed

+22
-63
lines changed

src/libhtml/escape.rs

+19-60
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
//! This module contains `Writer`s for escaping/unescaping HTML.
1414
1515
use std::io::{Writer, IoResult};
16-
use std::{char, str};
16+
use std::char;
1717
use entity::ENTITIES;
1818

1919
/// A `Writer` adaptor that escapes any HTML characters written to it.
@@ -32,9 +32,7 @@ pub enum EscapeMode {
3232
/// Escapes characters for double-quoted attribute values. Escapes `&"`.
3333
EscapeAttr,
3434
/// Escapes characters for single-quoted attribute values. Escapes `&'`.
35-
EscapeSingleQuoteAttr,
36-
/// Escapes all non-printable or non-ASCII characters, with the exception of U+0000.
37-
EscapeAll
35+
EscapeSingleQuoteAttr
3836
}
3937

4038
impl<W: Writer> EscapeWriter<W> {
@@ -64,63 +62,24 @@ impl<W: Writer> EscapeWriter<W> {
6462

6563
impl<W: Writer> Writer for EscapeWriter<W> {
6664
fn write(&mut self, bytes: &[u8]) -> IoResult<()> {
67-
if self.mode == EscapeAll {
68-
// This mode needs to operate on chars. Everything else is handled below.
69-
let s = str::from_utf8_lossy(bytes);
70-
let s = s.as_slice();
71-
let mut last = 0u;
72-
for (i, c) in s.char_indices() {
73-
match c {
74-
'&'|'<'|'>'|'"'|'\'' => (),
75-
'\0' | '\x20'..'\x7E' => continue,
76-
_ => ()
77-
}
78-
if last < i {
79-
try!(self.inner.write_str(s.slice(last, i)));
80-
}
81-
match c {
82-
'&'|'<'|'>'|'"'|'\'' => {
83-
let ent = match c {
84-
'&' => "&amp;",
85-
'<' => "&lt;",
86-
'>' => "&gt;",
87-
'"' => "&quot;",
88-
'\'' => "&apos;",
89-
_ => unreachable!()
90-
};
91-
try!(self.inner.write_str(ent));
92-
}
93-
_ => {
94-
let c = c as u32;
95-
try!(write!(&mut self.inner as &mut ::std::io::Writer, r"&\#x{:x};", c));
96-
}
97-
}
98-
last = i + char::len_utf8_bytes(c);
99-
}
100-
if last < s.as_slice().len() {
101-
try!(self.inner.write_str(s.slice_from(last)));
102-
}
103-
} else {
104-
// We only want to escape ASCII values, so we can safely operate on bytes
105-
let mut last = 0;
106-
for (i, b) in bytes.iter().enumerate() {
107-
let ent = match (self.mode, *b as char) {
108-
(_,'&') => "&amp;",
109-
(EscapeDefault,'<') |(EscapeText,'<') => "&lt;",
110-
(EscapeDefault,'>') |(EscapeText,'>') => "&gt;",
111-
(EscapeDefault,'\'')|(EscapeSingleQuoteAttr,'\'') => "&apos;",
112-
(EscapeDefault,'"') |(EscapeAttr,'"') => "&quot;",
113-
_ => continue
114-
};
115-
if last < i {
116-
try!(self.inner.write(bytes.slice(last, i)));
117-
}
118-
try!(self.inner.write_str(ent));
119-
last = i + 1;
120-
}
121-
if last < bytes.len() {
122-
try!(self.inner.write(bytes.slice_from(last)));
65+
let mut last = 0;
66+
for (i, b) in bytes.iter().enumerate() {
67+
let ent = match (self.mode, *b as char) {
68+
(_,'&') => "&amp;",
69+
(EscapeDefault,'<') |(EscapeText,'<') => "&lt;",
70+
(EscapeDefault,'>') |(EscapeText,'>') => "&gt;",
71+
(EscapeDefault,'\'')|(EscapeSingleQuoteAttr,'\'') => "&#39;",
72+
(EscapeDefault,'"') |(EscapeAttr,'"') => "&quot;",
73+
_ => continue
74+
};
75+
if last < i {
76+
try!(self.inner.write(bytes.slice(last, i)));
12377
}
78+
try!(self.inner.write_str(ent));
79+
last = i + 1;
80+
}
81+
if last < bytes.len() {
82+
try!(self.inner.write(bytes.slice_from(last)));
12483
}
12584
Ok(())
12685
}

src/libhtml/tests.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ impl fmt::Show for UnTest {
3535
fn test_escape() {
3636
let s = r#"<script src="evil.domain?foo&" type='baz'>"#;
3737
assert_eq!(escape(s).as_slice(), "&lt;script src=&quot;evil.domain?foo&amp;&quot; \
38-
type=&apos;baz&apos;&gt;");
38+
type=&#39;baz&#39;&gt;");
3939

4040
let t = Test("foo".to_strbuf());
4141
assert_eq!(escape(t), "&lt;Test&gt;foo&lt;/Test&gt;".to_owned());
@@ -71,9 +71,9 @@ mod python {
7171

7272
#[test]
7373
fn test_escape() {
74-
// python converts ' to &#x27; but we go to &apos;
74+
// python converts ' to &#x27; but we go to &#39;
7575
assert_eq!(escape(r#"'<script>"&foo;"</script>'"#).as_slice(),
76-
"&apos;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&apos;");
76+
"&#39;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#39;");
7777
let mut w = EscapeWriter::new(MemWriter::new(), EscapeText);
7878
assert!(w.write_str(r#"'<script>"&foo;"</script>'"#).is_ok());
7979
assert_eq!(w.unwrap().unwrap().as_slice(),

0 commit comments

Comments
 (0)