Skip to content

Commit 86f7f63

Browse files
committed
fix: replace recursive approach to cdata with escaping solution
1 parent 415677f commit 86f7f63

File tree

4 files changed

+60
-9
lines changed

4 files changed

+60
-9
lines changed

lib/loofah/html5/scrub.rb

+40
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,46 @@ def force_correct_attribute_escaping!(node)
182182
end.force_encoding(encoding)
183183
end
184184
end
185+
186+
def cdata_needs_escaping?(node)
187+
# Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
188+
node.cdata? || (Nokogiri.jruby? && node.text? && (node.parent.name == "style" || node.parent.name == "script"))
189+
end
190+
191+
def cdata_escape(node)
192+
escaped_text = escape_tags(node.text)
193+
if Nokogiri.jruby?
194+
node.document.create_text_node(escaped_text)
195+
else
196+
node.document.create_cdata(escaped_text)
197+
end
198+
end
199+
200+
TABLE_FOR_ESCAPE_HTML__ = {
201+
'<' => '&lt;',
202+
'>' => '&gt;',
203+
'&' => '&amp;',
204+
}
205+
206+
def escape_tags(string)
207+
# modified version of CGI.escapeHTML from ruby 3.1
208+
enc = string.encoding
209+
unless enc.ascii_compatible?
210+
if enc.dummy?
211+
origenc = enc
212+
enc = Encoding::Converter.asciicompat_encoding(enc)
213+
string = enc ? string.encode(enc) : string.b
214+
end
215+
table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
216+
string = string.gsub(/#{"[<>&]".encode(enc)}/, table)
217+
string.encode!(origenc) if origenc
218+
string
219+
else
220+
string = string.b
221+
string.gsub!(/[<>&]/, TABLE_FOR_ESCAPE_HTML__)
222+
string.force_encoding(enc)
223+
end
224+
end
185225
end
186226
end
187227
end

lib/loofah/scrubber.rb

+4
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,10 @@ def html5lib_sanitize(node)
108108
return Scrubber::CONTINUE
109109
end
110110
when Nokogiri::XML::Node::TEXT_NODE, Nokogiri::XML::Node::CDATA_SECTION_NODE
111+
if HTML5::Scrub.cdata_needs_escaping?(node)
112+
node.before(HTML5::Scrub.cdata_escape(node))
113+
return Scrubber::STOP
114+
end
111115
return Scrubber::CONTINUE
112116
end
113117
Scrubber::STOP

lib/loofah/scrubbers.rb

+2-6
Original file line numberDiff line numberDiff line change
@@ -100,13 +100,9 @@ def initialize
100100

101101
def scrub(node)
102102
return CONTINUE if html5lib_sanitize(node) == CONTINUE
103-
if node.children.length == 1 && node.children.first.cdata?
104-
sanitized_text = Loofah.fragment(node.children.first.to_html).scrub!(:strip).to_html
105-
node.before Nokogiri::XML::Text.new(sanitized_text, node.document)
106-
else
107-
node.before node.children
108-
end
103+
node.before(node.children)
109104
node.remove
105+
return STOP
110106
end
111107
end
112108

test/integration/test_ad_hoc.rb

+14-3
Original file line numberDiff line numberDiff line change
@@ -100,17 +100,28 @@ def test_return_empty_string_when_nothing_left
100100
end
101101

102102
def test_nested_script_cdata_tags_should_be_scrubbed
103-
html = "<script><script src='malicious.js'></script>"
103+
html = "<script><script src=\"malicious.js\">this & that</script>"
104104
stripped = Loofah.fragment(html).scrub!(:strip)
105+
105106
assert_empty stripped.xpath("//script")
106-
refute_match("<script", stripped.to_html)
107+
assert_equal("&lt;script src=\"malicious.js\"&gt;this &amp; that", stripped.to_html)
107108
end
108109

109110
def test_nested_script_cdata_tags_should_be_scrubbed_2
110111
html = "<script><script>alert('a');</script></script>"
111112
stripped = Loofah.fragment(html).scrub!(:strip)
113+
112114
assert_empty stripped.xpath("//script")
113-
refute_match("<script", stripped.to_html)
115+
assert_equal("&lt;script&gt;alert('a');", stripped.to_html)
116+
end
117+
118+
def test_nested_script_cdata_tags_should_be_scrubbed_max_recursion
119+
n = 40
120+
html = "<div>" + ("<script>" * n) + "alert(1);" + ("</script>" * n) + "</div>"
121+
expected = "<div>" + ("&lt;script&gt;" * (n-1)) + "alert(1);</div>"
122+
actual = Loofah.fragment(html).scrub!(:strip).to_html
123+
124+
assert_equal(expected, actual)
114125
end
115126

116127
def test_removal_of_all_tags

0 commit comments

Comments
 (0)