File tree 4 files changed +60
-9
lines changed
4 files changed +60
-9
lines changed Original file line number Diff line number Diff line change @@ -182,6 +182,46 @@ def force_correct_attribute_escaping!(node)
182
182
end . force_encoding ( encoding )
183
183
end
184
184
end
185
+
186
+ def cdata_needs_escaping? ( node )
187
+ # Nokogiri's HTML4 parser on JRuby doesn't flag the child of a `style` or `script` tag as cdata, but it acts that way
188
+ node . cdata? || ( Nokogiri . jruby? && node . text? && ( node . parent . name == "style" || node . parent . name == "script" ) )
189
+ end
190
+
191
+ def cdata_escape ( node )
192
+ escaped_text = escape_tags ( node . text )
193
+ if Nokogiri . jruby?
194
+ node . document . create_text_node ( escaped_text )
195
+ else
196
+ node . document . create_cdata ( escaped_text )
197
+ end
198
+ end
199
+
200
+ TABLE_FOR_ESCAPE_HTML__ = {
201
+ '<' => '<' ,
202
+ '>' => '>' ,
203
+ '&' => '&' ,
204
+ }
205
+
206
+ def escape_tags ( string )
207
+ # modified version of CGI.escapeHTML from ruby 3.1
208
+ enc = string . encoding
209
+ unless enc . ascii_compatible?
210
+ if enc . dummy?
211
+ origenc = enc
212
+ enc = Encoding ::Converter . asciicompat_encoding ( enc )
213
+ string = enc ? string . encode ( enc ) : string . b
214
+ end
215
+ table = Hash [ TABLE_FOR_ESCAPE_HTML__ . map { |pair |pair . map { |s |s . encode ( enc ) } } ]
216
+ string = string . gsub ( /#{ "[<>&]" . encode ( enc ) } / , table )
217
+ string . encode! ( origenc ) if origenc
218
+ string
219
+ else
220
+ string = string . b
221
+ string . gsub! ( /[<>&]/ , TABLE_FOR_ESCAPE_HTML__ )
222
+ string . force_encoding ( enc )
223
+ end
224
+ end
185
225
end
186
226
end
187
227
end
Original file line number Diff line number Diff line change @@ -108,6 +108,10 @@ def html5lib_sanitize(node)
108
108
return Scrubber ::CONTINUE
109
109
end
110
110
when Nokogiri ::XML ::Node ::TEXT_NODE , Nokogiri ::XML ::Node ::CDATA_SECTION_NODE
111
+ if HTML5 ::Scrub . cdata_needs_escaping? ( node )
112
+ node . before ( HTML5 ::Scrub . cdata_escape ( node ) )
113
+ return Scrubber ::STOP
114
+ end
111
115
return Scrubber ::CONTINUE
112
116
end
113
117
Scrubber ::STOP
Original file line number Diff line number Diff line change @@ -100,13 +100,9 @@ def initialize
100
100
101
101
def scrub ( node )
102
102
return CONTINUE if html5lib_sanitize ( node ) == CONTINUE
103
- if node . children . length == 1 && node . children . first . cdata?
104
- sanitized_text = Loofah . fragment ( node . children . first . to_html ) . scrub! ( :strip ) . to_html
105
- node . before Nokogiri ::XML ::Text . new ( sanitized_text , node . document )
106
- else
107
- node . before node . children
108
- end
103
+ node . before ( node . children )
109
104
node . remove
105
+ return STOP
110
106
end
111
107
end
112
108
Original file line number Diff line number Diff line change @@ -100,17 +100,28 @@ def test_return_empty_string_when_nothing_left
100
100
end
101
101
102
102
def test_nested_script_cdata_tags_should_be_scrubbed
103
- html = "<script><script src=' malicious.js'> </script>"
103
+ html = "<script><script src=\" malicious.js\" >this & that </script>"
104
104
stripped = Loofah . fragment ( html ) . scrub! ( :strip )
105
+
105
106
assert_empty stripped . xpath ( "//script" )
106
- refute_match ( "< script", stripped . to_html )
107
+ assert_equal ( "< script src= \" malicious.js \" >this & that ", stripped . to_html )
107
108
end
108
109
109
110
def test_nested_script_cdata_tags_should_be_scrubbed_2
110
111
html = "<script><script>alert('a');</script></script>"
111
112
stripped = Loofah . fragment ( html ) . scrub! ( :strip )
113
+
112
114
assert_empty stripped . xpath ( "//script" )
113
- refute_match ( "<script" , stripped . to_html )
115
+ assert_equal ( "<script>alert('a');" , stripped . to_html )
116
+ end
117
+
118
+ def test_nested_script_cdata_tags_should_be_scrubbed_max_recursion
119
+ n = 40
120
+ html = "<div>" + ( "<script>" * n ) + "alert(1);" + ( "</script>" * n ) + "</div>"
121
+ expected = "<div>" + ( "<script>" * ( n -1 ) ) + "alert(1);</div>"
122
+ actual = Loofah . fragment ( html ) . scrub! ( :strip ) . to_html
123
+
124
+ assert_equal ( expected , actual )
114
125
end
115
126
116
127
def test_removal_of_all_tags
You can’t perform that action at this time.
0 commit comments