Skip to content

Commit 6109e01

Browse files
authored
Fix a bug that Stream parser doesn't expand the user-defined entity references for "text" (#200)
## Why? Pull parser expands character references and predefined entity references, but doesn't expand user-defined entity references. ## Change - text_stream_unnormalize.rb ``` $LOAD_PATH.unshift(File.expand_path("lib")) require 'rexml/document' require 'rexml/parsers/sax2parser' require 'rexml/parsers/pullparser' require 'rexml/parsers/streamparser' require 'rexml/streamlistener' xml = <<EOS <!DOCTYPE foo [ <!ENTITY la "1234"> <!ENTITY lala "--&la;--"> <!ENTITY lalal "&la;&la;"> ]><root><la>&la;</la><lala>&lala;</lala><a>&lt;P&gt; &lt;I&gt; &lt;B&gt; Text &lt;/B&gt; &lt;/I&gt;</a><b>test&#8482;</b></root> EOS class StListener include REXML::StreamListener def text(text) puts text end end puts "REXML(DOM)" REXML::Document.new(xml).elements.each("/root/*") {|element| puts element.text} puts "" puts "REXML(Pull)" parser = REXML::Parsers::PullParser.new(xml) while parser.has_next? event = parser.pull case event.event_type when :text puts event[1] end end puts "" puts "REXML(Stream)" parser = REXML::Parsers::StreamParser.new(xml, StListener.new).parse puts "" puts "REXML(SAX)" sax = REXML::Parsers::SAX2Parser.new(xml) sax.listen(:characters) {|x| puts x } sax.parse ``` ## Before (master) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ REXML(Pull) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ REXML(Stream) &la; #<= This &lala; #<= This <P> <I> <B> Text </B> </I> test™ REXML(SAX) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ ``` ## After(This PR) ``` $ ruby text_stream_unnormalize.rb REXML(DOM) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ REXML(Pull) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ REXML(Stream) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ REXML(SAX) 1234 --1234-- <P> <I> <B> Text </B> </I> test™ ```
1 parent cb15858 commit 6109e01

File tree

2 files changed

+147
-2
lines changed

2 files changed

+147
-2
lines changed

lib/rexml/parsers/streamparser.rb

+7-1
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,17 @@ class StreamParser
77
def initialize source, listener
88
@listener = listener
99
@parser = BaseParser.new( source )
10+
@entities = {}
1011
end
1112

1213
def add_listener( listener )
1314
@parser.add_listener( listener )
1415
end
1516

17+
def entity_expansion_count
18+
@parser.entity_expansion_count
19+
end
20+
1621
def parse
1722
# entity string
1823
while true
@@ -28,7 +33,7 @@ def parse
2833
when :end_element
2934
@listener.tag_end( event[1] )
3035
when :text
31-
unnormalized = @parser.unnormalize( event[1] )
36+
unnormalized = @parser.unnormalize( event[1], @entities )
3237
@listener.text( unnormalized )
3338
when :processing_instruction
3439
@listener.instruction( *event[1,2] )
@@ -40,6 +45,7 @@ def parse
4045
when :comment, :attlistdecl, :cdata, :xmldecl, :elementdecl
4146
@listener.send( event[0].to_s, *event[1..-1] )
4247
when :entitydecl, :notationdecl
48+
@entities[ event[1] ] = event[2] if event.size == 3
4349
@listener.send( event[0].to_s, event[1..-1] )
4450
when :externalentity
4551
entity_reference = event[1]

test/test_stream.rb

+140-1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,42 @@ def entity(content)
8787

8888
assert_equal(["ISOLat2"], listener.entities)
8989
end
90+
91+
def test_entity_replacement
92+
source = <<-XML
93+
<!DOCTYPE foo [
94+
<!ENTITY la "1234">
95+
<!ENTITY lala "--&la;--">
96+
<!ENTITY lalal "&la;&la;">
97+
]><a><la>&la;</la><lala>&lala;</lala></a>
98+
XML
99+
100+
listener = MyListener.new
101+
class << listener
102+
attr_accessor :text_values
103+
def text(text)
104+
@text_values << text
105+
end
106+
end
107+
listener.text_values = []
108+
REXML::Document.parse_stream(source, listener)
109+
assert_equal(["1234", "--1234--"], listener.text_values)
110+
end
111+
112+
def test_characters_predefined_entities
113+
source = '<root><a>&lt;P&gt; &lt;I&gt; &lt;B&gt; Text &lt;/B&gt; &lt;/I&gt;</a></root>'
114+
115+
listener = MyListener.new
116+
class << listener
117+
attr_accessor :text_value
118+
def text(text)
119+
@text_value << text
120+
end
121+
end
122+
listener.text_value = ""
123+
REXML::Document.parse_stream(source, listener)
124+
assert_equal("<P> <I> <B> Text </B> </I>", listener.text_value)
125+
end
90126
end
91127

92128
class EntityExpansionLimitTest < Test::Unit::TestCase
@@ -100,6 +136,81 @@ def teardown
100136
REXML::Security.entity_expansion_text_limit = @default_entity_expansion_text_limit
101137
end
102138

139+
def test_have_value
140+
source = <<-XML
141+
<?xml version="1.0" encoding="UTF-8"?>
142+
<!DOCTYPE member [
143+
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
144+
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
145+
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
146+
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
147+
<!ENTITY e "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx">
148+
]>
149+
<member>
150+
&a;
151+
</member>
152+
XML
153+
154+
assert_raise(RuntimeError.new("entity expansion has grown too large")) do
155+
REXML::Document.parse_stream(source, MyListener.new)
156+
end
157+
end
158+
159+
def test_empty_value
160+
source = <<-XML
161+
<?xml version="1.0" encoding="UTF-8"?>
162+
<!DOCTYPE member [
163+
<!ENTITY a "&b;&b;&b;&b;&b;&b;&b;&b;&b;&b;">
164+
<!ENTITY b "&c;&c;&c;&c;&c;&c;&c;&c;&c;&c;">
165+
<!ENTITY c "&d;&d;&d;&d;&d;&d;&d;&d;&d;&d;">
166+
<!ENTITY d "&e;&e;&e;&e;&e;&e;&e;&e;&e;&e;">
167+
<!ENTITY e "">
168+
]>
169+
<member>
170+
&a;
171+
</member>
172+
XML
173+
174+
listener = MyListener.new
175+
REXML::Security.entity_expansion_limit = 100000
176+
parser = REXML::Parsers::StreamParser.new( source, listener )
177+
parser.parse
178+
assert_equal(11111, parser.entity_expansion_count)
179+
180+
REXML::Security.entity_expansion_limit = @default_entity_expansion_limit
181+
parser = REXML::Parsers::StreamParser.new( source, listener )
182+
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
183+
parser.parse
184+
end
185+
assert do
186+
parser.entity_expansion_count > @default_entity_expansion_limit
187+
end
188+
end
189+
190+
def test_with_default_entity
191+
source = <<-XML
192+
<?xml version="1.0" encoding="UTF-8"?>
193+
<!DOCTYPE member [
194+
<!ENTITY a "a">
195+
<!ENTITY a2 "&a; &a;">
196+
]>
197+
<member>
198+
&a;
199+
&a2;
200+
&lt;
201+
</member>
202+
XML
203+
204+
listener = MyListener.new
205+
REXML::Security.entity_expansion_limit = 4
206+
REXML::Document.parse_stream(source, listener)
207+
208+
REXML::Security.entity_expansion_limit = 3
209+
assert_raise(RuntimeError.new("number of entity expansions exceeded, processing aborted.")) do
210+
REXML::Document.parse_stream(source, listener)
211+
end
212+
end
213+
103214
def test_with_only_default_entities
104215
member_value = "&lt;p&gt;#{'A' * @default_entity_expansion_text_limit}&lt;/p&gt;"
105216
source = <<-XML
@@ -117,14 +228,42 @@ def text(text)
117228
end
118229
end
119230
listener.text_value = ""
120-
REXML::Document.parse_stream(source, listener)
231+
parser = REXML::Parsers::StreamParser.new( source, listener )
232+
parser.parse
121233

122234
expected_value = "<p>#{'A' * @default_entity_expansion_text_limit}</p>"
123235
assert_equal(expected_value, listener.text_value.strip)
236+
assert_equal(0, parser.entity_expansion_count)
124237
assert do
125238
listener.text_value.bytesize > @default_entity_expansion_text_limit
126239
end
127240
end
241+
242+
def test_entity_expansion_text_limit
243+
source = <<-XML
244+
<!DOCTYPE member [
245+
<!ENTITY a "&b;&b;&b;">
246+
<!ENTITY b "&c;&d;&e;">
247+
<!ENTITY c "xxxxxxxxxx">
248+
<!ENTITY d "yyyyyyyyyy">
249+
<!ENTITY e "zzzzzzzzzz">
250+
]>
251+
<member>&a;</member>
252+
XML
253+
254+
listener = MyListener.new
255+
class << listener
256+
attr_accessor :text_value
257+
def text(text)
258+
@text_value << text
259+
end
260+
end
261+
listener.text_value = ""
262+
REXML::Security.entity_expansion_text_limit = 90
263+
REXML::Document.parse_stream(source, listener)
264+
265+
assert_equal(90, listener.text_value.size)
266+
end
128267
end
129268

130269
# For test_listener

0 commit comments

Comments
 (0)