Skip to content

Commit 3bba911

Browse files
committed
SOLR-11477: Disallow resolving of external entities in Lucene queryparser/xml/CoreParser and SolrCoreParser (defType=xmlparser or {!xmlparser ...}) by default.
(Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke)
1 parent 1979b7e commit 3bba911

File tree

7 files changed

+130
-12
lines changed

7 files changed

+130
-12
lines changed

lucene/CHANGES.txt

+8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
55

66
======================= Lucene 7.1.0 =======================
77

8+
Changes in Runtime Behavior
9+
10+
* Resolving of external entities in queryparser/xml/CoreParser is disallowed
11+
by default. See SOLR-11477 for details.
12+
813
New Features
914

1015
* LUCENE-7970: Add a shape to Geo3D that consists of multiple planes that
@@ -111,6 +116,9 @@ Bug Fixes
111116
* LUCENE-7957: ConjunctionScorer.getChildren was failing to return all
112117
child scorers (Adrien Grand, Mike McCandless)
113118

119+
* SOLR-11477: Disallow resolving of external entities in queryparser/xml/CoreParser
120+
by default. (Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke)
121+
114122
Build
115123

116124
* SOLR-11181: Switch order of maven artifact publishing procedure: deploy first

lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/CoreParser.java

+53-12
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,17 @@
2323
import org.apache.lucene.search.spans.SpanQuery;
2424
import org.w3c.dom.Document;
2525
import org.w3c.dom.Element;
26+
import org.xml.sax.EntityResolver;
27+
import org.xml.sax.ErrorHandler;
28+
import org.xml.sax.SAXException;
2629

30+
import javax.xml.XMLConstants;
2731
import javax.xml.parsers.DocumentBuilder;
2832
import javax.xml.parsers.DocumentBuilderFactory;
33+
import javax.xml.parsers.ParserConfigurationException;
2934

3035
import java.io.InputStream;
36+
import java.util.Locale;
3137

3238
/**
3339
* Assembles a QueryBuilder which uses only core Lucene Query objects
@@ -111,6 +117,10 @@ protected CoreParser(String defaultField, Analyzer analyzer, QueryParser parser)
111117
queryFactory.addBuilder("SpanNot", snot);
112118
}
113119

120+
/**
121+
* Parses the given stream as XML file and returns a {@link Query}.
122+
* By default this disallows external entities for security reasons.
123+
*/
114124
public Query parse(InputStream xmlStream) throws ParserException {
115125
return getQuery(parseXML(xmlStream).getDocumentElement());
116126
}
@@ -133,23 +143,47 @@ public void addSpanQueryBuilder(String nodeName, SpanQueryBuilder builder) {
133143
spanFactory.addBuilder(nodeName, builder);
134144
}
135145

136-
static Document parseXML(InputStream pXmlFile) throws ParserException {
137-
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
138-
DocumentBuilder db = null;
146+
/**
147+
* Returns a SAX {@link EntityResolver} to be used by {@link DocumentBuilder}.
148+
* By default this returns {@link #DISALLOW_EXTERNAL_ENTITY_RESOLVER}, which disallows the
149+
* expansion of external entities (for security reasons). To restore legacy behavior,
150+
* override this method to return {@code null}.
151+
*/
152+
protected EntityResolver getEntityResolver() {
153+
return DISALLOW_EXTERNAL_ENTITY_RESOLVER;
154+
}
155+
156+
/**
157+
* Subclass and override to return a SAX {@link ErrorHandler} to be used by {@link DocumentBuilder}.
158+
* By default this returns {@code null} so no error handler is used.
159+
* This method can be used to redirect XML parse errors/warnings to a custom logger.
160+
*/
161+
protected ErrorHandler getErrorHandler() {
162+
return null;
163+
}
164+
165+
private Document parseXML(InputStream pXmlFile) throws ParserException {
166+
final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
167+
dbf.setValidating(false);
139168
try {
140-
db = dbf.newDocumentBuilder();
141-
}
142-
catch (Exception se) {
143-
throw new ParserException("XML Parser configuration error", se);
169+
dbf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
170+
} catch (ParserConfigurationException e) {
171+
// ignore since all implementations are required to support the
172+
// {@link javax.xml.XMLConstants#FEATURE_SECURE_PROCESSING} feature
144173
}
145-
org.w3c.dom.Document doc = null;
174+
final DocumentBuilder db;
146175
try {
147-
doc = db.parse(pXmlFile);
176+
db = dbf.newDocumentBuilder();
177+
} catch (Exception se) {
178+
throw new ParserException("XML Parser configuration error.", se);
148179
}
149-
catch (Exception se) {
150-
throw new ParserException("Error parsing XML stream:" + se, se);
180+
try {
181+
db.setEntityResolver(getEntityResolver());
182+
db.setErrorHandler(getErrorHandler());
183+
return db.parse(pXmlFile);
184+
} catch (Exception se) {
185+
throw new ParserException("Error parsing XML stream: " + se, se);
151186
}
152-
return doc;
153187
}
154188

155189
public Query getQuery(Element e) throws ParserException {
@@ -160,4 +194,11 @@ public Query getQuery(Element e) throws ParserException {
160194
public SpanQuery getSpanQuery(Element e) throws ParserException {
161195
return spanFactory.getSpanQuery(e);
162196
}
197+
198+
public static final EntityResolver DISALLOW_EXTERNAL_ENTITY_RESOLVER = (String publicId, String systemId) -> {
199+
throw new SAXException(String.format(Locale.ENGLISH,
200+
"External Entity resolving unsupported: publicId=\"%s\" systemId=\"%s\"",
201+
publicId, systemId));
202+
};
203+
163204
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE TermQuery SYSTEM "foo://bar.xyz/mydtd">
3+
<!--
4+
Licensed to the Apache Software Foundation (ASF) under one or more
5+
contributor license agreements. See the NOTICE file distributed with
6+
this work for additional information regarding copyright ownership.
7+
The ASF licenses this file to You under the Apache License, Version 2.0
8+
(the "License"); you may not use this file except in compliance with
9+
the License. You may obtain a copy of the License at
10+
11+
http://www.apache.org/licenses/LICENSE-2.0
12+
13+
Unless required by applicable law or agreed to in writing, software
14+
distributed under the License is distributed on an "AS IS" BASIS,
15+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16+
See the License for the specific language governing permissions and
17+
limitations under the License.
18+
-->
19+
<TermQuery fieldName="contents">sumitomo</TermQuery>
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<!DOCTYPE TermQuery [
3+
<!ENTITY internalTerm "sumitomo">
4+
<!ENTITY externalTerm SYSTEM "foo://bar.xyz/external">
5+
<!ENTITY % myParameterEntity "foo://bar.xyz/param">
6+
]>
7+
<!--
8+
Licensed to the Apache Software Foundation (ASF) under one or more
9+
contributor license agreements. See the NOTICE file distributed with
10+
this work for additional information regarding copyright ownership.
11+
The ASF licenses this file to You under the Apache License, Version 2.0
12+
(the "License"); you may not use this file except in compliance with
13+
the License. You may obtain a copy of the License at
14+
15+
http://www.apache.org/licenses/LICENSE-2.0
16+
17+
Unless required by applicable law or agreed to in writing, software
18+
distributed under the License is distributed on an "AS IS" BASIS,
19+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20+
See the License for the specific language governing permissions and
21+
limitations under the License.
22+
-->
23+
<TermQuery fieldName="contents">&internalTerm;&externalTerm;</TermQuery>

lucene/queryparser/src/test/org/apache/lucene/queryparser/xml/TestCoreParser.java

+13
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import org.apache.lucene.search.spans.SpanQuery;
3131
import org.apache.lucene.util.LuceneTestCase;
3232
import org.junit.AfterClass;
33+
import org.xml.sax.SAXException;
3334

3435
import java.io.IOException;
3536
import java.io.InputStream;
@@ -67,6 +68,18 @@ public void testTermQueryXML() throws ParserException, IOException {
6768
dumpResults("TermQuery", q, 5);
6869
}
6970

71+
public void test_DOCTYPE_TermQueryXML() throws ParserException, IOException {
72+
SAXException saxe = LuceneTestCase.expectThrows(ParserException.class, SAXException.class,
73+
() -> parse("DOCTYPE_TermQuery.xml"));
74+
assertTrue(saxe.getMessage().startsWith("External Entity resolving unsupported:"));
75+
}
76+
77+
public void test_ENTITY_TermQueryXML() throws ParserException, IOException {
78+
SAXException saxe = LuceneTestCase.expectThrows(ParserException.class, SAXException.class,
79+
() -> parse("ENTITY_TermQuery.xml"));
80+
assertTrue(saxe.getMessage().startsWith("External Entity resolving unsupported:"));
81+
}
82+
7083
public void testTermQueryEmptyXML() throws ParserException, IOException {
7184
parseShouldFail("TermQueryEmpty.xml",
7285
"TermQuery has no text");

solr/CHANGES.txt

+6
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,9 @@ Upgrade Notes
6464
events caused by updates, commits, or optimize, write your own listener as native Java class
6565
as part of a Solr plugin.
6666

67+
* SOLR-11477: in the XML query parser (defType=xmlparser or {!xmlparser ... })
68+
the resolving of external entities is now disallowed by default.
69+
6770
New Features
6871
----------------------
6972
* SOLR-10339: New set-trigger and remove-trigger APIs for autoscaling. (shalin)
@@ -233,6 +236,9 @@ Bug Fixes
233236

234237
* SOLR-11449: MoveReplicaCmd mistakenly called registerCollectionStateWatcher on failure. (ab)
235238

239+
* SOLR-11477: Disallow resolving of external entities in the XML query parser (defType=xmlparser).
240+
(Michael Stepankin, Olga Barinova, Uwe Schindler, Christine Poerschke)
241+
236242
Optimizations
237243
----------------------
238244

solr/core/src/java/org/apache/solr/search/SolrCoreParser.java

+8
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,13 @@
2525
import org.apache.lucene.queryparser.xml.builders.SpanQueryBuilder;
2626
import org.apache.solr.common.SolrException;
2727
import org.apache.solr.common.util.NamedList;
28+
import org.apache.solr.common.util.XMLErrorLogger;
2829
import org.apache.solr.core.SolrResourceLoader;
2930
import org.apache.solr.request.SolrQueryRequest;
3031
import org.apache.solr.util.plugin.NamedListInitializedPlugin;
3132
import org.slf4j.Logger;
3233
import org.slf4j.LoggerFactory;
34+
import org.xml.sax.ErrorHandler;
3335

3436
/**
3537
* Assembles a QueryBuilder which uses Query objects from Solr's <code>search</code> module
@@ -38,6 +40,7 @@
3840
public class SolrCoreParser extends CoreParser implements NamedListInitializedPlugin {
3941

4042
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
43+
private static final XMLErrorLogger xmllog = new XMLErrorLogger(log);
4144

4245
protected final SolrQueryRequest req;
4346

@@ -97,4 +100,9 @@ public void init(NamedList initArgs) {
97100
}
98101
}
99102

103+
@Override
104+
protected ErrorHandler getErrorHandler() {
105+
return xmllog;
106+
}
107+
100108
}

0 commit comments

Comments
 (0)