23
23
import org .apache .lucene .search .spans .SpanQuery ;
24
24
import org .w3c .dom .Document ;
25
25
import org .w3c .dom .Element ;
26
+ import org .xml .sax .EntityResolver ;
27
+ import org .xml .sax .ErrorHandler ;
28
+ import org .xml .sax .SAXException ;
26
29
30
+ import javax .xml .XMLConstants ;
27
31
import javax .xml .parsers .DocumentBuilder ;
28
32
import javax .xml .parsers .DocumentBuilderFactory ;
33
+ import javax .xml .parsers .ParserConfigurationException ;
29
34
30
35
import java .io .InputStream ;
36
+ import java .util .Locale ;
31
37
32
38
/**
33
39
* Assembles a QueryBuilder which uses only core Lucene Query objects
@@ -111,6 +117,10 @@ protected CoreParser(String defaultField, Analyzer analyzer, QueryParser parser)
111
117
queryFactory .addBuilder ("SpanNot" , snot );
112
118
}
113
119
120
+ /**
121
+ * Parses the given stream as XML file and returns a {@link Query}.
122
+ * By default this disallows external entities for security reasons.
123
+ */
114
124
public Query parse (InputStream xmlStream ) throws ParserException {
115
125
return getQuery (parseXML (xmlStream ).getDocumentElement ());
116
126
}
@@ -133,23 +143,47 @@ public void addSpanQueryBuilder(String nodeName, SpanQueryBuilder builder) {
133
143
spanFactory .addBuilder (nodeName , builder );
134
144
}
135
145
136
- static Document parseXML (InputStream pXmlFile ) throws ParserException {
137
- DocumentBuilderFactory dbf = DocumentBuilderFactory .newInstance ();
138
- DocumentBuilder db = null ;
146
+ /**
147
+ * Returns a SAX {@link EntityResolver} to be used by {@link DocumentBuilder}.
148
+ * By default this returns {@link #DISALLOW_EXTERNAL_ENTITY_RESOLVER}, which disallows the
149
+ * expansion of external entities (for security reasons). To restore legacy behavior,
150
+ * override this method to return {@code null}.
151
+ */
152
+ protected EntityResolver getEntityResolver () {
153
+ return DISALLOW_EXTERNAL_ENTITY_RESOLVER ;
154
+ }
155
+
156
+ /**
157
+ * Subclass and override to return a SAX {@link ErrorHandler} to be used by {@link DocumentBuilder}.
158
+ * By default this returns {@code null} so no error handler is used.
159
+ * This method can be used to redirect XML parse errors/warnings to a custom logger.
160
+ */
161
+ protected ErrorHandler getErrorHandler () {
162
+ return null ;
163
+ }
164
+
165
+ private Document parseXML (InputStream pXmlFile ) throws ParserException {
166
+ final DocumentBuilderFactory dbf = DocumentBuilderFactory .newInstance ();
167
+ dbf .setValidating (false );
139
168
try {
140
- db = dbf .newDocumentBuilder ( );
141
- }
142
- catch ( Exception se ) {
143
- throw new ParserException ( "XML Parser configuration error" , se );
169
+ dbf .setFeature ( XMLConstants . FEATURE_SECURE_PROCESSING , true );
170
+ } catch ( ParserConfigurationException e ) {
171
+ // ignore since all implementations are required to support the
172
+ // {@link javax.xml.XMLConstants#FEATURE_SECURE_PROCESSING} feature
144
173
}
145
- org . w3c . dom . Document doc = null ;
174
+ final DocumentBuilder db ;
146
175
try {
147
- doc = db .parse (pXmlFile );
176
+ db = dbf .newDocumentBuilder ();
177
+ } catch (Exception se ) {
178
+ throw new ParserException ("XML Parser configuration error." , se );
148
179
}
149
- catch (Exception se ) {
150
- throw new ParserException ("Error parsing XML stream:" + se , se );
180
+ try {
181
+ db .setEntityResolver (getEntityResolver ());
182
+ db .setErrorHandler (getErrorHandler ());
183
+ return db .parse (pXmlFile );
184
+ } catch (Exception se ) {
185
+ throw new ParserException ("Error parsing XML stream: " + se , se );
151
186
}
152
- return doc ;
153
187
}
154
188
155
189
public Query getQuery (Element e ) throws ParserException {
@@ -160,4 +194,11 @@ public Query getQuery(Element e) throws ParserException {
160
194
public SpanQuery getSpanQuery (Element e ) throws ParserException {
161
195
return spanFactory .getSpanQuery (e );
162
196
}
197
+
198
+ public static final EntityResolver DISALLOW_EXTERNAL_ENTITY_RESOLVER = (String publicId , String systemId ) -> {
199
+ throw new SAXException (String .format (Locale .ENGLISH ,
200
+ "External Entity resolving unsupported: publicId=\" %s\" systemId=\" %s\" " ,
201
+ publicId , systemId ));
202
+ };
203
+
163
204
}
0 commit comments