2424import org .w3c .dom .Node ;
2525import org .w3c .dom .NodeList ;
2626import org .xml .sax .InputSource ;
27+ import org .xml .sax .SAXException ;
2728
29+ import java .io .IOException ;
2830import java .io .StringReader ;
2931import java .io .StringWriter ;
3032import java .util .ArrayList ;
3335import java .util .Map ;
3436import java .util .regex .Matcher ;
3537import java .util .regex .Pattern ;
38+ import javax .xml .XMLConstants ;
39+ import javax .xml .parsers .DocumentBuilder ;
40+ import javax .xml .parsers .DocumentBuilderFactory ;
41+ import javax .xml .parsers .ParserConfigurationException ;
3642import javax .xml .transform .ErrorListener ;
3743import javax .xml .transform .OutputKeys ;
3844import javax .xml .transform .Source ;
4854import javax .xml .xpath .XPathExpression ;
4955import javax .xml .xpath .XPathExpressionException ;
5056import javax .xml .xpath .XPathFactory ;
57+ import javax .xml .xpath .XPathFactoryConfigurationException ;
5158
5259import static org .apache .calcite .linq4j .Nullness .castNonNull ;
5360import static org .apache .calcite .util .Static .RESOURCE ;
6067public class XmlFunctions {
6168
6269 private static final ThreadLocal <@ Nullable XPathFactory > XPATH_FACTORY =
63- ThreadLocal .withInitial (XPathFactory ::newInstance );
70+ ThreadLocal .withInitial (() -> {
71+ final XPathFactory xPathFactory = XPathFactory .newInstance ();
72+ try {
73+ xPathFactory .setFeature (XMLConstants .FEATURE_SECURE_PROCESSING , true );
74+ } catch (XPathFactoryConfigurationException e ) {
75+ throw new IllegalStateException ("XPath Factory configuration failed" , e );
76+ }
77+ return xPathFactory ;
78+ });
6479 private static final ThreadLocal <@ Nullable TransformerFactory > TRANSFORMER_FACTORY =
6580 ThreadLocal .withInitial (() -> {
66- TransformerFactory transformerFactory = TransformerFactory .newInstance ();
81+ final TransformerFactory transformerFactory = TransformerFactory .newInstance ();
6782 transformerFactory .setErrorListener (new InternalErrorListener ());
83+ try {
84+ transformerFactory .setFeature (XMLConstants .FEATURE_SECURE_PROCESSING , true );
85+ } catch (TransformerConfigurationException e ) {
86+ throw new IllegalStateException ("Transformer Factory configuration failed" , e );
87+ }
6888 return transformerFactory ;
6989 });
90+ private static final ThreadLocal <@ Nullable DocumentBuilderFactory > DOCUMENT_BUILDER_FACTORY =
91+ ThreadLocal .withInitial (() -> {
92+ final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory .newInstance ();
93+ documentBuilderFactory .setXIncludeAware (false );
94+ documentBuilderFactory .setExpandEntityReferences (false );
95+ documentBuilderFactory .setNamespaceAware (true );
96+ try {
97+ documentBuilderFactory .setFeature (XMLConstants .FEATURE_SECURE_PROCESSING , true );
98+ documentBuilderFactory
99+ .setFeature ("http://apache.org/xml/features/disallow-doctype-decl" , true );
100+ } catch (final ParserConfigurationException e ) {
101+ throw new IllegalStateException ("Document Builder configuration failed" , e );
102+ }
103+ return documentBuilderFactory ;
104+ });
70105
71106 private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
72107 .compile ("^(([0-9a-zA-Z:_-]+=\" [^\" ]*\" )( [0-9a-zA-Z:_-]+=\" [^\" ]*\" )*)$" );
@@ -81,10 +116,11 @@ private XmlFunctions() {
81116 return null ;
82117 }
83118 try {
119+ final Node documentNode = getDocumentNode (input );
84120 XPathExpression xpathExpression = castNonNull (XPATH_FACTORY .get ()).newXPath ().compile (xpath );
85121 try {
86122 NodeList nodes = (NodeList ) xpathExpression
87- .evaluate (new InputSource ( new StringReader ( input )) , XPathConstants .NODESET );
123+ .evaluate (documentNode , XPathConstants .NODESET );
88124 List <@ Nullable String > result = new ArrayList <>();
89125 for (int i = 0 ; i < nodes .getLength (); i ++) {
90126 Node item = castNonNull (nodes .item (i ));
@@ -94,9 +130,9 @@ private XmlFunctions() {
94130 }
95131 return StringUtils .join (result , " " );
96132 } catch (XPathExpressionException e ) {
97- return xpathExpression .evaluate (new InputSource ( new StringReader ( input )) );
133+ return xpathExpression .evaluate (documentNode );
98134 }
99- } catch (XPathExpressionException ex ) {
135+ } catch (IllegalArgumentException | XPathExpressionException ex ) {
100136 throw RESOURCE .invalidInputForExtractValue (input , xpath ).ex ();
101137 }
102138 }
@@ -140,17 +176,18 @@ private XmlFunctions() {
140176
141177 XPathExpression xpathExpression = xPath .compile (xpath );
142178
179+ final Node documentNode = getDocumentNode (xml );
143180 try {
144181 List <String > result = new ArrayList <>();
145182 NodeList nodes = (NodeList ) xpathExpression
146- .evaluate (new InputSource ( new StringReader ( xml )) , XPathConstants .NODESET );
183+ .evaluate (documentNode , XPathConstants .NODESET );
147184 for (int i = 0 ; i < nodes .getLength (); i ++) {
148185 result .add (convertNodeToString (castNonNull (nodes .item (i ))));
149186 }
150187 return StringUtils .join (result , "" );
151188 } catch (XPathExpressionException e ) {
152189 Node node = (Node ) xpathExpression
153- .evaluate (new InputSource ( new StringReader ( xml )) , XPathConstants .NODE );
190+ .evaluate (documentNode , XPathConstants .NODE );
154191 return convertNodeToString (node );
155192 }
156193 } catch (IllegalArgumentException | XPathExpressionException | TransformerException ex ) {
@@ -174,16 +211,17 @@ private XmlFunctions() {
174211 }
175212
176213 XPathExpression xpathExpression = xPath .compile (xpath );
214+ final Node documentNode = getDocumentNode (xml );
177215 try {
178216 NodeList nodes = (NodeList ) xpathExpression
179- .evaluate (new InputSource ( new StringReader ( xml )) , XPathConstants .NODESET );
217+ .evaluate (documentNode , XPathConstants .NODESET );
180218 if (nodes != null && nodes .getLength () > 0 ) {
181219 return 1 ;
182220 }
183221 return 0 ;
184222 } catch (XPathExpressionException e ) {
185223 Node node = (Node ) xpathExpression
186- .evaluate (new InputSource ( new StringReader ( xml )) , XPathConstants .NODE );
224+ .evaluate (documentNode , XPathConstants .NODE );
187225 if (node != null ) {
188226 return 1 ;
189227 }
@@ -215,6 +253,17 @@ private static String convertNodeToString(Node node) throws TransformerException
215253 return writer .toString ();
216254 }
217255
256+ private static Node getDocumentNode (final String xml ) {
257+ try {
258+ final DocumentBuilder documentBuilder =
259+ castNonNull (DOCUMENT_BUILDER_FACTORY .get ()).newDocumentBuilder ();
260+ final InputSource inputSource = new InputSource (new StringReader (xml ));
261+ return documentBuilder .parse (inputSource );
262+ } catch (final ParserConfigurationException | SAXException | IOException e ) {
263+ throw new IllegalArgumentException ("XML parsing failed" , e );
264+ }
265+ }
266+
218267 /** The internal default ErrorListener for Transformer. Just rethrows errors to
219268 * discontinue the XML transformation. */
220269 private static class InternalErrorListener implements ErrorListener {
0 commit comments