Skip to content

Commit ba80b91

Browse files
[CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
Co-authored-by: David Handermann <exceptionfactory@apache.org>
1 parent df8ee28 commit ba80b91

File tree

2 files changed

+106
-9
lines changed

2 files changed

+106
-9
lines changed

core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java

Lines changed: 58 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,9 @@
2424
import org.w3c.dom.Node;
2525
import org.w3c.dom.NodeList;
2626
import org.xml.sax.InputSource;
27+
import org.xml.sax.SAXException;
2728

29+
import java.io.IOException;
2830
import java.io.StringReader;
2931
import java.io.StringWriter;
3032
import java.util.ArrayList;
@@ -33,6 +35,10 @@
3335
import java.util.Map;
3436
import java.util.regex.Matcher;
3537
import java.util.regex.Pattern;
38+
import javax.xml.XMLConstants;
39+
import javax.xml.parsers.DocumentBuilder;
40+
import javax.xml.parsers.DocumentBuilderFactory;
41+
import javax.xml.parsers.ParserConfigurationException;
3642
import javax.xml.transform.ErrorListener;
3743
import javax.xml.transform.OutputKeys;
3844
import javax.xml.transform.Source;
@@ -48,6 +54,7 @@
4854
import javax.xml.xpath.XPathExpression;
4955
import javax.xml.xpath.XPathExpressionException;
5056
import javax.xml.xpath.XPathFactory;
57+
import javax.xml.xpath.XPathFactoryConfigurationException;
5158

5259
import static org.apache.calcite.linq4j.Nullness.castNonNull;
5360
import static org.apache.calcite.util.Static.RESOURCE;
@@ -60,13 +67,41 @@
6067
public class XmlFunctions {
6168

6269
private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY =
63-
ThreadLocal.withInitial(XPathFactory::newInstance);
70+
ThreadLocal.withInitial(() -> {
71+
final XPathFactory xPathFactory = XPathFactory.newInstance();
72+
try {
73+
xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
74+
} catch (XPathFactoryConfigurationException e) {
75+
throw new IllegalStateException("XPath Factory configuration failed", e);
76+
}
77+
return xPathFactory;
78+
});
6479
private static final ThreadLocal<@Nullable TransformerFactory> TRANSFORMER_FACTORY =
6580
ThreadLocal.withInitial(() -> {
66-
TransformerFactory transformerFactory = TransformerFactory.newInstance();
81+
final TransformerFactory transformerFactory = TransformerFactory.newInstance();
6782
transformerFactory.setErrorListener(new InternalErrorListener());
83+
try {
84+
transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
85+
} catch (TransformerConfigurationException e) {
86+
throw new IllegalStateException("Transformer Factory configuration failed", e);
87+
}
6888
return transformerFactory;
6989
});
90+
private static final ThreadLocal<@Nullable DocumentBuilderFactory> DOCUMENT_BUILDER_FACTORY =
91+
ThreadLocal.withInitial(() -> {
92+
final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
93+
documentBuilderFactory.setXIncludeAware(false);
94+
documentBuilderFactory.setExpandEntityReferences(false);
95+
documentBuilderFactory.setNamespaceAware(true);
96+
try {
97+
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
98+
documentBuilderFactory
99+
.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
100+
} catch (final ParserConfigurationException e) {
101+
throw new IllegalStateException("Document Builder configuration failed", e);
102+
}
103+
return documentBuilderFactory;
104+
});
70105

71106
private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
72107
.compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( [0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
@@ -81,10 +116,11 @@ private XmlFunctions() {
81116
return null;
82117
}
83118
try {
119+
final Node documentNode = getDocumentNode(input);
84120
XPathExpression xpathExpression = castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath);
85121
try {
86122
NodeList nodes = (NodeList) xpathExpression
87-
.evaluate(new InputSource(new StringReader(input)), XPathConstants.NODESET);
123+
.evaluate(documentNode, XPathConstants.NODESET);
88124
List<@Nullable String> result = new ArrayList<>();
89125
for (int i = 0; i < nodes.getLength(); i++) {
90126
Node item = castNonNull(nodes.item(i));
@@ -94,9 +130,9 @@ private XmlFunctions() {
94130
}
95131
return StringUtils.join(result, " ");
96132
} catch (XPathExpressionException e) {
97-
return xpathExpression.evaluate(new InputSource(new StringReader(input)));
133+
return xpathExpression.evaluate(documentNode);
98134
}
99-
} catch (XPathExpressionException ex) {
135+
} catch (IllegalArgumentException | XPathExpressionException ex) {
100136
throw RESOURCE.invalidInputForExtractValue(input, xpath).ex();
101137
}
102138
}
@@ -140,17 +176,18 @@ private XmlFunctions() {
140176

141177
XPathExpression xpathExpression = xPath.compile(xpath);
142178

179+
final Node documentNode = getDocumentNode(xml);
143180
try {
144181
List<String> result = new ArrayList<>();
145182
NodeList nodes = (NodeList) xpathExpression
146-
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
183+
.evaluate(documentNode, XPathConstants.NODESET);
147184
for (int i = 0; i < nodes.getLength(); i++) {
148185
result.add(convertNodeToString(castNonNull(nodes.item(i))));
149186
}
150187
return StringUtils.join(result, "");
151188
} catch (XPathExpressionException e) {
152189
Node node = (Node) xpathExpression
153-
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
190+
.evaluate(documentNode, XPathConstants.NODE);
154191
return convertNodeToString(node);
155192
}
156193
} catch (IllegalArgumentException | XPathExpressionException | TransformerException ex) {
@@ -174,16 +211,17 @@ private XmlFunctions() {
174211
}
175212

176213
XPathExpression xpathExpression = xPath.compile(xpath);
214+
final Node documentNode = getDocumentNode(xml);
177215
try {
178216
NodeList nodes = (NodeList) xpathExpression
179-
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
217+
.evaluate(documentNode, XPathConstants.NODESET);
180218
if (nodes != null && nodes.getLength() > 0) {
181219
return 1;
182220
}
183221
return 0;
184222
} catch (XPathExpressionException e) {
185223
Node node = (Node) xpathExpression
186-
.evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
224+
.evaluate(documentNode, XPathConstants.NODE);
187225
if (node != null) {
188226
return 1;
189227
}
@@ -215,6 +253,17 @@ private static String convertNodeToString(Node node) throws TransformerException
215253
return writer.toString();
216254
}
217255

256+
private static Node getDocumentNode(final String xml) {
257+
try {
258+
final DocumentBuilder documentBuilder =
259+
castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder();
260+
final InputSource inputSource = new InputSource(new StringReader(xml));
261+
return documentBuilder.parse(inputSource);
262+
} catch (final ParserConfigurationException | SAXException | IOException e) {
263+
throw new IllegalArgumentException("XML parsing failed", e);
264+
}
265+
}
266+
218267
/** The internal default ErrorListener for Transformer. Just rethrows errors to
219268
* discontinue the XML transformation. */
220269
private static class InternalErrorListener implements ErrorListener {

core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,13 @@
2121
import org.apache.calcite.runtime.XmlFunctions;
2222
import org.apache.calcite.util.BuiltInMethod;
2323

24+
import org.checkerframework.checker.nullness.qual.Nullable;
2425
import org.hamcrest.Matcher;
26+
import org.junit.jupiter.api.BeforeAll;
2527
import org.junit.jupiter.api.Test;
2628

29+
import java.nio.file.Files;
30+
import java.nio.file.Path;
2731
import java.util.function.Supplier;
2832

2933
import static org.hamcrest.CoreMatchers.is;
@@ -36,6 +40,23 @@
3640
*/
3741
class SqlXmlFunctionsTest {
3842

43+
private static final String XML = "<document>string</document>";
44+
private static final String XSLT =
45+
"<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"></xsl:stylesheet>";
46+
private static final String DOCUMENT_PATH = "/document";
47+
private static @Nullable String xmlExternalEntity = null;
48+
private static @Nullable String xsltExternalEntity = null;
49+
50+
@BeforeAll public static void setup() throws Exception {
51+
final Path testFile = Files.createTempFile("foo", "temp");
52+
testFile.toFile().deleteOnExit();
53+
final String filePath = "file:///" + testFile.toAbsolutePath();
54+
xmlExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
55+
+ "\"> ]><document>&entity;</document>";
56+
xsltExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
57+
+ "\"> ]><xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">&entity;</xsl:stylesheet>";
58+
}
59+
3960
@Test void testExtractValue() {
4061
assertExtractValue("<a>ccc<b>ddd</b></a>", "/a", is("ccc"));
4162

@@ -45,6 +66,33 @@ class SqlXmlFunctionsTest {
4566
assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected));
4667
}
4768

69+
@Test void testExtractValueExternalEntity() {
70+
String message = "Invalid input for EXTRACTVALUE: xml: '"
71+
+ xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'";
72+
CalciteException expected = new CalciteException(message, null);
73+
assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH,
74+
Matchers.expectThrowable(expected));
75+
}
76+
77+
@Test void testExistsNodeExternalEntity() {
78+
String message = "Invalid input for EXISTSNODE xpath: '"
79+
+ DOCUMENT_PATH + "', namespace: '" + null + "'";
80+
CalciteException expected = new CalciteException(message, null);
81+
assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null,
82+
Matchers.expectThrowable(expected));
83+
}
84+
85+
@Test void testXmlTransformExternalEntity() {
86+
String message = "Invalid input for XMLTRANSFORM xml: '" + xmlExternalEntity + "'";
87+
CalciteException expected = new CalciteException(message, null);
88+
assertXmlTransformFailed(xmlExternalEntity, XSLT, Matchers.expectThrowable(expected));
89+
}
90+
91+
@Test void testXmlTransformExternalEntityXslt() {
92+
String message = "Illegal xslt specified : '" + xsltExternalEntity + "'";
93+
CalciteException expected = new CalciteException(message, null);
94+
assertXmlTransformFailed(XML, xsltExternalEntity, Matchers.expectThrowable(expected));
95+
}
4896

4997
@Test void testXmlTransform() {
5098
assertXmlTransform(null, "", nullValue());

0 commit comments

Comments
 (0)