You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by ru...@apache.org on 2022/09/07 20:50:48 UTC
[calcite] branch main updated: [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
This is an automated email from the ASF dual-hosted git repository.
rubenql pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new ba80b9156a [CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
ba80b9156a is described below
commit ba80b9156afc0db26b194d97a031fcc0dc7f4c03
Author: rubenada <ru...@gmail.com>
AuthorDate: Sat Sep 3 19:25:25 2022 +0100
[CALCITE-5263] Improve XmlFunctions by using an XML DocumentBuilder
Co-authored-by: David Handermann <ex...@apache.org>
---
.../org/apache/calcite/runtime/XmlFunctions.java | 67 +++++++++++++++++++---
.../apache/calcite/test/SqlXmlFunctionsTest.java | 48 ++++++++++++++++
2 files changed, 106 insertions(+), 9 deletions(-)
diff --git a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
index 8c81647525..03134321a4 100644
--- a/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/XmlFunctions.java
@@ -24,7 +24,9 @@ import org.checkerframework.checker.nullness.qual.Nullable;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.ArrayList;
@@ -33,6 +35,10 @@ import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import javax.xml.XMLConstants;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.ErrorListener;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Source;
@@ -48,6 +54,7 @@ import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
+import javax.xml.xpath.XPathFactoryConfigurationException;
import static org.apache.calcite.linq4j.Nullness.castNonNull;
import static org.apache.calcite.util.Static.RESOURCE;
@@ -60,13 +67,41 @@ import static java.util.Objects.requireNonNull;
public class XmlFunctions {
private static final ThreadLocal<@Nullable XPathFactory> XPATH_FACTORY =
- ThreadLocal.withInitial(XPathFactory::newInstance);
+ ThreadLocal.withInitial(() -> {
+ final XPathFactory xPathFactory = XPathFactory.newInstance();
+ try {
+ xPathFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ } catch (XPathFactoryConfigurationException e) {
+ throw new IllegalStateException("XPath Factory configuration failed", e);
+ }
+ return xPathFactory;
+ });
private static final ThreadLocal<@Nullable TransformerFactory> TRANSFORMER_FACTORY =
ThreadLocal.withInitial(() -> {
- TransformerFactory transformerFactory = TransformerFactory.newInstance();
+ final TransformerFactory transformerFactory = TransformerFactory.newInstance();
transformerFactory.setErrorListener(new InternalErrorListener());
+ try {
+ transformerFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ } catch (TransformerConfigurationException e) {
+ throw new IllegalStateException("Transformer Factory configuration failed", e);
+ }
return transformerFactory;
});
+ private static final ThreadLocal<@Nullable DocumentBuilderFactory> DOCUMENT_BUILDER_FACTORY =
+ ThreadLocal.withInitial(() -> {
+ final DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
+ documentBuilderFactory.setXIncludeAware(false);
+ documentBuilderFactory.setExpandEntityReferences(false);
+ documentBuilderFactory.setNamespaceAware(true);
+ try {
+ documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+ documentBuilderFactory
+ .setFeature("http://apache.org/xml/features/disallow-doctype-decl", true);
+ } catch (final ParserConfigurationException e) {
+ throw new IllegalStateException("Document Builder configuration failed", e);
+ }
+ return documentBuilderFactory;
+ });
private static final Pattern VALID_NAMESPACE_PATTERN = Pattern
.compile("^(([0-9a-zA-Z:_-]+=\"[^\"]*\")( [0-9a-zA-Z:_-]+=\"[^\"]*\")*)$");
@@ -81,10 +116,11 @@ public class XmlFunctions {
return null;
}
try {
+ final Node documentNode = getDocumentNode(input);
XPathExpression xpathExpression = castNonNull(XPATH_FACTORY.get()).newXPath().compile(xpath);
try {
NodeList nodes = (NodeList) xpathExpression
- .evaluate(new InputSource(new StringReader(input)), XPathConstants.NODESET);
+ .evaluate(documentNode, XPathConstants.NODESET);
List<@Nullable String> result = new ArrayList<>();
for (int i = 0; i < nodes.getLength(); i++) {
Node item = castNonNull(nodes.item(i));
@@ -94,9 +130,9 @@ public class XmlFunctions {
}
return StringUtils.join(result, " ");
} catch (XPathExpressionException e) {
- return xpathExpression.evaluate(new InputSource(new StringReader(input)));
+ return xpathExpression.evaluate(documentNode);
}
- } catch (XPathExpressionException ex) {
+ } catch (IllegalArgumentException | XPathExpressionException ex) {
throw RESOURCE.invalidInputForExtractValue(input, xpath).ex();
}
}
@@ -140,17 +176,18 @@ public class XmlFunctions {
XPathExpression xpathExpression = xPath.compile(xpath);
+ final Node documentNode = getDocumentNode(xml);
try {
List<String> result = new ArrayList<>();
NodeList nodes = (NodeList) xpathExpression
- .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
+ .evaluate(documentNode, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
result.add(convertNodeToString(castNonNull(nodes.item(i))));
}
return StringUtils.join(result, "");
} catch (XPathExpressionException e) {
Node node = (Node) xpathExpression
- .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
+ .evaluate(documentNode, XPathConstants.NODE);
return convertNodeToString(node);
}
} catch (IllegalArgumentException | XPathExpressionException | TransformerException ex) {
@@ -174,16 +211,17 @@ public class XmlFunctions {
}
XPathExpression xpathExpression = xPath.compile(xpath);
+ final Node documentNode = getDocumentNode(xml);
try {
NodeList nodes = (NodeList) xpathExpression
- .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODESET);
+ .evaluate(documentNode, XPathConstants.NODESET);
if (nodes != null && nodes.getLength() > 0) {
return 1;
}
return 0;
} catch (XPathExpressionException e) {
Node node = (Node) xpathExpression
- .evaluate(new InputSource(new StringReader(xml)), XPathConstants.NODE);
+ .evaluate(documentNode, XPathConstants.NODE);
if (node != null) {
return 1;
}
@@ -215,6 +253,17 @@ public class XmlFunctions {
return writer.toString();
}
+ private static Node getDocumentNode(final String xml) {
+ try {
+ final DocumentBuilder documentBuilder =
+ castNonNull(DOCUMENT_BUILDER_FACTORY.get()).newDocumentBuilder();
+ final InputSource inputSource = new InputSource(new StringReader(xml));
+ return documentBuilder.parse(inputSource);
+ } catch (final ParserConfigurationException | SAXException | IOException e) {
+ throw new IllegalArgumentException("XML parsing failed", e);
+ }
+ }
+
/** The internal default ErrorListener for Transformer. Just rethrows errors to
* discontinue the XML transformation. */
private static class InternalErrorListener implements ErrorListener {
diff --git a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
index 1457ef6908..046d935705 100644
--- a/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
+++ b/core/src/test/java/org/apache/calcite/test/SqlXmlFunctionsTest.java
@@ -21,9 +21,13 @@ import org.apache.calcite.runtime.SqlFunctions;
import org.apache.calcite.runtime.XmlFunctions;
import org.apache.calcite.util.BuiltInMethod;
+import org.checkerframework.checker.nullness.qual.Nullable;
import org.hamcrest.Matcher;
+import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.function.Supplier;
import static org.hamcrest.CoreMatchers.is;
@@ -36,6 +40,23 @@ import static org.junit.jupiter.api.Assertions.fail;
*/
class SqlXmlFunctionsTest {
+ private static final String XML = "<document>string</document>";
+ private static final String XSLT =
+ "<xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"></xsl:stylesheet>";
+ private static final String DOCUMENT_PATH = "/document";
+ private static @Nullable String xmlExternalEntity = null;
+ private static @Nullable String xsltExternalEntity = null;
+
+ @BeforeAll public static void setup() throws Exception {
+ final Path testFile = Files.createTempFile("foo", "temp");
+ testFile.toFile().deleteOnExit();
+ final String filePath = "file:///" + testFile.toAbsolutePath();
+ xmlExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
+ + "\"> ]><document>&entity;</document>";
+ xsltExternalEntity = "<!DOCTYPE document [ <!ENTITY entity SYSTEM \"" + filePath
+ + "\"> ]><xsl:stylesheet xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\">&entity;</xsl:stylesheet>";
+ }
+
@Test void testExtractValue() {
assertExtractValue("<a>ccc<b>ddd</b></a>", "/a", is("ccc"));
@@ -45,6 +66,33 @@ class SqlXmlFunctionsTest {
assertExtractValueFailed(input, "#", Matchers.expectThrowable(expected));
}
+ @Test void testExtractValueExternalEntity() {
+ String message = "Invalid input for EXTRACTVALUE: xml: '"
+ + xmlExternalEntity + "', xpath expression: '" + DOCUMENT_PATH + "'";
+ CalciteException expected = new CalciteException(message, null);
+ assertExtractValueFailed(xmlExternalEntity, DOCUMENT_PATH,
+ Matchers.expectThrowable(expected));
+ }
+
+ @Test void testExistsNodeExternalEntity() {
+ String message = "Invalid input for EXISTSNODE xpath: '"
+ + DOCUMENT_PATH + "', namespace: '" + null + "'";
+ CalciteException expected = new CalciteException(message, null);
+ assertExistsNodeFailed(xmlExternalEntity, DOCUMENT_PATH, null,
+ Matchers.expectThrowable(expected));
+ }
+
+ @Test void testXmlTransformExternalEntity() {
+ String message = "Invalid input for XMLTRANSFORM xml: '" + xmlExternalEntity + "'";
+ CalciteException expected = new CalciteException(message, null);
+ assertXmlTransformFailed(xmlExternalEntity, XSLT, Matchers.expectThrowable(expected));
+ }
+
+ @Test void testXmlTransformExternalEntityXslt() {
+ String message = "Illegal xslt specified : '" + xsltExternalEntity + "'";
+ CalciteException expected = new CalciteException(message, null);
+ assertXmlTransformFailed(XML, xsltExternalEntity, Matchers.expectThrowable(expected));
+ }
@Test void testXmlTransform() {
assertXmlTransform(null, "", nullValue());