You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by da...@apache.org on 2018/03/07 21:37:51 UTC
hive git commit: HIVE-18879: Disallow embedded element in
UDFXPathUtil needs to work if xercesImpl.jar in classpath (Daniel Dai,
reviewed by Thejas Nair)
Repository: hive
Updated Branches:
refs/heads/master 0cfd4fead -> b0a58d245
HIVE-18879: Disallow embedded element in UDFXPathUtil needs to work if xercesImpl.jar in classpath (Daniel Dai, reviewed by Thejas Nair)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b0a58d24
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b0a58d24
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b0a58d24
Branch: refs/heads/master
Commit: b0a58d245875dc1b3ac58a7cf1a61d3b17805e96
Parents: 0cfd4fe
Author: Daniel Dai <da...@hortonworks.com>
Authored: Wed Mar 7 13:37:38 2018 -0800
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Wed Mar 7 13:37:38 2018 -0800
----------------------------------------------------------------------
.../hadoop/hive/ql/udf/xml/UDFXPathUtil.java | 27 +++++++++++++++++++-
.../hive/ql/udf/xml/TestUDFXPathUtil.java | 23 ++++++++++++++++-
2 files changed, 48 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/b0a58d24/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
index fbdd340..5bf3318 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
@@ -23,6 +23,9 @@ import java.io.Reader;
import java.io.StringReader;
import javax.xml.namespace.QName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
@@ -38,9 +41,15 @@ import org.xml.sax.InputSource;
* of this class.
*/
public class UDFXPathUtil {
+ public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
+ public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
+ public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
+ private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+ private DocumentBuilder builder = null;
private XPath xpath = XPathFactory.newInstance().newXPath();
private ReusableStringReader reader = new ReusableStringReader();
private InputSource inputSource = new InputSource(reader);
+
private XPathExpression expression = null;
private String oldPath = null;
@@ -66,15 +75,31 @@ public class UDFXPathUtil {
return null;
}
+ if (builder == null){
+ try {
+ initializeDocumentBuilderFactory();
+ builder = dbf.newDocumentBuilder();
+ } catch (ParserConfigurationException e) {
+ throw new RuntimeException("Error instantiating DocumentBuilder, cannot build xml parser", e);
+ }
+ }
+
reader.set(xml);
try {
- return expression.evaluate(inputSource, qname);
+ return expression.evaluate(builder.parse(inputSource), qname);
} catch (XPathExpressionException e) {
throw new RuntimeException ("Invalid expression '" + oldPath + "'", e);
+ } catch (Exception e) {
+ throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
}
}
+ private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
+ dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
+ dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
+ }
+
public Boolean evalBoolean(String xml, String path) {
return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/b0a58d24/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
index 2edcb7d..060ce2e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
@@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.udf.xml;
import javax.xml.xpath.XPathConstants;
+import org.apache.commons.io.FileUtils;
import org.junit.Test;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import static org.junit.Assert.*;
+import java.io.File;
+
public class TestUDFXPathUtil {
@Test
@@ -78,5 +81,23 @@ public class TestUDFXPathUtil {
assertTrue(result instanceof NodeList);
assertEquals(5, ((NodeList)result).getLength());
}
-
+
+ @Test
+ public void testEmbedFailure() throws Exception {
+
+ String secretValue = String.valueOf(Math.random());
+ File tempFile = File.createTempFile("verifyembed", ".tmp");
+ tempFile.deleteOnExit();
+ String fname = tempFile.getAbsolutePath();
+
+ FileUtils.writeStringToFile(tempFile, secretValue);
+
+ String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
+ "<!DOCTYPE test [ \n" +
+ " <!ENTITY embed SYSTEM \"" + fname + "\"> \n" +
+ "]>\n" +
+ "<foo>&embed;</foo>";
+ String evaled = new UDFXPathUtil().evalString(xml, "/foo");
+ assertTrue(evaled.isEmpty());
+ }
}