You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by da...@apache.org on 2018/03/07 21:37:51 UTC

hive git commit: HIVE-18879: Disallow embedded element in UDFXPathUtil needs to work if xercesImpl.jar in classpath (Daniel Dai, reviewed by Thejas Nair)

Repository: hive
Updated Branches:
  refs/heads/master 0cfd4fead -> b0a58d245


HIVE-18879: Disallow embedded element in UDFXPathUtil needs to work if xercesImpl.jar in classpath (Daniel Dai, reviewed by Thejas Nair)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b0a58d24
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b0a58d24
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b0a58d24

Branch: refs/heads/master
Commit: b0a58d245875dc1b3ac58a7cf1a61d3b17805e96
Parents: 0cfd4fe
Author: Daniel Dai <da...@hortonworks.com>
Authored: Wed Mar 7 13:37:38 2018 -0800
Committer: Daniel Dai <da...@hortonworks.com>
Committed: Wed Mar 7 13:37:38 2018 -0800

----------------------------------------------------------------------
 .../hadoop/hive/ql/udf/xml/UDFXPathUtil.java    | 27 +++++++++++++++++++-
 .../hive/ql/udf/xml/TestUDFXPathUtil.java       | 23 ++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b0a58d24/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
index fbdd340..5bf3318 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/xml/UDFXPathUtil.java
@@ -23,6 +23,9 @@ import java.io.Reader;
 import java.io.StringReader;
 
 import javax.xml.namespace.QName;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.parsers.ParserConfigurationException;
 import javax.xml.xpath.XPath;
 import javax.xml.xpath.XPathConstants;
 import javax.xml.xpath.XPathExpression;
@@ -38,9 +41,15 @@ import org.xml.sax.InputSource;
  * of this class.
  */
 public class UDFXPathUtil {
+  public static final String SAX_FEATURE_PREFIX = "http://xml.org/sax/features/";
+  public static final String EXTERNAL_GENERAL_ENTITIES_FEATURE = "external-general-entities";
+  public static final String EXTERNAL_PARAMETER_ENTITIES_FEATURE = "external-parameter-entities";
+  private DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
+  private DocumentBuilder builder = null;
   private XPath xpath = XPathFactory.newInstance().newXPath();
   private ReusableStringReader reader = new ReusableStringReader();
   private InputSource inputSource = new InputSource(reader);
+
   private XPathExpression expression = null;
   private String oldPath = null;
 
@@ -66,15 +75,31 @@ public class UDFXPathUtil {
       return null;
     }
 
+    if (builder == null){
+      try {
+        initializeDocumentBuilderFactory();
+        builder = dbf.newDocumentBuilder();
+      } catch (ParserConfigurationException e) {
+        throw new RuntimeException("Error instantiating DocumentBuilder, cannot build xml parser", e);
+      }
+    }
+
     reader.set(xml);
 
     try {
-      return expression.evaluate(inputSource, qname);
+      return expression.evaluate(builder.parse(inputSource), qname);
     } catch (XPathExpressionException e) {
       throw new RuntimeException ("Invalid expression '" + oldPath + "'", e);
+    } catch (Exception e) {
+      throw new RuntimeException("Error loading expression '" + oldPath + "'", e);
     }
   }
 
+  private void initializeDocumentBuilderFactory() throws ParserConfigurationException {
+    dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_GENERAL_ENTITIES_FEATURE, false);
+    dbf.setFeature(SAX_FEATURE_PREFIX + EXTERNAL_PARAMETER_ENTITIES_FEATURE, false);
+  }
+
   public Boolean evalBoolean(String xml, String path) {
     return (Boolean) eval(xml, path, XPathConstants.BOOLEAN);
   }

http://git-wip-us.apache.org/repos/asf/hive/blob/b0a58d24/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
index 2edcb7d..060ce2e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/xml/TestUDFXPathUtil.java
@@ -20,12 +20,15 @@ package org.apache.hadoop.hive.ql.udf.xml;
 
 import javax.xml.xpath.XPathConstants;
 
+import org.apache.commons.io.FileUtils;
 import org.junit.Test;
 import org.w3c.dom.Node;
 import org.w3c.dom.NodeList;
 
 import static org.junit.Assert.*;
 
+import java.io.File;
+
 public class TestUDFXPathUtil {
 
   @Test
@@ -78,5 +81,23 @@ public class TestUDFXPathUtil {
     assertTrue(result instanceof NodeList);
     assertEquals(5, ((NodeList)result).getLength());
   }
-  
+
+  @Test
+  public void testEmbedFailure() throws Exception {
+
+    String secretValue = String.valueOf(Math.random());
+    File tempFile = File.createTempFile("verifyembed", ".tmp");
+    tempFile.deleteOnExit();
+    String fname = tempFile.getAbsolutePath();
+
+    FileUtils.writeStringToFile(tempFile, secretValue);
+
+    String xml = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n" +
+        "<!DOCTYPE test [ \n" +
+        "    <!ENTITY embed SYSTEM \"" + fname + "\"> \n" +
+        "]>\n" +
+        "<foo>&embed;</foo>";
+    String evaled = new UDFXPathUtil().evalString(xml, "/foo");
+    assertTrue(evaled.isEmpty());
+  }
 }