You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by sa...@apache.org on 2016/08/23 18:28:12 UTC

[3/4] incubator-impala git commit: IMPALA-3820: Handle linkage errors while loading Java UDFs in Catalog

IMPALA-3820: Handle linkage errors while loading Java UDFs in Catalog

Currently Catalog aborts at startup if the class loader loading Java
UDFs throws a LinkageError (e.g., NoClassDefError). The code has been
designed to catch a generic "Exception" and ignore any incompatible
or erroneous UDFs but it can't handle any "Error"s. Java generally
doesn't encourage to handle all types of "Error"s, however given the
Catalog tries to load thirdparty udfs, it is possible that there might
be missing dependencies that the Catalog is expected to handle.

Testing: Added an e-e test that loads a corrupt Java UDF from Hive and
restarts Catalog to make sure its up and the function count tallies.

Change-Id: I1109614f1617caa9bb27e8c151e902aae71a6b41
Reviewed-on: http://gerrit.cloudera.org:8080/4092
Reviewed-by: Bharath Vissapragada <bh...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/deaccbb9
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/deaccbb9
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/deaccbb9

Branch: refs/heads/master
Commit: deaccbb967138afacbe261e6e4fb81312688b09e
Parents: 8fb1598
Author: Bharath Vissapragada <bh...@cloudera.com>
Authored: Tue Jul 5 02:46:26 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Tue Aug 23 12:20:44 2016 +0000

----------------------------------------------------------------------
 .../impala/catalog/CatalogServiceCatalog.java   |  8 +++-
 .../impala/hive/executor/UdfExecutor.java       |  2 +-
 tests/custom_cluster/test_permanent_udfs.py     | 20 ++++++++++
 tests/test-hive-udfs/pom.xml                    | 10 +++++
 .../java/com/cloudera/impala/UnresolvedUdf.java | 39 ++++++++++++++++++++
 5 files changed, 77 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java
index ae92996..f0993bb 100644
--- a/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/com/cloudera/impala/catalog/CatalogServiceCatalog.java
@@ -60,6 +60,7 @@ import com.cloudera.impala.common.ImpalaException;
 import com.cloudera.impala.common.ImpalaRuntimeException;
 import com.cloudera.impala.common.JniUtil;
 import com.cloudera.impala.common.Pair;
+import com.cloudera.impala.hive.executor.UdfExecutor;
 import com.cloudera.impala.thrift.TCatalog;
 import com.cloudera.impala.thrift.TCatalogObject;
 import com.cloudera.impala.thrift.TCatalogObjectType;
@@ -458,7 +459,7 @@ public class CatalogServiceCatalog extends Catalog {
       // Load each method in the UDF class and create the corresponding Impala Function
       // object.
       for (Method m: udfClass.getMethods()) {
-        if (!m.getName().equals("evaluate")) continue;
+        if (!m.getName().equals(UdfExecutor.UDF_FUNCTION_NAME)) continue;
         Function fn = ScalarFunction.fromHiveFunction(db,
             function.getFunctionName(), function.getClassName(),
             m.getParameterTypes(), m.getReturnType(), jarUri);
@@ -481,6 +482,11 @@ public class CatalogServiceCatalog extends Catalog {
     } catch (Exception e) {
       LOG.error("Skipping function load: " + function.getFunctionName(), e);
       throw new ImpalaRuntimeException("Error extracting functions", e);
+    } catch (LinkageError e) {
+      String errorMsg = "Error resolving dependencies for Java function: " + db + "." +
+          function.getFunctionName();
+      LOG.error(errorMsg);
+      throw new ImpalaRuntimeException(errorMsg, e);
     }
     return result;
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java b/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java
index bdfe4cd..242c704 100644
--- a/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java
+++ b/fe/src/main/java/com/cloudera/impala/hive/executor/UdfExecutor.java
@@ -61,7 +61,7 @@ import com.google.common.collect.Lists;
 public class UdfExecutor {
   private static final Logger LOG = Logger.getLogger(UdfExecutor.class);
   // By convention, the function in the class must be called evaluate()
-  private static final String UDF_FUNCTION_NAME = "evaluate";
+  public static final String UDF_FUNCTION_NAME = "evaluate";
 
   // Object to deserialize ctor params from BE.
   private final static TBinaryProtocol.Factory PROTOCOL_FACTORY =

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/custom_cluster/test_permanent_udfs.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py
index 28b4251..2f2457a 100644
--- a/tests/custom_cluster/test_permanent_udfs.py
+++ b/tests/custom_cluster/test_permanent_udfs.py
@@ -156,6 +156,26 @@ class TestUdfPersistence(CustomClusterTestSuite):
   @SkipIfS3.hive
   @SkipIfLocal.hive
   @pytest.mark.execute_serially
+  def test_corrupt_java_udf(self):
+    """ IMPALA-3820: This tests if the Catalog server can gracefully handle
+    Java UDFs with unresolved dependencies."""
+    if self.exploration_strategy() != 'exhaustive': pytest.skip()
+    # Create a Java UDF with unresolved dependencies from Hive and
+    # restart the Catalog server. Catalog should ignore the
+    # function load.
+    self.run_stmt_in_hive("create function %s.corrupt_udf as \
+        'com.cloudera.impala.UnresolvedUdf' using jar '%s'"
+        % (self.JAVA_FN_TEST_DB, self.JAVA_UDF_JAR))
+    self.__restart_cluster()
+    # Make sure the function count is 0
+    self.verify_function_count(
+        "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
+
+
+  @SkipIfIsilon.hive
+  @SkipIfS3.hive
+  @SkipIfLocal.hive
+  @pytest.mark.execute_serially
   def test_java_udfs_hive_integration(self):
     ''' This test checks the integration between Hive and Impala on
     CREATE FUNCTION and DROP FUNCTION statements for persistent Java UDFs.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/test-hive-udfs/pom.xml
----------------------------------------------------------------------
diff --git a/tests/test-hive-udfs/pom.xml b/tests/test-hive-udfs/pom.xml
index 11c3096..002c52a 100644
--- a/tests/test-hive-udfs/pom.xml
+++ b/tests/test-hive-udfs/pom.xml
@@ -26,6 +26,11 @@
       <artifactId>hive-exec</artifactId>
       <version>${hive.version}</version>
     </dependency>
+    <dependency>
+      <groupId>com.googlecode.libphonenumber</groupId>
+      <artifactId>libphonenumber</artifactId>
+      <version>4.0</version>
+    </dependency>
   </dependencies>
 
  <build>
@@ -75,5 +80,10 @@
         <enabled>false</enabled>
       </snapshots>
     </repository>
+    <repository>
+      <id>com.google.libphonenumber</id>
+      <url>http://repo1.maven.org/maven2/com/googlecode/libphonenumber</url>
+      <name>Google phone number library</name>
+    </repository>
   </repositories>
 </project>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/deaccbb9/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java
----------------------------------------------------------------------
diff --git a/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java
new file mode 100644
index 0000000..6487bab
--- /dev/null
+++ b/tests/test-hive-udfs/src/main/java/com/cloudera/impala/UnresolvedUdf.java
@@ -0,0 +1,39 @@
+// Copyright 2016 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.cloudera.impala;
+
+// Import a class that is not shaded in the UDF jar.
+import com.google.i18n.phonenumbers.NumberParseException;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * This class intends to test a UDF that has some classes that the Catalog's
+ * class loader can't resolve. We import the class NumberParseException from
+ * Google's phone number library that won't be shaded with the UDF jar. The
+ * Catalog should gracefully handle this situation by ignoring the load of
+ * Java UDF that references this jar.
+ *
+ * The jar for this file can be built by running "mvn clean package" in
+ * tests/test-hive-udfs. This is run in testdata/bin/create-load-data.sh, and
+ * copied to HDFS in testdata/bin/copy-udfs-uda.sh.
+ */
+public class UnresolvedUdf extends UDF {
+
+  public IntWritable evaluate(IntWritable a) throws NumberParseException {
+    return a;
+  }
+}