You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by cs...@apache.org on 2022/09/13 14:49:11 UTC

[impala] branch master updated: IMPALA-11528: Catalogd should start up with a corrupt Hive function.

This is an automated email from the ASF dual-hosted git repository.

csringhofer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 4e813b708 IMPALA-11528: Catalogd should start up with a corrupt Hive function.
4e813b708 is described below

commit 4e813b7085c995a7244ef886b00c22e9d93cc80c
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Mon Aug 29 14:57:19 2022 -0700

    IMPALA-11528: Catalogd should start up with a corrupt Hive function.
    
    This commit handles the case for a specific kind of corrupt function
    within the Hive Metastore in the following situation:
    
    A valid Hive SQL function gets created in HMS. This UDF is written in
    Java and must derive from the "UDF" class. After creating this function
    in Impala, we then replace the underlying jar file with a class that
    does NOT derive from the "UDF" class.
    
    In this scenario, catalogd should reject the function and still start
    up gracefully. Before this commit, catalogd wasn't coming up. The
    reason for this was because the Hive function
    FunctionUtils.getUDFClassType() has a dependency on UDAF and was
    throwing a LinkageError exception, so we need to include the UDAF
    class in the shaded jar.
    
    Change-Id: I54e7a1df6d018ba6cf5ecf32dc9946edf86e2112
    Reviewed-on: http://gerrit.cloudera.org:8080/18927
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tamas Mate <tm...@apache.org>
---
 .../impala/catalog/CatalogServiceCatalog.java      |  2 +-
 java/pom.xml                                       |  1 +
 java/shaded-deps/hive-exec/pom.xml                 |  1 +
 java/test-corrupt-hive-udfs/pom.xml                | 53 ++++++++++++++++++++++
 .../main/java/org/apache/impala/CorruptUdf.java    | 28 ++++++++++++
 .../main/java/org/apache/impala/CorruptUdf.java    | 35 ++++++++++++++
 testdata/bin/copy-udfs-udas.sh                     |  6 +++
 tests/custom_cluster/test_permanent_udfs.py        | 28 ++++++++++++
 8 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index 4515784f8..0a8e70c4c 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -1764,7 +1764,7 @@ public class CatalogServiceCatalog extends Catalog {
           db.addFunction(fn);
           fn.setCatalogVersion(incrementAndGetCatalogVersion());
         }
-      } catch (Exception e) {
+      } catch (Exception | LinkageError e) {
         LOG.error("Skipping function load: " + function.getFunctionName(), e);
       }
     }
diff --git a/java/pom.xml b/java/pom.xml
index 6b972aea8..5e0e6356a 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -261,6 +261,7 @@ under the License.
     <module>shaded-deps/s3a-aws-sdk</module>
     <module>TableFlattener</module>
     <module>test-hive-udfs</module>
+    <module>test-corrupt-hive-udfs</module>
     <module>yarn-extras</module>
   </modules>
 
diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml
index 417a087ae..499852614 100644
--- a/java/shaded-deps/hive-exec/pom.xml
+++ b/java/shaded-deps/hive-exec/pom.xml
@@ -100,6 +100,7 @@ the same dependencies
                 <include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include>
                 <!-- Needed to support Hive udfs -->
                 <include>org/apache/hadoop/hive/ql/exec/*UDF*</include>
+                <include>org/apache/hadoop/hive/ql/exec/UDAF.class</include>
                 <include>org/apache/hadoop/hive/ql/exec/MapredContext.class</include>
                 <include>org/apache/hadoop/hive/ql/exec/FunctionUtils*</include>
                 <include>org/apache/hadoop/hive/ql/parse/GenericHiveLexer*</include>
diff --git a/java/test-corrupt-hive-udfs/pom.xml b/java/test-corrupt-hive-udfs/pom.xml
new file mode 100644
index 000000000..07865d121
--- /dev/null
+++ b/java/test-corrupt-hive-udfs/pom.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.impala</groupId>
+    <artifactId>impala-parent</artifactId>
+    <version>4.2.0-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>test-corrupt-hive-udfs</artifactId>
+  <version>1.0</version>
+  <packaging>jar</packaging>
+
+  <name>test-corrupt-hive-udfs</name>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+ <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java b/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
new file mode 100644
index 000000000..59a312aeb
--- /dev/null
+++ b/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala;
+
+/*
+ * This class is one of two classes used to test for a corrupt UDF loaded
+ * into the Hive MetaStore. This class contains an invalid UDF. The jar file with this
+ * this class will replace the class in the jar created from the hive-test-udfs
+ * directory. The catalogd executable should still be able to start correctly
+ * even with this corrupt function.
+ */
+public class CorruptUdf {
+}
diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java b/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
new file mode 100644
index 000000000..b6d7d7198
--- /dev/null
+++ b/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.IntWritable;
+
+/*
+ * This class is one of two classes used to test for a corrupt UDF loaded
+ * into the Hive MetaStore. This class contains a valid UDF. This file will
+ * be used to create the function through SQL within the Hive MetaStore.
+ * Later, this object will be replaced by a corrupt Java object (under
+ * the hive-corrupt-test-udfs directory). The test will ensure that catalogd
+ * is still able to start up fine (though the function will be disabled).
+ */
+public class CorruptUdf extends UDF {
+  public IntWritable evaluate(IntWritable a) {
+    return a;
+  }
+}
diff --git a/testdata/bin/copy-udfs-udas.sh b/testdata/bin/copy-udfs-udas.sh
index 438eff4a9..e73f38d1e 100755
--- a/testdata/bin/copy-udfs-udas.sh
+++ b/testdata/bin/copy-udfs-udas.sh
@@ -51,6 +51,10 @@ then
   pushd "${IMPALA_HOME}"
   "${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
       TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir
+  cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
+  "${IMPALA_HOME}/bin/mvn-quiet.sh" package
+  cp target/test-corrupt-hive-udfs-1.0.jar \
+      "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar"
   cd "${IMPALA_HOME}/java/test-hive-udfs"
   "${IMPALA_HOME}/bin/mvn-quiet.sh" package
   cp target/test-hive-udfs-1.0.jar "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar"
@@ -97,6 +101,8 @@ ln -s "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so" "${UDF_TMP_DIR}/u
 ln -s "${HIVE_HOME}/lib/hive-exec-"*.jar "${UDF_TMP_DIR}/hive-exec.jar"
 ln -s "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" \
   "${UDF_TMP_DIR}/impala-hive-udfs.jar"
+ln -s "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar" \
+  "${UDF_TMP_DIR}/impala-corrupt-hive-udfs.jar"
 ln -s "${IMPALA_HOME}/be/build/latest/testutil/test-udfs.ll" "${UDF_TMP_DIR}"
 ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/libudfsample.so" "${UDF_TMP_DIR}"
 ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/udf-sample.ll" "${UDF_TMP_DIR}"
diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py
index 94abed452..678641070 100644
--- a/tests/custom_cluster/test_permanent_udfs.py
+++ b/tests/custom_cluster/test_permanent_udfs.py
@@ -176,6 +176,34 @@ class TestUdfPersistence(CustomClusterTestSuite):
     self.verify_function_count(
         "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
 
+  @SkipIfFS.hive
+  @pytest.mark.execute_serially
+  def test_corrupt_java_bad_function(self):
+    if self.exploration_strategy() != 'exhaustive': pytest.skip()
+    """ IMPALA-11528: This tests if a corrupt function exists inside of Hive
+    which does not derive from UDF. The way we do this here is to create a valid
+    function in Hive which does derive from UDF, but switch the underlying jar to
+    one that does not derive from the UDF class. """
+
+    CORRUPT_JAR = "test-warehouse/test_corrupt.jar"
+    self.filesystem_client.delete_file_dir(CORRUPT_JAR)
+    # impala-hive-udfs.jar contains the class CorruptUdf which derives from UDF
+    # which is a valid function.
+    self.filesystem_client.copy("/test-warehouse/impala-hive-udfs.jar",
+        "/" + CORRUPT_JAR)
+    self.run_stmt_in_hive("create function %s.corrupt_bad_function_udf as \
+        'org.apache.impala.CorruptUdf' using jar '%s/%s'"
+        % (self.JAVA_FN_TEST_DB, os.getenv('DEFAULT_FS'), CORRUPT_JAR))
+    # Now copy the CorruptUdf class from the impala-corrupt-hive-udfs.jar file which
+    # does not derive from UDF, making it an invalid UDF.
+    self.filesystem_client.delete_file_dir(CORRUPT_JAR)
+    self.filesystem_client.copy("/test-warehouse/impala-corrupt-hive-udfs.jar",
+        "/" + CORRUPT_JAR)
+    self.__restart_cluster()
+    # Make sure the function count is 0
+    self.verify_function_count(
+        "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
+
   @SkipIfFS.hive
   @pytest.mark.execute_serially
   @CustomClusterTestSuite.with_args(