You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by st...@apache.org on 2022/09/27 02:58:49 UTC

[impala] branch branch-4.1.1 updated: IMPALA-11528: Catalogd should start up with a corrupt Hive function.

This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch branch-4.1.1
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/branch-4.1.1 by this push:
     new dddc17c1e IMPALA-11528: Catalogd should start up with a corrupt Hive function.
dddc17c1e is described below

commit dddc17c1ef6037f3881a92f9188ffa57acd1bece
Author: Steve Carlin <sc...@cloudera.com>
AuthorDate: Mon Aug 29 14:57:19 2022 -0700

    IMPALA-11528: Catalogd should start up with a corrupt Hive function.
    
    This commit handles the case for a specific kind of corrupt function
    within the Hive Metastore in the following situation:
    
    A valid Hive SQL function gets created in HMS. This UDF is written in
    Java and must derive from the "UDF" class. After creating this function
    in Impala, we then replace the underlying jar file with a class that
    does NOT derive from the "UDF" class.
    
    In this scenario, catalogd should reject the function and still start
    up gracefully. Before this commit, catalogd wasn't coming up. The
    reason for this was because the Hive function
    FunctionUtils.getUDFClassType() has a dependency on UDAF and was
    throwing a LinkageError exception, so we need to include the UDAF
    class in the shaded jar.
    
    Merge conflicts in branch-4.1:
    - Ignore MapredContext.class in java/shaded-deps/hive-exec/pom.xml
    - Replace SkipIfFS.hive in test_permanent_udfs.py with individual skip
      annotations
    - Update version in java/test-corrupt-hive-udfs/pom.xml
    
    Change-Id: I54e7a1df6d018ba6cf5ecf32dc9946edf86e2112
    Reviewed-on: http://gerrit.cloudera.org:8080/18927
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
    Reviewed-by: Tamas Mate <tm...@apache.org>
    Reviewed-on: http://gerrit.cloudera.org:8080/19019
---
 .../impala/catalog/CatalogServiceCatalog.java      |  2 +-
 java/pom.xml                                       |  1 +
 java/shaded-deps/hive-exec/pom.xml                 |  1 +
 java/test-corrupt-hive-udfs/pom.xml                | 53 ++++++++++++++++++++++
 .../main/java/org/apache/impala/CorruptUdf.java    | 28 ++++++++++++
 .../main/java/org/apache/impala/CorruptUdf.java    | 35 ++++++++++++++
 testdata/bin/copy-udfs-udas.sh                     |  6 +++
 tests/custom_cluster/test_permanent_udfs.py        | 34 ++++++++++++++
 8 files changed, 159 insertions(+), 1 deletion(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
index c85af6070..40d9ad1b5 100644
--- a/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
+++ b/fe/src/main/java/org/apache/impala/catalog/CatalogServiceCatalog.java
@@ -1758,7 +1758,7 @@ public class CatalogServiceCatalog extends Catalog {
           db.addFunction(fn);
           fn.setCatalogVersion(incrementAndGetCatalogVersion());
         }
-      } catch (Exception e) {
+      } catch (Exception | LinkageError e) {
         LOG.error("Skipping function load: " + function.getFunctionName(), e);
       }
     }
diff --git a/java/pom.xml b/java/pom.xml
index 9ec75aa51..7736c8611 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -260,6 +260,7 @@ under the License.
     <module>shaded-deps/s3a-aws-sdk</module>
     <module>TableFlattener</module>
     <module>test-hive-udfs</module>
+    <module>test-corrupt-hive-udfs</module>
     <module>yarn-extras</module>
   </modules>
 
diff --git a/java/shaded-deps/hive-exec/pom.xml b/java/shaded-deps/hive-exec/pom.xml
index ef4e4a484..26cdec753 100644
--- a/java/shaded-deps/hive-exec/pom.xml
+++ b/java/shaded-deps/hive-exec/pom.xml
@@ -100,6 +100,7 @@ the same dependencies
                 <include>org/apache/hadoop/hive/ql/parse/SemanticException.class</include>
                 <!-- Needed to support Hive udfs -->
                 <include>org/apache/hadoop/hive/ql/exec/*UDF*</include>
+                <include>org/apache/hadoop/hive/ql/exec/UDAF.class</include>
                 <include>org/apache/hadoop/hive/ql/exec/FunctionUtils*</include>
                 <include>org/apache/hadoop/hive/ql/parse/GenericHiveLexer*</include>
                 <include>org/apache/hadoop/hive/ql/parse/HiveLexer*</include>
diff --git a/java/test-corrupt-hive-udfs/pom.xml b/java/test-corrupt-hive-udfs/pom.xml
new file mode 100644
index 000000000..6edb0fa05
--- /dev/null
+++ b/java/test-corrupt-hive-udfs/pom.xml
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <parent>
+    <groupId>org.apache.impala</groupId>
+    <artifactId>impala-parent</artifactId>
+    <version>4.1.1-SNAPSHOT</version>
+  </parent>
+  <modelVersion>4.0.0</modelVersion>
+
+  <artifactId>test-corrupt-hive-udfs</artifactId>
+  <version>1.0</version>
+  <packaging>jar</packaging>
+
+  <name>test-corrupt-hive-udfs</name>
+
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+
+ <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>3.3</version>
+        <configuration>
+          <source>1.8</source>
+          <target>1.8</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+</project>
diff --git a/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java b/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
new file mode 100644
index 000000000..59a312aeb
--- /dev/null
+++ b/java/test-corrupt-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
@@ -0,0 +1,28 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala;
+
+/*
+ * This class is one of two classes used to test for a corrupt UDF loaded
+ * into the Hive MetaStore. This class contains an invalid UDF. The jar file with this
+ * this class will replace the class in the jar created from the hive-test-udfs
+ * directory. The catalogd executable should still be able to start correctly
+ * even with this corrupt function.
+ */
+public class CorruptUdf {
+}
diff --git a/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java b/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
new file mode 100644
index 000000000..b6d7d7198
--- /dev/null
+++ b/java/test-hive-udfs/src/main/java/org/apache/impala/CorruptUdf.java
@@ -0,0 +1,35 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.impala;
+
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.IntWritable;
+
+/*
+ * This class is one of two classes used to test for a corrupt UDF loaded
+ * into the Hive MetaStore. This class contains a valid UDF. This file will
+ * be used to create the function through SQL within the Hive MetaStore.
+ * Later, this object will be replaced by a corrupt Java object (under
+ * the hive-corrupt-test-udfs directory). The test will ensure that catalogd
+ * is still able to start up fine (though the function will be disabled).
+ */
+public class CorruptUdf extends UDF {
+  public IntWritable evaluate(IntWritable a) {
+    return a;
+  }
+}
diff --git a/testdata/bin/copy-udfs-udas.sh b/testdata/bin/copy-udfs-udas.sh
index 438eff4a9..e73f38d1e 100755
--- a/testdata/bin/copy-udfs-udas.sh
+++ b/testdata/bin/copy-udfs-udas.sh
@@ -51,6 +51,10 @@ then
   pushd "${IMPALA_HOME}"
   "${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
       TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir
+  cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
+  "${IMPALA_HOME}/bin/mvn-quiet.sh" package
+  cp target/test-corrupt-hive-udfs-1.0.jar \
+      "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar"
   cd "${IMPALA_HOME}/java/test-hive-udfs"
   "${IMPALA_HOME}/bin/mvn-quiet.sh" package
   cp target/test-hive-udfs-1.0.jar "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar"
@@ -97,6 +101,8 @@ ln -s "${IMPALA_HOME}/be/build/latest/testutil/libTestUdfs.so" "${UDF_TMP_DIR}/u
 ln -s "${HIVE_HOME}/lib/hive-exec-"*.jar "${UDF_TMP_DIR}/hive-exec.jar"
 ln -s "${IMPALA_HOME}/testdata/udfs/impala-hive-udfs.jar" \
   "${UDF_TMP_DIR}/impala-hive-udfs.jar"
+ln -s "${IMPALA_HOME}/testdata/udfs/impala-corrupt-hive-udfs.jar" \
+  "${UDF_TMP_DIR}/impala-corrupt-hive-udfs.jar"
 ln -s "${IMPALA_HOME}/be/build/latest/testutil/test-udfs.ll" "${UDF_TMP_DIR}"
 ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/libudfsample.so" "${UDF_TMP_DIR}"
 ln -s "${IMPALA_HOME}/be/build/latest/udf_samples/udf-sample.ll" "${UDF_TMP_DIR}"
diff --git a/tests/custom_cluster/test_permanent_udfs.py b/tests/custom_cluster/test_permanent_udfs.py
index 203182310..b65c4b331 100644
--- a/tests/custom_cluster/test_permanent_udfs.py
+++ b/tests/custom_cluster/test_permanent_udfs.py
@@ -185,6 +185,40 @@ class TestUdfPersistence(CustomClusterTestSuite):
         "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
 
 
+  @SkipIfIsilon.hive
+  @SkipIfS3.hive
+  @SkipIfGCS.hive
+  @SkipIfCOS.hive
+  @SkipIfABFS.hive
+  @SkipIfADLS.hive
+  @SkipIfLocal.hive
+  @pytest.mark.execute_serially
+  def test_corrupt_java_bad_function(self):
+    if self.exploration_strategy() != 'exhaustive': pytest.skip()
+    """ IMPALA-11528: This tests if a corrupt function exists inside of Hive
+    which does not derive from UDF. The way we do this here is to create a valid
+    function in Hive which does derive from UDF, but switch the underlying jar to
+    one that does not derive from the UDF class. """
+
+    CORRUPT_JAR = "test-warehouse/test_corrupt.jar"
+    self.filesystem_client.delete_file_dir(CORRUPT_JAR)
+    # impala-hive-udfs.jar contains the class CorruptUdf which derives from UDF
+    # which is a valid function.
+    self.filesystem_client.copy("/test-warehouse/impala-hive-udfs.jar",
+        "/" + CORRUPT_JAR)
+    self.run_stmt_in_hive("create function %s.corrupt_bad_function_udf as \
+        'org.apache.impala.CorruptUdf' using jar '%s/%s'"
+        % (self.JAVA_FN_TEST_DB, os.getenv('DEFAULT_FS'), CORRUPT_JAR))
+    # Now copy the CorruptUdf class from the impala-corrupt-hive-udfs.jar file which
+    # does not derive from UDF, making it an invalid UDF.
+    self.filesystem_client.delete_file_dir(CORRUPT_JAR)
+    self.filesystem_client.copy("/test-warehouse/impala-corrupt-hive-udfs.jar",
+        "/" + CORRUPT_JAR)
+    self.__restart_cluster()
+    # Make sure the function count is 0
+    self.verify_function_count(
+        "SHOW FUNCTIONS in {0}".format(self.JAVA_FN_TEST_DB), 0)
+
   @SkipIfIsilon.hive
   @SkipIfS3.hive
   @SkipIfGCS.hive