You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/04/12 23:18:50 UTC
[16/50] incubator-impala git commit: IMPALA-2650: UDF EE tests: use unique databases in some tests

IMPALA-2650: UDF EE tests: use unique databases in some tests

Some of the end-to-end tests in query_test/test_udfs.py create UDFs in
the default database and leave them there. Other tests (e.g.,
test_functions_ddl) polling the default database and expecting to find
no UDFs will fail. It turns out this wouldn't happen in our Jenkins
builds (see IMPALA-2650 for more details as to why), but it manifests
itself with repeated impala-py.test runs in specific order.

The fix is to create the UDFs in databases unique to the test cases.
This leaves the default database pristine during these tests.

Testing:

Before, the following sequence of impala-py.test commands would cause
any subsequent runs of test_functions_ddl to fail:

$ # simulate a subset of serial tests that expect default DB not to have UDFs
$ impala-py.test -m "execute_serially" --workload_exploration_strategy \
    functional-query:exhaustive -k test_functions_ddl metadata/test_ddl.py
PASS
$ # simulate a subset of parallel tests that create UDFs in default DB
$ impala-py.test -n4 -m "not execute_serially" --workload_exploration_strategy \
    functional-query:exhaustive query_test/test_udfs.py
PASS
$ # rerun a subset of serial tests that passed before
$ impala-py.test -m "execute_serially" --workload_exploration_strategy \
    functional-query:exhaustive -k test_functions_ddl metadata/test_ddl.py
FAIL, because test_udfs left UDFs.

Now, I can run these over and over, and they pass.

Change-Id: Id4a8b4764fa310efaa4f6c6f06f64a4e18e44173
Reviewed-on: http://gerrit.cloudera.org:8080/2610
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b74e57a3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b74e57a3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b74e57a3

Branch: refs/heads/master
Commit: b74e57a31201c6ba3f2b1f62a8c16cdb3c1c9279
Parents: 943b7cc
Author: Michael Brown <mi...@cloudera.com>
Authored: Wed Mar 23 11:28:00 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Mar 30 04:50:15 2016 +0000

----------------------------------------------------------------------
 tests/query_test/test_udfs.py | 69 +++++++++++++++++++++-----------------
 1 file changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b74e57a3/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 5573ffc..a979886 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -88,16 +88,19 @@ class TestUdfs(ImpalaTestSuite):
       self.client.execute("drop database if exists udf_test cascade")
 
   @SkipIfLocal.multiple_impalad
-  def test_hive_udfs_missing_jar(self, vector):
+  def test_hive_udfs_missing_jar(self, vector, unique_database):
     """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
     on HDFS"""
     # Copy hive-exec.jar to a temporary file
     jar_path = get_fs_path("/test-warehouse/" + get_random_id(5) + ".jar")
     hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
     check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
-    drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
-    create_fn_stmt = "create function default.pi_missing_jar() returns double \
-        location '%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path
+    drop_fn_stmt = (
+        "drop function if exists "
+        "`{0}`.`pi_missing_jar`()".format(unique_database))
+    create_fn_stmt = (
+        "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
+        "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))
 
     cluster = ImpalaCluster()
     impalad = cluster.get_any_impalad()
@@ -118,8 +121,8 @@ class TestUdfs(ImpalaTestSuite):
     # we used to create the function. This is to bypass loading from
     # the cache
     try:
-      self.execute_query_using_client(client,
-          "select default.pi_missing_jar()", vector)
+      self.execute_query_using_client(
+          client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
       assert False, "Query expected to fail"
     except ImpalaBeeswaxException, e:
       assert "Failed to get file info" in str(e)
@@ -127,22 +130,24 @@ class TestUdfs(ImpalaTestSuite):
   def test_libs_with_same_filenames(self, vector):
     self.run_test_case('QueryTest/libs_with_same_filenames', vector)
 
-  def test_udf_update_via_drop(self, vector):
+  def test_udf_update_via_drop(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Dropping
     the function should remove the binary from the local cache."""
     # Run with sync_ddl to guarantee the drop is processed by all impalads.
     exec_options = vector.get_value('exec_option')
     exec_options['sync_ddl'] = 1
-    old_udf = os.path.join(os.environ['IMPALA_HOME'],
-        'testdata/udfs/impala-hive-udfs.jar')
-    new_udf = os.path.join(os.environ['IMPALA_HOME'],
-        'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
+    old_udf = os.path.join(
+        os.environ['IMPALA_HOME'], 'testdata/udfs/impala-hive-udfs.jar')
+    new_udf = os.path.join(
+        os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
     udf_dst = get_fs_path('/test-warehouse/impala-hive-udfs2.jar')
 
-    drop_fn_stmt = 'drop function if exists default.udf_update_test_drop()'
-    create_fn_stmt = "create function default.udf_update_test_drop() returns string "\
-        "LOCATION '" + udf_dst + "' SYMBOL='com.cloudera.impala.TestUpdateUdf'"
-    query_stmt = "select default.udf_update_test_drop()"
+    drop_fn_stmt = (
+        'drop function if exists `{0}`.`udf_update_test_drop`()'.format(unique_database))
+    create_fn_stmt = (
+        "create function `{0}`.`udf_update_test_drop`() returns string LOCATION '{1}' "
+        "SYMBOL='com.cloudera.impala.TestUpdateUdf'".format(unique_database, udf_dst))
+    query_stmt = "select `{0}`.`udf_update_test_drop`()".format(unique_database)
 
     # Put the old UDF binary on HDFS, make the UDF in Impala and run it.
     check_call(["hadoop", "fs", "-put", "-f", old_udf, udf_dst])
@@ -157,48 +162,50 @@ class TestUdfs(ImpalaTestSuite):
     self.execute_query_expect_success(self.client, create_fn_stmt, exec_options)
     self.__run_query_all_impalads(exec_options, query_stmt, ["New UDF"])
 
-  def test_udf_update_via_create(self, vector):
+  def test_udf_update_via_create(self, vector, unique_database):
     """Test updating the UDF binary without restarting Impala. Creating a new function
     from the library should refresh the cache."""
     # Run with sync_ddl to guarantee the create is processed by all impalads.
     exec_options = vector.get_value('exec_option')
     exec_options['sync_ddl'] = 1
-    old_udf = os.path.join(os.environ['IMPALA_HOME'],
-        'testdata/udfs/impala-hive-udfs.jar')
-    new_udf = os.path.join(os.environ['IMPALA_HOME'],
-        'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
+    old_udf = os.path.join(
+        os.environ['IMPALA_HOME'], 'testdata/udfs/impala-hive-udfs.jar')
+    new_udf = os.path.join(
+        os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
     udf_dst = get_fs_path('/test-warehouse/impala-hive-udfs3.jar')
     old_function_name = "udf_update_test_create1"
     new_function_name = "udf_update_test_create2"
 
-    drop_fn_template = 'drop function if exists default.%s()'
+    drop_fn_template = 'drop function if exists `{0}`.`{{0}}`()'.format(unique_database)
     self.execute_query_expect_success(
-      self.client, drop_fn_template % old_function_name, exec_options)
+        self.client, drop_fn_template.format(old_function_name), exec_options)
     self.execute_query_expect_success(
-      self.client, drop_fn_template % new_function_name, exec_options)
+        self.client, drop_fn_template.format(new_function_name), exec_options)
 
-    create_fn_template = "create function default.%s() returns string "\
-        "LOCATION '" + udf_dst + "' SYMBOL='com.cloudera.impala.TestUpdateUdf'"
-    query_template = "select default.%s()"
+    create_fn_template = (
+        "create function `{0}`.`{{0}}`() returns string LOCATION '{1}' "
+        "SYMBOL='com.cloudera.impala.TestUpdateUdf'".format(unique_database, udf_dst))
+
+    query_template = "select `{0}`.`{{0}}`()".format(unique_database)
 
     # Put the old UDF binary on HDFS, make the UDF in Impala and run it.
     check_call(["hadoop", "fs", "-put", "-f", old_udf, udf_dst])
     self.execute_query_expect_success(
-      self.client, create_fn_template % old_function_name, exec_options)
+        self.client, create_fn_template.format(old_function_name), exec_options)
     self.__run_query_all_impalads(
-      exec_options, query_template % old_function_name, ["Old UDF"])
+        exec_options, query_template.format(old_function_name), ["Old UDF"])
 
     # Update the binary, and create a new function using the binary. The new binary
     # should be running.
     check_call(["hadoop", "fs", "-put", "-f", new_udf, udf_dst])
     self.execute_query_expect_success(
-      self.client, create_fn_template % new_function_name, exec_options)
+        self.client, create_fn_template.format(new_function_name), exec_options)
     self.__run_query_all_impalads(
-      exec_options, query_template % new_function_name, ["New UDF"])
+        exec_options, query_template.format(new_function_name), ["New UDF"])
 
     # The old function should use the new library now
     self.__run_query_all_impalads(
-      exec_options, query_template % old_function_name, ["New UDF"])
+        exec_options, query_template.format(old_function_name), ["New UDF"])
 
   def test_drop_function_while_running(self, vector):
     self.client.execute("drop function if exists default.drop_while_running(BIGINT)")