You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/04/12 23:18:50 UTC
[16/50] incubator-impala git commit: IMPALA-2650: UDF EE tests: use
unique databases in some tests
IMPALA-2650: UDF EE tests: use unique databases in some tests
Some of the end-to-end tests in query_test/test_udfs.py create UDFs in
the default database and leave them there. Other tests (e.g.,
test_functions_ddl) polling the default database and expecting to find
no UDFs will fail. It turns out this wouldn't happen in our Jenkins
builds (see IMPALA-2650 for more details as to why), but it manifests
itself with repeated impala-py.test runs in specific order.
The fix is to create the UDFs in databases unique to the test cases.
This leaves the default database pristine during these tests.
Testing:
Before, the following sequence of impala-py.test commands would cause
any subsequent runs of test_functions_ddl to fail:
$ # simulate a subset of serial tests that expect default DB not to have UDFs
$ impala-py.test -m "execute_serially" --workload_exploration_strategy \
functional-query:exhaustive -k test_functions_ddl metadata/test_ddl.py
PASS
$ # simulate a subset of parallel tests that create UDFs in default DB
$ impala-py.test -n4 -m "not execute_serially" --workload_exploration_strategy \
functional-query:exhaustive query_test/test_udfs.py
PASS
$ # rerun a subset of serial tests that passed before
$ impala-py.test -m "execute_serially" --workload_exploration_strategy \
functional-query:exhaustive -k test_functions_ddl metadata/test_ddl.py
FAIL, because test_udfs left UDFs.
Now, I can run these over and over, and they pass.
Change-Id: Id4a8b4764fa310efaa4f6c6f06f64a4e18e44173
Reviewed-on: http://gerrit.cloudera.org:8080/2610
Reviewed-by: Michael Brown <mi...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b74e57a3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b74e57a3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b74e57a3
Branch: refs/heads/master
Commit: b74e57a31201c6ba3f2b1f62a8c16cdb3c1c9279
Parents: 943b7cc
Author: Michael Brown <mi...@cloudera.com>
Authored: Wed Mar 23 11:28:00 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Wed Mar 30 04:50:15 2016 +0000
----------------------------------------------------------------------
tests/query_test/test_udfs.py | 69 +++++++++++++++++++++-----------------
1 file changed, 38 insertions(+), 31 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b74e57a3/tests/query_test/test_udfs.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_udfs.py b/tests/query_test/test_udfs.py
index 5573ffc..a979886 100644
--- a/tests/query_test/test_udfs.py
+++ b/tests/query_test/test_udfs.py
@@ -88,16 +88,19 @@ class TestUdfs(ImpalaTestSuite):
self.client.execute("drop database if exists udf_test cascade")
@SkipIfLocal.multiple_impalad
- def test_hive_udfs_missing_jar(self, vector):
+ def test_hive_udfs_missing_jar(self, vector, unique_database):
""" IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
on HDFS"""
# Copy hive-exec.jar to a temporary file
jar_path = get_fs_path("/test-warehouse/" + get_random_id(5) + ".jar")
hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
- drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
- create_fn_stmt = "create function default.pi_missing_jar() returns double \
- location '%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path
+ drop_fn_stmt = (
+ "drop function if exists "
+ "`{0}`.`pi_missing_jar`()".format(unique_database))
+ create_fn_stmt = (
+ "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
+ "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))
cluster = ImpalaCluster()
impalad = cluster.get_any_impalad()
@@ -118,8 +121,8 @@ class TestUdfs(ImpalaTestSuite):
# we used to create the function. This is to bypass loading from
# the cache
try:
- self.execute_query_using_client(client,
- "select default.pi_missing_jar()", vector)
+ self.execute_query_using_client(
+ client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
assert False, "Query expected to fail"
except ImpalaBeeswaxException, e:
assert "Failed to get file info" in str(e)
@@ -127,22 +130,24 @@ class TestUdfs(ImpalaTestSuite):
def test_libs_with_same_filenames(self, vector):
self.run_test_case('QueryTest/libs_with_same_filenames', vector)
- def test_udf_update_via_drop(self, vector):
+ def test_udf_update_via_drop(self, vector, unique_database):
"""Test updating the UDF binary without restarting Impala. Dropping
the function should remove the binary from the local cache."""
# Run with sync_ddl to guarantee the drop is processed by all impalads.
exec_options = vector.get_value('exec_option')
exec_options['sync_ddl'] = 1
- old_udf = os.path.join(os.environ['IMPALA_HOME'],
- 'testdata/udfs/impala-hive-udfs.jar')
- new_udf = os.path.join(os.environ['IMPALA_HOME'],
- 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
+ old_udf = os.path.join(
+ os.environ['IMPALA_HOME'], 'testdata/udfs/impala-hive-udfs.jar')
+ new_udf = os.path.join(
+ os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
udf_dst = get_fs_path('/test-warehouse/impala-hive-udfs2.jar')
- drop_fn_stmt = 'drop function if exists default.udf_update_test_drop()'
- create_fn_stmt = "create function default.udf_update_test_drop() returns string "\
- "LOCATION '" + udf_dst + "' SYMBOL='com.cloudera.impala.TestUpdateUdf'"
- query_stmt = "select default.udf_update_test_drop()"
+ drop_fn_stmt = (
+ 'drop function if exists `{0}`.`udf_update_test_drop`()'.format(unique_database))
+ create_fn_stmt = (
+ "create function `{0}`.`udf_update_test_drop`() returns string LOCATION '{1}' "
+ "SYMBOL='com.cloudera.impala.TestUpdateUdf'".format(unique_database, udf_dst))
+ query_stmt = "select `{0}`.`udf_update_test_drop`()".format(unique_database)
# Put the old UDF binary on HDFS, make the UDF in Impala and run it.
check_call(["hadoop", "fs", "-put", "-f", old_udf, udf_dst])
@@ -157,48 +162,50 @@ class TestUdfs(ImpalaTestSuite):
self.execute_query_expect_success(self.client, create_fn_stmt, exec_options)
self.__run_query_all_impalads(exec_options, query_stmt, ["New UDF"])
- def test_udf_update_via_create(self, vector):
+ def test_udf_update_via_create(self, vector, unique_database):
"""Test updating the UDF binary without restarting Impala. Creating a new function
from the library should refresh the cache."""
# Run with sync_ddl to guarantee the create is processed by all impalads.
exec_options = vector.get_value('exec_option')
exec_options['sync_ddl'] = 1
- old_udf = os.path.join(os.environ['IMPALA_HOME'],
- 'testdata/udfs/impala-hive-udfs.jar')
- new_udf = os.path.join(os.environ['IMPALA_HOME'],
- 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
+ old_udf = os.path.join(
+ os.environ['IMPALA_HOME'], 'testdata/udfs/impala-hive-udfs.jar')
+ new_udf = os.path.join(
+ os.environ['IMPALA_HOME'], 'tests/test-hive-udfs/target/test-hive-udfs-1.0.jar')
udf_dst = get_fs_path('/test-warehouse/impala-hive-udfs3.jar')
old_function_name = "udf_update_test_create1"
new_function_name = "udf_update_test_create2"
- drop_fn_template = 'drop function if exists default.%s()'
+ drop_fn_template = 'drop function if exists `{0}`.`{{0}}`()'.format(unique_database)
self.execute_query_expect_success(
- self.client, drop_fn_template % old_function_name, exec_options)
+ self.client, drop_fn_template.format(old_function_name), exec_options)
self.execute_query_expect_success(
- self.client, drop_fn_template % new_function_name, exec_options)
+ self.client, drop_fn_template.format(new_function_name), exec_options)
- create_fn_template = "create function default.%s() returns string "\
- "LOCATION '" + udf_dst + "' SYMBOL='com.cloudera.impala.TestUpdateUdf'"
- query_template = "select default.%s()"
+ create_fn_template = (
+ "create function `{0}`.`{{0}}`() returns string LOCATION '{1}' "
+ "SYMBOL='com.cloudera.impala.TestUpdateUdf'".format(unique_database, udf_dst))
+
+ query_template = "select `{0}`.`{{0}}`()".format(unique_database)
# Put the old UDF binary on HDFS, make the UDF in Impala and run it.
check_call(["hadoop", "fs", "-put", "-f", old_udf, udf_dst])
self.execute_query_expect_success(
- self.client, create_fn_template % old_function_name, exec_options)
+ self.client, create_fn_template.format(old_function_name), exec_options)
self.__run_query_all_impalads(
- exec_options, query_template % old_function_name, ["Old UDF"])
+ exec_options, query_template.format(old_function_name), ["Old UDF"])
# Update the binary, and create a new function using the binary. The new binary
# should be running.
check_call(["hadoop", "fs", "-put", "-f", new_udf, udf_dst])
self.execute_query_expect_success(
- self.client, create_fn_template % new_function_name, exec_options)
+ self.client, create_fn_template.format(new_function_name), exec_options)
self.__run_query_all_impalads(
- exec_options, query_template % new_function_name, ["New UDF"])
+ exec_options, query_template.format(new_function_name), ["New UDF"])
# The old function should use the new library now
self.__run_query_all_impalads(
- exec_options, query_template % old_function_name, ["New UDF"])
+ exec_options, query_template.format(old_function_name), ["New UDF"])
def test_drop_function_while_running(self, vector):
self.client.execute("drop function if exists default.drop_while_running(BIGINT)")