You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/10/25 20:16:06 UTC
[19/33] incubator-impala git commit: IMPALA-3718: Add
test_cancellation tests for Kudu
IMPALA-3718: Add test_cancellation tests for Kudu
Additional functional tests for Kudu.
Change-Id: Icf3d3853e7075991f6d12f125407ebdbe6a287e2
Reviewed-on: http://gerrit.cloudera.org:8080/4700
Reviewed-by: Matthew Jacobs <mj...@cloudera.com>
Tested-by: Internal Jenkins
Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/8d7b01fa
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/8d7b01fa
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/8d7b01fa
Branch: refs/heads/hadoop-next
Commit: 8d7b01faea6362af675a2a335b462fad3e0caa03
Parents: 8a49cea
Author: Matthew Jacobs <mj...@cloudera.com>
Authored: Wed Sep 21 15:05:54 2016 -0700
Committer: Internal Jenkins <cl...@gerrit.cloudera.org>
Committed: Fri Oct 21 23:32:58 2016 +0000
----------------------------------------------------------------------
tests/query_test/test_cancellation.py | 49 +++++++++++++++++++++---------
1 file changed, 35 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8d7b01fa/tests/query_test/test_cancellation.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_cancellation.py b/tests/query_test/test_cancellation.py
index 265c781..91e81dc 100644
--- a/tests/query_test/test_cancellation.py
+++ b/tests/query_test/test_cancellation.py
@@ -27,13 +27,17 @@ from tests.common.test_vector import TestDimension
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.verifiers.metric_verifier import MetricVerifier
-# Queries to execute. Use the TPC-H dataset because tables are large so queries take some
-# time to execute.
-QUERIES = ['select l_returnflag from lineitem',
- 'select count(l_returnflag) from lineitem',
- 'select * from lineitem limit 50',
- 'compute stats lineitem',
- 'select * from lineitem order by l_orderkey']
+# PRIMARY KEY for lineitem
+LINEITEM_PK = 'l_orderkey, l_partkey, l_suppkey, l_linenumber'
+
+# Queries to execute, mapped to a unique PRIMARY KEY for use in CTAS with Kudu. If None
+# is specified for the PRIMARY KEY, it will not be used in a CTAS statement on Kudu.
+# Use the TPC-H dataset because tables are large so queries take some time to execute.
+QUERIES = {'select l_returnflag from lineitem' : None,
+ 'select count(l_returnflag) pk from lineitem' : 'pk',
+ 'select * from lineitem limit 50' : LINEITEM_PK,
+ 'compute stats lineitem' : None,
+ 'select * from lineitem order by l_orderkey' : LINEITEM_PK}
QUERY_TYPE = ["SELECT", "CTAS"]
@@ -59,19 +63,25 @@ class TestCancellation(ImpalaTestSuite):
@classmethod
def add_test_dimensions(cls):
super(TestCancellation, cls).add_test_dimensions()
- cls.TestMatrix.add_dimension(TestDimension('query', *QUERIES))
+ cls.TestMatrix.add_dimension(TestDimension('query', *QUERIES.keys()))
cls.TestMatrix.add_dimension(TestDimension('query_type', *QUERY_TYPE))
cls.TestMatrix.add_dimension(TestDimension('cancel_delay', *CANCEL_DELAY_IN_SECONDS))
cls.TestMatrix.add_dimension(TestDimension('action', *DEBUG_ACTIONS))
cls.TestMatrix.add_dimension(TestDimension('max_block_mgr_memory', 0))
cls.TestMatrix.add_constraint(lambda v: v.get_value('query_type') != 'CTAS' or (\
- v.get_value('table_format').file_format in ['text', 'parquet'] and\
+ v.get_value('table_format').file_format in ['text', 'parquet', 'kudu'] and\
v.get_value('table_format').compression_codec == 'none'))
cls.TestMatrix.add_constraint(lambda v: v.get_value('exec_option')['batch_size'] == 0)
# Ignore 'compute stats' queries for the CTAS query type.
cls.TestMatrix.add_constraint(lambda v: not (v.get_value('query_type') == 'CTAS' and
v.get_value('query').startswith('compute stats')))
+
+ # Ignore CTAS on Kudu if there is no PRIMARY KEY specified.
+ cls.TestMatrix.add_constraint(lambda v: not (v.get_value('query_type') == 'CTAS' and
+ v.get_value('table_format').file_format == 'kudu' and
+ QUERIES[v.get_value('query')] is None))
+
# tpch tables are not generated for hbase as the data loading takes a very long time.
# TODO: Add cancellation tests for hbase.
cls.TestMatrix.add_constraint(lambda v:\
@@ -87,15 +97,24 @@ class TestCancellation(ImpalaTestSuite):
query_type = vector.get_value('query_type')
if query_type == "CTAS":
self.cleanup_test_table(vector.get_value('table_format'))
- query = "create table ctas_cancel stored as %sfile as %s" %\
- (vector.get_value('table_format').file_format, query)
+ file_format = vector.get_value('table_format').file_format
+ if file_format == 'kudu':
+ assert QUERIES.has_key(query) and QUERIES[query] is not None,\
+ "PRIMARY KEY for query %s not specified" % query
+ query = "create table ctas_cancel primary key (%s) "\
+ "distribute by hash into 3 buckets stored as kudu as %s" %\
+ (QUERIES[query], query)
+ else:
+ query = "create table ctas_cancel stored as %sfile as %s" %\
+ (file_format, query)
action = vector.get_value('action')
# node ID 0 is the scan node
debug_action = '0:GETNEXT:' + action if action != None else ''
vector.get_value('exec_option')['debug_action'] = debug_action
- vector.get_value('exec_option')['max_block_mgr_memory'] = vector.get_value('max_block_mgr_memory')
+ vector.get_value('exec_option')['max_block_mgr_memory'] =\
+ vector.get_value('max_block_mgr_memory')
# Execute the query multiple times, cancelling it each time.
for i in xrange(NUM_CANCELATION_ITERATIONS):
@@ -167,7 +186,8 @@ class TestCancellationSerial(TestCancellation):
# Don't run across all cancel delay options unless running in exhaustive mode
if cls.exploration_strategy() != 'exhaustive':
cls.TestMatrix.add_constraint(lambda v: v.get_value('cancel_delay') in [3])
- cls.TestMatrix.add_constraint(lambda v: v.get_value('query') == choice(QUERIES))
+ cls.TestMatrix.add_constraint(lambda v: v.get_value('query') ==\
+ choice(QUERIES.keys()))
@pytest.mark.execute_serially
def test_cancel_insert(self, vector):
@@ -186,7 +206,8 @@ class TestCancellationFullSort(TestCancellation):
cls.TestMatrix.add_dimension(TestDimension('query', SORT_QUERY))
cls.TestMatrix.add_dimension(TestDimension('query_type', 'SELECT'))
cls.TestMatrix.add_dimension(TestDimension('cancel_delay', *SORT_CANCEL_DELAY))
- cls.TestMatrix.add_dimension(TestDimension('max_block_mgr_memory', *SORT_BLOCK_MGR_LIMIT))
+ cls.TestMatrix.add_dimension(TestDimension('max_block_mgr_memory',\
+ *SORT_BLOCK_MGR_LIMIT))
cls.TestMatrix.add_dimension(TestDimension('action', None))
cls.TestMatrix.add_constraint(lambda v:\
v.get_value('table_format').file_format =='parquet' and\