You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/10/22 05:33:36 UTC
[09/14] incubator-impala git commit: IMPALA-3739: Enable stress tests
on Kudu
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
new file mode 100644
index 0000000..be99b2f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
@@ -0,0 +1,71 @@
+====
+---- QUERY: TPCDS-Q8
+select
+ s_store_name,
+ round(sum(ss_net_profit), 2)
+from
+ store_sales
+ join store on (store_sales.ss_store_sk = store.s_store_sk)
+ join
+ (select
+ a.ca_zip
+ from
+ (select
+ substr(ca_zip, 1, 5) ca_zip,
+ count( *) cnt
+ from
+ customer_address
+ join customer on (customer_address.ca_address_sk = customer.c_current_addr_sk)
+ where
+ c_preferred_cust_flag = 'Y'
+ group by
+ ca_zip
+ having
+ count(*) > 10
+ ) a
+ left semi join
+ (select
+ substr(ca_zip, 1, 5) ca_zip
+ from
+ customer_address
+ where
+ substr(ca_zip, 1, 5) in ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779',
+ '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797',
+ '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068',
+ '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053',
+ '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425',
+ '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835',
+ '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360',
+ '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982',
+ '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447',
+ '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792',
+ '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561',
+ '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458',
+ '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648',
+ '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799',
+ '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036',
+ '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177',
+ '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121',
+ '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560',
+ '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619',
+ '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010',
+ '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705',
+ '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928',
+ '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047',
+ '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998',
+ '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744',
+ '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309',
+ '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622',
+ '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936',
+ '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492')
+ ) b
+ on (a.ca_zip = b.ca_zip)
+ ) v1 on (substr(store.s_zip, 1, 2) = substr(v1.ca_zip, 1, 2))
+where
+ ss_sold_date_sk between 2452276 and 2452366
+group by
+ s_store_name
+order by
+ s_store_name
+limit 100;
+====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
new file mode 100644
index 0000000..bb2beb8
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
@@ -0,0 +1,93 @@
+====
+---- QUERY: TPCDS-Q88
+select *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 9
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 10
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 11
+ and time_dim.t_minute >= 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 12
+ and time_dim.t_minute < 30
+ and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+ (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+ (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+ and store.s_store_name = 'ese') s8;
+====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
new file mode 100644
index 0000000..6677b07
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
@@ -0,0 +1,33 @@
+====
+---- QUERY: TPCDS-Q89
+/* Modifications: Added Partition Key filter because Impala does not do dynamic partition
+ pruning.*/
+select * from (select i_category, i_class, i_brand, s_store_name, s_company_name
+ d_moy, round(sum_sales, 2) sum_sales,
+ round(avg_monthly_sales, 2) avg_monthly_sales
+from (
+select i_category, i_class, i_brand,
+ s_store_name, s_company_name,
+ d_moy,
+ sum(ss_sales_price) sum_sales,
+ avg(sum(ss_sales_price)) over
+ (partition by i_category, i_brand, s_store_name, s_company_name)
+ avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
+ d_year in (2000) and
+ ((i_category in ('Children','Music','Home') and
+ i_class in ('toddlers','pop','lighting')
+ )
+ or (i_category in ('Jewelry','Books','Sports') and
+ i_class in ('costume','travel','football')
+ ))
+ and ss_sold_date_sk between 2451545 and 2451910 -- partition key filter
+group by i_category, i_class, i_brand,
+ s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100) tmp2;
+====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
new file mode 100644
index 0000000..1687bc4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
@@ -0,0 +1,17 @@
+====
+---- QUERY: TPCDS-Q96
+SELECT
+ COUNT(*) AS total
+FROM store_sales ss
+JOIN time_dim td
+ ON (ss.ss_sold_time_sk = td.t_time_sk)
+JOIN household_demographics hd
+ ON (ss.ss_hdemo_sk = hd.hd_demo_sk)
+JOIN store s
+ ON (ss.ss_store_sk = s.s_store_sk)
+WHERE
+ td.t_hour = 8
+ AND td.t_minute >= 30
+ AND hd.hd_dep_count = 5
+ AND s.s_store_name = 'ese';
+====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
new file mode 100644
index 0000000..fcd2081
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
@@ -0,0 +1,34 @@
+====
+---- QUERY: TPCDS-Q98
+select
+ i_item_desc,
+ i_category,
+ i_class,
+ i_current_price,
+ round(sum(ss_ext_sales_price), 2) as itemrevenue,
+ round(sum(ss_ext_sales_price)*100/ sum(sum(ss_ext_sales_price))
+ over (partition by i_class), 2) as revenueratio
+from
+ store_sales,
+ item,
+ date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and ss_sold_date_sk between 2451911 and 2451941
+ and d_date between '2001-01-01' and '2001-01-31' -- original uses interval and the
+group by
+ i_item_id,
+ i_item_desc,
+ i_category,
+ i_class,
+ i_current_price
+order by
+ i_category,
+ i_class,
+ i_item_id,
+ i_item_desc,
+ revenueratio
+limit 1000;
+====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/comparison/db_connection.py
----------------------------------------------------------------------
diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py
index 13125c2..9076da8 100644
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -225,6 +225,11 @@ class DbCursor(object):
db_name = db_name.lower()
self.execute('CREATE DATABASE ' + db_name)
+ def create_db_if_not_exists(self, db_name):
+ LOG.info("Creating database %s", db_name)
+ db_name = db_name.lower()
+ self.execute('CREATE DATABASE IF NOT EXISTS ' + db_name)
+
def drop_db_if_exists(self, db_name):
'''This should not be called from a conn to the database being dropped.'''
db_name = db_name.lower()
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index b76d713..fb24f65 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -897,13 +897,16 @@ class QueryRunner(object):
return hash_thread.result
-def load_tpc_queries(workload):
- """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
+def load_tpc_queries(workload, load_in_kudu=False):
+ """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'.
+ If 'load_in_kudu' is True, it loads only queries specified for the Kudu storage
+ engine."""
LOG.info("Loading %s queries", workload)
queries = list()
query_dir = os.path.join(os.path.dirname(__file__), "..", "..",
"testdata", "workloads", workload, "queries")
- file_name_pattern = re.compile(r"-(q\d+).test$")
+ engine = 'kudu-' if load_in_kudu else ''
+ file_name_pattern = re.compile(r"%s-%s(q\d+).test$" % (workload, engine))
for query_file in os.listdir(query_dir):
match = file_name_pattern.search(query_file)
if not match:
@@ -1339,6 +1342,10 @@ def main():
parser.add_argument("--tpch-db", help="If provided, TPC-H queries will be used.")
parser.add_argument("--tpch-nested-db",
help="If provided, nested TPC-H queries will be used.")
+ parser.add_argument("--tpch-kudu-db",
+ help="If provided, TPC-H queries for Kudu will be used.")
+ parser.add_argument("--tpcds-kudu-db",
+ help="If provided, TPC-DS queries for Kudu will be used.")
parser.add_argument("--random-db",
help="If provided, random queries will be used.")
parser.add_argument("--random-query-count", type=int, default=50,
@@ -1375,9 +1382,10 @@ def main():
LOG.debug("CLI args: %s" % (args, ))
if not args.tpcds_db and not args.tpch_db and not args.random_db \
- and not args.tpch_nested_db and not args.query_file_path:
- raise Exception("At least one of --tpcds-db, --tpch-db,"
- "--tpch-nested-db, --random-db, --query-file-path is required")
+ and not args.tpch_nested_db and not args.tpch_kudu_db \
+ and not args.tpcds_kudu_db and not args.query_file_path:
+ raise Exception("At least one of --tpcds-db, --tpch-db, --tpch-kudu-db,"
+ "--tpcds-kudu-db, --tpch-nested-db, --random-db, --query-file-path is required")
# The stress test sets these, so callers cannot override them.
IGNORE_QUERY_OPTIONS = frozenset([
@@ -1451,6 +1459,16 @@ def main():
for query in tpch_nested_queries:
query.db_name = args.tpch_nested_db
queries.extend(tpch_nested_queries)
+ if args.tpch_kudu_db:
+ tpch_kudu_queries = load_tpc_queries("tpch", load_in_kudu=True)
+ for query in tpch_kudu_queries:
+ query.db_name = args.tpch_kudu_db
+ queries.extend(tpch_kudu_queries)
+ if args.tpcds_kudu_db:
+ tpcds_kudu_queries = load_tpc_queries("tpcds", load_in_kudu=True)
+ for query in tpcds_kudu_queries:
+ query.db_name = args.tpcds_kudu_db
+ queries.extend(tpcds_kudu_queries)
for idx in xrange(len(queries) - 1, -1, -1):
query = queries[idx]
if query.sql in queries_with_runtime_info_by_db_and_sql[query.db_name]: