You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/10/22 05:33:36 UTC

[09/14] incubator-impala git commit: IMPALA-3739: Enable stress tests on Kudu

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
new file mode 100644
index 0000000..be99b2f
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test
@@ -0,0 +1,71 @@
+====
+---- QUERY: TPCDS-Q8
+select
+  s_store_name,
+  round(sum(ss_net_profit), 2)
+from
+  store_sales
+  join store on (store_sales.ss_store_sk = store.s_store_sk)
+  join
+  (select
+    a.ca_zip
+  from
+    (select
+      substr(ca_zip, 1, 5) ca_zip,
+      count( *) cnt
+    from
+      customer_address
+      join  customer on (customer_address.ca_address_sk = customer.c_current_addr_sk)
+    where
+      c_preferred_cust_flag = 'Y'
+    group by
+      ca_zip
+    having
+      count(*) > 10
+    ) a
+    left semi join
+    (select
+      substr(ca_zip, 1, 5) ca_zip
+    from
+      customer_address
+    where
+      substr(ca_zip, 1, 5) in ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779',
+      '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797',
+      '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068',
+      '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053',
+      '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425',
+      '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835',
+      '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360',
+      '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982',
+      '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447',
+      '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792',
+      '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561',
+      '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458',
+      '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648',
+      '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799',
+      '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036',
+      '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177',
+      '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121',
+      '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560',
+      '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619',
+      '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010',
+      '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705',
+      '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928',
+      '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047',
+      '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998',
+      '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744',
+      '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309',
+      '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622',
+      '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936',
+      '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492')
+    ) b
+  on (a.ca_zip = b.ca_zip)
+  ) v1 on (substr(store.s_zip, 1, 2) = substr(v1.ca_zip, 1, 2))
+where
+  ss_sold_date_sk between 2452276 and 2452366
+group by
+  s_store_name
+order by
+  s_store_name
+limit 100;
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
new file mode 100644
index 0000000..bb2beb8
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test
@@ -0,0 +1,93 @@
+====
+---- QUERY: TPCDS-Q88
+select  *
+from
+ (select count(*) h8_30_to_9
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 8
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s1,
+ (select count(*) h9_to_9_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 9
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s2,
+ (select count(*) h9_30_to_10
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 9
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s3,
+ (select count(*) h10_to_10_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s4,
+ (select count(*) h10_30_to_11
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 10
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s5,
+ (select count(*) h11_to_11_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 11
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s6,
+ (select count(*) h11_30_to_12
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 11
+     and time_dim.t_minute >= 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s7,
+ (select count(*) h12_to_12_30
+ from store_sales, household_demographics , time_dim, store
+ where ss_sold_time_sk = time_dim.t_time_sk
+     and ss_hdemo_sk = household_demographics.hd_demo_sk
+     and ss_store_sk = s_store_sk
+     and time_dim.t_hour = 12
+     and time_dim.t_minute < 30
+     and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or
+          (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or
+          (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2))
+     and store.s_store_name = 'ese') s8;
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
new file mode 100644
index 0000000..6677b07
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test
@@ -0,0 +1,33 @@
+====
+---- QUERY: TPCDS-Q89
+/* Modifications: Added Partition Key filter because Impala does not do dynamic partition
+   pruning.*/
+select * from (select i_category, i_class, i_brand, s_store_name, s_company_name
+	       d_moy, round(sum_sales, 2) sum_sales,
+	       round(avg_monthly_sales, 2) avg_monthly_sales
+from (
+select i_category, i_class, i_brand,
+       s_store_name, s_company_name,
+       d_moy,
+       sum(ss_sales_price) sum_sales,
+       avg(sum(ss_sales_price)) over
+         (partition by i_category, i_brand, s_store_name, s_company_name)
+         avg_monthly_sales
+from item, store_sales, date_dim, store
+where ss_item_sk = i_item_sk and
+      ss_sold_date_sk = d_date_sk and
+      ss_store_sk = s_store_sk and
+      d_year in (2000) and
+        ((i_category in ('Children','Music','Home') and
+          i_class in ('toddlers','pop','lighting')
+         )
+      or (i_category in ('Jewelry','Books','Sports') and
+          i_class in ('costume','travel','football')
+        ))
+      and ss_sold_date_sk between 2451545 and 2451910  -- partition key filter
+group by i_category, i_class, i_brand,
+         s_store_name, s_company_name, d_moy) tmp1
+where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1
+order by sum_sales - avg_monthly_sales, s_store_name
+limit 100) tmp2;
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
new file mode 100644
index 0000000..1687bc4
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test
@@ -0,0 +1,17 @@
+====
+---- QUERY: TPCDS-Q96
+SELECT
+  COUNT(*) AS total
+FROM store_sales ss
+JOIN time_dim td
+  ON (ss.ss_sold_time_sk = td.t_time_sk)
+JOIN household_demographics hd
+  ON (ss.ss_hdemo_sk = hd.hd_demo_sk)
+JOIN store s
+  ON (ss.ss_store_sk = s.s_store_sk)
+WHERE
+  td.t_hour = 8
+  AND td.t_minute >= 30
+  AND hd.hd_dep_count = 5
+  AND s.s_store_name = 'ese';
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
new file mode 100644
index 0000000..fcd2081
--- /dev/null
+++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test
@@ -0,0 +1,34 @@
+====
+---- QUERY: TPCDS-Q98
+select
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price,
+  round(sum(ss_ext_sales_price), 2) as itemrevenue,
+  round(sum(ss_ext_sales_price)*100/ sum(sum(ss_ext_sales_price))
+  over (partition by i_class), 2) as revenueratio
+from
+  store_sales,
+  item,
+  date_dim
+where
+  ss_item_sk = i_item_sk
+  and i_category in ('Jewelry', 'Sports', 'Books')
+  and ss_sold_date_sk = d_date_sk
+  and ss_sold_date_sk between 2451911 and 2451941
+  and d_date between '2001-01-01' and '2001-01-31' -- original uses interval and the
+group by
+  i_item_id,
+  i_item_desc,
+  i_category,
+  i_class,
+  i_current_price
+order by
+  i_category,
+  i_class,
+  i_item_id,
+  i_item_desc,
+  revenueratio
+limit 1000;
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/comparison/db_connection.py
----------------------------------------------------------------------
diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py
index 13125c2..9076da8 100644
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -225,6 +225,11 @@ class DbCursor(object):
     db_name = db_name.lower()
     self.execute('CREATE DATABASE ' + db_name)
 
+  def create_db_if_not_exists(self, db_name):
+    LOG.info("Creating database %s", db_name)
+    db_name = db_name.lower()
+    self.execute('CREATE DATABASE IF NOT EXISTS ' + db_name)
+
   def drop_db_if_exists(self, db_name):
     '''This should not be called from a conn to the database being dropped.'''
     db_name = db_name.lower()

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/stress/concurrent_select.py
----------------------------------------------------------------------
diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py
index b76d713..fb24f65 100755
--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -897,13 +897,16 @@ class QueryRunner(object):
     return hash_thread.result
 
 
-def load_tpc_queries(workload):
-  """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'."""
+def load_tpc_queries(workload, load_in_kudu=False):
+  """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'.
+  If 'load_in_kudu' is True, it loads only queries specified for the Kudu storage
+  engine."""
   LOG.info("Loading %s queries", workload)
   queries = list()
   query_dir = os.path.join(os.path.dirname(__file__), "..", "..",
       "testdata", "workloads", workload, "queries")
-  file_name_pattern = re.compile(r"-(q\d+).test$")
+  engine = 'kudu-' if load_in_kudu else ''
+  file_name_pattern = re.compile(r"%s-%s(q\d+).test$" % (workload, engine))
   for query_file in os.listdir(query_dir):
     match = file_name_pattern.search(query_file)
     if not match:
@@ -1339,6 +1342,10 @@ def main():
   parser.add_argument("--tpch-db", help="If provided, TPC-H queries will be used.")
   parser.add_argument("--tpch-nested-db",
       help="If provided, nested TPC-H queries will be used.")
+  parser.add_argument("--tpch-kudu-db",
+      help="If provided, TPC-H queries for Kudu will be used.")
+  parser.add_argument("--tpcds-kudu-db",
+      help="If provided, TPC-DS queries for Kudu will be used.")
   parser.add_argument("--random-db",
       help="If provided, random queries will be used.")
   parser.add_argument("--random-query-count", type=int, default=50,
@@ -1375,9 +1382,10 @@ def main():
   LOG.debug("CLI args: %s" % (args, ))
 
   if not args.tpcds_db and not args.tpch_db and not args.random_db \
-      and not args.tpch_nested_db and not args.query_file_path:
-    raise Exception("At least one of --tpcds-db, --tpch-db,"
-        "--tpch-nested-db, --random-db, --query-file-path is required")
+      and not args.tpch_nested_db and not args.tpch_kudu_db \
+      and not args.tpcds_kudu_db and not args.query_file_path:
+    raise Exception("At least one of --tpcds-db, --tpch-db, --tpch-kudu-db,"
+        "--tpcds-kudu-db, --tpch-nested-db, --random-db, --query-file-path is required")
 
   # The stress test sets these, so callers cannot override them.
   IGNORE_QUERY_OPTIONS = frozenset([
@@ -1451,6 +1459,16 @@ def main():
     for query in tpch_nested_queries:
       query.db_name = args.tpch_nested_db
     queries.extend(tpch_nested_queries)
+  if args.tpch_kudu_db:
+    tpch_kudu_queries = load_tpc_queries("tpch", load_in_kudu=True)
+    for query in tpch_kudu_queries:
+      query.db_name = args.tpch_kudu_db
+    queries.extend(tpch_kudu_queries)
+  if args.tpcds_kudu_db:
+    tpcds_kudu_queries = load_tpc_queries("tpcds", load_in_kudu=True)
+    for query in tpcds_kudu_queries:
+      query.db_name = args.tpcds_kudu_db
+    queries.extend(tpcds_kudu_queries)
   for idx in xrange(len(queries) - 1, -1, -1):
     query = queries[idx]
     if query.sql in queries_with_runtime_info_by_db_and_sql[query.db_name]: