You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/01 16:04:12 UTC

[7/7] impala git commit: IMPALA-5950: fix TPC-DS Q35a and Q48 queries

IMPALA-5950: fix TPC-DS Q35a and Q48 queries

The query text (for Q48) and substitution parameters didn't match the
TPC-DS standard for qualification queries. After fixing that, the
queries return the results expected by the TPC-DS standards.

Note that this may affect the performance of perf workloads running
tpcds-unmodified.

Change-Id: Ic7c737f68adf616738d6eb6e5a02593af25bcbaf
Reviewed-on: http://gerrit.cloudera.org:8080/11833
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>


Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e3a70270
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e3a70270
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e3a70270

Branch: refs/heads/master
Commit: e3a702707cb60b4e127215b99ab4e6945cedcd1f
Parents: 0e1f304
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Oct 30 15:26:39 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 1 08:05:48 2018 +0000

----------------------------------------------------------------------
 .../tpcds-unmodified/queries/tpcds-q35a.test    | 164 +++++++++++++++++++
 .../tpcds-unmodified/queries/tpcds-q48.test     |  85 +++++++---
 tests/query_test/test_tpcds_queries.py          |  51 ++++--
 3 files changed, 268 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test b/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
new file mode 100644
index 0000000..5748684
--- /dev/null
+++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
@@ -0,0 +1,164 @@
+====
+---- QUERY: TPCDS-Q35A
+select
+  ca_state,
+  cd_gender,
+  cd_marital_status,
+  cd_dep_count,
+  count(*) cnt1,
+  min(cd_dep_count),
+  max(cd_dep_count),
+  avg(cd_dep_count),
+  cd_dep_employed_count,
+  count(*) cnt2,
+  min(cd_dep_employed_count),
+  max(cd_dep_employed_count),
+  avg(cd_dep_employed_count),
+  cd_dep_college_count,
+  count(*) cnt3,
+  min(cd_dep_college_count),
+  max(cd_dep_college_count),
+  avg(cd_dep_college_count)
+ from
+  customer c,customer_address ca,customer_demographics
+ where
+  c.c_current_addr_sk = ca.ca_address_sk and
+  cd_demo_sk = c.c_current_cdemo_sk and
+  exists (select *
+          from store_sales,date_dim
+          where c.c_customer_sk = ss_customer_sk and
+                ss_sold_date_sk = d_date_sk and
+                d_year = 2002 and
+                d_qoy < 4) and
+   exists (select * from
+           (select ws_bill_customer_sk customsk
+            from web_sales,date_dim
+            where
+                  ws_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_qoy < 4
+            union all
+            select cs_ship_customer_sk customsk
+            from catalog_sales,date_dim
+            where
+                  cs_sold_date_sk = d_date_sk and
+                  d_year = 2002 and
+                  d_qoy < 4)x
+           where x.customsk = c.c_customer_sk)
+ group by ca_state,
+          cd_gender,
+          cd_marital_status,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+ order by ca_state,
+          cd_gender,
+          cd_marital_status,
+          cd_dep_count,
+          cd_dep_employed_count,
+          cd_dep_college_count
+limit 100
+---- RESULTS
+'AK','F','D',1,1,1,1,1,2,1,2,2,2,2,1,2,2,2
+'AK','F','D',5,1,5,5,5,3,1,3,3,3,3,1,3,3,3
+'AK','F','M',0,1,0,0,0,6,1,6,6,6,0,1,0,0,0
+'AK','F','M',4,1,4,4,4,2,1,2,2,2,0,1,0,0,0
+'AK','F','M',5,1,5,5,5,2,1,2,2,2,2,1,2,2,2
+'AK','F','S',2,1,2,2,2,3,1,3,3,3,0,1,0,0,0
+'AK','F','S',2,1,2,2,2,3,1,3,3,3,3,1,3,3,3
+'AK','F','S',2,1,2,2,2,5,1,5,5,5,2,1,2,2,2
+'AK','F','S',3,1,3,3,3,0,1,0,0,0,2,1,2,2,2
+'AK','F','S',4,1,4,4,4,1,1,1,1,1,6,1,6,6,6
+'AK','F','S',5,1,5,5,5,6,1,6,6,6,4,1,4,4,4
+'AK','F','S',6,1,6,6,6,2,1,2,2,2,4,1,4,4,4
+'AK','F','S',6,1,6,6,6,6,1,6,6,6,3,1,3,3,3
+'AK','F','U',1,1,1,1,1,5,1,5,5,5,1,1,1,1,1
+'AK','F','U',4,1,4,4,4,0,1,0,0,0,4,1,4,4,4
+'AK','F','U',5,1,5,5,5,2,1,2,2,2,5,1,5,5,5
+'AK','F','W',0,1,0,0,0,4,1,4,4,4,0,1,0,0,0
+'AK','F','W',1,1,1,1,1,1,1,1,1,1,4,1,4,4,4
+'AK','F','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AK','F','W',4,1,4,4,4,2,1,2,2,2,2,1,2,2,2
+'AK','M','D',0,1,0,0,0,0,1,0,0,0,2,1,2,2,2
+'AK','M','D',0,1,0,0,0,5,1,5,5,5,4,1,4,4,4
+'AK','M','D',5,1,5,5,5,2,1,2,2,2,4,1,4,4,4
+'AK','M','D',6,1,6,6,6,3,1,3,3,3,6,1,6,6,6
+'AK','M','M',0,1,0,0,0,6,1,6,6,6,5,1,5,5,5
+'AK','M','M',3,1,3,3,3,3,1,3,3,3,1,1,1,1,1
+'AK','M','M',5,1,5,5,5,3,1,3,3,3,0,1,0,0,0
+'AK','M','S',0,1,0,0,0,2,1,2,2,2,4,1,4,4,4
+'AK','M','S',4,1,4,4,4,0,1,0,0,0,1,1,1,1,1
+'AK','M','S',4,1,4,4,4,0,1,0,0,0,4,1,4,4,4
+'AK','M','S',5,1,5,5,5,5,1,5,5,5,2,1,2,2,2
+'AK','M','U',0,1,0,0,0,3,1,3,3,3,5,1,5,5,5
+'AK','M','U',4,1,4,4,4,6,1,6,6,6,1,1,1,1,1
+'AK','M','W',0,1,0,0,0,6,1,6,6,6,2,1,2,2,2
+'AK','M','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AK','M','W',2,1,2,2,2,4,1,4,4,4,6,1,6,6,6
+'AK','M','W',3,1,3,3,3,5,1,5,5,5,4,1,4,4,4
+'AK','M','W',4,1,4,4,4,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',0,1,0,0,0,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',2,1,2,2,2,0,1,0,0,0,6,1,6,6,6
+'AL','F','D',2,1,2,2,2,4,1,4,4,4,5,1,5,5,5
+'AL','F','D',2,1,2,2,2,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',3,1,3,3,3,2,1,2,2,2,0,1,0,0,0
+'AL','F','D',4,1,4,4,4,6,1,6,6,6,6,1,6,6,6
+'AL','F','D',5,1,5,5,5,0,1,0,0,0,6,1,6,6,6
+'AL','F','D',5,1,5,5,5,2,1,2,2,2,5,1,5,5,5
+'AL','F','D',5,1,5,5,5,3,1,3,3,3,3,1,3,3,3
+'AL','F','D',5,1,5,5,5,4,1,4,4,4,1,1,1,1,1
+'AL','F','D',5,1,5,5,5,4,1,4,4,4,2,1,2,2,2
+'AL','F','M',0,1,0,0,0,3,1,3,3,3,3,1,3,3,3
+'AL','F','M',1,1,1,1,1,3,1,3,3,3,1,1,1,1,1
+'AL','F','M',1,1,1,1,1,3,1,3,3,3,3,1,3,3,3
+'AL','F','M',1,1,1,1,1,4,1,4,4,4,4,1,4,4,4
+'AL','F','M',1,1,1,1,1,6,1,6,6,6,1,1,1,1,1
+'AL','F','M',1,2,1,1,1,6,2,6,6,6,5,2,5,5,5
+'AL','F','M',2,1,2,2,2,0,1,0,0,0,1,1,1,1,1
+'AL','F','M',2,1,2,2,2,5,1,5,5,5,3,1,3,3,3
+'AL','F','M',3,1,3,3,3,3,1,3,3,3,0,1,0,0,0
+'AL','F','M',4,1,4,4,4,3,1,3,3,3,2,1,2,2,2
+'AL','F','M',4,1,4,4,4,6,1,6,6,6,3,1,3,3,3
+'AL','F','M',5,1,5,5,5,3,1,3,3,3,1,1,1,1,1
+'AL','F','M',5,1,5,5,5,5,1,5,5,5,6,1,6,6,6
+'AL','F','M',6,1,6,6,6,1,1,1,1,1,6,1,6,6,6
+'AL','F','M',6,1,6,6,6,3,1,3,3,3,4,1,4,4,4
+'AL','F','S',1,1,1,1,1,0,1,0,0,0,4,1,4,4,4
+'AL','F','S',2,1,2,2,2,1,1,1,1,1,3,1,3,3,3
+'AL','F','S',2,1,2,2,2,5,1,5,5,5,4,1,4,4,4
+'AL','F','S',2,1,2,2,2,5,1,5,5,5,5,1,5,5,5
+'AL','F','S',3,1,3,3,3,0,1,0,0,0,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,2,1,2,2,2,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,4,1,4,4,4,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,4,1,4,4,4,4,1,4,4,4
+'AL','F','S',5,1,5,5,5,1,1,1,1,1,6,1,6,6,6
+'AL','F','U',0,2,0,0,0,1,2,1,1,1,1,2,1,1,1
+'AL','F','U',0,1,0,0,0,6,1,6,6,6,5,1,5,5,5
+'AL','F','U',1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+'AL','F','U',1,1,1,1,1,2,1,2,2,2,5,1,5,5,5
+'AL','F','U',1,1,1,1,1,3,1,3,3,3,2,1,2,2,2
+'AL','F','U',2,1,2,2,2,2,1,2,2,2,0,1,0,0,0
+'AL','F','U',5,1,5,5,5,6,1,6,6,6,1,1,1,1,1
+'AL','F','U',6,1,6,6,6,5,1,5,5,5,2,1,2,2,2
+'AL','F','W',1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+'AL','F','W',1,1,1,1,1,3,1,3,3,3,2,1,2,2,2
+'AL','F','W',1,1,1,1,1,4,1,4,4,4,3,1,3,3,3
+'AL','F','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AL','F','W',3,1,3,3,3,1,1,1,1,1,5,1,5,5,5
+'AL','F','W',3,1,3,3,3,2,1,2,2,2,3,1,3,3,3
+'AL','F','W',4,1,4,4,4,3,1,3,3,3,4,1,4,4,4
+'AL','F','W',5,1,5,5,5,1,1,1,1,1,3,1,3,3,3
+'AL','F','W',5,1,5,5,5,1,1,1,1,1,6,1,6,6,6
+'AL','F','W',5,1,5,5,5,5,1,5,5,5,4,1,4,4,4
+'AL','F','W',5,1,5,5,5,6,1,6,6,6,0,1,0,0,0
+'AL','M','D',1,1,1,1,1,1,1,1,1,1,2,1,2,2,2
+'AL','M','D',3,1,3,3,3,3,1,3,3,3,3,1,3,3,3
+'AL','M','D',4,1,4,4,4,3,1,3,3,3,6,1,6,6,6
+'AL','M','D',5,1,5,5,5,0,1,0,0,0,2,1,2,2,2
+'AL','M','M',2,1,2,2,2,2,1,2,2,2,1,1,1,1,1
+'AL','M','M',2,1,2,2,2,2,1,2,2,2,2,1,2,2,2
+'AL','M','M',2,1,2,2,2,4,1,4,4,4,5,1,5,5,5
+'AL','M','M',2,1,2,2,2,6,1,6,6,6,0,1,0,0,0
+---- TYPES
+STRING, STRING, STRING, INT, BIGINT, INT, INT, DOUBLE, INT, BIGINT, INT, INT, DOUBLE, INT, BIGINT, INT, INT, DOUBLE
+====

http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test b/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
index 2b8eaf3..d6d6e7d 100644
--- a/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
+++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
@@ -1,27 +1,70 @@
 ====
 ---- QUERY: TPCDS-Q48
-
-select sum (ss_quantity)
-from store_sales, store, (select * from customer_demographics where cd_marital_status = 'S' and cd_education_status = '4 yr Degree') as v1, (select * from customer_address where ca_country = 'United States' and ca_state in ('AK', 'IA', 'NE', 'NY', 'VA', 'AR', 'AZ', 'MI', 'NC')) as v2, date_dim
+ select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
  where s_store_sk = ss_store_sk
- and  ss_sold_date_sk = d_date_sk and d_year = 1998
- and  ss_sold_date_sk between 2450815 and 2451179
- and  (v1.cd_demo_sk = ss_cdemo_sk and
-        (
-        (ss_sales_price between 100.00 and 150.00) or
-        (ss_sales_price between 50.00 and 100.00) or
-        (ss_sales_price between 150.00 and 200.00)
-        )
-      )
- and  (ss_addr_sk = v2.ca_address_sk and
-      (
-        (ca_state in ('AK', 'IA', 'NE') and ss_net_profit between 0 and 2000)
-        or (ca_state in ('NY', 'VA', 'AR') and ss_net_profit between 150 and 3000)
-        or (ca_state in ('AZ', 'MI', 'NC') and ss_net_profit between 50 and 25000)
-      )
-      )
-;
+ and  ss_sold_date_sk = d_date_sk and d_year = 2000
+ and
+ (
+  (
+   cd_demo_sk = ss_cdemo_sk
+   and
+   cd_marital_status = 'M'
+   and
+   cd_education_status = '4 yr Degree'
+   and
+   ss_sales_price between 100.00 and 150.00
+   )
+ or
+  (
+  cd_demo_sk = ss_cdemo_sk
+   and
+   cd_marital_status = 'D'
+   and
+   cd_education_status = '2 yr Degree'
+   and
+   ss_sales_price between 50.00 and 100.00
+  )
+ or
+ (
+  cd_demo_sk = ss_cdemo_sk
+  and
+   cd_marital_status = 'S'
+   and
+   cd_education_status = 'College'
+   and
+   ss_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+  (
+  ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('CO', 'OH', 'TX')
+  and ss_net_profit between 0 and 2000
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('OR', 'MN', 'KY')
+  and ss_net_profit between 150 and 3000
+  )
+ or
+  (ss_addr_sk = ca_address_sk
+  and
+  ca_country = 'United States'
+  and
+  ca_state in ('VA', 'CA', 'MS')
+  and ss_net_profit between 50 and 25000
+  )
+ )
 ---- RESULTS
+26257
 ---- TYPES
-INT, INT, STRING, DECIMAL
+BIGINT
 ====

http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/tests/query_test/test_tpcds_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 4b937b1..b371bf4 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -32,19 +32,19 @@ class TestTpcdsQuery(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestTpcdsQuery, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
-        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and\
-        v.get_value('table_format').compression_codec in ['none', 'snap'] and\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+        v.get_value('table_format').compression_codec in ['none', 'snap'] and
         v.get_value('table_format').compression_type != 'record')
     cls.ImpalaTestMatrix.add_mandatory_exec_option('decimal_v2', 0)
 
     if cls.exploration_strategy() != 'exhaustive':
       # Cut down on the execution time for these tests in core by running only
       # against parquet.
-      cls.ImpalaTestMatrix.add_constraint(lambda v:\
+      cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format in ['parquet'])
 
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('exec_option')['batch_size'] == 0)
 
   @pytest.mark.execute_serially
@@ -274,18 +274,18 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
   @classmethod
   def add_test_dimensions(cls):
     super(TestTpcdsDecimalV2Query, cls).add_test_dimensions()
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
-        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and\
-        v.get_value('table_format').compression_codec in ['none', 'snap'] and\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+        v.get_value('table_format').compression_codec in ['none', 'snap'] and
         v.get_value('table_format').compression_type != 'record')
 
     if cls.exploration_strategy() != 'exhaustive':
       # Cut down on the execution time for these tests in core by running only
       # against parquet.
-      cls.ImpalaTestMatrix.add_constraint(lambda v:\
+      cls.ImpalaTestMatrix.add_constraint(lambda v:
           v.get_value('table_format').file_format in ['parquet'])
 
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         v.get_value('exec_option')['batch_size'] == 0)
 
   def test_tpcds_q1(self, vector):
@@ -514,7 +514,7 @@ class TestTpcdsInsert(ImpalaTestSuite):
   def add_test_dimensions(cls):
     super(TestTpcdsInsert, cls).add_test_dimensions()
     cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
-    cls.ImpalaTestMatrix.add_constraint(lambda v:\
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
         is_supported_insert_format(v.get_value('table_format')))
     if cls.exploration_strategy() == 'core' and not pytest.config.option.table_formats:
       # Don't run on core, unless the user explicitly wants to validate a specific table
@@ -524,3 +524,32 @@ class TestTpcdsInsert(ImpalaTestSuite):
 
   def test_tpcds_partitioned_insert(self, vector):
     self.run_test_case('partitioned-insert', vector)
+
+
+class TestTpcdsUnmodified(ImpalaTestSuite):
+  @classmethod
+  def get_workload(cls):
+    return 'tpcds-unmodified'
+
+  @classmethod
+  def add_test_dimensions(cls):
+    super(TestTpcdsUnmodified, cls).add_test_dimensions()
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+        v.get_value('table_format').compression_codec in ['none', 'snap'] and
+        v.get_value('table_format').compression_type != 'record')
+
+    if cls.exploration_strategy() != 'exhaustive':
+      # Cut down on the execution time for these tests in core by running only
+      # against parquet.
+      cls.ImpalaTestMatrix.add_constraint(lambda v:
+          v.get_value('table_format').file_format in ['parquet'])
+
+    cls.ImpalaTestMatrix.add_constraint(lambda v:
+        v.get_value('exec_option')['batch_size'] == 0)
+
+  def test_tpcds_q35a(self, vector):
+    self.run_test_case('tpcds-q35a', vector)
+
+  def test_tpcds_q48(self, vector):
+    self.run_test_case('tpcds-q48', vector)