You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/11/01 16:04:12 UTC
[7/7] impala git commit: IMPALA-5950: fix TPC-DS Q35a and Q48 queries
IMPALA-5950: fix TPC-DS Q35a and Q48 queries
The query text (for Q48) and substitution parameters didn't match the
TPC-DS standard for qualification queries. After fixing that, the
queries return the results expected by the TPC-DS standards.
Note that this may affect the performance of perf workloads running
tpcds-unmodified.
Change-Id: Ic7c737f68adf616738d6eb6e5a02593af25bcbaf
Reviewed-on: http://gerrit.cloudera.org:8080/11833
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/e3a70270
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/e3a70270
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/e3a70270
Branch: refs/heads/master
Commit: e3a702707cb60b4e127215b99ab4e6945cedcd1f
Parents: 0e1f304
Author: Tim Armstrong <ta...@cloudera.com>
Authored: Tue Oct 30 15:26:39 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Thu Nov 1 08:05:48 2018 +0000
----------------------------------------------------------------------
.../tpcds-unmodified/queries/tpcds-q35a.test | 164 +++++++++++++++++++
.../tpcds-unmodified/queries/tpcds-q48.test | 85 +++++++---
tests/query_test/test_tpcds_queries.py | 51 ++++--
3 files changed, 268 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test b/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
new file mode 100644
index 0000000..5748684
--- /dev/null
+++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q35a.test
@@ -0,0 +1,164 @@
+====
+---- QUERY: TPCDS-Q35A
+select
+ ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ count(*) cnt1,
+ min(cd_dep_count),
+ max(cd_dep_count),
+ avg(cd_dep_count),
+ cd_dep_employed_count,
+ count(*) cnt2,
+ min(cd_dep_employed_count),
+ max(cd_dep_employed_count),
+ avg(cd_dep_employed_count),
+ cd_dep_college_count,
+ count(*) cnt3,
+ min(cd_dep_college_count),
+ max(cd_dep_college_count),
+ avg(cd_dep_college_count)
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_qoy < 4) and
+ exists (select * from
+ (select ws_bill_customer_sk customsk
+ from web_sales,date_dim
+ where
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_qoy < 4
+ union all
+ select cs_ship_customer_sk customsk
+ from catalog_sales,date_dim
+ where
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_qoy < 4)x
+ where x.customsk = c.c_customer_sk)
+ group by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by ca_state,
+ cd_gender,
+ cd_marital_status,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
+---- RESULTS
+'AK','F','D',1,1,1,1,1,2,1,2,2,2,2,1,2,2,2
+'AK','F','D',5,1,5,5,5,3,1,3,3,3,3,1,3,3,3
+'AK','F','M',0,1,0,0,0,6,1,6,6,6,0,1,0,0,0
+'AK','F','M',4,1,4,4,4,2,1,2,2,2,0,1,0,0,0
+'AK','F','M',5,1,5,5,5,2,1,2,2,2,2,1,2,2,2
+'AK','F','S',2,1,2,2,2,3,1,3,3,3,0,1,0,0,0
+'AK','F','S',2,1,2,2,2,3,1,3,3,3,3,1,3,3,3
+'AK','F','S',2,1,2,2,2,5,1,5,5,5,2,1,2,2,2
+'AK','F','S',3,1,3,3,3,0,1,0,0,0,2,1,2,2,2
+'AK','F','S',4,1,4,4,4,1,1,1,1,1,6,1,6,6,6
+'AK','F','S',5,1,5,5,5,6,1,6,6,6,4,1,4,4,4
+'AK','F','S',6,1,6,6,6,2,1,2,2,2,4,1,4,4,4
+'AK','F','S',6,1,6,6,6,6,1,6,6,6,3,1,3,3,3
+'AK','F','U',1,1,1,1,1,5,1,5,5,5,1,1,1,1,1
+'AK','F','U',4,1,4,4,4,0,1,0,0,0,4,1,4,4,4
+'AK','F','U',5,1,5,5,5,2,1,2,2,2,5,1,5,5,5
+'AK','F','W',0,1,0,0,0,4,1,4,4,4,0,1,0,0,0
+'AK','F','W',1,1,1,1,1,1,1,1,1,1,4,1,4,4,4
+'AK','F','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AK','F','W',4,1,4,4,4,2,1,2,2,2,2,1,2,2,2
+'AK','M','D',0,1,0,0,0,0,1,0,0,0,2,1,2,2,2
+'AK','M','D',0,1,0,0,0,5,1,5,5,5,4,1,4,4,4
+'AK','M','D',5,1,5,5,5,2,1,2,2,2,4,1,4,4,4
+'AK','M','D',6,1,6,6,6,3,1,3,3,3,6,1,6,6,6
+'AK','M','M',0,1,0,0,0,6,1,6,6,6,5,1,5,5,5
+'AK','M','M',3,1,3,3,3,3,1,3,3,3,1,1,1,1,1
+'AK','M','M',5,1,5,5,5,3,1,3,3,3,0,1,0,0,0
+'AK','M','S',0,1,0,0,0,2,1,2,2,2,4,1,4,4,4
+'AK','M','S',4,1,4,4,4,0,1,0,0,0,1,1,1,1,1
+'AK','M','S',4,1,4,4,4,0,1,0,0,0,4,1,4,4,4
+'AK','M','S',5,1,5,5,5,5,1,5,5,5,2,1,2,2,2
+'AK','M','U',0,1,0,0,0,3,1,3,3,3,5,1,5,5,5
+'AK','M','U',4,1,4,4,4,6,1,6,6,6,1,1,1,1,1
+'AK','M','W',0,1,0,0,0,6,1,6,6,6,2,1,2,2,2
+'AK','M','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AK','M','W',2,1,2,2,2,4,1,4,4,4,6,1,6,6,6
+'AK','M','W',3,1,3,3,3,5,1,5,5,5,4,1,4,4,4
+'AK','M','W',4,1,4,4,4,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',0,1,0,0,0,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',2,1,2,2,2,0,1,0,0,0,6,1,6,6,6
+'AL','F','D',2,1,2,2,2,4,1,4,4,4,5,1,5,5,5
+'AL','F','D',2,1,2,2,2,6,1,6,6,6,3,1,3,3,3
+'AL','F','D',3,1,3,3,3,2,1,2,2,2,0,1,0,0,0
+'AL','F','D',4,1,4,4,4,6,1,6,6,6,6,1,6,6,6
+'AL','F','D',5,1,5,5,5,0,1,0,0,0,6,1,6,6,6
+'AL','F','D',5,1,5,5,5,2,1,2,2,2,5,1,5,5,5
+'AL','F','D',5,1,5,5,5,3,1,3,3,3,3,1,3,3,3
+'AL','F','D',5,1,5,5,5,4,1,4,4,4,1,1,1,1,1
+'AL','F','D',5,1,5,5,5,4,1,4,4,4,2,1,2,2,2
+'AL','F','M',0,1,0,0,0,3,1,3,3,3,3,1,3,3,3
+'AL','F','M',1,1,1,1,1,3,1,3,3,3,1,1,1,1,1
+'AL','F','M',1,1,1,1,1,3,1,3,3,3,3,1,3,3,3
+'AL','F','M',1,1,1,1,1,4,1,4,4,4,4,1,4,4,4
+'AL','F','M',1,1,1,1,1,6,1,6,6,6,1,1,1,1,1
+'AL','F','M',1,2,1,1,1,6,2,6,6,6,5,2,5,5,5
+'AL','F','M',2,1,2,2,2,0,1,0,0,0,1,1,1,1,1
+'AL','F','M',2,1,2,2,2,5,1,5,5,5,3,1,3,3,3
+'AL','F','M',3,1,3,3,3,3,1,3,3,3,0,1,0,0,0
+'AL','F','M',4,1,4,4,4,3,1,3,3,3,2,1,2,2,2
+'AL','F','M',4,1,4,4,4,6,1,6,6,6,3,1,3,3,3
+'AL','F','M',5,1,5,5,5,3,1,3,3,3,1,1,1,1,1
+'AL','F','M',5,1,5,5,5,5,1,5,5,5,6,1,6,6,6
+'AL','F','M',6,1,6,6,6,1,1,1,1,1,6,1,6,6,6
+'AL','F','M',6,1,6,6,6,3,1,3,3,3,4,1,4,4,4
+'AL','F','S',1,1,1,1,1,0,1,0,0,0,4,1,4,4,4
+'AL','F','S',2,1,2,2,2,1,1,1,1,1,3,1,3,3,3
+'AL','F','S',2,1,2,2,2,5,1,5,5,5,4,1,4,4,4
+'AL','F','S',2,1,2,2,2,5,1,5,5,5,5,1,5,5,5
+'AL','F','S',3,1,3,3,3,0,1,0,0,0,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,2,1,2,2,2,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,4,1,4,4,4,3,1,3,3,3
+'AL','F','S',3,1,3,3,3,4,1,4,4,4,4,1,4,4,4
+'AL','F','S',5,1,5,5,5,1,1,1,1,1,6,1,6,6,6
+'AL','F','U',0,2,0,0,0,1,2,1,1,1,1,2,1,1,1
+'AL','F','U',0,1,0,0,0,6,1,6,6,6,5,1,5,5,5
+'AL','F','U',1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+'AL','F','U',1,1,1,1,1,2,1,2,2,2,5,1,5,5,5
+'AL','F','U',1,1,1,1,1,3,1,3,3,3,2,1,2,2,2
+'AL','F','U',2,1,2,2,2,2,1,2,2,2,0,1,0,0,0
+'AL','F','U',5,1,5,5,5,6,1,6,6,6,1,1,1,1,1
+'AL','F','U',6,1,6,6,6,5,1,5,5,5,2,1,2,2,2
+'AL','F','W',1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
+'AL','F','W',1,1,1,1,1,3,1,3,3,3,2,1,2,2,2
+'AL','F','W',1,1,1,1,1,4,1,4,4,4,3,1,3,3,3
+'AL','F','W',2,1,2,2,2,2,1,2,2,2,5,1,5,5,5
+'AL','F','W',3,1,3,3,3,1,1,1,1,1,5,1,5,5,5
+'AL','F','W',3,1,3,3,3,2,1,2,2,2,3,1,3,3,3
+'AL','F','W',4,1,4,4,4,3,1,3,3,3,4,1,4,4,4
+'AL','F','W',5,1,5,5,5,1,1,1,1,1,3,1,3,3,3
+'AL','F','W',5,1,5,5,5,1,1,1,1,1,6,1,6,6,6
+'AL','F','W',5,1,5,5,5,5,1,5,5,5,4,1,4,4,4
+'AL','F','W',5,1,5,5,5,6,1,6,6,6,0,1,0,0,0
+'AL','M','D',1,1,1,1,1,1,1,1,1,1,2,1,2,2,2
+'AL','M','D',3,1,3,3,3,3,1,3,3,3,3,1,3,3,3
+'AL','M','D',4,1,4,4,4,3,1,3,3,3,6,1,6,6,6
+'AL','M','D',5,1,5,5,5,0,1,0,0,0,2,1,2,2,2
+'AL','M','M',2,1,2,2,2,2,1,2,2,2,1,1,1,1,1
+'AL','M','M',2,1,2,2,2,2,1,2,2,2,2,1,2,2,2
+'AL','M','M',2,1,2,2,2,4,1,4,4,4,5,1,5,5,5
+'AL','M','M',2,1,2,2,2,6,1,6,6,6,0,1,0,0,0
+---- TYPES
+STRING, STRING, STRING, INT, BIGINT, INT, INT, DOUBLE, INT, BIGINT, INT, INT, DOUBLE, INT, BIGINT, INT, INT, DOUBLE
+====
http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test b/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
index 2b8eaf3..d6d6e7d 100644
--- a/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
+++ b/testdata/workloads/tpcds-unmodified/queries/tpcds-q48.test
@@ -1,27 +1,70 @@
====
---- QUERY: TPCDS-Q48
-
-select sum (ss_quantity)
-from store_sales, store, (select * from customer_demographics where cd_marital_status = 'S' and cd_education_status = '4 yr Degree') as v1, (select * from customer_address where ca_country = 'United States' and ca_state in ('AK', 'IA', 'NE', 'NY', 'VA', 'AR', 'AZ', 'MI', 'NC')) as v2, date_dim
+ select sum (ss_quantity)
+ from store_sales, store, customer_demographics, customer_address, date_dim
where s_store_sk = ss_store_sk
- and ss_sold_date_sk = d_date_sk and d_year = 1998
- and ss_sold_date_sk between 2450815 and 2451179
- and (v1.cd_demo_sk = ss_cdemo_sk and
- (
- (ss_sales_price between 100.00 and 150.00) or
- (ss_sales_price between 50.00 and 100.00) or
- (ss_sales_price between 150.00 and 200.00)
- )
- )
- and (ss_addr_sk = v2.ca_address_sk and
- (
- (ca_state in ('AK', 'IA', 'NE') and ss_net_profit between 0 and 2000)
- or (ca_state in ('NY', 'VA', 'AR') and ss_net_profit between 150 and 3000)
- or (ca_state in ('AZ', 'MI', 'NC') and ss_net_profit between 50 and 25000)
- )
- )
-;
+ and ss_sold_date_sk = d_date_sk and d_year = 2000
+ and
+ (
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'M'
+ and
+ cd_education_status = '4 yr Degree'
+ and
+ ss_sales_price between 100.00 and 150.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'D'
+ and
+ cd_education_status = '2 yr Degree'
+ and
+ ss_sales_price between 50.00 and 100.00
+ )
+ or
+ (
+ cd_demo_sk = ss_cdemo_sk
+ and
+ cd_marital_status = 'S'
+ and
+ cd_education_status = 'College'
+ and
+ ss_sales_price between 150.00 and 200.00
+ )
+ )
+ and
+ (
+ (
+ ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('CO', 'OH', 'TX')
+ and ss_net_profit between 0 and 2000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('OR', 'MN', 'KY')
+ and ss_net_profit between 150 and 3000
+ )
+ or
+ (ss_addr_sk = ca_address_sk
+ and
+ ca_country = 'United States'
+ and
+ ca_state in ('VA', 'CA', 'MS')
+ and ss_net_profit between 50 and 25000
+ )
+ )
---- RESULTS
+26257
---- TYPES
-INT, INT, STRING, DECIMAL
+BIGINT
====
http://git-wip-us.apache.org/repos/asf/impala/blob/e3a70270/tests/query_test/test_tpcds_queries.py
----------------------------------------------------------------------
diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py
index 4b937b1..b371bf4 100644
--- a/tests/query_test/test_tpcds_queries.py
+++ b/tests/query_test/test_tpcds_queries.py
@@ -32,19 +32,19 @@ class TestTpcdsQuery(ImpalaTestSuite):
@classmethod
def add_test_dimensions(cls):
super(TestTpcdsQuery, cls).add_test_dimensions()
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
- v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and\
- v.get_value('table_format').compression_codec in ['none', 'snap'] and\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+ v.get_value('table_format').compression_codec in ['none', 'snap'] and
v.get_value('table_format').compression_type != 'record')
cls.ImpalaTestMatrix.add_mandatory_exec_option('decimal_v2', 0)
if cls.exploration_strategy() != 'exhaustive':
# Cut down on the execution time for these tests in core by running only
# against parquet.
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format in ['parquet'])
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('exec_option')['batch_size'] == 0)
@pytest.mark.execute_serially
@@ -274,18 +274,18 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
@classmethod
def add_test_dimensions(cls):
super(TestTpcdsDecimalV2Query, cls).add_test_dimensions()
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
- v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and\
- v.get_value('table_format').compression_codec in ['none', 'snap'] and\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+ v.get_value('table_format').compression_codec in ['none', 'snap'] and
v.get_value('table_format').compression_type != 'record')
if cls.exploration_strategy() != 'exhaustive':
# Cut down on the execution time for these tests in core by running only
# against parquet.
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format in ['parquet'])
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('exec_option')['batch_size'] == 0)
def test_tpcds_q1(self, vector):
@@ -514,7 +514,7 @@ class TestTpcdsInsert(ImpalaTestSuite):
def add_test_dimensions(cls):
super(TestTpcdsInsert, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
- cls.ImpalaTestMatrix.add_constraint(lambda v:\
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
is_supported_insert_format(v.get_value('table_format')))
if cls.exploration_strategy() == 'core' and not pytest.config.option.table_formats:
# Don't run on core, unless the user explicitly wants to validate a specific table
@@ -524,3 +524,32 @@ class TestTpcdsInsert(ImpalaTestSuite):
def test_tpcds_partitioned_insert(self, vector):
self.run_test_case('partitioned-insert', vector)
+
+
+class TestTpcdsUnmodified(ImpalaTestSuite):
+ @classmethod
+ def get_workload(cls):
+ return 'tpcds-unmodified'
+
+ @classmethod
+ def add_test_dimensions(cls):
+ super(TestTpcdsUnmodified, cls).add_test_dimensions()
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format not in ['rc', 'hbase', 'kudu'] and
+ v.get_value('table_format').compression_codec in ['none', 'snap'] and
+ v.get_value('table_format').compression_type != 'record')
+
+ if cls.exploration_strategy() != 'exhaustive':
+ # Cut down on the execution time for these tests in core by running only
+ # against parquet.
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('table_format').file_format in ['parquet'])
+
+ cls.ImpalaTestMatrix.add_constraint(lambda v:
+ v.get_value('exec_option')['batch_size'] == 0)
+
+ def test_tpcds_q35a(self, vector):
+ self.run_test_case('tpcds-q35a', vector)
+
+ def test_tpcds_q48(self, vector):
+ self.run_test_case('tpcds-q48', vector)