You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2018/10/22 02:10:32 UTC
[27/51] [partial] hive git commit: HIVE-20718: Add perf cli driver
with constraints (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out
new file mode 100644
index 0000000..ff28da1
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query90.q.out
@@ -0,0 +1,92 @@
+Warning: Shuffle Join MERGEJOIN[152][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product
+PREHOOK: query: explain cbo
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@time_dim
+PREHOOK: Input: default@web_page
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@time_dim
+POSTHOOK: Input: default@web_page
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$0], dir0=[ASC], fetch=[100])
+ HiveProject(am_pm_ratio=[/($0, $1)])
+ HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(CAST=[CAST($0):DECIMAL(15, 4)])
+ HiveAggregate(group=[{}], agg#0=[count()])
+ HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12])
+ HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(wp_web_page_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)])
+ HiveTableScan(table=[[default, web_page]], table:alias=[web_page])
+ HiveProject(t_time_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $3, 6, 7)])
+ HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim])
+ HiveProject(hd_demo_sk=[$0])
+ HiveFilter(condition=[=($3, 8)])
+ HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics])
+ HiveProject(CAST=[CAST($0):DECIMAL(15, 4)])
+ HiveAggregate(group=[{}], agg#0=[count()])
+ HiveJoin(condition=[=($1, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($2, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_sold_time_sk=[$1], ws_ship_hdemo_sk=[$10], ws_web_page_sk=[$12])
+ HiveFilter(condition=[AND(IS NOT NULL($10), IS NOT NULL($1), IS NOT NULL($12))])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(wp_web_page_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $10, 5000, 5200)])
+ HiveTableScan(table=[[default, web_page]], table:alias=[web_page])
+ HiveProject(t_time_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $3, 14, 15)])
+ HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim])
+ HiveProject(hd_demo_sk=[$0])
+ HiveFilter(condition=[=($3, 8)])
+ HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out
new file mode 100644
index 0000000..046a374
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query91.q.out
@@ -0,0 +1,109 @@
+PREHOOK: query: explain cbo
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@call_center
+PREHOOK: Input: default@catalog_returns
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@customer_demographics
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@household_demographics
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@call_center
+POSTHOOK: Input: default@catalog_returns
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@customer_demographics
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(call_center=[$0], call_center_name=[$1], manager=[$2], returns_loss=[$3])
+ HiveSortLimit(sort0=[$4], dir0=[DESC-nulls-last])
+ HiveProject(call_center=[$2], call_center_name=[$3], manager=[$4], returns_loss=[$5], (tok_function sum (tok_table_or_col cr_net_loss))=[$5])
+ HiveAggregate(group=[{6, 7, 14, 15, 16}], agg#0=[sum($11)])
+ HiveJoin(condition=[=($17, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($9, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ca_address_sk=[$0])
+ HiveFilter(condition=[=($11, -7)])
+ HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+ HiveJoin(condition=[=($4, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(c_customer_sk=[$0], c_current_cdemo_sk=[$2], c_current_hdemo_sk=[$3], c_current_addr_sk=[$4])
+ HiveFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($2), IS NOT NULL($3))])
+ HiveTableScan(table=[[default, customer]], table:alias=[customer])
+ HiveProject(cd_demo_sk=[$0], cd_marital_status=[$2], cd_education_status=[$3])
+ HiveFilter(condition=[AND(IN($3, _UTF-16LE'Unknown', _UTF-16LE'Advanced Degree'), IN($2, _UTF-16LE'M', _UTF-16LE'W'), IN(ROW($2, $3), ROW(_UTF-16LE'M', _UTF-16LE'Unknown'), ROW(_UTF-16LE'W', _UTF-16LE'Advanced Degree')))])
+ HiveTableScan(table=[[default, customer_demographics]], table:alias=[customer_demographics])
+ HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$1], cr_call_center_sk=[$2], cr_net_loss=[$3], d_date_sk=[$4], cc_call_center_sk=[$5], cc_call_center_id=[$6], cc_name=[$7], cc_manager=[$8])
+ HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(cr_returned_date_sk=[$0], cr_returning_customer_sk=[$7], cr_call_center_sk=[$11], cr_net_loss=[$26])
+ HiveFilter(condition=[AND(IS NOT NULL($11), IS NOT NULL($0), IS NOT NULL($7))])
+ HiveTableScan(table=[[default, catalog_returns]], table:alias=[catalog_returns])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[AND(=($6, 1999), =($8, 11))])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(cc_call_center_sk=[$0], cc_call_center_id=[$1], cc_name=[$6], cc_manager=[$11])
+ HiveTableScan(table=[[default, call_center]], table:alias=[call_center])
+ HiveProject(hd_demo_sk=[$0])
+ HiveFilter(condition=[LIKE($2, _UTF-16LE'0-500%')])
+ HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out
new file mode 100644
index 0000000..16098d7
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query92.q.out
@@ -0,0 +1,94 @@
+PREHOOK: query: explain cbo
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(excess discount amount=[$0])
+ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
+ HiveProject(excess discount amount=[$0], (tok_function sum (tok_table_or_col ws_ext_discount_amt))=[$0])
+ HiveAggregate(group=[{}], agg#0=[sum($2)])
+ HiveJoin(condition=[AND(>($2, $5), =($6, $1))], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(ws_item_sk=[$0], CAST3=[$1], i_item_sk=[$2])
+ HiveJoin(condition=[=($0, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_item_sk=[$0], CAST3=[CAST(*(1.3, /($1, $2))):DECIMAL(14, 7)])
+ HiveAggregate(group=[{1}], agg#0=[sum($2)], agg#1=[count($2)])
+ HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_sold_date_sk=[$0], ws_item_sk=[$3], ws_ext_discount_amt=[$22])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[web_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1998-03-18 00:00:00, 1998-06-16 00:00:00)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(i_item_sk=[$0])
+ HiveFilter(condition=[=($13, 269)])
+ HiveTableScan(table=[[default, item]], table:alias=[item])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out
new file mode 100644
index 0000000..4902320
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query93.q.out
@@ -0,0 +1,58 @@
+PREHOOK: query: explain cbo
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@reason
+PREHOOK: Input: default@store_returns
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@reason
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(sort0=[$1], sort1=[$0], dir0=[ASC], dir1=[ASC], fetch=[100])
+ HiveProject(ss_customer_sk=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[sum($1)])
+ HiveProject(ss_customer_sk=[$1], act_sales=[CASE(IS NOT NULL($8), *(CAST(-($3, $8)):DECIMAL(10, 0), $4), *(CAST($3):DECIMAL(10, 0), $4))])
+ HiveJoin(condition=[AND(=($5, $0), =($7, $2))], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ss_item_sk=[$2], ss_customer_sk=[$3], ss_ticket_number=[$9], ss_quantity=[$10], ss_sales_price=[$13])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(sr_item_sk=[$2], sr_reason_sk=[$8], sr_ticket_number=[$9], sr_return_quantity=[$10])
+ HiveFilter(condition=[IS NOT NULL($8)])
+ HiveTableScan(table=[[default, store_returns]], table:alias=[store_returns])
+ HiveProject(r_reason_sk=[$0])
+ HiveFilter(condition=[=($2, _UTF-16LE'Did not like the warranty')])
+ HiveTableScan(table=[[default, reason]], table:alias=[reason])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
new file mode 100644
index 0000000..198778f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query94.q.out
@@ -0,0 +1,98 @@
+PREHOOK: query: explain cbo
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@web_returns
+PREHOOK: Input: default@web_sales
+PREHOOK: Input: default@web_site
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@web_returns
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Input: default@web_site
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2])
+ HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100])
+ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0])
+ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $4)], agg#1=[sum($5)], agg#2=[sum($6)])
+ HiveFilter(condition=[IS NULL($14)])
+ HiveJoin(condition=[=($4, $13)], joinType=[left], algorithm=[none], cost=[not available])
+ HiveSemiJoin(condition=[AND(<>($3, $13), =($4, $14))], joinType=[inner])
+ HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$3], ws_web_site_sk=[$4], ws_warehouse_sk=[$5], ws_order_number=[$6], ws_ext_ship_cost=[$7], ws_net_profit=[$8], d_date_sk=[$9], d_date=[$10], ca_address_sk=[$0], ca_state=[$1], web_site_sk=[$11], web_company_name=[$12])
+ HiveJoin(condition=[=($4, $11)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($3, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ca_address_sk=[$0], ca_state=[CAST(_UTF-16LE'TX'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"])
+ HiveFilter(condition=[=($8, _UTF-16LE'TX')])
+ HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+ HiveJoin(condition=[=($0, $7)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_warehouse_sk=[$15], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33])
+ HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws1])
+ HiveProject(d_date_sk=[$0], d_date=[$2])
+ HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(web_site_sk=[$0], web_company_name=[CAST(_UTF-16LE'pri'):VARCHAR(2147483647) CHARACTER SET "UTF-16LE" COLLATE "ISO-8859-1$en_US$primary"])
+ HiveFilter(condition=[=($14, _UTF-16LE'pri')])
+ HiveTableScan(table=[[default, web_site]], table:alias=[web_site])
+ HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17])
+ HiveFilter(condition=[IS NOT NULL($15)])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws2])
+ HiveProject(wr_order_number0=[$13], $f1=[true])
+ HiveTableScan(table=[[default, web_returns]], table:alias=[wr1])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out
new file mode 100644
index 0000000..0546f1c
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query95.q.out
@@ -0,0 +1,112 @@
+PREHOOK: query: explain cbo
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+ from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+ from web_returns,ws_wh
+ where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@web_returns
+PREHOOK: Input: default@web_sales
+PREHOOK: Input: default@web_site
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with ws_wh as
+(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2
+ from web_sales ws1,web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+ select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and ws1.ws_order_number in (select ws_order_number
+ from ws_wh)
+and ws1.ws_order_number in (select wr_order_number
+ from web_returns,ws_wh
+ where wr_order_number = ws_wh.ws_order_number)
+order by count(distinct ws_order_number)
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@web_returns
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Input: default@web_site
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2])
+ HiveSortLimit(sort0=[$3], dir0=[ASC], fetch=[100])
+ HiveProject(order count=[$0], total shipping cost=[$1], total net profit=[$2], (tok_functiondi count (tok_table_or_col ws_order_number))=[$0])
+ HiveAggregate(group=[{}], agg#0=[count(DISTINCT $6)], agg#1=[sum($7)], agg#2=[sum($8)])
+ HiveJoin(condition=[=($6, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(wr_order_number=[$14])
+ HiveJoin(condition=[=($14, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_order_number=[$1])
+ HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws1])
+ HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws2])
+ HiveTableScan(table=[[default, web_returns]], table:alias=[web_returns])
+ HiveJoin(condition=[=($5, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_order_number=[$1])
+ HiveJoin(condition=[AND(=($1, $3), <>($0, $2))], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws1])
+ HiveProject(ws_warehouse_sk=[$15], ws_order_number=[$17])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws2])
+ HiveJoin(condition=[=($3, $9)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($2, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ca_address_sk=[$0])
+ HiveFilter(condition=[=($8, _UTF-16LE'TX')])
+ HiveTableScan(table=[[default, customer_address]], table:alias=[customer_address])
+ HiveJoin(condition=[=($0, $6)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ws_ship_date_sk=[$2], ws_ship_addr_sk=[$11], ws_web_site_sk=[$13], ws_order_number=[$17], ws_ext_ship_cost=[$28], ws_net_profit=[$33])
+ HiveFilter(condition=[AND(IS NOT NULL($2), IS NOT NULL($11), IS NOT NULL($13))])
+ HiveTableScan(table=[[default, web_sales]], table:alias=[ws1])
+ HiveProject(d_date_sk=[$0], d_date=[$2])
+ HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 1999-05-01 00:00:00, 1999-06-30 00:00:00)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(web_site_sk=[$0])
+ HiveFilter(condition=[=($14, _UTF-16LE'pri')])
+ HiveTableScan(table=[[default, web_site]], table:alias=[web_site])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out
new file mode 100644
index 0000000..e32705a
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query96.q.out
@@ -0,0 +1,61 @@
+PREHOOK: query: explain cbo
+select count(*)
+from store_sales
+ ,household_demographics
+ ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and household_demographics.hd_dep_count = 5
+ and store.s_store_name = 'ese'
+order by count(*)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@household_demographics
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_sales
+PREHOOK: Input: default@time_dim
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select count(*)
+from store_sales
+ ,household_demographics
+ ,time_dim, store
+where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
+ and time_dim.t_hour = 8
+ and time_dim.t_minute >= 30
+ and household_demographics.hd_dep_count = 5
+ and store.s_store_name = 'ese'
+order by count(*)
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@household_demographics
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Input: default@time_dim
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(_o__c0=[$0])
+ HiveSortLimit(sort0=[$1], dir0=[ASC], fetch=[100])
+ HiveProject(_o__c0=[$0], (tok_functionstar count)=[$0])
+ HiveAggregate(group=[{}], agg#0=[count()])
+ HiveJoin(condition=[=($2, $5)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($1, $4)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ss_sold_time_sk=[$1], ss_hdemo_sk=[$5], ss_store_sk=[$7])
+ HiveFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($1), IS NOT NULL($7))])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(t_time_sk=[$0])
+ HiveFilter(condition=[AND(=($3, 8), >=($4, 30))])
+ HiveTableScan(table=[[default, time_dim]], table:alias=[time_dim])
+ HiveProject(hd_demo_sk=[$0])
+ HiveFilter(condition=[=($3, 5)])
+ HiveTableScan(table=[[default, household_demographics]], table:alias=[household_demographics])
+ HiveProject(s_store_sk=[$0])
+ HiveFilter(condition=[=($5, _UTF-16LE'ese')])
+ HiveTableScan(table=[[default, store]], table:alias=[store])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out
new file mode 100644
index 0000000..6151627
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query97.q.out
@@ -0,0 +1,81 @@
+PREHOOK: query: explain cbo
+with ssci as (
+select ss_customer_sk customer_sk
+ ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+ ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+ ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+ ,cs_item_sk)
+ select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+ ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+ ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+ and ssci.item_sk = csci.item_sk)
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+with ssci as (
+select ss_customer_sk customer_sk
+ ,ss_item_sk item_sk
+from store_sales,date_dim
+where ss_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by ss_customer_sk
+ ,ss_item_sk),
+csci as(
+ select cs_bill_customer_sk customer_sk
+ ,cs_item_sk item_sk
+from catalog_sales,date_dim
+where cs_sold_date_sk = d_date_sk
+ and d_month_seq between 1212 and 1212 + 11
+group by cs_bill_customer_sk
+ ,cs_item_sk)
+ select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only
+ ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only
+ ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog
+from ssci full outer join csci on (ssci.customer_sk=csci.customer_sk
+ and ssci.item_sk = csci.item_sk)
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_sales
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveSortLimit(fetch=[100])
+ HiveProject($f0=[$0], $f1=[$1], $f2=[$2])
+ HiveAggregate(group=[{}], agg#0=[sum($0)], agg#1=[sum($1)], agg#2=[sum($2)])
+ HiveProject($f0=[CASE(AND(IS NOT NULL($0), IS NULL($2)), 1, 0)], $f1=[CASE(AND(IS NULL($0), IS NOT NULL($2)), 1, 0)], $f2=[CASE(AND(IS NOT NULL($0), IS NOT NULL($2)), 1, 0)])
+ HiveJoin(condition=[AND(=($0, $2), =($1, $3))], joinType=[full], algorithm=[none], cost=[not available])
+ HiveProject(ss_customer_sk=[$1], ss_item_sk=[$0])
+ HiveAggregate(group=[{1, 2}])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_customer_sk=[$3])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(cs_bill_customer_sk=[$0], cs_item_sk=[$1])
+ HiveAggregate(group=[{1, 2}])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(cs_sold_date_sk=[$0], cs_bill_customer_sk=[$3], cs_item_sk=[$15])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out
new file mode 100644
index 0000000..2a6b66f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query98.q.out
@@ -0,0 +1,87 @@
+PREHOOK: query: explain cbo
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ss_ext_sales_price) as itemrevenue
+ ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ store_sales
+ ,item
+ ,date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+PREHOOK: type: QUERY
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@item
+PREHOOK: Input: default@store_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+ ,sum(ss_ext_sales_price) as itemrevenue
+ ,sum(ss_ext_sales_price)*100/sum(sum(ss_ext_sales_price)) over
+ (partition by i_class) as revenueratio
+from
+ store_sales
+ ,item
+ ,date_dim
+where
+ ss_item_sk = i_item_sk
+ and i_category in ('Jewelry', 'Sports', 'Books')
+ and ss_sold_date_sk = d_date_sk
+ and d_date between cast('2001-01-12' as date)
+ and (cast('2001-01-12' as date) + 30 days)
+group by
+ i_item_id
+ ,i_item_desc
+ ,i_category
+ ,i_class
+ ,i_current_price
+order by
+ i_category
+ ,i_class
+ ,i_item_id
+ ,i_item_desc
+ ,revenueratio
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@item
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(i_item_desc=[$0], i_category=[$1], i_class=[$2], i_current_price=[$3], itemrevenue=[$4], revenueratio=[$5])
+ HiveSortLimit(sort0=[$1], sort1=[$2], sort2=[$6], sort3=[$0], sort4=[$5], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC], dir4=[ASC])
+ HiveProject(i_item_desc=[$1], i_category=[$4], i_class=[$3], i_current_price=[$2], itemrevenue=[$5], revenueratio=[/(*($5, CAST(100):DECIMAL(10, 0)), sum($5) OVER (PARTITION BY $3 ORDER BY $3 NULLS FIRST ROWS BETWEEN 2147483647 FOLLOWING AND 2147483647 PRECEDING))], (tok_table_or_col i_item_id)=[$0])
+ HiveAggregate(group=[{1, 2, 3, 4, 5}], agg#0=[sum($8)])
+ HiveJoin(condition=[=($7, $0)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(i_item_sk=[$0], i_item_id=[$1], i_item_desc=[$4], i_current_price=[$5], i_class=[$10], i_category=[$12])
+ HiveFilter(condition=[IN($12, _UTF-16LE'Jewelry', _UTF-16LE'Sports', _UTF-16LE'Books')])
+ HiveTableScan(table=[[default, item]], table:alias=[item])
+ HiveJoin(condition=[=($0, $3)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(ss_sold_date_sk=[$0], ss_item_sk=[$2], ss_ext_sales_price=[$15])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, store_sales]], table:alias=[store_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, CAST($2):TIMESTAMP(9), 2001-01-12 00:00:00, 2001-02-11 00:00:00)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out
new file mode 100644
index 0000000..2dccdf2
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/cbo_query99.q.out
@@ -0,0 +1,102 @@
+PREHOOK: query: explain cbo
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ catalog_sales
+ ,warehouse
+ ,ship_mode
+ ,call_center
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk = d_date_sk
+and cs_warehouse_sk = w_warehouse_sk
+and cs_ship_mode_sk = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@call_center
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@ship_mode
+PREHOOK: Input: default@warehouse
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain cbo
+select
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk <= 30 ) then 1 else 0 end) as `30 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 30) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 60) then 1 else 0 end ) as `31-60 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 60) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 90) then 1 else 0 end) as `61-90 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 90) and
+ (cs_ship_date_sk - cs_sold_date_sk <= 120) then 1 else 0 end) as `91-120 days`
+ ,sum(case when (cs_ship_date_sk - cs_sold_date_sk > 120) then 1 else 0 end) as `>120 days`
+from
+ catalog_sales
+ ,warehouse
+ ,ship_mode
+ ,call_center
+ ,date_dim
+where
+ d_month_seq between 1212 and 1212 + 11
+and cs_ship_date_sk = d_date_sk
+and cs_warehouse_sk = w_warehouse_sk
+and cs_ship_mode_sk = sm_ship_mode_sk
+and cs_call_center_sk = cc_call_center_sk
+group by
+ substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+order by substr(w_warehouse_name,1,20)
+ ,sm_type
+ ,cc_name
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@call_center
+POSTHOOK: Input: default@catalog_sales
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@ship_mode
+POSTHOOK: Input: default@warehouse
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+CBO PLAN:
+HiveProject(_o__c0=[$0], sm_type=[$1], cc_name=[$2], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7])
+ HiveSortLimit(sort0=[$8], sort1=[$1], sort2=[$2], dir0=[ASC], dir1=[ASC], dir2=[ASC], fetch=[100])
+ HiveProject(_o__c0=[$2], sm_type=[$0], cc_name=[$1], 30 days=[$3], 31-60 days=[$4], 61-90 days=[$5], 91-120 days=[$6], >120 days=[$7], (tok_function substr (tok_table_or_col w_warehouse_name) 1 20)=[$2])
+ HiveAggregate(group=[{11, 13, 15}], agg#0=[sum($4)], agg#1=[sum($5)], agg#2=[sum($6)], agg#3=[sum($7)], agg#4=[sum($8)])
+ HiveJoin(condition=[=($3, $14)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($1, $12)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($2, $10)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveJoin(condition=[=($0, $9)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(cs_ship_date_sk=[$2], cs_call_center_sk=[$11], cs_ship_mode_sk=[$13], cs_warehouse_sk=[$14], CASE=[CASE(<=(-($2, $0), 30), 1, 0)], CASE5=[CASE(AND(>(-($2, $0), 30), <=(-($2, $0), 60)), 1, 0)], CASE6=[CASE(AND(>(-($2, $0), 60), <=(-($2, $0), 90)), 1, 0)], CASE7=[CASE(AND(>(-($2, $0), 90), <=(-($2, $0), 120)), 1, 0)], CASE8=[CASE(>(-($2, $0), 120), 1, 0)])
+ HiveFilter(condition=[AND(IS NOT NULL($14), IS NOT NULL($13), IS NOT NULL($11), IS NOT NULL($2))])
+ HiveTableScan(table=[[default, catalog_sales]], table:alias=[catalog_sales])
+ HiveProject(d_date_sk=[$0])
+ HiveFilter(condition=[BETWEEN(false, $3, 1212, 1223)])
+ HiveTableScan(table=[[default, date_dim]], table:alias=[date_dim])
+ HiveProject(sm_ship_mode_sk=[$0], sm_type=[$2])
+ HiveTableScan(table=[[default, ship_mode]], table:alias=[ship_mode])
+ HiveProject(cc_call_center_sk=[$0], cc_name=[$6])
+ HiveTableScan(table=[[default, call_center]], table:alias=[call_center])
+ HiveProject(w_warehouse_sk=[$0], substr=[substr($2, 1, 20)])
+ HiveTableScan(table=[[default, warehouse]], table:alias=[warehouse])
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out
new file mode 100644
index 0000000..69f9329
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query1.q.out
@@ -0,0 +1,177 @@
+PREHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store
+PREHOOK: Input: default@store_returns
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+with customer_total_return as
+(select sr_customer_sk as ctr_customer_sk
+,sr_store_sk as ctr_store_sk
+,sum(SR_FEE) as ctr_total_return
+from store_returns
+,date_dim
+where sr_returned_date_sk = d_date_sk
+and d_year =2000
+group by sr_customer_sk
+,sr_store_sk)
+ select c_customer_id
+from customer_total_return ctr1
+,store
+,customer
+where ctr1.ctr_total_return > (select avg(ctr_total_return)*1.2
+from customer_total_return ctr2
+where ctr1.ctr_store_sk = ctr2.ctr_store_sk)
+and s_store_sk = ctr1.ctr_store_sk
+and s_state = 'NM'
+and ctr1.ctr_customer_sk = c_customer_sk
+order by c_customer_id
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store
+POSTHOOK: Input: default@store_returns
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Map 12 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 5 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 1 (SIMPLE_EDGE), Map 10 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 8 (SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 7 vectorized
+ File Output Operator [FS_159]
+ Limit [LIM_158] (rows=100 width=100)
+ Number of rows:100
+ Select Operator [SEL_157] (rows=816091 width=100)
+ Output:["_col0"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_49]
+ Select Operator [SEL_48] (rows=816091 width=100)
+ Output:["_col0"]
+ Filter Operator [FIL_47] (rows=816091 width=324)
+ predicate:(_col2 > _col6)
+ Merge Join Operator [MERGEJOIN_133] (rows=2448274 width=324)
+ Conds:RS_44._col1=RS_156._col1(Inner),Output:["_col2","_col5","_col6"]
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_44]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_131] (rows=2369298 width=213)
+ Conds:RS_41._col0=RS_151._col0(Inner),Output:["_col1","_col2","_col5"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_151]
+ PartitionCols:_col0
+ Select Operator [SEL_150] (rows=80000000 width=104)
+ Output:["_col0","_col1"]
+ TableScan [TS_17] (rows=80000000 width=104)
+ default@customer,customer,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_customer_id"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_41]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_130] (rows=2369298 width=114)
+ Conds:RS_146._col1=RS_149._col0(Inner),Output:["_col0","_col1","_col2"]
+ <-Map 11 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_149]
+ PartitionCols:_col0
+ Select Operator [SEL_148] (rows=35 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_147] (rows=35 width=90)
+ predicate:(s_state = 'NM')
+ TableScan [TS_14] (rows=1704 width=90)
+ default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_state"]
+ <-Reducer 3 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_146]
+ PartitionCols:_col1
+ Select Operator [SEL_145] (rows=14291868 width=119)
+ Output:["_col0","_col1","_col2"]
+ Group By Operator [GBY_144] (rows=14291868 width=119)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_11]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_10] (rows=16855704 width=119)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1
+ Merge Join Operator [MERGEJOIN_129] (rows=16855704 width=107)
+ Conds:RS_138._col0=RS_142._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_138]
+ PartitionCols:_col0
+ Select Operator [SEL_136] (rows=51757026 width=119)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_134] (rows=51757026 width=119)
+ predicate:(sr_customer_sk is not null and sr_returned_date_sk is not null and sr_store_sk is not null)
+ TableScan [TS_0] (rows=57591150 width=119)
+ default@store_returns,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_returned_date_sk","sr_customer_sk","sr_store_sk","sr_fee"]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_142]
+ PartitionCols:_col0
+ Select Operator [SEL_141] (rows=652 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_140] (rows=652 width=8)
+ predicate:(d_year = 2000)
+ TableScan [TS_3] (rows=73049 width=8)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year"]
+ <-Reducer 9 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_156]
+ PartitionCols:_col1
+ Select Operator [SEL_155] (rows=31 width=115)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_154] (rows=31 width=123)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col2)","count(_col2)"],keys:_col1
+ Select Operator [SEL_153] (rows=14291868 width=119)
+ Output:["_col1","_col2"]
+ Group By Operator [GBY_152] (rows=14291868 width=119)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col0
+ Group By Operator [GBY_29] (rows=17467258 width=119)
+ Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)"],keys:_col2, _col1
+ Merge Join Operator [MERGEJOIN_132] (rows=17467258 width=107)
+ Conds:RS_139._col0=RS_143._col0(Inner),Output:["_col1","_col2","_col3"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_139]
+ PartitionCols:_col0
+ Select Operator [SEL_137] (rows=53634860 width=119)
+ Output:["_col0","_col1","_col2","_col3"]
+ Filter Operator [FIL_135] (rows=53634860 width=119)
+ predicate:(sr_returned_date_sk is not null and sr_store_sk is not null)
+ Please refer to the previous TableScan [TS_0]
+ <-Map 10 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_143]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_141]
+
http://git-wip-us.apache.org/repos/asf/hive/blob/b8299551/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out
new file mode 100644
index 0000000..3fbd928
--- /dev/null
+++ b/ql/src/test/results/clientpositive/perf/tez/constraints/query10.q.out
@@ -0,0 +1,379 @@
+PREHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3,
+ cd_dep_count,
+ count(*) cnt4,
+ cd_dep_employed_count,
+ count(*) cnt5,
+ cd_dep_college_count,
+ count(*) cnt6
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 ANd 4+3) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
+PREHOOK: type: QUERY
+PREHOOK: Input: default@catalog_sales
+PREHOOK: Input: default@customer
+PREHOOK: Input: default@customer_address
+PREHOOK: Input: default@customer_demographics
+PREHOOK: Input: default@date_dim
+PREHOOK: Input: default@store_sales
+PREHOOK: Input: default@web_sales
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select
+ cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ count(*) cnt1,
+ cd_purchase_estimate,
+ count(*) cnt2,
+ cd_credit_rating,
+ count(*) cnt3,
+ cd_dep_count,
+ count(*) cnt4,
+ cd_dep_employed_count,
+ count(*) cnt5,
+ cd_dep_college_count,
+ count(*) cnt6
+ from
+ customer c,customer_address ca,customer_demographics
+ where
+ c.c_current_addr_sk = ca.ca_address_sk and
+ ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') and
+ cd_demo_sk = c.c_current_cdemo_sk and
+ exists (select *
+ from store_sales,date_dim
+ where c.c_customer_sk = ss_customer_sk and
+ ss_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3) and
+ (exists (select *
+ from web_sales,date_dim
+ where c.c_customer_sk = ws_bill_customer_sk and
+ ws_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 ANd 4+3) or
+ exists (select *
+ from catalog_sales,date_dim
+ where c.c_customer_sk = cs_ship_customer_sk and
+ cs_sold_date_sk = d_date_sk and
+ d_year = 2002 and
+ d_moy between 4 and 4+3))
+ group by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+ order by cd_gender,
+ cd_marital_status,
+ cd_education_status,
+ cd_purchase_estimate,
+ cd_credit_rating,
+ cd_dep_count,
+ cd_dep_employed_count,
+ cd_dep_college_count
+limit 100
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@catalog_sales
+POSTHOOK: Input: default@customer
+POSTHOOK: Input: default@customer_address
+POSTHOOK: Input: default@customer_demographics
+POSTHOOK: Input: default@date_dim
+POSTHOOK: Input: default@store_sales
+POSTHOOK: Input: default@web_sales
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 14 <- Reducer 11 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE)
+Map 24 <- Reducer 10 (BROADCAST_EDGE), Reducer 20 (BROADCAST_EDGE)
+Map 25 <- Reducer 23 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE)
+Reducer 10 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
+Reducer 11 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Map 16 (SIMPLE_EDGE)
+Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE)
+Reducer 18 <- Map 16 (SIMPLE_EDGE), Map 24 (SIMPLE_EDGE)
+Reducer 19 <- Reducer 18 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 12 (SIMPLE_EDGE)
+Reducer 20 <- Map 16 (CUSTOM_SIMPLE_EDGE)
+Reducer 21 <- Map 16 (SIMPLE_EDGE), Map 25 (SIMPLE_EDGE)
+Reducer 22 <- Reducer 21 (SIMPLE_EDGE)
+Reducer 23 <- Map 16 (CUSTOM_SIMPLE_EDGE)
+Reducer 3 <- Map 13 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 15 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 19 (ONE_TO_ONE_EDGE), Reducer 4 (ONE_TO_ONE_EDGE)
+Reducer 6 <- Reducer 22 (ONE_TO_ONE_EDGE), Reducer 5 (ONE_TO_ONE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 9 <- Reducer 5 (CUSTOM_SIMPLE_EDGE)
+
+Stage-0
+ Fetch Operator
+ limit:100
+ Stage-1
+ Reducer 8 vectorized
+ File Output Operator [FS_235]
+ Limit [LIM_234] (rows=1 width=419)
+ Number of rows:100
+ Select Operator [SEL_233] (rows=1 width=419)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ <-Reducer 7 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_232]
+ Select Operator [SEL_231] (rows=1 width=419)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col6","_col8","_col10","_col12"]
+ Group By Operator [GBY_230] (rows=1 width=379)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_66]
+ PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
+ Group By Operator [GBY_65] (rows=1 width=379)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["count()"],keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
+ Top N Key Operator [TNK_102] (rows=58 width=379)
+ keys:_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,sort order:++++++++,top n:100
+ Select Operator [SEL_64] (rows=58 width=379)
+ Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ Filter Operator [FIL_63] (rows=58 width=379)
+ predicate:(_col15 is not null or _col17 is not null)
+ Merge Join Operator [MERGEJOIN_180] (rows=58 width=379)
+ Conds:RS_60._col0=RS_229._col0(Left Outer),Output:["_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15","_col17"]
+ <-Reducer 5 [ONE_TO_ONE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_60]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_179] (rows=58 width=379)
+ Conds:RS_57._col0=RS_219._col0(Left Outer),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col15"]
+ <-Reducer 4 [ONE_TO_ONE_EDGE]
+ FORWARD [RS_57]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_178] (rows=7792 width=375)
+ Conds:RS_54._col0=RS_55._col0(Left Semi),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_54]
+ PartitionCols:_col0
+ Merge Join Operator [MERGEJOIN_174] (rows=3914656 width=375)
+ Conds:RS_49._col1=RS_188._col0(Inner),Output:["_col0","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13"]
+ <-Map 13 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_188]
+ PartitionCols:_col0
+ Select Operator [SEL_187] (rows=1861800 width=375)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+ TableScan [TS_6] (rows=1861800 width=375)
+ default@customer_demographics,customer_demographics,Tbl:COMPLETE,Col:COMPLETE,Output:["cd_demo_sk","cd_gender","cd_marital_status","cd_education_status","cd_purchase_estimate","cd_credit_rating","cd_dep_count","cd_dep_employed_count","cd_dep_college_count"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_49]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_173] (rows=3860070 width=5)
+ Conds:RS_183._col2=RS_186._col0(Inner),Output:["_col0","_col1"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_183]
+ PartitionCols:_col2
+ Select Operator [SEL_182] (rows=77201384 width=11)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_181] (rows=77201384 width=11)
+ predicate:(c_current_addr_sk is not null and c_current_cdemo_sk is not null)
+ TableScan [TS_0] (rows=80000000 width=11)
+ default@customer,c,Tbl:COMPLETE,Col:COMPLETE,Output:["c_customer_sk","c_current_cdemo_sk","c_current_addr_sk"]
+ <-Map 12 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_186]
+ PartitionCols:_col0
+ Select Operator [SEL_185] (rows=2000000 width=102)
+ Output:["_col0"]
+ Filter Operator [FIL_184] (rows=2000000 width=102)
+ predicate:(ca_county) IN ('Walker County', 'Richland County', 'Gaines County', 'Douglas County', 'Dona Ana County')
+ TableScan [TS_3] (rows=40000000 width=102)
+ default@customer_address,ca,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_county"]
+ <-Reducer 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_55]
+ PartitionCols:_col0
+ Group By Operator [GBY_53] (rows=155827 width=2)
+ Output:["_col0"],keys:_col0
+ Select Operator [SEL_17] (rows=57825495 width=2)
+ Output:["_col0"]
+ Merge Join Operator [MERGEJOIN_175] (rows=57825495 width=2)
+ Conds:RS_209._col0=RS_191._col0(Inner),Output:["_col1"]
+ <-Map 16 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_191]
+ PartitionCols:_col0
+ Select Operator [SEL_190] (rows=201 width=4)
+ Output:["_col0"]
+ Filter Operator [FIL_189] (rows=201 width=12)
+ predicate:((d_year = 2002) and d_moy BETWEEN 4 AND 7)
+ TableScan [TS_11] (rows=73049 width=12)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE,Output:["d_date_sk","d_year","d_moy"]
+ <-Map 14 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_209]
+ PartitionCols:_col0
+ Select Operator [SEL_208] (rows=525327388 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_207] (rows=525327388 width=7)
+ predicate:((ss_customer_sk BETWEEN DynamicValue(RS_54_c_c_customer_sk_min) AND DynamicValue(RS_54_c_c_customer_sk_max) and in_bloom_filter(ss_customer_sk, DynamicValue(RS_54_c_c_customer_sk_bloom_filter))) and (ss_sold_date_sk BETWEEN DynamicValue(RS_15_date_dim_d_date_sk_min) AND DynamicValue(RS_15_date_dim_d_date_sk_max) and in_bloom_filter(ss_sold_date_sk, DynamicValue(RS_15_date_dim_d_date_sk_bloom_filter))) and ss_customer_sk is not null and ss_sold_date_sk is not null)
+ TableScan [TS_8] (rows=575995635 width=7)
+ default@store_sales,store_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ss_sold_date_sk","ss_customer_sk"]
+ <-Reducer 11 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_206]
+ Group By Operator [GBY_205] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=3647763)"]
+ <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+ SHUFFLE [RS_135]
+ Group By Operator [GBY_134] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=3647763)"]
+ Select Operator [SEL_133] (rows=3914656 width=4)
+ Output:["_col0"]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_174]
+ <-Reducer 17 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_204]
+ Group By Operator [GBY_203] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_200]
+ Group By Operator [GBY_197] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_192] (rows=201 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_190]
+ <-Reducer 19 [ONE_TO_ONE_EDGE] vectorized
+ FORWARD [RS_219]
+ PartitionCols:_col0
+ Select Operator [SEL_218] (rows=155827 width=7)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_217] (rows=155827 width=3)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_29]
+ PartitionCols:_col0
+ Group By Operator [GBY_28] (rows=155827 width=3)
+ Output:["_col0"],keys:_col1
+ Merge Join Operator [MERGEJOIN_176] (rows=15843227 width=3)
+ Conds:RS_216._col0=RS_193._col0(Inner),Output:["_col1"]
+ <-Map 16 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_193]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_190]
+ <-Map 24 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_216]
+ PartitionCols:_col0
+ Select Operator [SEL_215] (rows=143930993 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_214] (rows=143930993 width=7)
+ predicate:((ws_bill_customer_sk BETWEEN DynamicValue(RS_57_c_c_customer_sk_min) AND DynamicValue(RS_57_c_c_customer_sk_max) and in_bloom_filter(ws_bill_customer_sk, DynamicValue(RS_57_c_c_customer_sk_bloom_filter))) and (ws_sold_date_sk BETWEEN DynamicValue(RS_25_date_dim_d_date_sk_min) AND DynamicValue(RS_25_date_dim_d_date_sk_max) and in_bloom_filter(ws_sold_date_sk, DynamicValue(RS_25_date_dim_d_date_sk_bloom_filter))) and ws_bill_customer_sk is not null and ws_sold_date_sk is not null)
+ TableScan [TS_18] (rows=144002668 width=7)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["ws_sold_date_sk","ws_bill_customer_sk"]
+ <-Reducer 10 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_213]
+ Group By Operator [GBY_212] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Reducer 4 [CUSTOM_SIMPLE_EDGE]
+ FORWARD [RS_150]
+ Group By Operator [GBY_149] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_148] (rows=7792 width=4)
+ Output:["_col0"]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_178]
+ <-Reducer 20 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_211]
+ Group By Operator [GBY_210] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_201]
+ Group By Operator [GBY_198] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_194] (rows=201 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_190]
+ <-Reducer 22 [ONE_TO_ONE_EDGE] vectorized
+ FORWARD [RS_229]
+ PartitionCols:_col0
+ Select Operator [SEL_228] (rows=154725 width=7)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_227] (rows=154725 width=3)
+ Output:["_col0"],keys:KEY._col0
+ <-Reducer 21 [SIMPLE_EDGE]
+ SHUFFLE [RS_43]
+ PartitionCols:_col0
+ Group By Operator [GBY_42] (rows=154725 width=3)
+ Output:["_col0"],keys:_col1
+ Merge Join Operator [MERGEJOIN_177] (rows=31162251 width=3)
+ Conds:RS_226._col0=RS_195._col0(Inner),Output:["_col1"]
+ <-Map 16 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_195]
+ PartitionCols:_col0
+ Please refer to the previous Select Operator [SEL_190]
+ <-Map 25 [SIMPLE_EDGE] vectorized
+ SHUFFLE [RS_226]
+ PartitionCols:_col0
+ Select Operator [SEL_225] (rows=285115246 width=7)
+ Output:["_col0","_col1"]
+ Filter Operator [FIL_224] (rows=285115246 width=7)
+ predicate:((cs_ship_customer_sk BETWEEN DynamicValue(RS_60_c_c_customer_sk_min) AND DynamicValue(RS_60_c_c_customer_sk_max) and in_bloom_filter(cs_ship_customer_sk, DynamicValue(RS_60_c_c_customer_sk_bloom_filter))) and (cs_sold_date_sk BETWEEN DynamicValue(RS_39_date_dim_d_date_sk_min) AND DynamicValue(RS_39_date_dim_d_date_sk_max) and in_bloom_filter(cs_sold_date_sk, DynamicValue(RS_39_date_dim_d_date_sk_bloom_filter))) and cs_ship_customer_sk is not null and cs_sold_date_sk is not null)
+ TableScan [TS_32] (rows=287989836 width=7)
+ default@catalog_sales,catalog_sales,Tbl:COMPLETE,Col:COMPLETE,Output:["cs_sold_date_sk","cs_ship_customer_sk"]
+ <-Reducer 23 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_221]
+ Group By Operator [GBY_220] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Map 16 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_202]
+ Group By Operator [GBY_199] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_196] (rows=201 width=4)
+ Output:["_col0"]
+ Please refer to the previous Select Operator [SEL_190]
+ <-Reducer 9 [BROADCAST_EDGE] vectorized
+ BROADCAST [RS_223]
+ Group By Operator [GBY_222] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","bloom_filter(VALUE._col2, expectedEntries=1000000)"]
+ <-Reducer 5 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_165]
+ Group By Operator [GBY_164] (rows=1 width=12)
+ Output:["_col0","_col1","_col2"],aggregations:["min(_col0)","max(_col0)","bloom_filter(_col0, expectedEntries=1000000)"]
+ Select Operator [SEL_163] (rows=58 width=4)
+ Output:["_col0"]
+ Please refer to the previous Merge Join Operator [MERGEJOIN_179]
+