You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/05/31 00:01:09 UTC
[02/15] hive git commit: HIVE-16764: Support numeric as same as
decimal (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query88.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query88.q.out b/ql/src/test/results/clientpositive/perf/query88.q.out
index f7af4ef..18d0a77 100644
--- a/ql/src/test/results/clientpositive/perf/query88.q.out
+++ b/ql/src/test/results/clientpositive/perf/query88.q.out
@@ -4,9 +4,9 @@ select *
from
(select count(*) h8_30_to_9
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 8
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -15,9 +15,9 @@ from
and store.s_store_name = 'ese') s1,
(select count(*) h9_to_9_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 9
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -26,9 +26,9 @@ from
and store.s_store_name = 'ese') s2,
(select count(*) h9_30_to_10
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 9
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -37,9 +37,9 @@ from
and store.s_store_name = 'ese') s3,
(select count(*) h10_to_10_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 10
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -48,9 +48,9 @@ from
and store.s_store_name = 'ese') s4,
(select count(*) h10_30_to_11
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 10
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -59,9 +59,9 @@ from
and store.s_store_name = 'ese') s5,
(select count(*) h11_to_11_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 11
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -70,9 +70,9 @@ from
and store.s_store_name = 'ese') s6,
(select count(*) h11_30_to_12
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 11
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -81,9 +81,9 @@ from
and store.s_store_name = 'ese') s7,
(select count(*) h12_to_12_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 12
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -96,9 +96,9 @@ select *
from
(select count(*) h8_30_to_9
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 8
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -107,9 +107,9 @@ from
and store.s_store_name = 'ese') s1,
(select count(*) h9_to_9_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 9
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -118,9 +118,9 @@ from
and store.s_store_name = 'ese') s2,
(select count(*) h9_30_to_10
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 9
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -129,9 +129,9 @@ from
and store.s_store_name = 'ese') s3,
(select count(*) h10_to_10_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 10
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -140,9 +140,9 @@ from
and store.s_store_name = 'ese') s4,
(select count(*) h10_30_to_11
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 10
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -151,9 +151,9 @@ from
and store.s_store_name = 'ese') s5,
(select count(*) h11_to_11_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 11
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -162,9 +162,9 @@ from
and store.s_store_name = 'ese') s6,
(select count(*) h11_30_to_12
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 11
and time_dim.t_minute >= 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
@@ -173,9 +173,9 @@ from
and store.s_store_name = 'ese') s7,
(select count(*) h12_to_12_30
from store_sales, household_demographics , time_dim, store
- where store_sales.ss_sold_time_sk = time_dim.t_time_sk
- and store_sales.ss_hdemo_sk = household_demographics.hd_demo_sk
- and store_sales.ss_store_sk = store.s_store_sk
+ where ss_sold_time_sk = time_dim.t_time_sk
+ and ss_hdemo_sk = household_demographics.hd_demo_sk
+ and ss_store_sk = s_store_sk
and time_dim.t_hour = 12
and time_dim.t_minute < 30
and ((household_demographics.hd_dep_count = 3 and household_demographics.hd_vehicle_count<=3+2) or
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query89.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query89.q.out b/ql/src/test/results/clientpositive/perf/query89.q.out
index 27467bd..1f13f9e 100644
--- a/ql/src/test/results/clientpositive/perf/query89.q.out
+++ b/ql/src/test/results/clientpositive/perf/query89.q.out
@@ -9,9 +9,9 @@ select i_category, i_class, i_brand,
(partition by i_category, i_brand, s_store_name, s_company_name)
avg_monthly_sales
from item, store_sales, date_dim, store
-where store_sales.ss_item_sk = item.i_item_sk and
- store_sales.ss_sold_date_sk = date_dim.d_date_sk and
- store_sales.ss_store_sk = store.s_store_sk and
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
d_year in (2000) and
((i_category in ('Home','Books','Electronics') and
i_class in ('wallpaper','parenting','musical')
@@ -36,9 +36,9 @@ select i_category, i_class, i_brand,
(partition by i_category, i_brand, s_store_name, s_company_name)
avg_monthly_sales
from item, store_sales, date_dim, store
-where store_sales.ss_item_sk = item.i_item_sk and
- store_sales.ss_sold_date_sk = date_dim.d_date_sk and
- store_sales.ss_store_sk = store.s_store_sk and
+where ss_item_sk = i_item_sk and
+ ss_sold_date_sk = d_date_sk and
+ ss_store_sk = s_store_sk and
d_year in (2000) and
((i_category in ('Home','Books','Electronics') and
i_class in ('wallpaper','parenting','musical')
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query9.q.out b/ql/src/test/results/clientpositive/perf/query9.q.out
index 4dded91..425163e 100644
--- a/ql/src/test/results/clientpositive/perf/query9.q.out
+++ b/ql/src/test/results/clientpositive/perf/query9.q.out
@@ -13,12 +13,13 @@ Warning: Shuffle Join MERGEJOIN[182][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_
Warning: Shuffle Join MERGEJOIN[183][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13]] in Stage 'Reducer 14' is a cross product
Warning: Shuffle Join MERGEJOIN[184][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14]] in Stage 'Reducer 15' is a cross product
Warning: Shuffle Join MERGEJOIN[185][tables = [$hdt$_0, $hdt$_1, $hdt$_2, $hdt$_3, $hdt$_4, $hdt$_5, $hdt$_6, $hdt$_7, $hdt$_8, $hdt$_9, $hdt$_10, $hdt$_11, $hdt$_12, $hdt$_13, $hdt$_14, $hdt$_15]] in Stage 'Reducer 16' is a cross product
-PREHOOK: query: explain select case when (select count(*)
- from store_sales
+PREHOOK: query: explain
+select case when (select count(*)
+ from store_sales
where ss_quantity between 1 and 20) > 409437
- then (select avg(ss_ext_list_price)
- from store_sales
- where ss_quantity between 1 and 20)
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 1 and 20)
else (select avg(ss_net_paid_inc_tax)
from store_sales
where ss_quantity between 1 and 20) end bucket1 ,
@@ -27,7 +28,7 @@ PREHOOK: query: explain select case when (select count(*)
where ss_quantity between 21 and 40) > 4595804
then (select avg(ss_ext_list_price)
from store_sales
- where ss_quantity between 21 and 40)
+ where ss_quantity between 21 and 40)
else (select avg(ss_net_paid_inc_tax)
from store_sales
where ss_quantity between 21 and 40) end bucket2,
@@ -61,12 +62,13 @@ PREHOOK: query: explain select case when (select count(*)
from reason
where r_reason_sk = 1
PREHOOK: type: QUERY
-POSTHOOK: query: explain select case when (select count(*)
- from store_sales
+POSTHOOK: query: explain
+select case when (select count(*)
+ from store_sales
where ss_quantity between 1 and 20) > 409437
- then (select avg(ss_ext_list_price)
- from store_sales
- where ss_quantity between 1 and 20)
+ then (select avg(ss_ext_list_price)
+ from store_sales
+ where ss_quantity between 1 and 20)
else (select avg(ss_net_paid_inc_tax)
from store_sales
where ss_quantity between 1 and 20) end bucket1 ,
@@ -75,7 +77,7 @@ POSTHOOK: query: explain select case when (select count(*)
where ss_quantity between 21 and 40) > 4595804
then (select avg(ss_ext_list_price)
from store_sales
- where ss_quantity between 21 and 40)
+ where ss_quantity between 21 and 40)
else (select avg(ss_net_paid_inc_tax)
from store_sales
where ss_quantity between 21 and 40) end bucket2,
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query90.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query90.q.out b/ql/src/test/results/clientpositive/perf/query90.q.out
index aae0aec..b3468ec 100644
--- a/ql/src/test/results/clientpositive/perf/query90.q.out
+++ b/ql/src/test/results/clientpositive/perf/query90.q.out
@@ -1,7 +1,45 @@
Warning: Shuffle Join MERGEJOIN[92][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 6' is a cross product
-PREHOOK: query: explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 6 and 6+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 14 and 14+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100
+PREHOOK: query: explain
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio from ( select count(*) amc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 6 and 6+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) at, ( select count(*) pmc from web_sales, household_demographics , time_dim, web_page where ws_sold_time_sk = time_dim.t_time_sk and ws_ship_hdemo_sk = household_demographics.hd_demo_sk and ws_web_page_sk = web_page.wp_web_page_sk and time_dim.t_hour between 14 and 14+1 and household_demographics.hd_dep_count = 8 and web_page.wp_char_count between 5000 and 5200) pt order by am_pm_ratio limit 100
+POSTHOOK: query: explain
+select cast(amc as decimal(15,4))/cast(pmc as decimal(15,4)) am_pm_ratio
+ from ( select count(*) amc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 6 and 6+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) at,
+ ( select count(*) pmc
+ from web_sales, household_demographics , time_dim, web_page
+ where ws_sold_time_sk = time_dim.t_time_sk
+ and ws_ship_hdemo_sk = household_demographics.hd_demo_sk
+ and ws_web_page_sk = web_page.wp_web_page_sk
+ and time_dim.t_hour between 14 and 14+1
+ and household_demographics.hd_dep_count = 8
+ and web_page.wp_char_count between 5000 and 5200) pt
+ order by am_pm_ratio
+ limit 100
POSTHOOK: type: QUERY
Plan optimized by CBO.
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query91.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query91.q.out b/ql/src/test/results/clientpositive/perf/query91.q.out
index 0ee7e43..ef04f4a 100644
--- a/ql/src/test/results/clientpositive/perf/query91.q.out
+++ b/ql/src/test/results/clientpositive/perf/query91.q.out
@@ -1,6 +1,62 @@
-PREHOOK: query: explain select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and customer_address.ca_address_sk = customer.c_current_addr_sk and d_year = 1999 and d_moy = 11 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by Returns_Loss d
esc
+PREHOOK: query: explain
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
PREHOOK: type: QUERY
-POSTHOOK: query: explain select cc_call_center_id Call_Center, cc_name Call_Center_Name, cc_manager Manager, sum(cr_net_loss) Returns_Loss from call_center, catalog_returns, date_dim, customer, customer_address, customer_demographics, household_demographics where catalog_returns.cr_call_center_sk = call_center.cc_call_center_sk and catalog_returns.cr_returned_date_sk = date_dim.d_date_sk and catalog_returns.cr_returning_customer_sk= customer.c_customer_sk and customer_demographics.cd_demo_sk = customer.c_current_cdemo_sk and household_demographics.hd_demo_sk = customer.c_current_hdemo_sk and customer_address.ca_address_sk = customer.c_current_addr_sk and d_year = 1999 and d_moy = 11 and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown') or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree')) and hd_buy_potential like '0-500%' and ca_gmt_offset = -7 group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status order by Returns_Loss
desc
+POSTHOOK: query: explain
+select
+ cc_call_center_id Call_Center,
+ cc_name Call_Center_Name,
+ cc_manager Manager,
+ sum(cr_net_loss) Returns_Loss
+from
+ call_center,
+ catalog_returns,
+ date_dim,
+ customer,
+ customer_address,
+ customer_demographics,
+ household_demographics
+where
+ cr_call_center_sk = cc_call_center_sk
+and cr_returned_date_sk = d_date_sk
+and cr_returning_customer_sk= c_customer_sk
+and cd_demo_sk = c_current_cdemo_sk
+and hd_demo_sk = c_current_hdemo_sk
+and ca_address_sk = c_current_addr_sk
+and d_year = 1999
+and d_moy = 11
+and ( (cd_marital_status = 'M' and cd_education_status = 'Unknown')
+ or(cd_marital_status = 'W' and cd_education_status = 'Advanced Degree'))
+and hd_buy_potential like '0-500%'
+and ca_gmt_offset = -7
+group by cc_call_center_id,cc_name,cc_manager,cd_marital_status,cd_education_status
+order by sum(cr_net_loss) desc
POSTHOOK: type: QUERY
Plan optimized by CBO.
@@ -25,7 +81,7 @@ Stage-0
<-Reducer 5 [SIMPLE_EDGE]
SHUFFLE [RS_45]
Select Operator [SEL_44] (rows=58564004 width=860)
- Output:["_col0","_col1","_col2","_col3"]
+ Output:["_col0","_col1","_col2","_col6"]
Group By Operator [GBY_43] (rows=58564004 width=860)
Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3, KEY._col4
<-Reducer 4 [SIMPLE_EDGE]
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query92.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query92.q.out b/ql/src/test/results/clientpositive/perf/query92.q.out
index d3abc23..ec4fbb9 100644
--- a/ql/src/test/results/clientpositive/perf/query92.q.out
+++ b/ql/src/test/results/clientpositive/perf/query92.q.out
@@ -1,90 +1,163 @@
-PREHOOK: query: explain SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) as store_only, sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) as catalog_only, sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) as store_and_catalog FROM (SELECT ss.ss_customer_sk as customer_sk, ss.ss_item_sk as item_sk FROM store_sales ss JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) WHERE d1.d_month_seq >= 1206 and d1.d_month_seq <= 1217 GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, cs.cs_item_sk as item_sk FROM catalog_sales cs JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) WHERE d2.d_month_seq >= 1206 and d2.d_month_seq <= 1217 GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci ON (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk)
+PREHOOK: query: explain
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain SELECT sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) as store_only, sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) as catalog_only, sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) as store_and_catalog FROM (SELECT ss.ss_customer_sk as customer_sk, ss.ss_item_sk as item_sk FROM store_sales ss JOIN date_dim d1 ON (ss.ss_sold_date_sk = d1.d_date_sk) WHERE d1.d_month_seq >= 1206 and d1.d_month_seq <= 1217 GROUP BY ss.ss_customer_sk, ss.ss_item_sk) ssci FULL OUTER JOIN (SELECT cs.cs_bill_customer_sk as customer_sk, cs.cs_item_sk as item_sk FROM catalog_sales cs JOIN date_dim d2 ON (cs.cs_sold_date_sk = d2.d_date_sk) WHERE d2.d_month_seq >= 1206 and d2.d_month_seq <= 1217 GROUP BY cs.cs_bill_customer_sk, cs.cs_item_sk) csci ON (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk)
+POSTHOOK: query: explain
+select
+ sum(ws_ext_discount_amt) as `Excess Discount Amount`
+from
+ web_sales
+ ,item
+ ,date_dim
+where
+i_manufact_id = 269
+and i_item_sk = ws_item_sk
+and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+and d_date_sk = ws_sold_date_sk
+and ws_ext_discount_amt
+ > (
+ SELECT
+ 1.3 * avg(ws_ext_discount_amt)
+ FROM
+ web_sales
+ ,date_dim
+ WHERE
+ ws_item_sk = i_item_sk
+ and d_date between '1998-03-18' and
+ (cast('1998-03-18' as date) + 90 days)
+ and d_date_sk = ws_sold_date_sk
+ )
+order by sum(ws_ext_discount_amt)
+limit 100
POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-Reducer 3 <- Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Reducer 3 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
-Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
-Reducer 7 <- Map 6 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
-Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 3 <- Reducer 2 (SIMPLE_EDGE), Reducer 8 (SIMPLE_EDGE)
+Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
+Reducer 8 <- Map 10 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
Stage-0
Fetch Operator
limit:-1
Stage-1
Reducer 5
- File Output Operator [FS_37]
- Group By Operator [GBY_35] (rows=1 width=24)
- Output:["_col0","_col1","_col2"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
- <-Reducer 4 [CUSTOM_SIMPLE_EDGE]
- PARTITION_ONLY_SHUFFLE [RS_34]
- Group By Operator [GBY_33] (rows=1 width=24)
- Output:["_col0","_col1","_col2"],aggregations:["sum(_col0)","sum(_col1)","sum(_col2)"]
- Select Operator [SEL_31] (rows=348477374 width=88)
- Output:["_col0","_col1","_col2"]
- Merge Join Operator [MERGEJOIN_48] (rows=348477374 width=88)
- Conds:RS_28._col0, _col1=RS_29._col0, _col1(Outer),Output:["_col0","_col2"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_28]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_12] (rows=316797606 width=88)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_11]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_10] (rows=633595212 width=88)
- Output:["_col0","_col1"],keys:_col2, _col1
- Merge Join Operator [MERGEJOIN_46] (rows=633595212 width=88)
- Conds:RS_6._col0=RS_7._col0(Inner),Output:["_col1","_col2"]
- <-Map 6 [SIMPLE_EDGE]
- SHUFFLE [RS_7]
- PartitionCols:_col0
- Select Operator [SEL_5] (rows=8116 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_43] (rows=8116 width=1119)
- predicate:((d_month_seq >= 1206) and (d_month_seq <= 1217) and d_date_sk is not null)
- TableScan [TS_3] (rows=73049 width=1119)
- default@date_dim,d1,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_month_seq"]
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_6]
- PartitionCols:_col0
- Select Operator [SEL_2] (rows=575995635 width=88)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_42] (rows=575995635 width=88)
- predicate:ss_sold_date_sk is not null
- TableScan [TS_0] (rows=575995635 width=88)
- default@store_sales,ss,Tbl:COMPLETE,Col:NONE,Output:["ss_sold_date_sk","ss_item_sk","ss_customer_sk"]
- <-Reducer 8 [SIMPLE_EDGE]
- SHUFFLE [RS_29]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_26] (rows=158394413 width=135)
- Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
- <-Reducer 7 [SIMPLE_EDGE]
- SHUFFLE [RS_25]
- PartitionCols:_col0, _col1
- Group By Operator [GBY_24] (rows=316788826 width=135)
- Output:["_col0","_col1"],keys:_col1, _col2
- Merge Join Operator [MERGEJOIN_47] (rows=316788826 width=135)
- Conds:RS_20._col0=RS_21._col0(Inner),Output:["_col1","_col2"]
- <-Map 6 [SIMPLE_EDGE]
- SHUFFLE [RS_21]
- PartitionCols:_col0
- Select Operator [SEL_19] (rows=8116 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_45] (rows=8116 width=1119)
- predicate:((d_month_seq >= 1206) and (d_month_seq <= 1217) and d_date_sk is not null)
- Please refer to the previous TableScan [TS_3]
- <-Map 9 [SIMPLE_EDGE]
- SHUFFLE [RS_20]
- PartitionCols:_col0
- Select Operator [SEL_16] (rows=287989836 width=135)
- Output:["_col0","_col1","_col2"]
- Filter Operator [FIL_44] (rows=287989836 width=135)
- predicate:cs_sold_date_sk is not null
- TableScan [TS_14] (rows=287989836 width=135)
- default@catalog_sales,cs,Tbl:COMPLETE,Col:NONE,Output:["cs_sold_date_sk","cs_bill_customer_sk","cs_item_sk"]
+ File Output Operator [FS_43]
+ Limit [LIM_41] (rows=1 width=112)
+ Number of rows:100
+ Select Operator [SEL_40] (rows=1 width=112)
+ Output:["_col0"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_39]
+ Select Operator [SEL_38] (rows=1 width=112)
+ Output:["_col1"]
+ Group By Operator [GBY_37] (rows=1 width=112)
+ Output:["_col0"],aggregations:["sum(VALUE._col0)"]
+ <-Reducer 3 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_36]
+ Group By Operator [GBY_35] (rows=1 width=112)
+ Output:["_col0"],aggregations:["sum(_col2)"]
+ Select Operator [SEL_34] (rows=58081078 width=135)
+ Output:["_col2"]
+ Filter Operator [FIL_33] (rows=58081078 width=135)
+ predicate:(_col2 > _col5)
+ Merge Join Operator [MERGEJOIN_64] (rows=174243235 width=135)
+ Conds:RS_30._col1=RS_31._col2(Inner),Output:["_col2","_col5"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_30]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_61] (rows=158402938 width=135)
+ Conds:RS_27._col0=RS_28._col0(Inner),Output:["_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_27]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_56] (rows=144002668 width=135)
+ predicate:(ws_item_sk is not null and ws_sold_date_sk is not null)
+ TableScan [TS_0] (rows=144002668 width=135)
+ default@web_sales,web_sales,Tbl:COMPLETE,Col:NONE,Output:["ws_sold_date_sk","ws_item_sk","ws_ext_discount_amt"]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_28]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_57] (rows=8116 width=1119)
+ predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
+ <-Reducer 8 [SIMPLE_EDGE]
+ SHUFFLE [RS_31]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_63] (rows=87121617 width=135)
+ Conds:RS_23._col1=RS_24._col0(Inner),Output:["_col0","_col2"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_24]
+ PartitionCols:_col0
+ Select Operator [SEL_22] (rows=231000 width=1436)
+ Output:["_col0"]
+ Filter Operator [FIL_60] (rows=231000 width=1436)
+ predicate:((i_manufact_id = 269) and i_item_sk is not null)
+ TableScan [TS_20] (rows=462000 width=1436)
+ default@item,item,Tbl:COMPLETE,Col:NONE,Output:["i_item_sk","i_manufact_id"]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_23]
+ PartitionCols:_col1
+ Select Operator [SEL_19] (rows=79201469 width=135)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_18] (rows=79201469 width=135)
+ Output:["_col0","_col1"],aggregations:["avg(VALUE._col0)"],keys:KEY._col0
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_17]
+ PartitionCols:_col0
+ Group By Operator [GBY_16] (rows=158402938 width=135)
+ Output:["_col0","_col1"],aggregations:["avg(_col2)"],keys:_col1
+ Merge Join Operator [MERGEJOIN_62] (rows=158402938 width=135)
+ Conds:RS_12._col0=RS_13._col0(Inner),Output:["_col1","_col2"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_12]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2"]
+ Filter Operator [FIL_58] (rows=144002668 width=135)
+ predicate:(ws_item_sk is not null and ws_sold_date_sk is not null)
+ Please refer to the previous TableScan [TS_0]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_13]
+ PartitionCols:_col0
+ Select Operator [SEL_11] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_59] (rows=8116 width=1119)
+ predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1998-03-18 00:00:00.0 AND 1998-06-16 01:00:00.0 and d_date_sk is not null)
+ Please refer to the previous TableScan [TS_3]
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query93.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query93.q.out b/ql/src/test/results/clientpositive/perf/query93.q.out
index f28ba41..fc58396 100644
--- a/ql/src/test/results/clientpositive/perf/query93.q.out
+++ b/ql/src/test/results/clientpositive/perf/query93.q.out
@@ -1,6 +1,36 @@
-PREHOOK: query: explain select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk and store_returns.sr_ticket_number = store_sales.ss_ticket_number) ,reason where store_returns.sr_reason_sk = reason.r_reason_sk and r_reason_desc = 'Did not like the warranty') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100
+PREHOOK: query: explain
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain select ss_customer_sk ,sum(act_sales) sumsales from (select ss_item_sk ,ss_ticket_number ,ss_customer_sk ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price else (ss_quantity*ss_sales_price) end act_sales from store_sales left outer join store_returns on (store_returns.sr_item_sk = store_sales.ss_item_sk and store_returns.sr_ticket_number = store_sales.ss_ticket_number) ,reason where store_returns.sr_reason_sk = reason.r_reason_sk and r_reason_desc = 'Did not like the warranty') t group by ss_customer_sk order by sumsales, ss_customer_sk limit 100
+POSTHOOK: query: explain
+select ss_customer_sk
+ ,sum(act_sales) sumsales
+ from (select ss_item_sk
+ ,ss_ticket_number
+ ,ss_customer_sk
+ ,case when sr_return_quantity is not null then (ss_quantity-sr_return_quantity)*ss_sales_price
+ else (ss_quantity*ss_sales_price) end act_sales
+ from store_sales left outer join store_returns on (sr_item_sk = ss_item_sk
+ and sr_ticket_number = ss_ticket_number)
+ ,reason
+ where sr_reason_sk = r_reason_sk
+ and r_reason_desc = 'Did not like the warranty') t
+ group by ss_customer_sk
+ order by sumsales, ss_customer_sk
+limit 100
POSTHOOK: type: QUERY
Plan optimized by CBO.
http://git-wip-us.apache.org/repos/asf/hive/blob/86b18772/ql/src/test/results/clientpositive/perf/query94.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query94.q.out b/ql/src/test/results/clientpositive/perf/query94.q.out
index 836b16b..6e24345 100644
--- a/ql/src/test/results/clientpositive/perf/query94.q.out
+++ b/ql/src/test/results/clientpositive/perf/query94.q.out
@@ -1,130 +1,231 @@
-PREHOOK: query: explain SELECT count(distinct ws_order_number) as order_count, sum(ws_ext_ship_cost) as total_shipping_cost, sum(ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) WHERE d.d_date between '1999-05-01' and '1999-07-01' and ca.ca_state = 'TX' and s.web_company_name = 'pri' and wr1.wr_order_number is null limit 100
+Warning: Shuffle Join MERGEJOIN[107][tables = [$hdt$_2, $hdt$_3, $hdt$_1, $hdt$_4]] in Stage 'Reducer 17' is a cross product
+PREHOOK: query: explain
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
PREHOOK: type: QUERY
-POSTHOOK: query: explain SELECT count(distinct ws_order_number) as order_count, sum(ws_ext_ship_cost) as total_shipping_cost, sum(ws_net_profit) as total_net_profit FROM web_sales ws1 JOIN customer_address ca ON (ws1.ws_ship_addr_sk = ca.ca_address_sk) JOIN web_site s ON (ws1.ws_web_site_sk = s.web_site_sk) JOIN date_dim d ON (ws1.ws_ship_date_sk = d.d_date_sk) LEFT SEMI JOIN (SELECT ws2.ws_order_number as ws_order_number FROM web_sales ws2 JOIN web_sales ws3 ON (ws2.ws_order_number = ws3.ws_order_number) WHERE ws2.ws_warehouse_sk <> ws3.ws_warehouse_sk ) ws_wh1 ON (ws1.ws_order_number = ws_wh1.ws_order_number) LEFT OUTER JOIN web_returns wr1 ON (ws1.ws_order_number = wr1.wr_order_number) WHERE d.d_date between '1999-05-01' and '1999-07-01' and ca.ca_state = 'TX' and s.web_company_name = 'pri' and wr1.wr_order_number is null limit 100
+POSTHOOK: query: explain
+select
+ count(distinct ws_order_number) as `order count`
+ ,sum(ws_ext_ship_cost) as `total shipping cost`
+ ,sum(ws_net_profit) as `total net profit`
+from
+ web_sales ws1
+ ,date_dim
+ ,customer_address
+ ,web_site
+where
+ d_date between '1999-5-01' and
+ (cast('1999-5-01' as date) + 60 days)
+and ws1.ws_ship_date_sk = d_date_sk
+and ws1.ws_ship_addr_sk = ca_address_sk
+and ca_state = 'TX'
+and ws1.ws_web_site_sk = web_site_sk
+and web_company_name = 'pri'
+and exists (select *
+ from web_sales ws2
+ where ws1.ws_order_number = ws2.ws_order_number
+ and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk)
+and not exists(select *
+ from web_returns wr1
+ where ws1.ws_order_number = wr1.wr_order_number)
+order by count(distinct ws_order_number)
+limit 100
POSTHOOK: type: QUERY
Plan optimized by CBO.
Vertex dependency in root stage
-Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 9 (SIMPLE_EDGE)
-Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
-Reducer 4 <- Map 12 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
-Reducer 5 <- Map 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
-Reducer 6 <- Map 14 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
+Reducer 13 <- Map 12 (SIMPLE_EDGE)
+Reducer 15 <- Map 14 (SIMPLE_EDGE), Reducer 18 (SIMPLE_EDGE)
+Reducer 16 <- Reducer 15 (SIMPLE_EDGE)
+Reducer 17 <- Map 14 (CUSTOM_SIMPLE_EDGE), Map 19 (CUSTOM_SIMPLE_EDGE), Map 20 (CUSTOM_SIMPLE_EDGE), Map 21 (CUSTOM_SIMPLE_EDGE)
+Reducer 18 <- Reducer 17 (SIMPLE_EDGE)
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 9 (SIMPLE_EDGE)
+Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (SIMPLE_EDGE)
+Reducer 4 <- Map 11 (SIMPLE_EDGE), Reducer 3 (SIMPLE_EDGE)
+Reducer 5 <- Reducer 13 (SIMPLE_EDGE), Reducer 4 (SIMPLE_EDGE)
+Reducer 6 <- Reducer 16 (SIMPLE_EDGE), Reducer 5 (SIMPLE_EDGE)
Reducer 7 <- Reducer 6 (SIMPLE_EDGE)
-Reducer 9 <- Map 10 (SIMPLE_EDGE), Map 8 (SIMPLE_EDGE)
+Reducer 8 <- Reducer 7 (SIMPLE_EDGE)
Stage-0
Fetch Operator
- limit:100
+ limit:-1
Stage-1
- Reducer 7
- File Output Operator [FS_51]
- Limit [LIM_50] (rows=1 width=344)
+ Reducer 8
+ File Output Operator [FS_74]
+ Limit [LIM_72] (rows=1 width=344)
Number of rows:100
- Group By Operator [GBY_48] (rows=1 width=344)
- Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
- <-Reducer 6 [SIMPLE_EDGE]
- SHUFFLE [RS_47]
- Group By Operator [GBY_46] (rows=127554770 width=135)
- Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT _col3)","sum(_col4)","sum(_col5)"],keys:_col3
- Select Operator [SEL_45] (rows=127554770 width=135)
- Output:["_col3","_col4","_col5"]
- Filter Operator [FIL_44] (rows=127554770 width=135)
- predicate:_col12 is null
- Merge Join Operator [MERGEJOIN_85] (rows=255109540 width=135)
- Conds:RS_40._col3=RS_41._col0(Left Outer),Output:["_col3","_col4","_col5","_col12"]
- <-Map 14 [SIMPLE_EDGE]
- SHUFFLE [RS_41]
- PartitionCols:_col0
- Select Operator [SEL_25] (rows=14398467 width=92)
- Output:["_col0"]
- Filter Operator [FIL_79] (rows=14398467 width=92)
- predicate:wr_order_number is not null
- TableScan [TS_23] (rows=14398467 width=92)
- default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
- <-Reducer 5 [SIMPLE_EDGE]
- SHUFFLE [RS_40]
- PartitionCols:_col3
- Merge Join Operator [MERGEJOIN_84] (rows=231917759 width=135)
- Conds:RS_37._col2=RS_38._col0(Inner),Output:["_col3","_col4","_col5"]
- <-Map 13 [SIMPLE_EDGE]
- SHUFFLE [RS_38]
- PartitionCols:_col0
- Select Operator [SEL_22] (rows=42 width=1850)
- Output:["_col0"]
- Filter Operator [FIL_78] (rows=42 width=1850)
- predicate:((web_company_name = 'pri') and web_site_sk is not null)
- TableScan [TS_20] (rows=84 width=1850)
- default@web_site,s,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
- <-Reducer 4 [SIMPLE_EDGE]
- SHUFFLE [RS_37]
- PartitionCols:_col2
- Merge Join Operator [MERGEJOIN_83] (rows=210834322 width=135)
- Conds:RS_34._col1=RS_35._col0(Inner),Output:["_col2","_col3","_col4","_col5"]
- <-Map 12 [SIMPLE_EDGE]
- SHUFFLE [RS_35]
- PartitionCols:_col0
- Select Operator [SEL_19] (rows=20000000 width=1014)
- Output:["_col0"]
- Filter Operator [FIL_77] (rows=20000000 width=1014)
- predicate:((ca_state = 'TX') and ca_address_sk is not null)
- TableScan [TS_17] (rows=40000000 width=1014)
- default@customer_address,ca,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
- <-Reducer 3 [SIMPLE_EDGE]
- SHUFFLE [RS_34]
- PartitionCols:_col1
- Merge Join Operator [MERGEJOIN_82] (rows=191667562 width=135)
- Conds:RS_31._col0=RS_32._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5"]
- <-Map 11 [SIMPLE_EDGE]
- SHUFFLE [RS_32]
+ Select Operator [SEL_71] (rows=1 width=344)
+ Output:["_col0","_col1","_col2"]
+ <-Reducer 7 [SIMPLE_EDGE]
+ SHUFFLE [RS_70]
+ Select Operator [SEL_69] (rows=1 width=344)
+ Output:["_col1","_col2","_col3"]
+ Group By Operator [GBY_68] (rows=1 width=344)
+ Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT KEY._col0:0._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
+ <-Reducer 6 [SIMPLE_EDGE]
+ SHUFFLE [RS_67]
+ Group By Operator [GBY_66] (rows=1395035081047425024 width=1)
+ Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT _col4)","sum(_col5)","sum(_col6)"],keys:_col4
+ Select Operator [SEL_65] (rows=1395035081047425024 width=1)
+ Output:["_col4","_col5","_col6"]
+ Filter Operator [FIL_64] (rows=1395035081047425024 width=1)
+ predicate:_col16 is null
+ Select Operator [SEL_63] (rows=2790070162094850048 width=1)
+ Output:["_col4","_col5","_col6","_col16"]
+ Merge Join Operator [MERGEJOIN_113] (rows=2790070162094850048 width=1)
+ Conds:RS_60._col3, _col4=RS_61._col0, _col1(Inner),Output:["_col4","_col5","_col6","_col14"]
+ <-Reducer 16 [SIMPLE_EDGE]
+ SHUFFLE [RS_61]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_46] (rows=2536427365110644736 width=1)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Reducer 15 [SIMPLE_EDGE]
+ SHUFFLE [RS_45]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_44] (rows=5072854730221289472 width=1)
+ Output:["_col0","_col1"],keys:_col2, _col3
+ Select Operator [SEL_43] (rows=5072854730221289472 width=1)
+ Output:["_col2","_col3"]
+ Filter Operator [FIL_42] (rows=5072854730221289472 width=1)
+ predicate:(_col2 <> _col0)
+ Merge Join Operator [MERGEJOIN_111] (rows=5072854730221289472 width=1)
+ Conds:RS_39._col1=RS_40._col1(Inner),Output:["_col0","_col2","_col3"]
+ <-Map 14 [SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_39]
+ PartitionCols:_col1
+ Select Operator [SEL_20] (rows=144002668 width=135)
+ Output:["_col0","_col1"]
+ TableScan [TS_19] (rows=144002668 width=135)
+ default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+ <-Reducer 18 [SIMPLE_EDGE]
+ SHUFFLE [RS_40]
+ PartitionCols:_col1
+ Select Operator [SEL_38] (rows=4611686018427387903 width=1)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_37] (rows=4611686018427387903 width=1)
+ Output:["_col0","_col1"],keys:KEY._col0, KEY._col1
+ <-Reducer 17 [SIMPLE_EDGE]
+ SHUFFLE [RS_36]
+ PartitionCols:_col0, _col1
+ Group By Operator [GBY_35] (rows=9223372036854775807 width=1)
+ Output:["_col0","_col1"],keys:_col4, _col3
+ Merge Join Operator [MERGEJOIN_107] (rows=9223372036854775807 width=1)
+ Conds:(Inner),(Inner),(Inner),Output:["_col3","_col4"]
+ <-Map 14 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_32]
+ Select Operator [SEL_28] (rows=144002668 width=135)
+ Output:["_col0","_col1"]
+ Please refer to the previous TableScan [TS_19]
+ <-Map 19 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_29]
+ Select Operator [SEL_22] (rows=73049 width=4)
+ TableScan [TS_21] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:COMPLETE
+ <-Map 20 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_30]
+ Select Operator [SEL_24] (rows=84 width=4)
+ TableScan [TS_23] (rows=84 width=1850)
+ default@web_site,web_site,Tbl:COMPLETE,Col:COMPLETE
+ <-Map 21 [CUSTOM_SIMPLE_EDGE]
+ PARTITION_ONLY_SHUFFLE [RS_31]
+ Select Operator [SEL_26] (rows=40000000 width=4)
+ TableScan [TS_25] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE
+ <-Reducer 5 [SIMPLE_EDGE]
+ SHUFFLE [RS_60]
+ PartitionCols:_col3, _col4
+ Merge Join Operator [MERGEJOIN_112] (rows=210834322 width=135)
+ Conds:RS_57._col4=RS_58._col0(Left Outer),Output:["_col3","_col4","_col5","_col6","_col14"]
+ <-Reducer 13 [SIMPLE_EDGE]
+ SHUFFLE [RS_58]
PartitionCols:_col0
- Select Operator [SEL_16] (rows=8116 width=1119)
- Output:["_col0"]
- Filter Operator [FIL_76] (rows=8116 width=1119)
- predicate:(d_date BETWEEN '1999-05-01' AND '1999-07-01' and d_date_sk is not null)
- TableScan [TS_14] (rows=73049 width=1119)
- default@date_dim,d,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]
- <-Reducer 2 [SIMPLE_EDGE]
- SHUFFLE [RS_31]
- PartitionCols:_col0
- Merge Join Operator [MERGEJOIN_81] (rows=174243235 width=135)
- Conds:RS_28._col3=RS_29._col0(Left Semi),Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- <-Map 1 [SIMPLE_EDGE]
- SHUFFLE [RS_28]
- PartitionCols:_col3
- Select Operator [SEL_2] (rows=144002668 width=135)
- Output:["_col0","_col1","_col2","_col3","_col4","_col5"]
- Filter Operator [FIL_73] (rows=144002668 width=135)
- predicate:(ws_ship_addr_sk is not null and ws_web_site_sk is not null and ws_ship_date_sk is not null and ws_order_number is not null)
- TableScan [TS_0] (rows=144002668 width=135)
- default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
- <-Reducer 9 [SIMPLE_EDGE]
- SHUFFLE [RS_29]
+ Select Operator [SEL_18] (rows=7199233 width=92)
+ Output:["_col0","_col1"]
+ Group By Operator [GBY_17] (rows=7199233 width=92)
+ Output:["_col0"],keys:KEY._col0
+ <-Map 12 [SIMPLE_EDGE]
+ SHUFFLE [RS_16]
+ PartitionCols:_col0
+ Group By Operator [GBY_15] (rows=14398467 width=92)
+ Output:["_col0"],keys:wr_order_number
+ Filter Operator [FIL_104] (rows=14398467 width=92)
+ predicate:wr_order_number is not null
+ TableScan [TS_12] (rows=14398467 width=92)
+ default@web_returns,wr1,Tbl:COMPLETE,Col:NONE,Output:["wr_order_number"]
+ <-Reducer 4 [SIMPLE_EDGE]
+ SHUFFLE [RS_57]
+ PartitionCols:_col4
+ Merge Join Operator [MERGEJOIN_110] (rows=191667562 width=135)
+ Conds:RS_54._col2=RS_55._col0(Inner),Output:["_col3","_col4","_col5","_col6"]
+ <-Map 11 [SIMPLE_EDGE]
+ SHUFFLE [RS_55]
PartitionCols:_col0
- Group By Operator [GBY_27] (rows=158402938 width=135)
- Output:["_col0"],keys:_col0
- Select Operator [SEL_13] (rows=158402938 width=135)
- Output:["_col0"]
- Filter Operator [FIL_12] (rows=158402938 width=135)
- predicate:(_col0 <> _col2)
- Merge Join Operator [MERGEJOIN_80] (rows=158402938 width=135)
- Conds:RS_9._col1=RS_10._col1(Inner),Output:["_col0","_col1","_col2"]
- <-Map 10 [SIMPLE_EDGE]
- SHUFFLE [RS_10]
- PartitionCols:_col1
- Select Operator [SEL_8] (rows=144002668 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_75] (rows=144002668 width=135)
- predicate:ws_order_number is not null
- TableScan [TS_6] (rows=144002668 width=135)
- default@web_sales,ws3,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
- <-Map 8 [SIMPLE_EDGE]
- SHUFFLE [RS_9]
- PartitionCols:_col1
- Select Operator [SEL_5] (rows=144002668 width=135)
- Output:["_col0","_col1"]
- Filter Operator [FIL_74] (rows=144002668 width=135)
- predicate:ws_order_number is not null
- TableScan [TS_3] (rows=144002668 width=135)
- default@web_sales,ws2,Tbl:COMPLETE,Col:NONE,Output:["ws_warehouse_sk","ws_order_number"]
+ Select Operator [SEL_11] (rows=42 width=1850)
+ Output:["_col0"]
+ Filter Operator [FIL_103] (rows=42 width=1850)
+ predicate:((web_company_name = 'pri') and web_site_sk is not null)
+ TableScan [TS_9] (rows=84 width=1850)
+ default@web_site,web_site,Tbl:COMPLETE,Col:NONE,Output:["web_site_sk","web_company_name"]
+ <-Reducer 3 [SIMPLE_EDGE]
+ SHUFFLE [RS_54]
+ PartitionCols:_col2
+ Merge Join Operator [MERGEJOIN_109] (rows=174243235 width=135)
+ Conds:RS_51._col1=RS_52._col0(Inner),Output:["_col2","_col3","_col4","_col5","_col6"]
+ <-Map 10 [SIMPLE_EDGE]
+ SHUFFLE [RS_52]
+ PartitionCols:_col0
+ Select Operator [SEL_8] (rows=20000000 width=1014)
+ Output:["_col0"]
+ Filter Operator [FIL_102] (rows=20000000 width=1014)
+ predicate:((ca_state = 'TX') and ca_address_sk is not null)
+ TableScan [TS_6] (rows=40000000 width=1014)
+ default@customer_address,customer_address,Tbl:COMPLETE,Col:NONE,Output:["ca_address_sk","ca_state"]
+ <-Reducer 2 [SIMPLE_EDGE]
+ SHUFFLE [RS_51]
+ PartitionCols:_col1
+ Merge Join Operator [MERGEJOIN_108] (rows=158402938 width=135)
+ Conds:RS_48._col0=RS_49._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col5","_col6"]
+ <-Map 1 [SIMPLE_EDGE]
+ SHUFFLE [RS_48]
+ PartitionCols:_col0
+ Select Operator [SEL_2] (rows=144002668 width=135)
+ Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"]
+ Filter Operator [FIL_100] (rows=144002668 width=135)
+ predicate:(ws_ship_date_sk is not null and ws_ship_addr_sk is not null and ws_web_site_sk is not null)
+ TableScan [TS_0] (rows=144002668 width=135)
+ default@web_sales,ws1,Tbl:COMPLETE,Col:NONE,Output:["ws_ship_date_sk","ws_ship_addr_sk","ws_web_site_sk","ws_warehouse_sk","ws_order_number","ws_ext_ship_cost","ws_net_profit"]
+ <-Map 9 [SIMPLE_EDGE]
+ SHUFFLE [RS_49]
+ PartitionCols:_col0
+ Select Operator [SEL_5] (rows=8116 width=1119)
+ Output:["_col0"]
+ Filter Operator [FIL_101] (rows=8116 width=1119)
+ predicate:(CAST( d_date AS TIMESTAMP) BETWEEN 1999-05-01 00:00:00.0 AND 1999-06-30 00:00:00.0 and d_date_sk is not null)
+ TableScan [TS_3] (rows=73049 width=1119)
+ default@date_dim,date_dim,Tbl:COMPLETE,Col:NONE,Output:["d_date_sk","d_date"]