You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/01/27 09:44:33 UTC

hive git commit: HIVE-12478: Improve Hive/Calcite Transitive Predicate inference (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran) (addendum)

Repository: hive
Updated Branches:
  refs/heads/master 48b201ee1 -> 7981904fa


HIVE-12478: Improve Hive/Calcite Transitive Predicate inference (Jesus Camacho Rodriguez, reviewed by Laljo John Pullokkaran) (addendum)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7981904f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7981904f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7981904f

Branch: refs/heads/master
Commit: 7981904fab059c96cf64757fa47853d90d76e8e2
Parents: 48b201e
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Wed Jan 27 09:44:14 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Wed Jan 27 09:44:14 2016 +0100

----------------------------------------------------------------------
 .../results/clientpositive/perf/query97.q.out   | 239 ++++++++++---------
 1 file changed, 127 insertions(+), 112 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7981904f/ql/src/test/results/clientpositive/perf/query97.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/perf/query97.q.out b/ql/src/test/results/clientpositive/perf/query97.q.out
index 4e254e3..ec5f125 100644
--- a/ql/src/test/results/clientpositive/perf/query97.q.out
+++ b/ql/src/test/results/clientpositive/perf/query97.q.out
@@ -2,7 +2,7 @@ PREHOOK: query: explain select sum(case when ssci.customer_sk is not null and cs
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select sum(case when ssci.customer_sk is not null and csci.customer_sk is null then 1 else 0 end) store_only ,sum(case when ssci.customer_sk is null and csci.customer_sk is not null then 1 else 0 end) catalog_only ,sum(case when ssci.customer_sk is not null and csci.customer_sk is not null then 1 else 0 end) store_and_catalog from ( select ss_customer_sk customer_sk ,ss_item_sk item_sk from store_sales JOIN date_dim ON store_sales.ss_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by ss_customer_sk ,ss_item_sk) ssci full outer join ( select cs_bill_customer_sk customer_sk ,cs_item_sk item_sk from catalog_sales JOIN date_dim ON catalog_sales.cs_sold_date_sk = date_dim.d_date_sk where d_month_seq between 1193 and 1193 + 11 group by cs_bill_customer_sk ,cs_item_sk) csci on (ssci.customer_sk=csci.customer_sk and ssci.item_sk = csci.item_sk) limit 100
 POSTHOOK: type: QUERY
-Plan not optimized by CBO.
+Plan optimized by CBO.
 
 Vertex dependency in root stage
 Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
@@ -17,132 +17,147 @@ Stage-0
       limit:100
       Stage-1
          Reducer 5
-         File Output Operator [FS_35]
+         File Output Operator [FS_38]
             compressed:false
             Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
             table:{"input format:":"org.apache.hadoop.mapred.TextInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"}
-            Limit [LIM_34]
+            Limit [LIM_37]
                Number of rows:100
                Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-               Group By Operator [GBY_32]
+               Group By Operator [GBY_35]
                |  aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)"]
                |  outputColumnNames:["_col0","_col1","_col2"]
                |  Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                |<-Reducer 4 [SIMPLE_EDGE]
-                  Reduce Output Operator [RS_31]
+                  Reduce Output Operator [RS_34]
                      sort order:
                      Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
                      value expressions:_col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint)
-                     Group By Operator [GBY_30]
-                        aggregations:["sum(CASE WHEN ((_col0 is not null and _col2 is null)) THEN (1) ELSE (0) END)","sum(CASE WHEN ((_col0 is null and _col2 is not null)) THEN (1) ELSE (0) END)","sum(CASE WHEN ((_col0 is not null and _col2 is not null)) THEN (1) ELSE (0) END)"]
+                     Group By Operator [GBY_33]
+                        aggregations:["sum(_col0)","sum(_col1)","sum(_col2)"]
                         outputColumnNames:["_col0","_col1","_col2"]
                         Statistics:Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE
-                        Merge Join Operator [MERGEJOIN_46]
-                        |  condition map:[{"":"Outer Join 0 to 1"}]
-                        |  keys:{"0":"_col0 (type: int), _col1 (type: int)","1":"_col0 (type: int), _col1 (type: int)"}
-                        |  outputColumnNames:["_col0","_col2"]
-                        |  Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE
-                        |<-Reducer 3 [SIMPLE_EDGE]
-                        |  Reduce Output Operator [RS_26]
-                        |     key expressions:_col0 (type: int), _col1 (type: int)
-                        |     Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
-                        |     sort order:++
-                        |     Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
-                        |     Group By Operator [GBY_11]
-                        |     |  keys:KEY._col0 (type: int), KEY._col1 (type: int)
-                        |     |  outputColumnNames:["_col0","_col1"]
-                        |     |  Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
-                        |     |<-Reducer 2 [SIMPLE_EDGE]
-                        |        Reduce Output Operator [RS_10]
-                        |           key expressions:_col0 (type: int), _col1 (type: int)
-                        |           Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
-                        |           sort order:++
-                        |           Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                        |           Group By Operator [GBY_9]
-                        |              keys:_col3 (type: int), _col2 (type: int)
-                        |              outputColumnNames:["_col0","_col1"]
-                        |              Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                        |              Select Operator [SEL_8]
-                        |                 outputColumnNames:["_col3","_col2"]
-                        |                 Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                        |                 Merge Join Operator [MERGEJOIN_44]
-                        |                 |  condition map:[{"":"Inner Join 0 to 1"}]
-                        |                 |  keys:{"0":"ss_sold_date_sk (type: int)","1":"d_date_sk (type: int)"}
-                        |                 |  outputColumnNames:["_col2","_col3"]
-                        |                 |  Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                        |                 |<-Map 1 [SIMPLE_EDGE]
-                        |                 |  Reduce Output Operator [RS_3]
-                        |                 |     key expressions:ss_sold_date_sk (type: int)
-                        |                 |     Map-reduce partition columns:ss_sold_date_sk (type: int)
-                        |                 |     sort order:+
-                        |                 |     Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                        |                 |     value expressions:ss_item_sk (type: int), ss_customer_sk (type: int)
-                        |                 |     Filter Operator [FIL_40]
-                        |                 |        predicate:ss_sold_date_sk is not null (type: boolean)
-                        |                 |        Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                        |                 |        TableScan [TS_0]
-                        |                 |           alias:store_sales
-                        |                 |           Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                        |                 |<-Map 6 [SIMPLE_EDGE]
-                        |                    Reduce Output Operator [RS_5]
-                        |                       key expressions:d_date_sk (type: int)
-                        |                       Map-reduce partition columns:d_date_sk (type: int)
-                        |                       sort order:+
-                        |                       Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                        |                       Filter Operator [FIL_41]
-                        |                          predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean)
-                        |                          Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                        |                          TableScan [TS_1]
-                        |                             alias:date_dim
-                        |                             Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                        |<-Reducer 9 [SIMPLE_EDGE]
-                           Reduce Output Operator [RS_27]
-                              key expressions:_col0 (type: int), _col1 (type: int)
-                              Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
-                              sort order:++
-                              Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
-                              Group By Operator [GBY_24]
-                              |  keys:KEY._col0 (type: int), KEY._col1 (type: int)
-                              |  outputColumnNames:["_col0","_col1"]
-                              |  Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
-                              |<-Reducer 8 [SIMPLE_EDGE]
-                                 Reduce Output Operator [RS_23]
-                                    key expressions:_col0 (type: int), _col1 (type: int)
-                                    Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
-                                    sort order:++
-                                    Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                                    Group By Operator [GBY_22]
-                                       keys:_col3 (type: int), _col15 (type: int)
-                                       outputColumnNames:["_col0","_col1"]
+                        Select Operator [SEL_31]
+                           outputColumnNames:["_col0","_col1","_col2"]
+                           Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE
+                           Merge Join Operator [MERGEJOIN_49]
+                           |  condition map:[{"":"Outer Join 0 to 1"}]
+                           |  keys:{"0":"_col0 (type: int), _col1 (type: int)","1":"_col0 (type: int), _col1 (type: int)"}
+                           |  outputColumnNames:["_col0","_col2"]
+                           |  Statistics:Num rows: 22096 Data size: 24726566 Basic stats: COMPLETE Column stats: NONE
+                           |<-Reducer 3 [SIMPLE_EDGE]
+                           |  Reduce Output Operator [RS_28]
+                           |     key expressions:_col0 (type: int), _col1 (type: int)
+                           |     Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
+                           |     sort order:++
+                           |     Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
+                           |     Select Operator [SEL_13]
+                           |        outputColumnNames:["_col0","_col1"]
+                           |        Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
+                           |        Group By Operator [GBY_12]
+                           |        |  keys:KEY._col0 (type: int), KEY._col1 (type: int)
+                           |        |  outputColumnNames:["_col0","_col1"]
+                           |        |  Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
+                           |        |<-Reducer 2 [SIMPLE_EDGE]
+                           |           Reduce Output Operator [RS_11]
+                           |              key expressions:_col0 (type: int), _col1 (type: int)
+                           |              Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
+                           |              sort order:++
+                           |              Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
+                           |              Group By Operator [GBY_10]
+                           |                 keys:_col1 (type: int), _col2 (type: int)
+                           |                 outputColumnNames:["_col0","_col1"]
+                           |                 Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
+                           |                 Merge Join Operator [MERGEJOIN_47]
+                           |                 |  condition map:[{"":"Inner Join 0 to 1"}]
+                           |                 |  keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+                           |                 |  outputColumnNames:["_col1","_col2"]
+                           |                 |  Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
+                           |                 |<-Map 1 [SIMPLE_EDGE]
+                           |                 |  Reduce Output Operator [RS_6]
+                           |                 |     key expressions:_col0 (type: int)
+                           |                 |     Map-reduce partition columns:_col0 (type: int)
+                           |                 |     sort order:+
+                           |                 |     Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                           |                 |     value expressions:_col1 (type: int), _col2 (type: int)
+                           |                 |     Select Operator [SEL_2]
+                           |                 |        outputColumnNames:["_col0","_col1","_col2"]
+                           |                 |        Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                           |                 |        Filter Operator [FIL_43]
+                           |                 |           predicate:ss_sold_date_sk is not null (type: boolean)
+                           |                 |           Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                           |                 |           TableScan [TS_0]
+                           |                 |              alias:store_sales
+                           |                 |              Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                           |                 |<-Map 6 [SIMPLE_EDGE]
+                           |                    Reduce Output Operator [RS_7]
+                           |                       key expressions:_col0 (type: int)
+                           |                       Map-reduce partition columns:_col0 (type: int)
+                           |                       sort order:+
+                           |                       Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                           |                       Select Operator [SEL_5]
+                           |                          outputColumnNames:["_col0"]
+                           |                          Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                           |                          Filter Operator [FIL_44]
+                           |                             predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean)
+                           |                             Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                           |                             TableScan [TS_3]
+                           |                                alias:date_dim
+                           |                                Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                           |<-Reducer 9 [SIMPLE_EDGE]
+                              Reduce Output Operator [RS_29]
+                                 key expressions:_col0 (type: int), _col1 (type: int)
+                                 Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
+                                 sort order:++
+                                 Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
+                                 Group By Operator [GBY_26]
+                                 |  keys:KEY._col0 (type: int), KEY._col1 (type: int)
+                                 |  outputColumnNames:["_col0","_col1"]
+                                 |  Statistics:Num rows: 20088 Data size: 22478696 Basic stats: COMPLETE Column stats: NONE
+                                 |<-Reducer 8 [SIMPLE_EDGE]
+                                    Reduce Output Operator [RS_25]
+                                       key expressions:_col0 (type: int), _col1 (type: int)
+                                       Map-reduce partition columns:_col0 (type: int), _col1 (type: int)
+                                       sort order:++
                                        Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                                       Merge Join Operator [MERGEJOIN_45]
-                                       |  condition map:[{"":"Inner Join 0 to 1"}]
-                                       |  keys:{"0":"cs_sold_date_sk (type: int)","1":"d_date_sk (type: int)"}
-                                       |  outputColumnNames:["_col3","_col15"]
-                                       |  Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
-                                       |<-Map 10 [SIMPLE_EDGE]
-                                       |  Reduce Output Operator [RS_18]
-                                       |     key expressions:d_date_sk (type: int)
-                                       |     Map-reduce partition columns:d_date_sk (type: int)
-                                       |     sort order:+
-                                       |     Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                                       |     Filter Operator [FIL_43]
-                                       |        predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean)
-                                       |        Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
-                                       |        TableScan [TS_14]
-                                       |           alias:date_dim
-                                       |           Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
-                                       |<-Map 7 [SIMPLE_EDGE]
-                                          Reduce Output Operator [RS_16]
-                                             key expressions:cs_sold_date_sk (type: int)
-                                             Map-reduce partition columns:cs_sold_date_sk (type: int)
-                                             sort order:+
-                                             Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                                             value expressions:cs_bill_customer_sk (type: int), cs_item_sk (type: int)
-                                             Filter Operator [FIL_42]
-                                                predicate:cs_sold_date_sk is not null (type: boolean)
+                                       Group By Operator [GBY_24]
+                                          keys:_col1 (type: int), _col2 (type: int)
+                                          outputColumnNames:["_col0","_col1"]
+                                          Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
+                                          Merge Join Operator [MERGEJOIN_48]
+                                          |  condition map:[{"":"Inner Join 0 to 1"}]
+                                          |  keys:{"0":"_col0 (type: int)","1":"_col0 (type: int)"}
+                                          |  outputColumnNames:["_col1","_col2"]
+                                          |  Statistics:Num rows: 40176 Data size: 44957392 Basic stats: COMPLETE Column stats: NONE
+                                          |<-Map 10 [SIMPLE_EDGE]
+                                          |  Reduce Output Operator [RS_21]
+                                          |     key expressions:_col0 (type: int)
+                                          |     Map-reduce partition columns:_col0 (type: int)
+                                          |     sort order:+
+                                          |     Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                                          |     Select Operator [SEL_19]
+                                          |        outputColumnNames:["_col0"]
+                                          |        Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                                          |        Filter Operator [FIL_46]
+                                          |           predicate:(d_date_sk is not null and d_month_seq BETWEEN 1193 AND 1204) (type: boolean)
+                                          |           Statistics:Num rows: 36524 Data size: 40870356 Basic stats: COMPLETE Column stats: NONE
+                                          |           TableScan [TS_17]
+                                          |              alias:date_dim
+                                          |              Statistics:Num rows: 73049 Data size: 81741831 Basic stats: COMPLETE Column stats: NONE
+                                          |<-Map 7 [SIMPLE_EDGE]
+                                             Reduce Output Operator [RS_20]
+                                                key expressions:_col0 (type: int)
+                                                Map-reduce partition columns:_col0 (type: int)
+                                                sort order:+
                                                 Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
-                                                TableScan [TS_13]
-                                                   alias:catalog_sales
+                                                value expressions:_col1 (type: int), _col2 (type: int)
+                                                Select Operator [SEL_16]
+                                                   outputColumnNames:["_col0","_col1","_col2"]
                                                    Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                                                   Filter Operator [FIL_45]
+                                                      predicate:cs_sold_date_sk is not null (type: boolean)
+                                                      Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                                                      TableScan [TS_14]
+                                                         alias:catalog_sales
+                                                         Statistics:Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE