You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2019/03/14 19:21:02 UTC

[hive] branch master updated: HIVE-21398: Columns which has estimated statistics should not be considered as unique keys (addendum)

This is an automated email from the ASF dual-hosted git repository.

kgyrtkirk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new cfaeced  HIVE-21398: Columns which has estimated statistics should not be considered as unique keys (addendum)
cfaeced is described below

commit cfaeced79ee1e17f5c7afdc6e728419268dc8546
Author: Zoltan Haindrich <ki...@rxd.hu>
AuthorDate: Thu Mar 14 20:20:20 2019 +0100

    HIVE-21398: Columns which has estimated statistics should not be considered as unique keys (addendum)
---
 .../llap/dynamic_semijoin_reduction_sw.q.out       | 140 +++++++++------------
 1 file changed, 59 insertions(+), 81 deletions(-)

diff --git a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
index 0a4f06f..8e76184 100644
--- a/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynamic_semijoin_reduction_sw.q.out
@@ -208,42 +208,21 @@ STAGE PLANS:
     Tez
 #### A masked pattern was here ####
       Edges:
-        Map 1 <- Reducer 7 (BROADCAST_EDGE)
-        Map 11 <- Reducer 7 (BROADCAST_EDGE)
-        Map 12 <- Reducer 10 (BROADCAST_EDGE)
-        Reducer 10 <- Map 6 (CUSTOM_SIMPLE_EDGE)
-        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-        Reducer 3 <- Map 11 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
-        Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 9 (XPROD_EDGE)
+        Map 1 <- Reducer 8 (BROADCAST_EDGE)
+        Map 10 <- Reducer 8 (BROADCAST_EDGE)
+        Map 11 <- Reducer 9 (BROADCAST_EDGE)
+        Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 7 (SIMPLE_EDGE)
+        Reducer 3 <- Map 10 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+        Reducer 4 <- Reducer 3 (XPROD_EDGE), Reducer 6 (XPROD_EDGE)
         Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE)
-        Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE)
-        Reducer 8 <- Map 12 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE)
-        Reducer 9 <- Map 11 (SIMPLE_EDGE), Reducer 8 (ONE_TO_ONE_EDGE)
+        Reducer 6 <- Map 11 (SIMPLE_EDGE), Reducer 2 (ONE_TO_ONE_EDGE)
+        Reducer 8 <- Map 7 (CUSTOM_SIMPLE_EDGE)
+        Reducer 9 <- Map 7 (CUSTOM_SIMPLE_EDGE)
 #### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
-                  alias: srcpart_date_n6
-                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter)))) (type: boolean)
-                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
-                  Filter Operator
-                    predicate: ((key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean)
-                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
-                    Select Operator
-                      expressions: key (type: string)
-                      outputColumnNames: _col0
-                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
-            Execution mode: vectorized, llap
-            LLAP IO: all inputs
-        Map 11 
-            Map Operator Tree:
-                TableScan
                   alias: alltypesorc_int_n0
                   filterExpr: (cstring is not null and (cstring BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(cstring, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter)))) (type: boolean)
                   Statistics: Num rows: 12288 Data size: 862450 Basic stats: COMPLETE Column stats: COMPLETE
@@ -259,14 +238,29 @@ STAGE PLANS:
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
                         Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs
+        Map 10 
+            Map Operator Tree:
+                TableScan
+                  alias: srcpart_date_n6
+                  filterExpr: (key is not null and (key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter)))) (type: boolean)
+                  Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+                  Filter Operator
+                    predicate: ((key BETWEEN DynamicValue(RS_26_srcpart_small_n2_key1_min) AND DynamicValue(RS_26_srcpart_small_n2_key1_max) and in_bloom_filter(key, DynamicValue(RS_26_srcpart_small_n2_key1_bloom_filter))) and key is not null) (type: boolean)
+                    Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
+                    Select Operator
+                      expressions: key (type: string)
+                      outputColumnNames: _col0
+                      Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 9174 Data size: 643900 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
-        Map 12 
+        Map 11 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_date_n6
@@ -286,7 +280,7 @@ STAGE PLANS:
                         Statistics: Num rows: 1000 Data size: 87000 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
-        Map 6 
+        Map 7 
             Map Operator Tree:
                 TableScan
                   alias: srcpart_small_n2
@@ -317,11 +311,6 @@ STAGE PLANS:
                             sort order: 
                             Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
                             value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
-                      Reduce Output Operator
-                        key expressions: _col0 (type: string)
-                        sort order: +
-                        Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 1 Data size: 184 Basic stats: PARTIAL Column stats: NONE
                       Select Operator
                         expressions: _col0 (type: string)
                         outputColumnNames: _col0
@@ -337,18 +326,6 @@ STAGE PLANS:
                             value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
-        Reducer 10 
-            Execution mode: vectorized, llap
-            Reduce Operator Tree:
-              Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
-                mode: final
-                outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  sort order: 
-                  Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
-                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
@@ -359,12 +336,17 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col1
-                Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col1 (type: string)
+                  sort order: +
+                  Map-reduce partition columns: _col1 (type: string)
+                  Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   key expressions: _col1 (type: string)
                   sort order: +
                   Map-reduce partition columns: _col1 (type: string)
-                  Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
         Reducer 3 
             Execution mode: llap
             Reduce Operator Tree:
@@ -374,10 +356,10 @@ STAGE PLANS:
                 keys:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
-                Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE
         Reducer 4 
             Execution mode: llap
             Reduce Operator Tree:
@@ -387,7 +369,7 @@ STAGE PLANS:
                 keys:
                   0 
                   1 
-                Statistics: Num rows: 101828281 Data size: 14396537061 Basic stats: PARTIAL Column stats: NONE
+                Statistics: Num rows: 123210000 Data size: 17419651800 Basic stats: PARTIAL Column stats: NONE
                 Group By Operator
                   aggregations: count()
                   mode: hash
@@ -412,7 +394,20 @@ STAGE PLANS:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
                       serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
-        Reducer 7 
+        Reducer 6 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Merge Join Operator
+                condition map:
+                     Inner Join 0 to 1
+                keys:
+                  0 _col1 (type: string)
+                  1 _col0 (type: string)
+                Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 11100 Data size: 779119 Basic stats: PARTIAL Column stats: NONE
+        Reducer 8 
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
@@ -428,35 +423,18 @@ STAGE PLANS:
                   sort order: 
                   Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
                   value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
-        Reducer 8 
-            Execution mode: llap
-            Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col0 (type: string)
-                  1 _col0 (type: string)
-                outputColumnNames: _col1
-                Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
-                Reduce Output Operator
-                  key expressions: _col1 (type: string)
-                  sort order: +
-                  Map-reduce partition columns: _col1 (type: string)
-                  Statistics: Num rows: 1100 Data size: 95700 Basic stats: PARTIAL Column stats: NONE
         Reducer 9 
-            Execution mode: llap
+            Execution mode: vectorized, llap
             Reduce Operator Tree:
-              Merge Join Operator
-                condition map:
-                     Inner Join 0 to 1
-                keys:
-                  0 _col1 (type: string)
-                  1 _col0 (type: string)
-                Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=1)
+                mode: final
+                outputColumnNames: _col0, _col1, _col2
+                Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 10091 Data size: 708290 Basic stats: PARTIAL Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 736 Basic stats: PARTIAL Column stats: NONE
+                  value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary)
 
   Stage: Stage-0
     Fetch Operator