You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by vg...@apache.org on 2018/10/21 19:47:11 UTC

[5/7] hive git commit: HIVE-20703: Put dynamic sort partition optimization under cost based decision (Vineet Garg, reviewed by Prasanth Jayachandran, Sergey Shelukhin)

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
index 5fd1bf6..40dc5e9 100644
--- a/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
+++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_optimization_acid.q.out
@@ -515,20 +515,19 @@ STAGE PLANS:
                         key expressions: _col3 (type: string), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: +++
                         Map-reduce partition columns: _col3 (type: string)
-                        Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
+                        value expressions: 'foo' (type: string), 'bar' (type: string)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
-                expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string)
+                expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number
-                Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
                   Dp Sort State: PARTITION_BUCKET_SORTED
-                  Statistics: Num rows: 5 Data size: 2220 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 5 Data size: 2170 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1286,20 +1285,19 @@ STAGE PLANS:
                         key expressions: '2008-04-08' (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: ++++
                         Map-reduce partition columns: '2008-04-08' (type: string), _col4 (type: int)
-                        Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
+                        value expressions: 'foo' (type: string), 'bar' (type: string)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
             Execution mode: llap
             Reduce Operator Tree:
               Select Operator
-                expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), 'foo' (type: string), 'bar' (type: string), '2008-04-08' (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
+                expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
-                Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
                   Dp Sort State: PARTITION_BUCKET_SORTED
-                  Statistics: Num rows: 5 Data size: 1790 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 5 Data size: 1740 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1431,7 +1429,6 @@ STAGE PLANS:
                         key expressions: _col1 (type: string), _col2 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: ++++
                         Map-reduce partition columns: _col1 (type: string), _col2 (type: int)
-                        Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
             Execution mode: llap
             LLAP IO: may be used (ACID table)
         Reducer 2 
@@ -1440,11 +1437,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), KEY._col1 (type: string), KEY._col2 (type: int), KEY._bucket_number (type: string)
                 outputColumnNames: _col0, _col1, _col2, _bucket_number
-                Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
                   Dp Sort State: PARTITION_BUCKET_SORTED
-                  Statistics: Num rows: 5 Data size: 2240 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 5 Data size: 1320 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1636,7 +1632,6 @@ STAGE PLANS:
                         key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: ++++
                         Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
-                        Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), 'bar' (type: string)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
@@ -1646,11 +1641,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
-                Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
                   Dp Sort State: PARTITION_BUCKET_SORTED
-                  Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
@@ -1745,7 +1739,6 @@ STAGE PLANS:
                         key expressions: _col3 (type: string), _col4 (type: int), _bucket_number (type: string), _col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>)
                         sort order: ++++
                         Map-reduce partition columns: _col3 (type: string), _col4 (type: int)
-                        Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
                         value expressions: _col1 (type: string), 'bar' (type: string)
             Execution mode: llap
             LLAP IO: may be used (ACID table)
@@ -1755,11 +1748,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY._col0 (type: struct<writeid:bigint,bucketid:int,rowid:bigint>), VALUE._col1 (type: string), VALUE._col2 (type: string), KEY._col3 (type: string), KEY._col4 (type: int), KEY._bucket_number (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number
-                Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
                   Dp Sort State: PARTITION_BUCKET_SORTED
-                  Statistics: Num rows: 5 Data size: 3595 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 5 Data size: 2675 Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                       output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
index 9c7babb..916db93 100644
--- a/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
+++ b/ql/src/test/results/clientpositive/llap/extrapolate_part_stats_partial_ndv.q.out
@@ -299,7 +299,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 545
+              totalSize 539
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -391,7 +391,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 595
+              totalSize 594
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -690,7 +690,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 545
+              totalSize 539
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           
@@ -782,7 +782,7 @@ STAGE PLANS:
               serialization.ddl struct loc_orc_1d_n0 { string state, double locid, decimal(10,0) cnt, i32 zip}
               serialization.format 1
               serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
-              totalSize 595
+              totalSize 594
 #### A masked pattern was here ####
             serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
           

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/lineage3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/lineage3.q.out b/ql/src/test/results/clientpositive/llap/lineage3.q.out
index cf38816..9bec309 100644
--- a/ql/src/test/results/clientpositive/llap/lineage3.q.out
+++ b/ql/src/test/results/clientpositive/llap/lineage3.q.out
@@ -372,7 +372,7 @@ PREHOOK: query: create table src_dp1 (f string, w string, m int)
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@src_dp1
-Warning: Shuffle Join MERGEJOIN[54][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
+Warning: Shuffle Join MERGEJOIN[62][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product
 PREHOOK: query: from src_dp, src_dp1
 insert into dest_dp1 partition (year) select first, word, year
 insert into dest_dp2 partition (y, m) select first, word, year, month

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
index 7e6e88b..18dca49 100644
--- a/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_partitioned.q.out
@@ -1916,7 +1916,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: oft
-                  Statistics: Num rows: 12288 Data size: 1884148 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 12288 Data size: 13243096 Basic stats: COMPLETE Column stats: PARTIAL
                   TableScan Vectorization:
                       native: true
                       vectorizationSchemaColumns: [0:csmallint:smallint, 1:cint:int, 2:cbigint:bigint, 3:cfloat:float, 4:cdouble:double, 5:cstring1:string, 6:cchar1:char(255), 7:cvchar1:varchar(255), 8:cboolean1:boolean, 9:cboolean2:boolean, 10:ctinyint:tinyint, 11:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>]
@@ -1939,7 +1939,7 @@ STAGE PLANS:
                     outputColumnNames: _col1, _col6, _col7, _col10
                     input vertices:
                       1 Map 2
-                    Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
                     Select Operator
                       expressions: _col10 (type: tinyint), _col1 (type: int), _col6 (type: char(255)), _col7 (type: varchar(255))
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -1947,13 +1947,13 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumnNums: [10, 1, 6, 7]
-                      Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 960 Data size: 4040 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 960 Data size: 240494 Basic stats: COMPLETE Column stats: PARTIAL
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_smb.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_smb.q.out b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
index cdc10f8..5f5a747 100644
--- a/ql/src/test/results/clientpositive/llap/llap_smb.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_smb.q.out
@@ -269,10 +269,10 @@ STAGE PLANS:
                 TableScan
                   alias: a
                   filterExpr: id is not null (type: boolean)
-                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                   Filter Operator
                     predicate: id is not null (type: boolean)
-                    Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                     Merge Join Operator
                       condition map:
                            Inner Join 0 to 1
@@ -280,18 +280,18 @@ STAGE PLANS:
                         0 id (type: bigint)
                         1 id (type: bigint)
                       outputColumnNames: _col2, _col3
-                      Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                       Group By Operator
                         aggregations: count()
                         keys: _col2 (type: int), _col3 (type: smallint)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2
-                        Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                         Reduce Output Operator
                           key expressions: _col0 (type: int), _col1 (type: smallint)
                           sort order: ++
                           Map-reduce partition columns: _col0 (type: int), _col1 (type: smallint)
-                          Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                           value expressions: _col2 (type: bigint)
             Execution mode: llap
         Reducer 2 
@@ -302,10 +302,10 @@ STAGE PLANS:
                 keys: KEY._col0 (type: int), KEY._col1 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: ###Masked### Data size: ###Masked### Basic stats: COMPLETE Column stats: PARTIAL
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/5eebbdf7/ql/src/test/results/clientpositive/llap/llap_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/llap_stats.q.out b/ql/src/test/results/clientpositive/llap/llap_stats.q.out
index 5bba6b0..89c0c57 100644
--- a/ql/src/test/results/clientpositive/llap/llap_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/llap_stats.q.out
@@ -153,11 +153,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: llap_stats
-                  Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL
                   Select Operator
                     expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
                     outputColumnNames: ctinyint, csmallint, cint
-                    Statistics: Num rows: 10 Data size: 120 Basic stats: COMPLETE Column stats: PARTIAL
+                    Statistics: Num rows: 10 Data size: 196 Basic stats: COMPLETE Column stats: PARTIAL
                     Group By Operator
                       aggregations: compute_stats(ctinyint, 'hll'), compute_stats(csmallint, 'hll')
                       keys: cint (type: int)