You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/12/12 00:00:05 UTC

[07/51] [partial] hive git commit: HIVE-13567 : Enable auto-gather column stats by default (Zoltan Haindrich, Pengcheng Xiong via Ashutosh Chauhan)

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/mm_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/mm_all.q.out b/ql/src/test/results/clientpositive/llap/mm_all.q.out
index 03c1293..0374ea6 100644
--- a/ql/src/test/results/clientpositive/llap/mm_all.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_all.q.out
@@ -63,27 +63,66 @@ STAGE PLANS:
   Stage: Stage-1
     Tez
 #### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
       Vertices:
         Map 1 
             Map Operator Tree:
                 TableScan
                   alias: intermediate
-                  Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: key (type: int)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                           output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
                           serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde
                           name: default.part_mm
                       Write Type: INSERT
+                    Select Operator
+                      expressions: _col0 (type: int), UDFToInteger('455') (type: int)
+                      outputColumnNames: key, key_mm
+                      Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE
+                      Group By Operator
+                        aggregations: compute_stats(key, 'hll')
+                        keys: key_mm (type: int)
+                        mode: hash
+                        outputColumnNames: _col0, _col1
+                        Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: _col0 (type: int)
+                          sort order: +
+                          Map-reduce partition columns: _col0 (type: int)
+                          Statistics: Num rows: 3 Data size: 1284 Basic stats: COMPLETE Column stats: COMPLETE
+                          value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>)
             Execution mode: llap
             LLAP IO: all inputs
+        Reducer 2 
+            Execution mode: llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: compute_stats(VALUE._col0)
+                keys: KEY._col0 (type: int)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1
+                Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE
+                Select Operator
+                  expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int)
+                  outputColumnNames: _col0, _col1
+                  Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 3 Data size: 1332 Basic stats: COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
 
   Stage: Stage-2
     Dependency Collection
@@ -105,6 +144,10 @@ STAGE PLANS:
   Stage: Stage-3
     Stats Work
       Basic Stats Work:
+      Column Stats Desc:
+          Columns: key
+          Column Types: int
+          Table: default.part_mm
 
 PREHOOK: query: insert into table part_mm partition(key_mm=455) select key from intermediate
 PREHOOK: type: QUERY
@@ -1773,7 +1816,7 @@ Retention:          	0
 #### A masked pattern was here ####
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
 	numFiles            	3                   
 	numRows             	6                   
 	rawDataSize         	13                  
@@ -1823,7 +1866,7 @@ Retention:          	0
 #### A masked pattern was here ####
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
-	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\"}}
 	numFiles            	6                   
 	numRows             	12                  
 	rawDataSize         	26                  

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out
index 88951c3..05ec048 100644
--- a/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out
+++ b/ql/src/test/results/clientpositive/llap/multiMapJoin2.q.out
@@ -1683,14 +1683,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: x
-                  Statistics: Num rows: 125 Data size: 23341 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: key is not null (type: boolean)
-                    Statistics: Num rows: 119 Data size: 22220 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 119 Data size: 22220 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 125 Data size: 10875 Basic stats: COMPLETE Column stats: COMPLETE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -1699,15 +1699,15 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 130 Data size: 24442 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
                           outputColumnNames: _col0
-                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                           Reduce Output Operator
                             sort order: 
-                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                             value expressions: _col0 (type: bigint)
             Execution mode: llap
             LLAP IO: no inputs
@@ -1737,10 +1737,10 @@ STAGE PLANS:
                 aggregations: count(VALUE._col0)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/3bbc24d2/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
index 658c1bc..39feaec 100644
--- a/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
+++ b/ql/src/test/results/clientpositive/llap/multi_count_distinct_null.q.out
@@ -40,21 +40,21 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: employee
-                  Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: department_id (type: int), gender (type: varchar(10)), education_level (type: int)
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE
                     Group By Operator
                       keys: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), 0 (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 36 Data size: 3672 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int)
                         sort order: ++++
                         Map-reduce partition columns: _col0 (type: int), _col1 (type: varchar(10)), _col2 (type: int), _col3 (type: int)
-                        Statistics: Num rows: 36 Data size: 3672 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -64,19 +64,19 @@ STAGE PLANS:
                 keys: KEY._col0 (type: int), KEY._col1 (type: varchar(10)), KEY._col2 (type: int), KEY._col3 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 18 Data size: 1836 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 18 Data size: 1836 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 18 Data size: 1556 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: count(_col0), count(_col1), count(_col2)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       sort order: 
-                      Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint)
         Reducer 3 
             Execution mode: llap
@@ -85,10 +85,10 @@ STAGE PLANS:
                 aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 1 Data size: 126 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -181,21 +181,21 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: employee
-                  Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE
                   Select Operator
                     expressions: gender (type: varchar(10)), department_id (type: int), education_level (type: int)
                     outputColumnNames: _col0, _col1, _col2
-                    Statistics: Num rows: 12 Data size: 1224 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12 Data size: 1023 Basic stats: COMPLETE Column stats: COMPLETE
                     Group By Operator
                       keys: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), 0 (type: int)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3
-                      Statistics: Num rows: 60 Data size: 6120 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int)
                         sort order: ++++
                         Map-reduce partition columns: _col0 (type: varchar(10)), _col1 (type: int), _col2 (type: int), _col3 (type: int)
-                        Statistics: Num rows: 60 Data size: 6120 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: llap
             LLAP IO: no inputs
         Reducer 2 
@@ -205,19 +205,19 @@ STAGE PLANS:
                 keys: KEY._col0 (type: varchar(10)), KEY._col1 (type: int), KEY._col2 (type: int), KEY._col3 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 30 Data size: 3060 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: CASE WHEN (((_col3 = 3) and _col0 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 5) and _col1 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN (((_col3 = 6) and _col2 is not null)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 4)) THEN (1) ELSE (null) END (type: int), CASE WHEN ((_col3 = 0)) THEN (1) ELSE (null) END (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                  Statistics: Num rows: 30 Data size: 3060 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 30 Data size: 2534 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: count(_col0), count(_col1), count(_col2), count(_col3), count(_col4)
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                    Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       sort order: 
-                      Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                       value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: bigint)
         Reducer 3 
             Execution mode: llap
@@ -226,14 +226,14 @@ STAGE PLANS:
                 aggregations: count(VALUE._col0), count(VALUE._col1), count(VALUE._col2), count(VALUE._col3), count(VALUE._col4)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: bigint), _col1 (type: bigint), _col0 (type: bigint), _col2 (type: bigint), _col3 (type: bigint), _col3 (type: bigint), _col4 (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                  Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 1 Data size: 142 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat