You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/07/25 16:38:43 UTC

hive git commit: HIVE-14308: While using column stats estimated data size may become 0 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 85a8c32f5 -> cb398d15d


HIVE-14308: While using column stats estimated data size may become 0 (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/cb398d15
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/cb398d15
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/cb398d15

Branch: refs/heads/master
Commit: cb398d15df078217c5825a121ca3a28b39e6820b
Parents: 85a8c32
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Fri Jul 22 12:44:13 2016 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Mon Jul 25 17:38:57 2016 +0100

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  2 +-
 .../clientpositive/annotate_stats_part.q.out    | 22 ++++++++++----------
 .../clientpositive/stats_partial_size.q.out     |  2 +-
 .../results/clientpositive/stats_ppr_all.q.out  | 12 +++++------
 4 files changed, 19 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/cb398d15/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index d8acf94..7a15904 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -283,8 +283,8 @@ public class StatsUtils {
           // add partition column stats
           addParitionColumnStats(conf, neededColumns, referencedColumns, schema, table, partList,
               emptyStats);
-
           stats.addToColumnStats(emptyStats);
+          stats.addToDataSize(getDataSizeFromColumnStats(nr, emptyStats));
           stats.updateColumnStatsState(deriveStatType(emptyStats, referencedColumns));
         } else {
           List<ColumnStatisticsObj> colStats = aggrStats.getColStats();

http://git-wip-us.apache.org/repos/asf/hive/blob/cb398d15/ql/src/test/results/clientpositive/annotate_stats_part.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
index 131cf6a..df42f36 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_part.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_part.q.out
@@ -56,11 +56,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+            Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: insert overwrite table loc_orc partition(year) select * from loc_staging
@@ -98,7 +98,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 6 Data size: 780 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 6 Data size: 1884 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -156,11 +156,11 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
+          Statistics: Num rows: 7 Data size: 1966 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
-            Statistics: Num rows: 7 Data size: 678 Basic stats: PARTIAL Column stats: PARTIAL
+            Statistics: Num rows: 7 Data size: 1288 Basic stats: COMPLETE Column stats: PARTIAL
             ListSink
 
 PREHOOK: query: -- basicStatState: COMPLETE colStatState: NONE
@@ -239,7 +239,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -262,7 +262,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: PARTIAL
+          Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: PARTIAL
           Select Operator
             expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
             outputColumnNames: _col0, _col1, _col2, _col3
@@ -287,14 +287,14 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+          Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL
           Filter Operator
             predicate: ((year = '2001') and (year = '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+            Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
               outputColumnNames: _col0, _col1, _col2, _col3
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+              Statistics: Num rows: 1 Data size: 110 Basic stats: COMPLETE Column stats: PARTIAL
               ListSink
 
 PREHOOK: query: -- partition level partial column statistics
@@ -371,7 +371,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: loc_orc
-          Statistics: Num rows: 8 Data size: 774 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 8 Data size: 2246 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
             expressions: year (type: string)
             outputColumnNames: _col0

http://git-wip-us.apache.org/repos/asf/hive/blob/cb398d15/ql/src/test/results/clientpositive/stats_partial_size.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_partial_size.q.out b/ql/src/test/results/clientpositive/stats_partial_size.q.out
index 31adec7..ee9040d 100644
--- a/ql/src/test/results/clientpositive/stats_partial_size.q.out
+++ b/ql/src/test/results/clientpositive/stats_partial_size.q.out
@@ -49,7 +49,7 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: sample_partitioned
-            Statistics: Num rows: 2 Data size: 2 Basic stats: COMPLETE Column stats: PARTIAL
+            Statistics: Num rows: 2 Data size: 10 Basic stats: COMPLETE Column stats: PARTIAL
             Select Operator
               expressions: x (type: int), y (type: int)
               outputColumnNames: _col0, _col1

http://git-wip-us.apache.org/repos/asf/hive/blob/cb398d15/ql/src/test/results/clientpositive/stats_ppr_all.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/stats_ppr_all.q.out b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
index d8da399..de6cb31 100644
--- a/ql/src/test/results/clientpositive/stats_ppr_all.q.out
+++ b/ql/src/test/results/clientpositive/stats_ppr_all.q.out
@@ -122,22 +122,22 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: ss
-            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+            Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL
             Filter Operator
               predicate: (UDFToDouble((((year * 10000) + (month * 100)) + day)) = 2015010.0) (type: boolean)
-              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+              Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL
               Select Operator
                 expressions: order_amount (type: float)
                 outputColumnNames: order_amount
-                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: PARTIAL
+                Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: PARTIAL
                 Group By Operator
                   aggregations: sum(order_amount)
                   mode: hash
                   outputColumnNames: _col0
-                  Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
                   Reduce Output Operator
                     sort order: 
-                    Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: PARTIAL
+                    Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: PARTIAL
                     value expressions: _col0 (type: double)
       Reduce Operator Tree:
         Group By Operator
@@ -272,7 +272,7 @@ STAGE PLANS:
       Processor Tree:
         TableScan
           alias: ss
-          Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
+          Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE
           Filter Operator
             predicate: (UDFToDouble(((201500 + (month * 10)) + day)) > 201511.0) (type: boolean)
             Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE