You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2017/07/15 08:06:42 UTC

[03/24] hive git commit: HIVE-16996: Add HLL as an alternative to FM sketch to compute stats (Pengcheng Xiong, reviewed by Ashutosh Chauhan, Prasanth Jayachandran)

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
index 0057762..38598b4 100644
--- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out
@@ -102,7 +102,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) 
 -> boolean) -> boolean
                     predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean)
-                    Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble
@@ -110,7 +110,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 4, 5]
-                      Statistics: Num rows: 5466 Data size: 1157380 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint)
                         Group By Vectorization:
@@ -857,7 +857,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean
                     predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean)
-                    Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint), cfloat (type: float)
                       outputColumnNames: ctinyint, cint, cbigint, cfloat
@@ -865,7 +865,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 2, 3, 4]
-                      Statistics: Num rows: 2835 Data size: 493648 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE
                       Group By Operator
                         aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat)
                         Group By Vectorization:
@@ -1398,7 +1398,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean
                     predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean)
-                    Statistics: Num rows: 8195 Data size: 1735170 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 8194 Data size: 1734900 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
@@ -1407,7 +1407,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28]
                           selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 
 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long
-                      Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint)
                         sort order: +++++++++++++++++++++++++
@@ -1415,7 +1415,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -1444,7 +1444,7 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21]
-                Statistics: Num rows: 8195 Data size: 3349694 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 25
                   Limit Vectorization:
@@ -2220,7 +2220,7 @@ STAGE PLANS:
                         keys: csmallint (type: smallint)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: smallint)
                           sort order: +
@@ -2229,7 +2229,7 @@ STAGE PLANS:
                               className: VectorReduceSinkLongOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 1128 Data size: 201900 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1141 Data size: 204228 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -2265,7 +2265,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 1128 Data size: 39468 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1141 Data size: 39924 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -2274,7 +2274,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13]
                       selectExpressions: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 5:long, DecimalScalarDivideDecimalColumn(val -1.389, col 6)(children: CastLongToDecimal(col 0) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9, col 10)(children: CastLongToDouble(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 9:double, CastLongToDouble(col 2) -> 10:double) -> 11:double, LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 12:long, LongColUnaryMinus(col 13)(children: LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 13:long) -> 8:long, LongColSubtractLongScalar(col 4, val -89010) -> 13:long
-                  Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint)
                     sort order: +++++++++++
@@ -2282,7 +2282,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
             Execution mode: vectorized, llap
@@ -2301,7 +2301,7 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-                Statistics: Num rows: 1128 Data size: 197388 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1141 Data size: 199664 Basic stats: COMPLETE Column stats: COMPLETE
                 Limit
                   Number of rows: 20
                   Limit Vectorization:
@@ -2500,7 +2500,7 @@ STAGE PLANS:
                         keys: cdouble (type: double)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                        Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE
                         Reduce Output Operator
                           key expressions: _col0 (type: double)
                           sort order: +
@@ -2509,7 +2509,7 @@ STAGE PLANS:
                               className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 870 Data size: 234888 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 1136 Data size: 306696 Basic stats: COMPLETE Column stats: COMPLETE
                           value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: double)
             Execution mode: vectorized, llap
             LLAP IO: all inputs
@@ -2545,7 +2545,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 870 Data size: 46968 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1136 Data size: 61320 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -2554,7 +2554,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15]
                       selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 7:double, DoubleColUnaryMinus(col 1) -> 8:double, DoubleColAddDoubleScalar(col 9, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9, col 12)(children: DoubleColUnaryMinus(col 1) -> 9:double, DoubleColAddDoubleScalar(col 11, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 1) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0, col 1) -> 9:double, DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14)(children: DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 14:double) -> 15:double
-                  Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
                   Reduce Output Operator
                     key expressions: _col0 (type: double)
                     sort order: +
@@ -2562,7 +2562,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
                     value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double)
         Reducer 3 
             Execution mode: vectorized, llap
@@ -2581,13 +2581,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13]
-                Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 870 Data size: 109608 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1136 Data size: 143112 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
index c3e5f7c..fe9f0d2 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_distinct_gby.q.out
@@ -211,7 +211,7 @@ STAGE PLANS:
                       keys: cint (type: int)
                       mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
@@ -222,7 +222,7 @@ STAGE PLANS:
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             valueColumns: []
-                        Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: all inputs
             Map Vectorization:
@@ -266,7 +266,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: int)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 5775 Data size: 17248 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 6030 Data size: 18008 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: sum(_col0), count(_col0), avg(_col0), std(_col0)
                   Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out
index 9a1c44c..0af7204 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out
@@ -1741,7 +1741,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 57 Data size: 228 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=94)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=114)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1789,7 +1789,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=94)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=114)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
index a03466f..ee65bed 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction2.q.out
@@ -152,7 +152,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=36)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
@@ -200,7 +200,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=36)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
@@ -288,7 +288,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=32)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
@@ -336,7 +336,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=32)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE
@@ -424,7 +424,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=60)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
@@ -472,7 +472,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=60)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE
@@ -832,7 +832,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -851,7 +851,7 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   mode: hash
@@ -880,7 +880,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE
@@ -968,7 +968,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
@@ -987,7 +987,7 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: char(10))
                   1 _col0 (type: char(10))
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   mode: hash
@@ -1016,7 +1016,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1104,7 +1104,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         Statistics: Num rows: 20 Data size: 1880 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
-                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=30)
+                          aggregations: min(_col0), max(_col0), bloom_filter(_col0, expectedEntries=40)
                           mode: hash
                           outputColumnNames: _col0, _col1, _col2
                           Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE
@@ -1123,7 +1123,7 @@ STAGE PLANS:
                 keys:
                   0 _col0 (type: varchar(10))
                   1 _col0 (type: varchar(10))
-                Statistics: Num rows: 20 Data size: 160 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 21 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE
                 Group By Operator
                   aggregations: count()
                   mode: hash
@@ -1152,7 +1152,7 @@ STAGE PLANS:
             Execution mode: vectorized, llap
             Reduce Operator Tree:
               Group By Operator
-                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=30)
+                aggregations: min(VALUE._col0), max(VALUE._col1), bloom_filter(VALUE._col2, expectedEntries=40)
                 mode: final
                 outputColumnNames: _col0, _col1, _col2
                 Statistics: Num rows: 1 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
index 8160bc7..9590c00 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out
@@ -59,7 +59,7 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
                           outputColumnNames: _col0, _col1, _col2
@@ -68,7 +68,7 @@ STAGE PLANS:
                               native: true
                               projectedOutputColumns: [2, 2, 12]
                               selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long
-                          Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE
                           Group By Operator
                             aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
                             Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
index f744eb6..2c66856 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_multi_output_select.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
@@ -75,7 +75,7 @@ STAGE PLANS:
                           1 _col0 (type: string)
                         input vertices:
                           1 Map 5
-                        Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE
                         Group By Operator
                           aggregations: count()
                           mode: hash
@@ -94,16 +94,16 @@ STAGE PLANS:
                   Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((value = 'val_278') and key is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Map 5 
@@ -113,16 +113,16 @@ STAGE PLANS:
                   Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE
                   Filter Operator
                     predicate: ((value = 'val_255') and key is not null) (type: boolean)
-                    Statistics: Num rows: 2 Data size: 350 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 175 Basic stats: COMPLETE Column stats: COMPLETE
                     Select Operator
                       expressions: key (type: string)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 2 Data size: 354 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 1 Data size: 177 Basic stats: COMPLETE Column stats: COMPLETE
             Execution mode: vectorized, llap
             LLAP IO: no inputs
         Reducer 2 

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
index 28a8340..2a95065 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_nested_mapjoin.q.out
@@ -40,11 +40,11 @@ STAGE PLANS:
                         outputColumnNames: _col2, _col3
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 884742 Data size: 10596096 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE
                         Select Operator
                           expressions: _col2 (type: smallint), _col3 (type: double)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 884742 Data size: 10596096 Basic stats: COMPLETE Column stats: COMPLETE
+                          Statistics: Num rows: 661228 Data size: 7913928 Basic stats: COMPLETE Column stats: COMPLETE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -54,7 +54,7 @@ STAGE PLANS:
                             outputColumnNames: _col1
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 1966236 Data size: 15716016 Basic stats: COMPLETE Column stats: COMPLETE
+                            Statistics: Num rows: 1452263 Data size: 11604232 Basic stats: COMPLETE Column stats: COMPLETE
                             Group By Operator
                               aggregations: sum(_col1)
                               mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out
index 73ab9fc..5469018 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out
@@ -118,11 +118,11 @@ STAGE PLANS:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 19518 Data size: 137552 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 18694 Data size: 130960 Basic stats: COMPLETE Column stats: COMPLETE
                   Group By Operator
                     aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
                     Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/llap/windowing_gby.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/windowing_gby.q.out b/ql/src/test/results/clientpositive/llap/windowing_gby.q.out
index 2c47b8b..22d2d75 100644
--- a/ql/src/test/results/clientpositive/llap/windowing_gby.q.out
+++ b/ql/src/test/results/clientpositive/llap/windowing_gby.q.out
@@ -41,7 +41,7 @@ Stage-0
                       PartitionCols:_col0
                       Group By Operator [GBY_10] (rows=3 width=20)
                         Output:["_col0","_col1","_col2"],aggregations:["sum(_col3)","sum(_col1)"],keys:_col2
-                        Merge Join Operator [MERGEJOIN_24] (rows=29 width=7)
+                        Merge Join Operator [MERGEJOIN_24] (rows=36 width=7)
                           Conds:RS_6._col0=RS_7._col1(Inner),Output:["_col1","_col2","_col3"]
                         <-Map 1 [SIMPLE_EDGE] llap
                           SHUFFLE [RS_6]

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/parallel_colstats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/parallel_colstats.q.out b/ql/src/test/results/clientpositive/parallel_colstats.q.out
index c851131..d5bce1e 100644
--- a/ql/src/test/results/clientpositive/parallel_colstats.q.out
+++ b/ql/src/test/results/clientpositive/parallel_colstats.q.out
@@ -100,10 +100,10 @@ STAGE PLANS:
               outputColumnNames: key, value
               Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -128,10 +128,10 @@ STAGE PLANS:
               outputColumnNames: key, value
               Statistics: Num rows: 125 Data size: 1328 Basic stats: COMPLETE Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   table:
@@ -172,17 +172,17 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+              Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -207,17 +207,17 @@ STAGE PLANS:
           TableScan
             Reduce Output Operator
               sort order: 
-              Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
-              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+              Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 984 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/partial_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partial_column_stats.q.out b/ql/src/test/results/clientpositive/partial_column_stats.q.out
index 5876efa..87d47da 100644
--- a/ql/src/test/results/clientpositive/partial_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/partial_column_stats.q.out
@@ -26,23 +26,23 @@ STAGE PLANS:
               outputColumnNames: key, value
               Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
               Group By Operator
-                aggregations: compute_stats(key, 16), compute_stats(value, 16)
+                aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
                 mode: hash
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
                 Reduce Output Operator
                   sort order: 
-                  Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
-                  value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
+                  Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string>)
       Reduce Operator Tree:
         Group By Operator
           aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
           mode: mergepartial
           outputColumnNames: _col0, _col1
-          Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+          Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
           File Output Operator
             compressed: false
-            Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 1 Data size: 960 Basic stats: COMPLETE Column stats: NONE
             table:
                 input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                 output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
index 3505556..d459b36 100644
--- a/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
+++ b/ql/src/test/results/clientpositive/partition_coltype_literals.q.out
@@ -304,7 +304,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@partcoltypenum
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-key                 	int                 	27                  	484                 	0                   	18                  	                    	                    	                    	                    	from deserializer   
+key                 	int                 	27                  	484                 	0                   	20                  	                    	                    	                    	                    	from deserializer   
 PREHOOK: query: describe formatted partcoltypenum partition (tint=110Y, sint=22000S, bint=330000000000L) value
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@partcoltypenum
@@ -313,7 +313,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@partcoltypenum
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-value               	string              	                    	                    	0                   	18                  	6.766666666666667   	7                   	                    	                    	from deserializer   
+value               	string              	                    	                    	0                   	20                  	6.766666666666667   	7                   	                    	                    	from deserializer   
 PREHOOK: query: describe formatted partcoltypenum tint
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@partcoltypenum

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
index 3ad09e8..4bddd3b 100644
--- a/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
+++ b/ql/src/test/results/clientpositive/reduceSinkDeDuplication_pRS_key_empty.q.out
@@ -1,4 +1,4 @@
-PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -10,7 +10,7 @@ select
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -23,7 +23,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 {"columntype":"Double","min":260.182,"max":260.182,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{0}{0}{0}{1}{1}{1}{0}{0}{0}{0}{0}{1}{2}{1}{0}"}	{"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"}	{"columntype":"Double","min":20428.07287599998,"max":20428.07287599998,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{0}{0}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}{4}{2}{0}"}	{"columntype":"Double","min":20469.01089779557,"max":20469.01089779557,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{0}{1}{3}{2}{3}{5}{2}{0}{1}{0}{1}{1}{1}{1}{0}{1}"}
-PREHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -33,7 +33,7 @@ select
   var_samp(substr(src.value,5)) as d
  from src)subq
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+POSTHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -69,7 +69,7 @@ STAGE PLANS:
           outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 1 Data size: 208 Basic stats: COMPLETE Column stats: NONE
           Group By Operator
-            aggregations: compute_stats(_col0, 16), compute_stats(_col1, 16), compute_stats(_col2, 16), compute_stats(_col3, 16)
+            aggregations: compute_stats(_col0, 'fm', 16), compute_stats(_col1, 'fm', 16), compute_stats(_col2, 'fm', 16), compute_stats(_col3, 'fm', 16)
             mode: complete
             outputColumnNames: _col0, _col1, _col2, _col3
             Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE
@@ -87,7 +87,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -99,7 +99,7 @@ select
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -112,7 +112,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 #### A masked pattern was here ####
 {"columntype":"Double","min":256.10355987055016,"max":256.10355987055016,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{2}{1}{0}{2}{0}{1}{1}{1}{0}{0}{1}{1}{0}{2}{1}{0}"}	{"columntype":"String","maxlength":2,"avglength":2.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{2}{0}{3}{6}{3}{0}{1}{1}{0}{0}{0}{0}{0}{0}{0}"}	{"columntype":"Double","min":20428.07287599999,"max":20428.07287599999,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":"{1}{4}{0}{0}{4}{3}{0}{1}{0}{0}{0}{0}{0}{0}{1}{2}"}	{"columntype":"Double","min":20469.010897795582,"max":20469.010897795582,"countnulls":0,"numdistinctvalues":2,"ndvbitvector":"{2}{0}{2}{2}{0}{0}{2}{0}{0}{0}{0}{0}{1}{0}{0}{0}"}
-PREHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+PREHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -122,7 +122,7 @@ select
   var_samp(substr(src.value,5)) as d
  from src)subq
 PREHOOK: type: QUERY
-POSTHOOK: query: explain select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+POSTHOOK: query: explain select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -175,7 +175,7 @@ STAGE PLANS:
               value expressions: _col0 (type: double), _col1 (type: string), _col2 (type: double), _col3 (type: double)
       Reduce Operator Tree:
         Group By Operator
-          aggregations: compute_stats(VALUE._col0, 16), compute_stats(VALUE._col2, 16), compute_stats(VALUE._col3, 16), compute_stats(VALUE._col4, 16)
+          aggregations: compute_stats(VALUE._col0, 'fm', 16), compute_stats(VALUE._col3, 'fm', 16), compute_stats(VALUE._col4, 'fm', 16), compute_stats(VALUE._col5, 'fm', 16)
           mode: complete
           outputColumnNames: _col0, _col1, _col2, _col3
           Statistics: Num rows: 1 Data size: 2004 Basic stats: COMPLETE Column stats: NONE
@@ -193,7 +193,7 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
-PREHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+PREHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select
@@ -205,7 +205,7 @@ select
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 #### A masked pattern was here ####
-POSTHOOK: query: select compute_stats(a,16),compute_stats(b,16),compute_stats(c,16),compute_stats(d,16)
+POSTHOOK: query: select compute_stats(a,'fm',16),compute_stats(b,'fm',16),compute_stats(c,'fm',16),compute_stats(d,'fm',16)
 from
 (
 select

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
index 33cf90a..4600e71 100644
--- a/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
+++ b/ql/src/test/results/clientpositive/remove_exprs_stats.q.out
@@ -449,14 +449,14 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
               predicate: (locid) IN (5) (type: boolean)
-              Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 2 Data size: 204 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 1 Data size: 102 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -485,14 +485,14 @@ STAGE PLANS:
             Statistics: Num rows: 8 Data size: 816 Basic stats: COMPLETE Column stats: COMPLETE
             Filter Operator
               predicate: (locid) IN (5, 2, 3) (type: boolean)
-              Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
               Select Operator
                 expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 4 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
index ec4076f..19546c3 100644
--- a/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
+++ b/ql/src/test/results/clientpositive/rename_external_partition_location.q.out
@@ -173,7 +173,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@ex_table
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-value               	string              	                    	                    	0                   	5                   	5.0                 	5                   	                    	                    	from deserializer   
+value               	string              	                    	                    	0                   	6                   	5.0                 	5                   	                    	                    	from deserializer   
 PREHOOK: query: ALTER TABLE ex_table PARTITION (part='part1') RENAME TO PARTITION (part='part2')
 PREHOOK: type: ALTERTABLE_RENAMEPART
 PREHOOK: Input: default@ex_table
@@ -321,4 +321,4 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@ex_table
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-value               	string              	                    	                    	0                   	5                   	5.0                 	5                   	                    	                    	from deserializer   
+value               	string              	                    	                    	0                   	6                   	5.0                 	5                   	                    	                    	from deserializer   

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
index b3d6f03..16b3a38 100644
--- a/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
+++ b/ql/src/test/results/clientpositive/rename_table_update_column_stats.q.out
@@ -59,7 +59,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable1
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col1                	int                 	27                  	484                 	0                   	8                   	                    	                    	                    	                    	from deserializer   
+col1                	int                 	27                  	484                 	0                   	10                  	                    	                    	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb1.testtable1 col2
 PREHOOK: type: DESCTABLE
@@ -69,7 +69,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable1
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col2                	string              	                    	                    	0                   	12                  	6.7                 	7                   	                    	                    	from deserializer   
+col2                	string              	                    	                    	0                   	10                  	6.7                 	7                   	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb1.testtable1 col3
 PREHOOK: type: DESCTABLE
@@ -98,7 +98,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb2@testtable2
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col1                	int                 	27                  	484                 	0                   	8                   	                    	                    	                    	                    	from deserializer   
+col1                	int                 	27                  	484                 	0                   	10                  	                    	                    	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb2.testtable2 col2
 PREHOOK: type: DESCTABLE
@@ -108,7 +108,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb2@testtable2
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col2                	string              	                    	                    	0                   	12                  	6.7                 	7                   	                    	                    	from deserializer   
+col2                	string              	                    	                    	0                   	10                  	6.7                 	7                   	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb2.testtable2 col3
 PREHOOK: type: DESCTABLE
@@ -205,7 +205,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable1
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col1                	int                 	27                  	484                 	0                   	8                   	                    	                    	                    	                    	from deserializer   
+col1                	int                 	27                  	484                 	0                   	10                  	                    	                    	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb1.testtable1 col2
 PREHOOK: type: DESCTABLE
@@ -215,7 +215,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb1@testtable1
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col2                	string              	                    	                    	0                   	12                  	6.7                 	7                   	                    	                    	from deserializer   
+col2                	string              	                    	                    	0                   	10                  	6.7                 	7                   	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb1.testtable1 col3
 PREHOOK: type: DESCTABLE
@@ -244,7 +244,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb2@testtable2
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col1                	int                 	27                  	484                 	0                   	8                   	                    	                    	                    	                    	from deserializer   
+col1                	int                 	27                  	484                 	0                   	10                  	                    	                    	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb2.testtable2 col2
 PREHOOK: type: DESCTABLE
@@ -254,7 +254,7 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: statsdb2@testtable2
 # col_name            	data_type           	min                 	max                 	num_nulls           	distinct_count      	avg_col_len         	max_col_len         	num_trues           	num_falses          	comment             
 	 	 	 	 	 	 	 	 	 	 
-col2                	string              	                    	                    	0                   	12                  	6.7                 	7                   	                    	                    	from deserializer   
+col2                	string              	                    	                    	0                   	10                  	6.7                 	7                   	                    	                    	from deserializer   
 COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"col1\":\"true\",\"col2\":\"true\",\"col3\":\"true\"}}	 	 	 	 	 	 	 	 	 
 PREHOOK: query: describe formatted statsdb2.testtable2 col3
 PREHOOK: type: DESCTABLE

http://git-wip-us.apache.org/repos/asf/hive/blob/b883d313/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
index 519e155..0667ea9 100644
--- a/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out
@@ -252,10 +252,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -326,10 +326,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -400,10 +400,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -590,10 +590,10 @@ STAGE PLANS:
                   1 _col0 (type: int)
                   2 _col1 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 47 Data size: 13912 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 64 Data size: 18944 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -752,10 +752,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 48 Data size: 5417 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 48 Data size: 5417 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 48 Data size: 5607 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -831,10 +831,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col0 (type: string), _col1 (type: int)
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 3 Data size: 297 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 4 Data size: 396 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -899,10 +899,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6 Data size: 1164 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 8 Data size: 1552 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -967,10 +967,10 @@ STAGE PLANS:
                   0 _col0 (type: string), _col1 (type: int)
                   1 _col1 (type: string), _col0 (type: int)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 54 Data size: 1358 Basic stats: COMPLETE Column stats: COMPLETE
+                Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 54 Data size: 1358 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 54 Data size: 1746 Basic stats: COMPLETE Column stats: COMPLETE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat