You are viewing a plain text version of this content. The canonical link for it is here.

Posted to commits@hive.apache.org by li...@apache.org on 2017/08/17 07:04:51 UTC

[1/5] hive git commit: HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)

Repository: hive
Updated Branches:
  refs/heads/master c4e2e2a51 -> 002626d5e


http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
index 66dcdad..2c99ff3 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_case.q.out
@@ -51,7 +51,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -61,7 +61,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
                     predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
@@ -70,13 +70,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [1, 15, 16]
                           selectExpressions: IfExprStringScalarStringGroupColumn(col 12, val a, col 14)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, IfExprStringScalarStringScalar(col 13, val b, val c)(children: LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String) -> 15:String, IfExprStringScalarStringGroupColumn(col 12, val a, col 14)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, IfExprStringScalarStringScalar(col 13, val b, val c)(children: LongColEqualLongScalar(col 1, val 12205) -> 13:long) -> 14:String) -> 16:String
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -192,7 +192,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -202,7 +202,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterLongColEqualLongScalar(col 1, val 418) -> boolean, FilterLongColEqualLongScalar(col 1, val 12205) -> boolean, FilterLongColEqualLongScalar(col 1, val 10583) -> boolean) -> boolean
                     predicate: ((csmallint = 418) or (csmallint = 12205) or (csmallint = 10583)) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN ('b') ELSE (null) END (type: string), CASE WHEN ((csmallint = 418)) THEN ('a') WHEN ((csmallint = 12205)) THEN (null) ELSE ('c') END (type: string)
                       outputColumnNames: _col0, _col1, _col2
@@ -211,13 +211,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [1, 16, 19]
                           selectExpressions: IfExprStringScalarStringGroupColumn(col 12, val a, col 15)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, IfExprColumnNull(col 13, col 14, null)(children: LongColEqualLongScalar(col 1, val 12205) -> 13:long, ConstantVectorExpression(val b) -> 14:string) -> 15:string) -> 16:String, IfExprStringScalarStringGroupColumn(col 12, val a, col 18)(children: LongColEqualLongScalar(col 1, val 418) -> 12:long, IfExprNullColumn(col 17, null, col 15)(children: LongColEqualLongScalar(col 1, val 12205) -> 17:long, ConstantVectorExpression(val c) -> 15:string) -> 18:string) -> 19:String
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -269,7 +269,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -281,7 +281,7 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumns: [12, 13]
                         selectExpressions: IfExprLongScalarLongScalar(col 13, val 1, val 0)(children: LongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 2, val 2) -> 12:long) -> 13:long) -> 12:long, IfExprLongScalarLongScalar(col 14, val 1, val 0)(children: LongColEqualLongScalar(col 13, val 1)(children: LongColModuloLongScalar(col 2, val 2) -> 13:long) -> 14:long) -> 13:long
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(_col0), sum(_col1)
                       Group By Vectorization:
@@ -398,7 +398,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -410,7 +410,7 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumns: [12, 13]
                         selectExpressions: IfExprLongColumnLongScalar(col 13, col 2, val 0)(children: LongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 2, val 2) -> 12:long) -> 13:long) -> 12:long, IfExprLongColumnLongScalar(col 14, col 2, val 0)(children: LongColEqualLongScalar(col 13, val 1)(children: LongColModuloLongScalar(col 2, val 2) -> 13:long) -> 14:long) -> 13:long
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(_col0), sum(_col1)
                       Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
index 9fd16c9..2c881ba 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out
@@ -24,7 +24,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t2
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -34,7 +34,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: SelectColumnIsNotNull(col 2) -> boolean
                     predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
@@ -42,7 +42,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [2]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         Spark Hash Table Sink Vectorization:
                             className: VectorSparkHashTableSinkOperator
@@ -72,7 +72,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t1
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -82,7 +82,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: SelectColumnIsNotNull(col 2) -> boolean
                     predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
@@ -90,7 +90,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [2]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -104,7 +104,7 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
                           outputColumnNames: _col0, _col1, _col2
@@ -113,7 +113,7 @@ STAGE PLANS:
                               native: true
                               projectedOutputColumns: [2, 2, 12]
                               selectExpressions: LongColAddLongColumn(col 2, col 2) -> 12:long
-                          Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                           Group By Operator
                             aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
                             Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out
index 90ef576..3780a4a 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_math_funcs.q.out
@@ -119,7 +119,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -129,7 +129,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColEqualLongScalar(col 12, val 0)(children: LongColModuloLongScalar(col 3, val 500) -> 12:long) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 13, val -1.0)(children: FuncSinDoubleToDouble(col 4) -> 13:double) -> boolean) -> boolean
                     predicate: (((cbigint % 500) = 0) and (sin(cfloat) >= -1.0)) (type: boolean)
-                    Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), round(cdouble, 2) (type: double), floor(cdouble) (type: bigint), ceil(cdouble) (type: bigint), rand() (type: double), rand(98007) (type: double), exp(ln(cdouble)) (type: double), ln(cdouble) (type: double), ln(cfloat) (type: double), log10(cdouble) (type: double), log2(cdouble) (type: double), log2((cdouble - 15601.0)) (type: double), log2(cfloat) (type: double), log2(cbigint) (type: double), log2(cint) (type: double), log2(csmallint) (type: double), log2(ctinyint) (type: double), log(2, cdouble) (type: double), power(log2(cdouble), 2) (type: double), power(log2(cdouble), 2) (type: double), sqrt(cdouble) (type: double), sqrt(cbigint) (type: double), bin(cbigint) (type: string), hex(cdouble) (type: string), conv(cbigint, 10, 16) (type: string), abs(cdouble) (type: double), abs(ctinyint) (type: int), (cint pmod 3) (type: int), sin(cdouble) (type: double), asin(cdouble) (type: double), cos(cdouble) (type: double), acos(cdouble)
  (type: double), atan(cdouble) (type: double), degrees(cdouble) (type: double), radians(cdouble) (type: double), cdouble (type: double), cbigint (type: bigint), (- cdouble) (type: double), sign(cdouble) (type: double), sign(cbigint) (type: double), cos(((- sin(log(cdouble))) + 3.14159)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40
@@ -138,13 +138,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [5, 13, 12, 14, 15, 16, 18, 17, 19, 20, 21, 23, 22, 24, 25, 26, 27, 28, 30, 31, 29, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 5, 3, 46, 47, 48, 49]
                           selectExpressions: RoundWithNumDigitsDoubleToDouble(col 5, decimalPlaces 2) -> 13:double, FuncFloorDoubleToLong(col 5) -> 12:long, FuncCeilDoubleToLong(col 5) -> 14:long, FuncRandNoSeed -> 15:double, FuncRand -> 16:double, FuncExpDoubleToDouble(col 17)(children: FuncLnDoubleToDouble(col 5) -> 17:double) -> 18:double, FuncLnDoubleToDouble(col 5) -> 17:double, FuncLnDoubleToDouble(col 4) -> 19:double, FuncLog10DoubleToDouble(col 5) -> 20:double, FuncLog2DoubleToDouble(col 5) -> 21:double, FuncLog2DoubleToDouble(col 22)(children: DoubleColSubtractDoubleScalar(col 5, val 15601.0) -> 22:double) -> 23:double, FuncLog2DoubleToDouble(col 4) -> 22:double, FuncLog2LongToDouble(col 3) -> 24:double, FuncLog2LongToDouble(col 2) -> 25:double, FuncLog2LongToDouble(col 1) -> 26:double, FuncLog2LongToDouble(col 0) -> 27:double, VectorUDFAdaptor(log(2, cdouble)) -> 28:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 3
 0:double, VectorUDFAdaptor(power(log2(cdouble), 2))(children: FuncLog2DoubleToDouble(col 5) -> 29:double) -> 31:double, FuncSqrtDoubleToDouble(col 5) -> 29:double, FuncSqrtLongToDouble(col 3) -> 32:double, FuncBin(col 3) -> 33:String, VectorUDFAdaptor(hex(cdouble)) -> 34:string, VectorUDFAdaptor(conv(cbigint, 10, 16)) -> 35:string, FuncAbsDoubleToDouble(col 5) -> 36:double, FuncAbsLongToLong(col 0) -> 37:long, PosModLongToLong(col 2, divisor 3) -> 38:long, FuncSinDoubleToDouble(col 5) -> 39:double, FuncASinDoubleToDouble(col 5) -> 40:double, FuncCosDoubleToDouble(col 5) -> 41:double, FuncACosDoubleToDouble(col 5) -> 42:double, FuncATanDoubleToDouble(col 5) -> 43:double, FuncDegreesDoubleToDouble(col 5) -> 44:double, FuncRadiansDoubleToDouble(col 5) -> 45:double, DoubleColUnaryMinus(col 5) -> 46:double, FuncSignDoubleToDouble(col 5) -> 47:double, FuncSignLongToDouble(col 3) -> 48:double, FuncCosDoubleToDouble(col 50)(children: DoubleColAddDoubleScalar(col 49, val 3.14159)(children: D
 oubleColUnaryMinus(col 50)(children: FuncSinDoubleToDouble(col 49)(children: FuncLnDoubleToDouble(col 5) -> 49:double) -> 50:double) -> 49:double) -> 50:double) -> 49:double
-                      Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out
index 996021f..6760e51 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_nested_mapjoin.q.out
@@ -20,14 +20,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: v1
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (ctinyint is not null and csmallint is not null) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cdouble (type: double)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col0 (type: tinyint)
@@ -47,14 +47,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: v3
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: csmallint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: csmallint (type: smallint)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Spark HashTable Sink Operator
                         keys:
                           0 _col0 (type: smallint)
@@ -81,14 +81,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: v2
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: ctinyint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint)
                       outputColumnNames: _col0
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -98,11 +98,11 @@ STAGE PLANS:
                         outputColumnNames: _col2, _col3
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col2 (type: smallint), _col3 (type: double)
                           outputColumnNames: _col0, _col1
-                          Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -112,7 +112,7 @@ STAGE PLANS:
                             outputColumnNames: _col1
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE
                             Group By Operator
                               aggregations: sum(_col1)
                               mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
index 4d8620f..fc1f959 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_shufflejoin.q.out
@@ -27,7 +27,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t1
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -37,7 +37,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: SelectColumnIsNotNull(col 2) -> boolean
                     predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
@@ -45,7 +45,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [2]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
@@ -54,7 +54,7 @@ STAGE PLANS:
                             className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
             Map Vectorization:
                 enabled: true
@@ -68,7 +68,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: t2
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -78,7 +78,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: SelectColumnIsNotNull(col 2) -> boolean
                     predicate: cint is not null (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int)
                       outputColumnNames: _col0
@@ -86,7 +86,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [2]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int)
                         sort order: +
@@ -95,7 +95,7 @@ STAGE PLANS:
                             className: VectorReduceSinkLongOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
             Execution mode: vectorized
             Map Vectorization:
                 enabled: true
@@ -119,11 +119,11 @@ STAGE PLANS:
                   0 _col0 (type: int)
                   1 _col0 (type: int)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: int), _col1 (type: int), (_col0 + _col1) (type: int)
                   outputColumnNames: _col0, _col1, _col2
-                  Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: count(_col0), max(_col1), min(_col0), avg(_col2)
                     mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out
index 26aab1c..b88eda5 100644
--- a/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorized_string_funcs.q.out
@@ -59,17 +59,17 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (((cbigint % 237) = 0) and (length(substr(cstring1, 1, 2)) <= 2) and (cstring1 like '%')) (type: boolean)
-                    Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                      Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 1024 Data size: 31436 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1024 Data size: 220163 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[4/5] hive git commit: HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)

Posted by li...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
index a707ac9..4e26314 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_12.q.out
@@ -82,7 +82,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -92,7 +92,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10, col 11) -> boolean, FilterLongColNotEqualLongColumn(col 0, col 1)(children: col 0) -> boolean) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11, val 1) -> boolean, FilterLongColGreaterEqualLongColumn(col 3, col 1)(children: col 1) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: (ctimestamp1 is null and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint))))) (type: boolean)
-                    Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cbigint (type: bigint), cdouble (type: double), cstring1 (type: string), cboolean1 (type: boolean)
                       outputColumnNames: cbigint, cdouble, cstring1, cboolean1
@@ -100,7 +100,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [3, 5, 6, 10]
-                      Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count(cbigint), stddev_samp(cbigint), avg(cdouble), sum(cbigint), stddev_pop(cdouble)
                         Group By Vectorization:
@@ -115,7 +115,7 @@ STAGE PLANS:
                         keys: cdouble (type: double), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                        Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean)
                           sort order: ++++
@@ -126,7 +126,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [4, 5, 6, 7, 8]
-                          Statistics: Num rows: 5006 Data size: 153682 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 5006 Data size: 1076307 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col4 (type: bigint), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: struct<count:bigint,sum:double,input:double>), _col7 (type: bigint), _col8 (type: struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
             Map Vectorization:
@@ -172,7 +172,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
-                Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0 * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), _col5 (type: double), ((-6432.0 * _col0) / -6432.0) (type: double), (- ((-6432.0 * _col0) / -6432.0)) (type: double), _col6 (type: double), (- (-6432.0 * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col7 (type: bigint), (_col6 / (-6432.0 * _col0)) (type: double), (- (- ((-6432.0 * _col0) / -6432.0))) (type: double), (((-6432.0 * _col0) / -6432.0) + (- (-6432.0 * _col0))) (type: double), _col8 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19
@@ -181,7 +181,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [1, 3, 2, 0, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
                       selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 9:double, LongColUnaryMinus(col 1) -> 10:long, LongColMultiplyLongColumn(col 1, col 4) -> 11:long, DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 13:double, DoubleColUnaryMinus(col 14)(children: DoubleColDivideDoubleScalar(col 12, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 12:double) -> 14:double) -> 12:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 15:double, DecimalScalarAddDecimalColumn(val -5638.15, col 16)(children: CastLongToDecimal(col 1) -> 16:decimal(19,0)) -> 17:decimal(22,2), DoubleColDivideDoubleColumn(col 6, col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 18:double, DoubleColUnaryMinus(col 14)(children: DoubleColUnaryMinus(col 19)(children: DoubleColDi
 videDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 19:double) -> 14:double) -> 19:double, DoubleColAddDoubleColumn(col 20, col 21)(children: DoubleColDivideDoubleScalar(col 14, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 20:double, DoubleColUnaryMinus(col 14)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0) -> 14:double) -> 21:double) -> 14:double
-                  Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string)
                     sort order: +++
@@ -191,7 +191,7 @@ STAGE PLANS:
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         valueColumns: [3, 9, 10, 4, 11, 5, 13, 12, 6, 15, 17, 7, 18, 19, 14, 8]
-                    Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double)
         Reducer 3 
             Execution mode: vectorized
@@ -218,13 +218,13 @@ STAGE PLANS:
                     native: true
                     projectedOutputColumns: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18]
                     selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp
-                Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
index 6ecd0b5..ce160a6 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_13.q.out
@@ -84,7 +84,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -94,7 +94,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val 11.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val 12.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
                     predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > 11.0) and (UDFToDouble(ctimestamp2) <> 12.0) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
-                    Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
                       outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1
@@ -102,7 +102,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 4, 6, 8, 10]
-                      Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
                         Group By Vectorization:
@@ -117,7 +117,7 @@ STAGE PLANS:
                         keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                        Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
                           sort order: +++++
@@ -128,7 +128,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [5, 6, 7, 8, 9, 10]
-                          Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
             Execution mode: vectorized
             Map Vectorization:
@@ -175,7 +175,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
@@ -184,7 +184,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
                       selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -
 > 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double
-                  Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
                     sort order: +++++++++++++++++++++
@@ -194,7 +194,7 @@ STAGE PLANS:
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         valueColumns: []
-                    Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
             Execution mode: vectorized
@@ -219,19 +219,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 40
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -437,7 +437,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -447,7 +447,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4, val 3569.0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val 10.175, col 5) -> boolean, FilterLongColNotEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 12, val -1.388)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleScalar(col 12, val -1.3359999999999999)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDecimalColLessDecimalScalar(col 13, val 9763215.5639)(children: CastLongToDecimal(col 0) -> 13:decimal(11,4)) -> boolean) -> boolean) -> boolean
                     predicate: (((cfloat < 3569) and (10.175 >= cdouble) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -1.388) and (UDFToDouble(ctimestamp2) <> -1.3359999999999999) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean)
-                    Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cfloat (type: float), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
                       outputColumnNames: ctinyint, cfloat, cstring1, ctimestamp1, cboolean1
@@ -455,7 +455,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 4, 6, 8, 10]
-                      Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: max(ctinyint), sum(cfloat), stddev_pop(cfloat), stddev_pop(ctinyint), max(cfloat), min(ctinyint)
                         Group By Vectorization:
@@ -470,7 +470,7 @@ STAGE PLANS:
                         keys: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                        Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string)
                           sort order: +++++
@@ -479,7 +479,7 @@ STAGE PLANS:
                               className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 2730 Data size: 83809 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: float), _col10 (type: tinyint)
             Execution mode: vectorized
             Map Vectorization:
@@ -514,7 +514,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), _col7 (type: double), (- _col6) (type: double), _col8 (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28 / (- (- _col6))) (type: double), _col9 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col10 (type: tinyint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
@@ -523,7 +523,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 11, 5, 13, 6, 16, 15, 17, 7, 18, 8, 20, 22, 21, 9, 25, 10]
                       selectExpressions: LongColUnaryMinus(col 1) -> 11:long, LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 13:long, DoubleColMultiplyDoubleColumn(col 6, col 15)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 15:double) -> 16:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3) -> 17:double, DoubleColUnaryMinus(col 6) -> 18:double, DecimalColSubtractDecimalScalar(col 19, val 10.175)(children: CastLongToDecimal(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 19:decimal(3,0)) -> 20:decimal(7,3), DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -> 21:double) -> 22:double, DoubleScalarDivideDoubleColumn(val -26.28, col 23)(children: DoubleColUnaryMinus(col 21)(children: DoubleColUnaryMinus(col 6) -
 > 21:double) -> 23:double) -> 21:double, DoubleColDivideDoubleColumn(col 24, col 23)(children: DoubleColMultiplyDoubleColumn(col 6, col 23)(children: CastLongToDouble(col 14)(children: LongColAddLongColumn(col 12, col 5)(children: LongColUnaryMinus(col 1) -> 12:long) -> 14:long) -> 23:double) -> 24:double, CastLongToDouble(col 1) -> 23:double) -> 25:double
-                  Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint)
                     sort order: +++++++++++++++++++++
@@ -531,7 +531,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
             Execution mode: vectorized
@@ -550,19 +550,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20]
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 40
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 40 Data size: 1200 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 40 Data size: 8600 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
index 15926be..0d44391 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_14.q.out
@@ -84,7 +84,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -94,7 +94,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterTimestampColLessTimestampColumn(col 9, col 8) -> boolean) -> boolean, FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3, val -257) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean
                     predicate: ((UDFToLong(ctinyint) <= cbigint) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1)) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257) or (cfloat < UDFToFloat(cint)))) (type: boolean)
-                    Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28 + cdouble)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
@@ -103,7 +103,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [8, 4, 6, 10, 5, 13]
                           selectExpressions: DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5) -> 12:double) -> 13:double
-                      Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: stddev_samp(_col5), max(_col1), stddev_pop(_col1), count(_col1), var_pop(_col1), var_samp(_col1)
                         Group By Vectorization:
@@ -118,7 +118,7 @@ STAGE PLANS:
                         keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                        Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean)
                           sort order: +++++
@@ -129,7 +129,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [5, 6, 7, 8, 9, 10]
-                          Statistics: Num rows: 606 Data size: 18603 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: float), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: bigint), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
             Map Vectorization:
@@ -176,7 +176,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28 + _col2) (type: double), (- (-26.28 + _col2)) (type: double), _col5 (type: double), (_col1 * -26.28) (type: float), _col6 (type: float), (- _col1) (type: float), (- _col6) (type: float), ((- (-26.28 + _col2)) / 10.175) (type: double), _col7 (type: double), _col8 (type: bigint), (- ((- (-26.28 + _col2)) / 10.175)) (type: double), (-1.389 % _col5) (type: double), (UDFToDouble(_col1) - _col2) (type: double), _col9 (type: double), (_col9 % 10.175) (type: double), _col10 (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
@@ -185,7 +185,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [3, 1, 0, 4, 2, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
                       selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 11:double, DoubleColUnaryMinus(col 12)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 12:double) -> 13:double, DoubleColMultiplyDoubleScalar(col 1, val -26.280000686645508) -> 12:double, DoubleColUnaryMinus(col 1) -> 14:double, DoubleColUnaryMinus(col 6) -> 15:double, DoubleColDivideDoubleScalar(col 17, val 10.175)(children: DoubleColUnaryMinus(col 16)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 16:double) -> 17:double) -> 16:double, DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleScalar(col 18, val 10.175)(children: DoubleColUnaryMinus(col 17)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2) -> 17:double) -> 18:double) -> 17:double) -> 18:double, DoubleScalarModuloDoubleColumn(val -1.389, col 5) -> 17:double, DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 19:double, DoubleColModuloDoubleScalar(col 9, val 10.175) -> 20:double, Do
 ubleColUnaryMinus(col 21)(children: DoubleColSubtractDoubleColumn(col 1, col 2)(children: col 1) -> 21:double) -> 22:double
-                  Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp)
                     sort order: ++++
@@ -195,7 +195,7 @@ STAGE PLANS:
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                         valueColumns: [4, 11, 13, 5, 12, 6, 14, 15, 16, 7, 8, 18, 17, 19, 9, 20, 10, 22]
-                    Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double)
         Reducer 3 
             Execution mode: vectorized
@@ -220,13 +220,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
-                Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 303 Data size: 9301 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 303 Data size: 65146 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
index 9a78ee5..6ef97c2 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_15.q.out
@@ -80,7 +80,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -90,7 +90,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean, FilterStringColLikeStringScalar(col 6, pattern 10%) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -75) -> boolean, FilterLongColEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 5, val -3728.0) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 like '%ss%') or (cstring1 like '10%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp), cboolean1 (type: boolean)
                       outputColumnNames: ctinyint, cint, cfloat, cdouble, cstring1, ctimestamp1, cboolean1
@@ -98,7 +98,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 2, 4, 5, 6, 8, 10]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: stddev_samp(cfloat), min(cdouble), stddev_samp(ctinyint), var_pop(ctinyint), var_samp(cint), stddev_pop(cint)
                         Group By Vectorization:
@@ -113,7 +113,7 @@ STAGE PLANS:
                         keys: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
                           sort order: +++++++
@@ -124,7 +124,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [7, 8, 9, 10, 11, 12]
-                          Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: double), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,variance:double>), _col11 (type: struct<count:bigint,sum:double,variance:double>), _col12 (type: struct<count:bigint,sum:double,variance:double>)
             Execution mode: vectorized
             Map Vectorization:
@@ -157,15 +157,15 @@ STAGE PLANS:
                 keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), _col7 (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col8 (type: double), (_col2 * 79.553) (type: double), (33.0 % _col0) (type: float), _col9 (type: double), _col10 (type: double), (-23.0 % _col2) (type: double), (- _col4) (type: tinyint), _col11 (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), _col12 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
-                  Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp)
                     sort order: +++++++
-                    Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double)
         Reducer 3 
             Reduce Vectorization:
@@ -176,10 +176,10 @@ STAGE PLANS:
               Select Operator
                 expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
-                  Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
index eb65dc3..84fc4b0 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_16.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -67,7 +67,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                       outputColumnNames: cdouble, cstring1, ctimestamp1
@@ -75,7 +75,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [5, 6, 8]
-                      Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
                         Group By Vectorization:
@@ -90,7 +90,7 @@ STAGE PLANS:
                         keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                        Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
                           sort order: +++
@@ -101,7 +101,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [3, 4, 5]
-                          Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
             Execution mode: vectorized
             Map Vectorization:
@@ -147,7 +147,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
@@ -156,13 +156,13 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
                       selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6)
-                  Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
index 9395a01..b43c506 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_17.q.out
@@ -65,7 +65,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -75,7 +75,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val -23) -> boolean, FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5, val 988888.0) -> boolean, FilterDecimalColGreaterDecimalScalar(col 12, val -863.257)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean) -> boolean, FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0, val 33) -> boolean, FilterLongColGreaterEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterDoubleColEqualDoubleColumn(col 4, col 5)(children: col 4) -> boolean) -> boolean) -> boolean
                     predicate: ((cbigint > -23) and ((cdouble <> 988888.0) or (CAST( cint AS decimal(13,3)) > -863.257)) and ((ctinyint >= 33) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble))) (type: boolean)
-                    Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58 + (- (- cdouble))) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13
@@ -84,7 +84,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [4, 6, 2, 8, 5, 3, 14, 15, 13, 16, 18, 19, 21, 17]
                           selectExpressions: DoubleColDivideDoubleColumn(col 4, col 13)(children: col 4, CastLongToDouble(col 0) -> 13:double) -> 14:double, LongColModuloLongColumn(col 2, col 3)(children: col 2) -> 15:long, DoubleColUnaryMinus(col 5) -> 13:double, DoubleColAddDoubleColumn(col 5, col 17)(children: DoubleColDivideDoubleColumn(col 4, col 16)(children: col 4, CastLongToDouble(col 0) -> 16:double) -> 17:double) -> 16:double, DoubleColDivideDoubleColumn(col 5, col 17)(children: CastLongToDouble(col 2) -> 17:double) -> 18:double, DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 19:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 20)(children: CastLongToDecimal(col 3) -> 20:decimal(19,0)) -> 21:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 22)(children: DoubleColUnaryMinus(col 17)(children: DoubleColUnaryMinus(col 5) -> 17:double) -> 22:double) -> 17:double
-                      Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col5 (type: bigint), _col0 (type: float)
                         sort order: ++
@@ -94,7 +94,7 @@ STAGE PLANS:
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                             valueColumns: [6, 2, 8, 5, 14, 15, 13, 16, 18, 19, 21, 17]
-                        Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double)
             Execution mode: vectorized
             Map Vectorization:
@@ -134,13 +134,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13]
-                Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
index a3c70bb..65ea41e 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_2.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -73,7 +73,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8, col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern b%) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterDoubleScalarNotEqualDoubleColumn(val -10669.0, col 12)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterLongScalarGreaterLongColumn(val 359, col 2) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: (((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15)) or ((cdouble < UDFToDouble(ctinyint)) and ((-10669.0 <> UDFToDouble(ctimestamp2)) or (359 > cint)))) (type: boolean)
-                    Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint), cfloat (type: float), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cbigint, cfloat, cdouble
@@ -81,7 +81,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 3, 4, 5]
-                      Statistics: Num rows: 4778 Data size: 146682 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4778 Data size: 1027287 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(csmallint), sum(cfloat), var_pop(cbigint), count(), min(ctinyint), avg(cdouble)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
index a335c7d..ccf302e 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_3.q.out
@@ -68,7 +68,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -78,7 +78,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -29071.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 1) -> 14:decimal(8,3)) -> boolean, FilterTimestampColGreaterTimestampColumn(col 8, col 9) -> boolean) -> boolean) -> boolean
                     predicate: (((UDFToFloat(cint) <= cfloat) and (79.553 <> CAST( cbigint AS decimal(22,3))) and (UDFToDouble(ctimestamp2) = -29071.0)) or ((UDFToDouble(cbigint) > cdouble) and (79.553 <= CAST( csmallint AS decimal(8,3))) and (ctimestamp1 > ctimestamp2))) (type: boolean)
-                    Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float)
                       outputColumnNames: ctinyint, csmallint, cint, cfloat
@@ -86,7 +86,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 4]
-                      Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: stddev_samp(csmallint), stddev_pop(ctinyint), stddev_samp(cfloat), sum(cfloat), avg(cint), stddev_pop(cint)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
index 3d0e700..8be788f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_4.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -73,7 +73,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterLongScalarGreaterEqualLongColumn(val -89010, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterDoubleScalar(col 5, val 79.553) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -563, col 3) -> boolean, FilterExprOrExpr(children: FilterLongColNotEqualLongColumn(col 0, col 3)(children: col 0) -> boolean, FilterDoubleScalarGreaterEqualDoubleColumn(val -3728.0, col 5) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((UDFToInteger(csmallint) >= cint) or ((-89010 >= UDFToInteger(ctinyint)) and (cdouble > 79.553)) or ((-563 <> cbigint) and ((UDFToLong(ctinyint) <> cbigint) or (-3728.0 >= cdouble)))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cint (type: int), cdouble (type: double)
                       outputColumnNames: ctinyint, cint, cdouble
@@ -81,7 +81,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 2, 5]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: sum(cint), stddev_pop(cdouble), avg(cdouble), var_pop(cdouble), min(ctinyint)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_5.q.out b/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
index 2737d9b..fbe7715 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_5.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -67,7 +67,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNotNull(col 11) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %b%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringColLikeStringScalar(col 7, pattern a) -> boolean) -> boolean) -> boolean
                     predicate: ((cboolean2 is not null and (cstring1 like '%b%')) or ((UDFToDouble(ctinyint) = cdouble) and ctimestamp2 is not null and (cstring2 like 'a'))) (type: boolean)
-                    Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int)
                       outputColumnNames: ctinyint, csmallint, cint
@@ -75,7 +75,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2]
-                      Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_6.q.out b/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
index 4906328..fa7f046 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_6.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -67,7 +67,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0, val 0) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 10, val 0) -> boolean, FilterLongColGreaterEqualLongColumn(col 11, col 10) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 3) -> boolean, FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %a) -> boolean, FilterDoubleColLessEqualDoubleScalar(col 4, val -257.0) -> boolean) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((ctinyint <> 0) and (((cboolean1 <= 0) and (cboolean2 >= cboolean1)) or (cbigint is not null and ((cstring2 like '%a') or (cfloat <= -257))))) (type: boolean)
-                    Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cboolean1 (type: boolean), cfloat (type: float), cstring1 (type: string), (988888 * UDFToInteger(csmallint)) (type: int), (- csmallint) (type: smallint), (- cfloat) (type: float), (-26.28 / UDFToDouble(cfloat)) (type: double), (cfloat * 359.0) (type: float), (cint % UDFToInteger(ctinyint)) (type: int), (- cdouble) (type: double), (UDFToInteger(ctinyint) - -75) (type: int), (762 * (cint % UDFToInteger(ctinyint))) (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
@@ -76,13 +76,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [10, 4, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21]
                           selectExpressions: LongScalarMultiplyLongColumn(val 988888, col 1)(children: col 1) -> 12:long, LongColUnaryMinus(col 1) -> 13:long, DoubleColUnaryMinus(col 4) -> 14:double, DoubleScalarDivideDoubleColumn(val -26.28, col 4)(children: col 4) -> 15:double, DoubleColMultiplyDoubleScalar(col 4, val 359.0) -> 16:double, LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 17:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColSubtractLongScalar(col 0, val -75)(children: col 0) -> 19:long, LongScalarMultiplyLongColumn(val 762, col 20)(children: LongColModuloLongColumn(col 2, col 0)(children: col 0) -> 20:long) -> 21:long
-                      Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 11605 Data size: 356269 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 11605 Data size: 2495116 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
index eb65dc3..84fc4b0 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_9.q.out
@@ -57,7 +57,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -67,7 +67,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -1.389) -> boolean, FilterStringGroupColLessStringScalar(col 6, val a) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389) or (cstring1 < 'a'))) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                       outputColumnNames: cdouble, cstring1, ctimestamp1
@@ -75,7 +75,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [5, 6, 8]
-                      Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count(cdouble), stddev_samp(cdouble), min(cdouble)
                         Group By Vectorization:
@@ -90,7 +90,7 @@ STAGE PLANS:
                         keys: cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                        Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp)
                           sort order: +++
@@ -101,7 +101,7 @@ STAGE PLANS:
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
                               valueColumns: [3, 4, 5]
-                          Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col3 (type: bigint), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: double)
             Execution mode: vectorized
             Map Vectorization:
@@ -147,7 +147,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double), KEY._col1 (type: string), KEY._col2 (type: timestamp)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: string), _col0 (type: double), _col2 (type: timestamp), (_col0 - 9763215.5639) (type: double), (- (_col0 - 9763215.5639)) (type: double), _col3 (type: bigint), _col4 (type: double), (- _col4) (type: double), (_col4 * UDFToDouble(_col3)) (type: double), _col5 (type: double), (9763215.5639 / _col0) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), _col4 (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12
@@ -156,13 +156,13 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [1, 0, 2, 6, 8, 3, 4, 7, 10, 5, 9, 12, 4]
                       selectExpressions: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 6:double, DoubleColUnaryMinus(col 7)(children: DoubleColSubtractDoubleScalar(col 0, val 9763215.5639) -> 7:double) -> 8:double, DoubleColUnaryMinus(col 4) -> 7:double, DoubleColMultiplyDoubleColumn(col 4, col 9)(children: CastLongToDouble(col 3) -> 9:double) -> 10:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 0) -> 9:double, DecimalColDivideDecimalScalar(col 11, val -1.389)(children: CastLongToDecimal(col 3) -> 11:decimal(19,0)) -> 12:decimal(28,6)
-                  Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 2048 Data size: 62872 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2048 Data size: 440327 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[3/5] hive git commit: HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)

Posted by li...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
index 788c2ee..5645e17 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_div0.q.out
@@ -21,7 +21,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -33,19 +33,19 @@ STAGE PLANS:
                         native: true
                         projectedOutputColumns: [12]
                         selectExpressions: DoubleColDivideDoubleScalar(col 5, val 0.0) -> 12:double
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Limit
                       Number of rows: 100
                       Limit Vectorization:
                           className: VectorLimitOperator
                           native: true
-                      Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -201,7 +201,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -211,7 +211,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3, val 0) -> boolean, FilterLongColLessLongScalar(col 3, val 100000000) -> boolean) -> boolean
                     predicate: ((cbigint > 0) and (cbigint < 100000000)) (type: boolean)
-                    Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: (cbigint - 988888) (type: bigint), (cdouble / UDFToDouble((cbigint - 988888))) (type: double), (1.2 / CAST( (cbigint - 988888) AS decimal(19,0))) (type: decimal(22,21))
                       outputColumnNames: _col0, _col1, _col2
@@ -220,7 +220,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [12, 15, 17]
                           selectExpressions: LongColSubtractLongScalar(col 3, val 988888) -> 12:long, DoubleColDivideDoubleColumn(col 5, col 14)(children: CastLongToDouble(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 14:double) -> 15:double, DecimalScalarDivideDecimalColumn(val 1.2, col 16)(children: CastLongToDecimal(col 13)(children: LongColSubtractLongScalar(col 3, val 988888) -> 13:long) -> 16:decimal(19,0)) -> 17:decimal(22,21)
-                      Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: bigint), _col1 (type: double)
                         sort order: ++
@@ -228,7 +228,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col2 (type: decimal(22,21))
             Execution mode: vectorized
@@ -257,19 +257,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2]
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -418,7 +418,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -428,7 +428,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -500.0) -> boolean, FilterDoubleColLessDoubleScalar(col 5, val -199.0) -> boolean) -> boolean
                     predicate: ((cdouble >= -500.0) and (cdouble < -199.0)) (type: boolean)
-                    Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: (cdouble + 200.0) (type: double), (UDFToDouble(cbigint) / (cdouble + 200.0)) (type: double), ((cdouble + 200.0) / (cdouble + 200.0)) (type: double), (3.0 / (cdouble + 200.0)) (type: double), (1.2 / (cdouble + 200.0)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col4, _col5
@@ -437,7 +437,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [12, 15, 16, 14, 17]
                           selectExpressions: DoubleColAddDoubleScalar(col 5, val 200.0) -> 12:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: CastLongToDouble(col 3) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 15:double, DoubleColDivideDoubleColumn(col 13, col 14)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double, DoubleColAddDoubleScalar(col 5, val 200.0) -> 14:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 3.0, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 14:double, DoubleScalarDivideDoubleColumn(val 1.2, col 13)(children: DoubleColAddDoubleScalar(col 5, val 200.0) -> 13:double) -> 17:double
-                      Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: double), _col1 (type: double)
                         sort order: ++
@@ -445,7 +445,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col2 (type: double), _col4 (type: double), _col5 (type: double)
             Execution mode: vectorized
@@ -474,19 +474,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 1, 3, 4]
-                Statistics: Num rows: 1365 Data size: 41904 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1365 Data size: 293479 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 100
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 100 Data size: 3000 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 100 Data size: 21500 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
index 0b901be..6131f0f 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_pushdown.q.out
@@ -23,14 +23,14 @@ STAGE PLANS:
                 TableScan
                   alias: alltypesorc
                   filterExpr: (UDFToDouble(cbigint) < cdouble) (type: boolean)
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (UDFToDouble(cbigint) < cdouble) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cbigint (type: bigint)
                       outputColumnNames: cbigint
-                      Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(cbigint)
                         mode: hash

[2/5] hive git commit: HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)

Posted by li...@apache.org.

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
index bfb2e4c..18a54ff 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_short_regress.q.out
@@ -91,7 +91,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -101,7 +101,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val 762, col 3) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 1) -> 12:double) -> boolean, FilterDoubleColGreaterDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -> boolean) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val a) -> boolean, FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13, val -1.389)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean, FilterStringGroupColNotEqualStringScalar(col 7, val a) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 14)(children: CastLongToDecimal(col 2) -> 14:decimal(13,3)) -> boolean, FilterLongColNotEqualLongColumn(col 11, col 10) -> boolean) 
 -> boolean) -> boolean
                     predicate: ((762 = cbigint) or ((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0) and (cdouble <> UDFToDouble(cint))) or (cstring1 = 'a') or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (79.553 <> CAST( cint AS decimal(13,3))) and (cboolean2 <> cboolean1))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble
@@ -109,7 +109,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 4, 5]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(cint), sum(cdouble), stddev_pop(cint), stddev_samp(csmallint), var_samp(cint), avg(cfloat), stddev_samp(cint), min(ctinyint), count(csmallint)
                         Group By Vectorization:
@@ -353,7 +353,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -363,7 +363,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 3, val 197) -> boolean, FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5, val -26.28) -> boolean, FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 1) -> 12:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 4)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss.*) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 4, val 79.5530014038086) -> boolean, FilterStringColLikeStringScalar(col 7, pattern 10%) -> boolean) -> boolean) -> boolean
                     predicate: (((cbigint <= 197) and (UDFToLong(cint) < cbigint)) or ((cdouble >= -26.28) and (UDFToDouble(csmallint) > cdouble)) or ((UDFToFloat(ctinyint) > cfloat) and cstring1 regexp '.*ss.*') or ((cfloat > 79.553) and (cstring2 like '10%'))) (type: boolean)
-                    Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble
@@ -371,7 +371,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 3, 5]
-                      Statistics: Num rows: 6826 Data size: 209555 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6826 Data size: 1467614 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: max(cint), var_pop(cbigint), stddev_pop(csmallint), max(cdouble), avg(ctinyint), min(cint), min(cdouble), stddev_samp(csmallint), var_samp(cint)
                         Group By Vectorization:
@@ -607,7 +607,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -617,7 +617,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8, col 9) -> boolean, FilterDoubleScalarEqualDoubleColumn(val 762.0, col 4) -> boolean, FilterStringGroupColEqualStringScalar(col 6, val ss) -> boolean, FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1, col 3)(children: col 1) -> boolean, FilterLongScalarEqualLongColumn(val 1, col 11) -> boolean) -> boolean, FilterExprAndExpr(children: SelectColumnIsNotNull(col 10) -> boolean, SelectColumnIsNotNull(col 9) -> boolean, FilterStringGroupColGreaterStringScalar(col 7, val a) -> boolean) -> boolean) -> boolean
                     predicate: ((ctimestamp1 = ctimestamp2) or (762 = cfloat) or (cstring1 = 'ss') or ((UDFToLong(csmallint) <= cbigint) and (1 = cboolean2)) or (cboolean1 is not null and ctimestamp2 is not null and (cstring2 > 'a'))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cdouble (type: double)
                       outputColumnNames: ctinyint, csmallint, cint, cbigint, cdouble
@@ -625,7 +625,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 3, 5]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: var_pop(cbigint), count(), max(ctinyint), stddev_pop(csmallint), max(cint), stddev_samp(cdouble), count(ctinyint), avg(ctinyint)
                         Group By Vectorization:
@@ -840,7 +840,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -850,7 +850,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9, col 8) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 3) -> 12:double) -> boolean, FilterStringScalarLessEqualStringGroupColumn(val ss, col 6) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterDoubleColGreaterEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 4, val 17.0) -> boolean) -> boolean
                     predicate: (((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and ('ss' <= cstring1)) or ((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0)) or (cfloat = 17)) (type: boolean)
-                    Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cint (type: int), cbigint (type: bigint), cfloat (type: float)
                       outputColumnNames: ctinyint, cint, cbigint, cfloat
@@ -858,7 +858,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 2, 3, 4]
-                      Statistics: Num rows: 8874 Data size: 272428 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 8874 Data size: 1907941 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(ctinyint), max(cbigint), stddev_samp(cint), var_pop(cint), var_pop(cbigint), max(cfloat)
                         Group By Vectorization:
@@ -1081,7 +1081,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1091,7 +1091,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6, pattern a.*) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %ss%) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val 1, col 11) -> boolean, FilterDecimalColLessDecimalScalar(col 12, val 79.553)(children: CastLongToDecimal(col 1) -> 12:decimal(8,3)) -> boolean, FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 0) -> 13:double) -> boolean, FilterDoubleColGreaterEqualDoubleColumn(col 4, col 13)(children: CastLongToFloatViaLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColGreaterLongColumn(col 0, col 3)(children: col 0) -> boolean
 ) -> boolean) -> boolean
                     predicate: ((cstring1 regexp 'a.*' and (cstring2 like '%ss%')) or ((1 <> cboolean2) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (-257 <> UDFToInteger(ctinyint))) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint))) (type: boolean)
-                    Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728 * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0) (type: double), (UDFToDouble(ctinyint) / 988888.0) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7))
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22
@@ -1100,7 +1100,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 14, 15, 17, 18, 20, 22, 24, 26, 13, 23, 28, 19, 30]
                           selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3) -> 14:long, LongColUnaryMinus(col 2) -> 15:long, DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 17:decimal(14,3), LongColUnaryMinus(col 1) -> 18:long, LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 20:long, LongColAddLongColumn(col 21, col 19)(children: LongColSubtractLongColumn(col 1, col 19)(children: LongColUnaryMinus(col 1) -> 19:long) -> 21:long, LongColUnaryMinus(col 1) -> 19:long) -> 22:long, DoubleColDivideDoubleColumn(col 13, col 23)(children: CastLongToDouble(col 2) -> 13:double, CastLongToDouble(col 2) -> 23:double) -> 24:double, DecimalColSubtractDecimalScalar(col 25, val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 16)(children: CastLongToDecimal(col 2) -> 16:decimal(10,0)) -> 25:decimal(14,3)) -> 26:decimal(15,3), DoubleColUnaryMinus(col 4) -
 > 13:double, DoubleColMultiplyDoubleScalar(col 5, val -89010.0) -> 23:double, DoubleColDivideDoubleScalar(col 27, val 988888.0)(children: CastLongToDouble(col 0) -> 27:double) -> 28:double, LongColUnaryMinus(col 0) -> 19:long, DecimalScalarDivideDecimalColumn(val 79.553, col 29)(children: CastLongToDecimal(col 0) -> 29:decimal(3,0)) -> 30:decimal(9,7)
-                      Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7))
                         sort order: +++++++++++++++++++++++
@@ -1108,7 +1108,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized
             Map Vectorization:
@@ -1136,19 +1136,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-                Statistics: Num rows: 9898 Data size: 303864 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 9898 Data size: 2128105 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 50
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1377,7 +1377,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1387,7 +1387,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarGreaterLongColumn(val 197, col 0)(children: col 0) -> boolean, FilterLongColEqualLongColumn(col 2, col 3)(children: col 2) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 3, val 359) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern %ss) -> boolean, FilterDoubleColLessEqualDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 0) -> 12:double) -> boolean) -> boolean) -> boolean
                     predicate: (((197 > UDFToInteger(ctinyint)) and (UDFToLong(cint) = cbigint)) or (cbigint = 359) or (cboolean1 < 0) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569 % cbigint) (type: bigint), (359.0 - cdouble) (type: double), (- csmallint) (type: smallint)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24
@@ -1396,7 +1396,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 14, 16, 12, 13, 17, 19, 18, 21, 20, 22, 23, 26, 27, 24, 28]
                           selectExpressions: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 14:double, DecimalColModuloDecimalScalar(col 15, val 79.553)(children: CastLongToDecimal(col 3) -> 15:decimal(19,0)) -> 16:decimal(5,3), DoubleColUnaryMinus(col 17)(children: DoubleColDivideDoubleColumn(col 12, col 13)(children: CastLongToDouble(col 2) -> 12:double, CastLongToDouble(col 3) -> 13:double) -> 17:double) -> 12:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4) -> 13:double, DoubleColUnaryMinus(col 4) -> 17:double, DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 19:double, DoubleColModuloDoubleScalar(col 20, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4, col 18)(children: DoubleColUnaryMinus(col 4) -> 18:double) -> 20:double) -> 18:double, DoubleColMultiplyDoubleColumn(col 5, col 20)(children: CastLongToDouble(col 
 1) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColUnaryMinus(col 3) -> 22:long, DoubleColSubtractDoubleColumn(col 4, col 25)(children: col 4, DoubleColDivideDoubleColumn(col 23, col 24)(children: CastLongToDouble(col 2) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double) -> 23:double, LongColUnaryMinus(col 1) -> 26:long, LongScalarModuloLongColumn(val 3569, col 3) -> 27:long, DoubleScalarSubtractDoubleColumn(val 359.0, col 5) -> 24:double, LongColUnaryMinus(col 1) -> 28:long
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint)
                         sort order: +++++++++++++++++++++++++
@@ -1404,7 +1404,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
             Execution mode: vectorized
             Map Vectorization:
@@ -1432,19 +1432,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21]
-                Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 25
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 25 Data size: 750 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 25 Data size: 5375 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1622,7 +1622,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1632,7 +1632,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 12, val -26.28)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean, FilterStringColLikeStringScalar(col 7, pattern ss) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterStringGroupColGreaterEqualStringScalar(col 6, val ss) -> boolean, FilterDoubleColNotEqualDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 2) -> 13:double) -> boolean) -> boolean, FilterLongColEqualLongScalar(col 0, val -89010)(children: col 0) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13, col 4)(children: CastLongToFloatViaLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarLessEqualDecimalColumn(val -26.28, col 12)(children: CastLongToDecimal(col 1) -> 12:decimal(7,2)) -> boolean) -> 
 boolean) -> boolean
                     predicate: (((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss')) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((UDFToFloat(cbigint) <= cfloat) and (-26.28 <= CAST( csmallint AS decimal(7,2))))) (type: boolean)
-                    Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21
@@ -1641,7 +1641,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [2, 6, 11, 9, 5, 4, 3, 1, 10, 14, 15, 16, 13, 18, 19, 20, 22, 25, 27, 24, 17, 28]
                           selectExpressions: LongColAddLongColumn(col 2, col 1)(children: col 1) -> 14:long, LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 15:long, LongColUnaryMinus(col 3) -> 16:long, DoubleColUnaryMinus(col 4) -> 13:double, LongColAddLongColumn(col 17, col 3)(children: LongColSubtractLongColumn(col 3, col 0)(children: col 0) -> 17:long) -> 18:long, DoubleColDivideDoubleColumn(col 5, col 5) -> 19:double, DoubleColUnaryMinus(col 5) -> 20:double, LongColMultiplyLongColumn(col 17, col 21)(children: col 17, LongColUnaryMinus(col 3) -> 21:long) -> 22:long, DoubleColAddDoubleColumn(col 23, col 24)(children: DoubleColUnaryMinus(col 5) -> 23:double, CastLongToDouble(col 3) -> 24:double) -> 25:double, DecimalScalarDivideDecimalColumn(val -1.389, col 26)(children: CastLongToDecimal(col 0) -> 26:decimal(3,0)) -> 27:decimal(8,7), DoubleColModuloDoubleColumn(col 23, col 5)(children: CastLongToDouble(col 3) -> 23:double) -> 24:double, LongColUnaryMinus(col 1) -> 17:
 long, LongColAddLongColumn(col 1, col 21)(children: col 1, LongColAddLongColumn(col 2, col 1)(children: col 1) -> 21:long) -> 28:long
-                      Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col1 (type: string), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col4 (type: double), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int)
                         sort order: +++++++++++++++++++++++
@@ -1649,7 +1649,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col2 (type: boolean)
             Execution mode: vectorized
@@ -1678,19 +1678,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [7, 1, 23, 2, 6, 3, 4, 8, 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]
-                Statistics: Num rows: 10922 Data size: 335301 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 10922 Data size: 2348269 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 75
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 75 Data size: 2250 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 75 Data size: 16125 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -1925,7 +1925,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1935,7 +1935,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDecimalScalarGreaterEqualDecimalColumn(val -1.389, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterLongColLessLongColumn(col 1, col 0)(children: col 0) -> boolean, FilterLongScalarGreaterLongColumn(val -6432, col 1)(children: col 1) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5, col 4)(children: col 4) -> boolean, FilterStringGroupColLessEqualStringScalar(col 7, val a) -> boolean) -> boolean, FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6, pattern ss%) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val 10.175, col 13)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean) -> boolean
                     predicate: (((-1.389 >= CAST( cint AS decimal(13,3))) and (csmallint < UDFToShort(ctinyint)) and (-6432 > UDFToInteger(csmallint))) or ((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((cstring1 like 'ss%') and (10.175 > CAST( cbigint AS decimal(22,3))))) (type: boolean)
-                    Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15
@@ -1944,7 +1944,7 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [8, 7, 5, 4, 3, 1, 15, 16, 14, 17, 18, 20, 19, 21, 22, 24]
                           selectExpressions: DoubleColDivideDoubleScalar(col 14, val 3569.0)(children: CastLongToDouble(col 3) -> 14:double) -> 15:double, LongScalarSubtractLongColumn(val -257, col 1)(children: col 1) -> 16:long, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4) -> 14:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColMultiplyDoubleScalar(col 5, val 10.175) -> 18:double, DoubleColDivideDoubleColumn(col 19, col 4)(children: col 19, col 4) -> 20:double, DoubleColUnaryMinus(col 4) -> 19:double, LongColModuloLongColumn(col 2, col 1)(children: col 1) -> 21:long, DoubleColUnaryMinus(col 5) -> 22:double, DoubleColMultiplyDoubleColumn(col 5, col 23)(children: DoubleColUnaryMinus(col 5) -> 23:double) -> 24:double
-                      Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double)
                         sort order: +++++++++++++++
@@ -1952,7 +1952,7 @@ STAGE PLANS:
                             className: VectorReduceSinkObjectHashOperator
                             native: true
                             nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                        Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.1
                         value expressions: _col0 (type: timestamp)
             Execution mode: vectorized
@@ -1981,19 +1981,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14]
-                Statistics: Num rows: 3868 Data size: 118746 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 3868 Data size: 831633 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 45
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 45 Data size: 1350 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 45 Data size: 9675 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2170,7 +2170,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -2180,7 +2180,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 1, val -257)(children: col 1) -> boolean, FilterExprOrExpr(children: FilterLongScalarEqualLongColumn(val -6432, col 1)(children: col 1) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 2) -> 12:double) -> boolean, FilterLongColLessEqualLongColumn(col 0, col 2)(children: col 0) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((UDFToInteger(csmallint) >= -257) and ((-6432 = UDFToInteger(csmallint)) or ((UDFToDouble(cint) >= cdouble) and (UDFToInteger(ctinyint) <= cint)))) (type: boolean)
-                    Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cbigint (type: bigint)
                       outputColumnNames: ctinyint, csmallint, cbigint
@@ -2188,7 +2188,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 3]
-                      Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: stddev_samp(csmallint), sum(cbigint), var_pop(ctinyint), count()
                         Group By Vectorization:
@@ -2203,7 +2203,7 @@ STAGE PLANS:
                         keys: csmallint (type: smallint)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                        Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: smallint)
                           sort order: +
@@ -2212,7 +2212,7 @@ STAGE PLANS:
                               className: VectorReduceSinkLongOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 2503 Data size: 76841 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2503 Data size: 538153 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: bigint)
             Execution mode: vectorized
             Map Vectorization:
@@ -2247,7 +2247,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: smallint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: smallint), (UDFToInteger(_col0) % -75) (type: int), _col1 (type: double), (-1.389 / CAST( _col0 AS decimal(5,0))) (type: decimal(10,9)), _col2 (type: bigint), (UDFToDouble((UDFToInteger(_col0) % -75)) / UDFToDouble(_col2)) (type: double), (- (UDFToInteger(_col0) % -75)) (type: int), _col3 (type: double), (- (- (UDFToInteger(_col0) % -75))) (type: int), _col4 (type: bigint), (_col4 - -89010) (type: bigint)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
@@ -2256,7 +2256,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 5, 1, 7, 2, 11, 12, 3, 8, 4, 13]
                       selectExpressions: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 5:long, DecimalScalarDivideDecimalColumn(val -1.389, col 6)(children: CastLongToDecimal(col 0) -> 6:decimal(5,0)) -> 7:decimal(10,9), DoubleColDivideDoubleColumn(col 9, col 10)(children: CastLongToDouble(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 9:double, CastLongToDouble(col 2) -> 10:double) -> 11:double, LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 12:long, LongColUnaryMinus(col 13)(children: LongColUnaryMinus(col 8)(children: LongColModuloLongScalar(col 0, val -75)(children: col 0) -> 8:long) -> 13:long) -> 8:long, LongColSubtractLongScalar(col 4, val -89010) -> 13:long
-                  Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: smallint), _col1 (type: int), _col2 (type: double), _col3 (type: decimal(10,9)), _col4 (type: bigint), _col5 (type: double), _col6 (type: int), _col7 (type: double), _col8 (type: int), _col9 (type: bigint), _col10 (type: bigint)
                     sort order: +++++++++++
@@ -2264,7 +2264,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
             Execution mode: vectorized
@@ -2283,19 +2283,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
-                Statistics: Num rows: 1251 Data size: 38405 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1251 Data size: 268968 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 20
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2448,7 +2448,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -2458,7 +2458,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 2563.58) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 3, col 2)(children: col 2) -> boolean, FilterLongColLessLongColumn(col 1, col 2)(children: col 1) -> boolean, FilterDoubleColLessDoubleScalar(col 4, val -5638.14990234375) -> boolean) -> boolean, FilterDecimalScalarEqualDecimalColumn(val 2563.58, col 12)(children: CastLongToDecimal(col 0) -> 12:decimal(6,2)) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5, col 13)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterDecimalScalarGreaterDecimalColumn(val -5638.15, col 14)(children: CastLongToDecimal(col 3) -> 14:decimal(21,2)) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((cdouble > 2563.58) and (((cbigint >= UDFToLong(cint)) and (UDFToInteger(csmallint) < cint) and (cfloat < -5638.15)) or (2563.58 = CAST( ctinyint AS decimal(6,2))) or ((cdouble <= UDFToDouble(cbigint)) and (-5638.15 > CAST( cbigint AS decimal(21,2)))))) (type: boolean)
-                    Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cfloat (type: float), cdouble (type: double)
                       outputColumnNames: cfloat, cdouble
@@ -2466,7 +2466,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [4, 5]
-                      Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: var_samp(cdouble), count(cfloat), sum(cfloat), var_pop(cdouble), stddev_pop(cdouble), sum(cdouble)
                         Group By Vectorization:
@@ -2481,7 +2481,7 @@ STAGE PLANS:
                         keys: cdouble (type: double)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                        Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: double)
                           sort order: +
@@ -2490,7 +2490,7 @@ STAGE PLANS:
                               className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 2654 Data size: 81476 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 2654 Data size: 570619 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col1 (type: struct<count:bigint,sum:double,variance:double>), _col2 (type: bigint), _col3 (type: double), _col4 (type: struct<count:bigint,sum:double,variance:double>), _col5 (type: struct<count:bigint,sum:double,variance:double>), _col6 (type: double)
             Execution mode: vectorized
             Map Vectorization:
@@ -2525,7 +2525,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: double), _col1 (type: double), (2563.58 * _col1) (type: double), (- _col1) (type: double), _col2 (type: bigint), ((2563.58 * _col1) + -5638.15) (type: double), ((- _col1) * ((2563.58 * _col1) + -5638.15)) (type: double), _col3 (type: double), _col4 (type: double), (_col0 - (- _col1)) (type: double), _col5 (type: double), (_col0 + _col1) (type: double), (_col0 * 762.0) (type: double), _col6 (type: double), (-863.257 % (_col0 * 762.0)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -2534,7 +2534,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 7, 8, 2, 10, 11, 3, 4, 12, 5, 9, 13, 6, 15]
                       selectExpressions: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 7:double, DoubleColUnaryMinus(col 1) -> 8:double, DoubleColAddDoubleScalar(col 9, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 9:double) -> 10:double, DoubleColMultiplyDoubleColumn(col 9, col 12)(children: DoubleColUnaryMinus(col 1) -> 9:double, DoubleColAddDoubleScalar(col 11, val -5638.15)(children: DoubleScalarMultiplyDoubleColumn(val 2563.58, col 1) -> 11:double) -> 12:double) -> 11:double, DoubleColSubtractDoubleColumn(col 0, col 9)(children: DoubleColUnaryMinus(col 1) -> 9:double) -> 12:double, DoubleColAddDoubleColumn(col 0, col 1) -> 9:double, DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 13:double, DoubleScalarModuloDoubleColumn(val -863.257, col 14)(children: DoubleColMultiplyDoubleScalar(col 0, val 762.0) -> 14:double) -> 15:double
-                  Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: double)
                     sort order: +
@@ -2542,7 +2542,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: double), _col14 (type: double)
         Reducer 3 
             Execution mode: vectorized
@@ -2561,13 +2561,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13]
-                Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 1327 Data size: 40738 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 1327 Data size: 285309 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -2770,7 +2770,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -2780,7 +2780,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterDoubleColNotEqualDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongScalarNotEqualLongColumn(val -257, col 0)(children: col 0) -> boolean, SelectColumnIsNotNull(col 11) -> boolean, FilterStringColRegExpStringScalar(col 6, pattern .*ss) -> boolean, FilterDoubleScalarLessDoubleColumn(val -3.0, col 12)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean) -> boolean, FilterDoubleColEqualDoubleScalar(col 12, val -5.0)(children: CastTimestampToDouble(col 9) -> 12:double) -> boolean, FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 12, val 0.0)(children: CastTimestampToDouble(col 8) -> 12:double) -> boolean, FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean) -> boolean, FilterDoubleColEqualDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 2) -> 12:double) -
 > boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 10) -> boolean, FilterDoubleColLessDoubleColumn(col 4, col 12)(children: CastLongToFloatViaLongToDouble(col 2) -> 12:double) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((UDFToDouble(ctimestamp1) <> 0.0) and (((-257 <> UDFToInteger(ctinyint)) and cboolean2 is not null and cstring1 regexp '.*ss' and (-3.0 < UDFToDouble(ctimestamp1))) or (UDFToDouble(ctimestamp2) = -5.0) or ((UDFToDouble(ctimestamp1) < 0.0) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint))))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cfloat (type: float), cdouble (type: double), cstring1 (type: string), ctimestamp1 (type: timestamp)
                       outputColumnNames: ctinyint, csmallint, cint, cfloat, cdouble, cstring1, ctimestamp1
@@ -2788,7 +2788,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 4, 5, 6, 8]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: stddev_pop(cint), avg(csmallint), count(), min(ctinyint), var_samp(csmallint), var_pop(cfloat), avg(cint), var_samp(cfloat), avg(cfloat), min(cdouble), var_pop(csmallint), stddev_pop(ctinyint), sum(cint)
                         Group By Vectorization:
@@ -2803,7 +2803,7 @@ STAGE PLANS:
                         keys: ctimestamp1 (type: timestamp), cstring1 (type: string)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: timestamp), _col1 (type: string)
                           sort order: ++
@@ -2812,7 +2812,7 @@ STAGE PLANS:
                               className: VectorReduceSinkMultiKeyOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col2 (type: struct<count:bigint,sum:double,variance:double>), _col3 (type: struct<count:bigint,sum:double,input:smallint>), _col4 (type: bigint), _col5 (type: tinyint), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: struct<count:bigint,sum:double,variance:double>), _col8 (type: struct<count:bigint,sum:double,input:int>), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,input:float>), _col11 (type: double), _col12 (type: struct<count:bigint,sum:double,variance:double>), _col13 (type: struct<count:bigint,sum:double,variance:double>), _col14 (type: bigint)
             Execution mode: vectorized
             Map Vectorization:
@@ -2847,7 +2847,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: timestamp), KEY._col1 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), (_col2 * 10.175) (type: double), (- _col2) (type: double), _col3 (type: double), (- _col2) (type: double), (-26.28 - _col2) (type: double), _col4 (type: bigint), (- _col4) (type: bigint), ((-26.28 - _col2) * (- _col2)) (type: double), _col5 (type: tinyint), (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4))) (type: double), (- (_col2 * 10.175)) (type: double), _col6 (type: double), (_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- _col2)) (type: double), (UDFToDouble((- _col4)) / _col2) (type: double), _col7 (type: double), (10.175 / _col3) (type: double), _col8 (type: double), _col9 (type: double), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) (type: double), (- (- (_col2 * 10.175))) (type: double), _col10 (type: double), (((_col6 + (((-26.28 - _col2) * (-
  _col2)) * UDFToDouble((- _col4)))) - (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) * 10.175) (type: double), (10.175 % (10.175 / _col3)) (type: double), (- _col5) (type: tinyint), _col11 (type: double), _col12 (type: double), (- ((-26.28 - _col2) * (- _col2))) (type: double), ((- _col2) % _col10) (type: double), (-26.28 / CAST( (- _col5) AS decimal(3,0))) (type: decimal(8,6)), _col13 (type: double), _col14 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) / _col7) (type: double), (- (- _col4)) (type: bigint), _col4 (type: bigint), ((_col6 + (((-26.28 - _col2) * (- _col2)) * UDFToDouble((- _col4)))) % -26.28) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38
@@ -2856,7 +2856,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 15, 16, 3, 17, 18, 4, 19, 22, 5, 21, 23, 6, 20, 26, 27, 7, 25, 8, 9, 29, 28, 10, 30, 32, 24, 11, 12, 31, 34, 37, 13, 14, 38, 40, 4, 39]
                       selectExpressions: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 15:double, DoubleColUnaryMinus(col 2) -> 16:double, DoubleColUnaryMinus(col 2) -> 17:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 18:double, LongColUnaryMinus(col 4) -> 19:long, DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 23, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 21)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 21:double) -> 23:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 20)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 20:double) -> 23:double, DoubleColAddDoubleColumn(col 6, col 25)(children: DoubleColMultiplyDoubleColumn(col
  26, col 20)(children: DoubleColMultiplyDoubleColumn(col 20, col 25)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 20:double, DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 20:double) -> 25:double) -> 20:double, DoubleColUnaryMinus(col 25)(children: DoubleColUnaryMinus(col 2) -> 25:double) -> 26:double, DoubleColDivideDoubleColumn(col 25, col 2)(children: CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 25:double) -> 27:double, DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 25:double, DoubleColSubtractDoubleColumn(col 28, col 30)(children: DoubleColAddDoubleColumn(col 6, col 29)(children: DoubleColMultiplyDoubleColumn(col 30, col 28)(children: DoubleColMultiplyDoubleColumn(col 28, col 29)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 28:double, DoubleColUnaryMinus(col 2) -> 29:double) -> 30:double, CastLongToDouble(col 24)(children:
  LongColUnaryMinus(col 4) -> 24:long) -> 28:double) -> 29:double) -> 28:double, DoubleColMultiplyDoubleColumn(col 31, col 29)(children: DoubleColMultiplyDoubleColumn(col 29, col 30)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 29:double, DoubleColUnaryMinus(col 2) -> 30:double) -> 31:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 29:double) -> 30:double) -> 29:double, DoubleColUnaryMinus(col 30)(children: DoubleColUnaryMinus(col 28)(children: DoubleColMultiplyDoubleScalar(col 2, val 10.175) -> 28:double) -> 30:double) -> 28:double, DoubleColMultiplyDoubleScalar(col 31, val 10.175)(children: DoubleColSubtractDoubleColumn(col 30, col 32)(children: DoubleColAddDoubleColumn(col 6, col 31)(children: DoubleColMultiplyDoubleColumn(col 32, col 30)(children: DoubleColMultiplyDoubleColumn(col 30, col 31)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 30:double, DoubleColUnaryMinus(col 2) -> 31:double) -> 32:double, Cast
 LongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 30:double) -> 31:double) -> 30:double, DoubleColMultiplyDoubleColumn(col 33, col 31)(children: DoubleColMultiplyDoubleColumn(col 31, col 32)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 32:double) -> 33:double, CastLongToDouble(col 24)(children: LongColUnaryMinus(col 4) -> 24:long) -> 31:double) -> 32:double) -> 31:double) -> 30:double, DoubleScalarModuloDoubleColumn(val 10.175, col 31)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 3) -> 31:double) -> 32:double, LongColUnaryMinus(col 5) -> 24:long, DoubleColUnaryMinus(col 34)(children: DoubleColMultiplyDoubleColumn(col 31, col 33)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 31:double, DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double) -> 31:double, DoubleColModuloDoubleColumn(col 33, col 10)(children: DoubleColUnaryMinus(col 2) -> 33:double) -> 34:double, DecimalScala
 rDivideDecimalColumn(val -26.28, col 36)(children: CastLongToDecimal(col 35)(children: LongColUnaryMinus(col 5) -> 35:long) -> 36:decimal(3,0)) -> 37:decimal(8,6), DoubleColDivideDoubleColumn(col 33, col 7)(children: DoubleColAddDoubleColumn(col 6, col 38)(children: DoubleColMultiplyDoubleColumn(col 39, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 38)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double, DoubleColUnaryMinus(col 2) -> 38:double) -> 39:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 38:double) -> 33:double) -> 38:double, LongColUnaryMinus(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 40:long, DoubleColModuloDoubleScalar(col 33, val -26.28)(children: DoubleColAddDoubleColumn(col 6, col 39)(children: DoubleColMultiplyDoubleColumn(col 41, col 33)(children: DoubleColMultiplyDoubleColumn(col 33, col 39)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 2) -> 33:double
 , DoubleColUnaryMinus(col 2) -> 39:double) -> 41:double, CastLongToDouble(col 35)(children: LongColUnaryMinus(col 4) -> 35:long) -> 33:double) -> 39:double) -> 33:double) -> 39:double
-                  Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double)
                     sort order: +++++++++++++++++++++++++++++++++++++++
@@ -2864,7 +2864,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                     TopN Hash Memory Usage: 0.1
         Reducer 3 
             Execution mode: vectorized
@@ -2883,19 +2883,19 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38]
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 50
                   Limit Vectorization:
                       className: VectorLimitOperator
                       native: true
-                  Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
                     File Sink Vectorization:
                         className: VectorFileSinkOperator
                         native: false
-                    Statistics: Num rows: 50 Data size: 1500 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 50 Data size: 10750 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -3173,7 +3173,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -3183,7 +3183,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 1) -> 12:double) -> boolean, FilterLongColEqualLongColumn(col 11, col 10) -> boolean, FilterDecimalColLessEqualDecimalScalar(col 13, val -863.257)(children: CastLongToDecimal(col 3) -> 13:decimal(22,3)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2, val -257) -> boolean, SelectColumnIsNotNull(col 6) -> boolean, FilterLongColGreaterEqualLongScalar(col 10, val 1) -> boolean) -> boolean, FilterStringColRegExpStringScalar(col 7, pattern b) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1, col 0)(children: col 0) -> boolean, SelectColumnIsNull(col 9) -> boolean) -> boolean) -> boolean, SelectColumnIsNotNull(col 10) -> boolean) -> boolean
                     predicate: ((((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and cstring1 is not null and (cboolean1 >= 1)) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null)) and cboolean1 is not null) (type: boolean)
-                    Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cboolean1 (type: boolean)
                       outputColumnNames: ctinyint, csmallint, cint, cbigint, cfloat, cdouble, cboolean1
@@ -3191,7 +3191,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 1, 2, 3, 4, 5, 10]
-                      Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: max(cfloat), sum(cbigint), var_samp(cint), avg(cdouble), min(cbigint), var_pop(cbigint), sum(cint), stddev_samp(ctinyint), stddev_pop(csmallint), avg(cint)
                         Group By Vectorization:
@@ -3206,7 +3206,7 @@ STAGE PLANS:
                         keys: cboolean1 (type: boolean)
                         mode: hash
                         outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                        Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE
                         Reduce Output Operator
                           key expressions: _col0 (type: boolean)
                           sort order: +
@@ -3215,7 +3215,7 @@ STAGE PLANS:
                               className: VectorReduceSinkLongOperator
                               native: true
                               nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                          Statistics: Num rows: 10239 Data size: 314333 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 10239 Data size: 2201421 Basic stats: COMPLETE Column stats: NONE
                           value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: struct<count:bigint,sum:double,variance:double>), _col4 (type: struct<count:bigint,sum:double,input:double>), _col5 (type: bigint), _col6 (type: struct<count:bigint,sum:double,variance:double>), _col7 (type: bigint), _col8 (type: struct<count:bigint,sum:double,variance:double>), _col9 (type: struct<count:bigint,sum:double,variance:double>), _col10 (type: struct<count:bigint,sum:double,input:int>)
             Execution mode: vectorized
             Map Vectorization:
@@ -3250,7 +3250,7 @@ STAGE PLANS:
                 keys: KEY._col0 (type: boolean)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10
-                Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: boolean), _col1 (type: float), (- _col1) (type: float), (-26.28 / UDFToDouble(_col1)) (type: double), _col2 (type: bigint), (CAST( _col2 AS decimal(19,0)) - 10.175) (type: decimal(23,3)), _col3 (type: double), (_col3 % UDFToDouble(_col1)) (type: double), (10.175 + (- _col1)) (type: float), _col4 (type: double), (UDFToDouble((CAST( _col2 AS decimal(19,0)) - 10.175)) + _col3) (type: double), _col5 (type: bigint), _col6 (type: double), (- (10.175 + (- _col1))) (type: float), (79.553 / _col6) (type: double), (_col3 % (79.553 / _col6)) (type: double), _col7 (type: bigint), _col8 (type: double), (-1.389 * CAST( _col5 AS decimal(19,0))) (type: decimal(24,3)), (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0)))) (type: decimal(25,3)), _col9 (type: double), (- (CAST( _col7 AS decimal(19,0)) - (-1.389 * CAST( _col5 AS decimal(19,0))))) (type: decimal(25,3)), _col10 (type: double), (- _col10) (type: double), (_col10 * UDFToDoubl
 e(_col7)) (type: double)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25
@@ -3259,7 +3259,7 @@ STAGE PLANS:
                       native: true
                       projectedOutputColumns: [0, 1, 11, 12, 2, 14, 3, 15, 17, 4, 19, 5, 6, 16, 20, 22, 7, 8, 23, 26, 9, 28, 10, 21, 30]
                       selectExpressions: DoubleColUnaryMinus(col 1) -> 11:double, DoubleScalarDivideDoubleColumn(val -26.28, col 1)(children: col 1) -> 12:double, DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 14:decimal(23,3), DoubleColModuloDoubleColumn(col 3, col 1)(children: col 1) -> 15:double, DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 17:double, DoubleColAddDoubleColumn(col 16, col 3)(children: CastDecimalToDouble(col 18)(children: DecimalColSubtractDecimalScalar(col 13, val 10.175)(children: CastLongToDecimal(col 2) -> 13:decimal(19,0)) -> 18:decimal(23,3)) -> 16:double) -> 19:double, DoubleColUnaryMinus(col 20)(children: DoubleScalarAddDoubleColumn(val 10.175000190734863, col 16)(children: DoubleColUnaryMinus(col 1) -> 16:double) -> 20:double) -> 16:double, DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 20:double, DoubleColModuloDouble
 Column(col 3, col 21)(children: DoubleScalarDivideDoubleColumn(val 79.553, col 6) -> 21:double) -> 22:double, DecimalScalarMultiplyDecimalColumn(val -1.389, col 13)(children: CastLongToDecimal(col 5) -> 13:decimal(19,0)) -> 23:decimal(24,3), DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 26:decimal(25,3), FuncNegateDecimalToDecimal(col 27)(children: DecimalColSubtractDecimalColumn(col 13, col 25)(children: CastLongToDecimal(col 7) -> 13:decimal(19,0), DecimalScalarMultiplyDecimalColumn(val -1.389, col 24)(children: CastLongToDecimal(col 5) -> 24:decimal(19,0)) -> 25:decimal(24,3)) -> 27:decimal(25,3)) -> 28:decimal(25,3), DoubleColUnaryMinus(col 10) -> 21:double, DoubleColMultiplyDoubleColumn(col 10, col 29)(children: CastLongToDouble(col 7) -> 29:double) -> 30:double
-                  Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col0 (type: boolean)
                     sort order: +
@@ -3267,7 +3267,7 @@ STAGE PLANS:
                         className: VectorReduceSinkObjectHashOperator
                         native: true
                         nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
-                    Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col1 (type: float), _col2 (type: float), _col3 (type: double), _col4 (type: bigint), _col5 (type: decimal(23,3)), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint), _col12 (type: double), _col13 (type: float), _col14 (type: double), _col15 (type: double), _col17 (type: bigint), _col18 (type: double), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,3)), _col21 (type: double), _col22 (type: decimal(25,3)), _col23 (type: double), _col24 (type: double), _col25 (type: double)
         Reducer 3 
             Execution mode: vectorized
@@ -3286,13 +3286,13 @@ STAGE PLANS:
                     className: VectorSelectOperator
                     native: true
                     projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24]
-                Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   File Sink Vectorization:
                       className: VectorFileSinkOperator
                       native: false
-                  Statistics: Num rows: 5119 Data size: 157151 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5119 Data size: 1100602 Basic stats: COMPLETE Column stats: NONE
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                       output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

[5/5] hive git commit: HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)

Posted by li...@apache.org.

HIVE-17321: HoS: analyze ORC table doesn't compute raw data size when noscan/partialscan is not specified (Rui reviewed by Liyun and Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/002626d5
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/002626d5
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/002626d5

Branch: refs/heads/master
Commit: 002626d5e6225a467a92f669c512938a3a079042
Parents: c4e2e2a
Author: Rui Li <li...@apache.org>
Authored: Thu Aug 17 15:04:46 2017 +0800
Committer: Rui Li <li...@apache.org>
Committed: Thu Aug 17 15:04:46 2017 +0800

----------------------------------------------------------------------
 .../parse/spark/SparkProcessAnalyzeTable.java   |  17 +-
 .../clientpositive/spark/limit_pushdown.q.out   |  60 +++----
 .../clientpositive/spark/vector_elt.q.out       |  12 +-
 .../spark/vector_left_outer_join.q.out          |  16 +-
 .../spark/vector_outer_join1.q.out              |  40 ++---
 .../spark/vector_outer_join2.q.out              |  16 +-
 .../clientpositive/spark/vectorization_0.q.out  | 110 ++++++-------
 .../clientpositive/spark/vectorization_1.q.out  |   6 +-
 .../clientpositive/spark/vectorization_10.q.out |   8 +-
 .../clientpositive/spark/vectorization_11.q.out |   8 +-
 .../clientpositive/spark/vectorization_12.q.out |  20 +--
 .../clientpositive/spark/vectorization_13.q.out |  44 ++---
 .../clientpositive/spark/vectorization_14.q.out |  20 +--
 .../clientpositive/spark/vectorization_15.q.out |  20 +--
 .../clientpositive/spark/vectorization_16.q.out |  16 +-
 .../clientpositive/spark/vectorization_17.q.out |  12 +-
 .../clientpositive/spark/vectorization_2.q.out  |   6 +-
 .../clientpositive/spark/vectorization_3.q.out  |   6 +-
 .../clientpositive/spark/vectorization_4.q.out  |   6 +-
 .../clientpositive/spark/vectorization_5.q.out  |   6 +-
 .../clientpositive/spark/vectorization_6.q.out  |   8 +-
 .../clientpositive/spark/vectorization_9.q.out  |  16 +-
 .../spark/vectorization_div0.q.out              |  36 ++--
 .../spark/vectorization_pushdown.q.out          |   6 +-
 .../spark/vectorization_short_regress.q.out     | 164 +++++++++----------
 .../clientpositive/spark/vectorized_case.q.out  |  24 +--
 .../spark/vectorized_mapjoin.q.out              |  16 +-
 .../spark/vectorized_math_funcs.q.out           |   8 +-
 .../spark/vectorized_nested_mapjoin.q.out       |  24 +--
 .../spark/vectorized_shufflejoin.q.out          |  20 +--
 .../spark/vectorized_string_funcs.q.out         |   8 +-
 31 files changed, 394 insertions(+), 385 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
index 52af3af..a2876e1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkProcessAnalyzeTable.java
@@ -99,16 +99,25 @@ public class SparkProcessAnalyzeTable implements NodeProcessor {
       Preconditions.checkArgument(alias != null, "AssertionError: expected alias to be not null");
 
       SparkWork sparkWork = context.currentTask.getWork();
-      boolean partialScan = parseContext.getQueryProperties().isPartialScanAnalyzeCommand();
-      boolean noScan = parseContext.getQueryProperties().isNoScanAnalyzeCommand();
-      if ((OrcInputFormat.class.isAssignableFrom(inputFormat) ||
-          MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) && (noScan || partialScan)) {
+      if (OrcInputFormat.class.isAssignableFrom(inputFormat) ||
+          MapredParquetInputFormat.class.isAssignableFrom(inputFormat)) {
+        // For ORC & Parquet, all the following statements are the same
+        // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS partialscan;
         // ANALYZE TABLE T [PARTITION (...)] COMPUTE STATISTICS noscan;
         // There will not be any Spark job above this task
         StatsNoJobWork snjWork = new StatsNoJobWork(tableScan.getConf().getTableMetadata().getTableSpec());
         snjWork.setStatsReliable(parseContext.getConf().getBoolVar(
             HiveConf.ConfVars.HIVE_STATS_RELIABLE));
+        // If partition is specified, get pruned partition list
+        Set<Partition> confirmedParts = GenMapRedUtils.getConfirmedPartitionsForScan(tableScan);
+        if (confirmedParts.size() > 0) {
+          Table source = tableScan.getConf().getTableMetadata();
+          List<String> partCols = GenMapRedUtils.getPartitionColumns(tableScan);
+          PrunedPartitionList partList = new PrunedPartitionList(source, confirmedParts, partCols,
+              false);
+          snjWork.setPrunedPartitionList(partList);
+        }
         Task<StatsNoJobWork> snjTask = TaskFactory.get(snjWork, parseContext.getConf());
         snjTask.setParentTasks(null);
         context.rootTasks.remove(context.currentTask);

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
index ac74eaf..b441f27 100644
--- a/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
+++ b/ql/src/test/results/clientpositive/spark/limit_pushdown.q.out
@@ -369,21 +369,21 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cdouble (type: double)
                     outputColumnNames: cdouble
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       keys: cdouble (type: double)
                       mode: hash
                       outputColumnNames: _col0
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: double)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: double)
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.3
         Reducer 2 
             Reduce Operator Tree:
@@ -391,13 +391,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 20
-                  Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -458,40 +458,40 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cdouble (type: double)
                     outputColumnNames: ctinyint, cdouble
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       keys: ctinyint (type: tinyint), cdouble (type: double)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: double)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: tinyint)
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: count(_col1)
                   keys: _col0 (type: tinyint)
                   mode: complete
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                   Limit
                     Number of rows: 20
-                    Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -552,40 +552,40 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cdouble (type: double)
                     outputColumnNames: ctinyint, cdouble
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       keys: ctinyint (type: tinyint), cdouble (type: double)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: double)
                         sort order: ++
                         Map-reduce partition columns: _col0 (type: tinyint)
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
         Reducer 2 
             Reduce Operator Tree:
               Group By Operator
                 keys: KEY._col0 (type: tinyint), KEY._col1 (type: double)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Group By Operator
                   aggregations: count(_col1)
                   keys: _col0 (type: tinyint)
                   mode: complete
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                   Limit
                     Number of rows: 20
-                    Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                       table:
                           input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                           output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -646,22 +646,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
                     outputColumnNames: ctinyint, cstring1, cstring2
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count(DISTINCT cstring1), count(DISTINCT cstring2)
                       keys: ctinyint (type: tinyint), cstring1 (type: string), cstring2 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: tinyint), _col1 (type: string), _col2 (type: string)
                         sort order: +++
                         Map-reduce partition columns: _col0 (type: tinyint)
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                         TopN Hash Memory Usage: 0.3
         Reducer 2 
             Reduce Operator Tree:
@@ -670,13 +670,13 @@ STAGE PLANS:
                 keys: KEY._col0 (type: tinyint)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2
-                Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                 Limit
                   Number of rows: 20
-                  Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 20 Data size: 600 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4300 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vector_elt.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_elt.q.out b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
index b49462a..00f5292 100644
--- a/ql/src/test/results/clientpositive/spark/vector_elt.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_elt.q.out
@@ -23,7 +23,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -33,7 +33,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterLongColGreaterLongScalar(col 0, val 0) -> boolean
                     predicate: (ctinyint > 0) (type: boolean)
-                    Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ((UDFToInteger(ctinyint) % 2) + 1) (type: int), cstring1 (type: string), cint (type: int), elt(((UDFToInteger(ctinyint) % 2) + 1), cstring1, cint) (type: string)
                       outputColumnNames: _col0, _col1, _col2, _col3
@@ -42,19 +42,19 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [13, 6, 2, 16]
                           selectExpressions: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 13:long, VectorElt(columns [14, 6, 15])(children: LongColAddLongScalar(col 12, val 1)(children: LongColModuloLongScalar(col 0, val 2)(children: col 0) -> 12:long) -> 14:long, col 6, CastLongToString(col 2) -> 15:String) -> 16:string
-                      Statistics: Num rows: 4096 Data size: 125745 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE
                       Limit
                         Number of rows: 10
                         Limit Vectorization:
                             className: VectorLimitOperator
                             native: true
-                        Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
                         File Output Operator
                           compressed: false
                           File Sink Vectorization:
                               className: VectorFileSinkOperator
                               native: false
-                          Statistics: Num rows: 10 Data size: 300 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 10 Data size: 2150 Basic stats: COMPLETE Column stats: NONE
                           table:
                               input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                               output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -140,7 +140,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: COMPLETE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out
index 1e93de9..e46fd64 100644
--- a/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_left_outer_join.q.out
@@ -34,11 +34,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: cd
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cint (type: int)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       keys:
                         0 _col1 (type: int)
@@ -58,11 +58,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: hd
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: _col0
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       keys:
                         0 _col0 (type: tinyint)
@@ -89,11 +89,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint), cint (type: int)
                     outputColumnNames: _col0, _col1
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join 0 to 1
@@ -103,7 +103,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 13516 Data size: 414960 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 13516 Data size: 2906160 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Left Outer Join 0 to 1
@@ -112,7 +112,7 @@ STAGE PLANS:
                           1 _col0 (type: tinyint)
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 14867 Data size: 456456 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 14867 Data size: 3196776 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count()
                           mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
index 5554788..a93c833 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out
@@ -244,7 +244,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: cd
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -255,7 +255,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -288,7 +288,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -299,7 +299,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join 0 to 1
@@ -319,13 +319,13 @@ STAGE PLANS:
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 16 Data size: 4436 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 16 Data size: 4436 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -417,7 +417,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: hd
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -428,7 +428,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -461,7 +461,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -472,7 +472,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join 0 to 1
@@ -490,13 +490,13 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 2
-                      Statistics: Num rows: 16 Data size: 4436 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 16 Data size: 4436 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -679,7 +679,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: cd
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -690,7 +690,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [2]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -718,7 +718,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: hd
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -729,7 +729,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -764,7 +764,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -775,7 +775,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0, 2]
-                    Statistics: Num rows: 15 Data size: 4033 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 15 Data size: 3483 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join 0 to 1
@@ -793,7 +793,7 @@ STAGE PLANS:
                       outputColumnNames: _col0
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 16 Data size: 4436 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 16 Data size: 3831 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Left Outer Join 0 to 1
@@ -811,7 +811,7 @@ STAGE PLANS:
                         outputColumnNames: _col0
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 17 Data size: 4879 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 17 Data size: 4214 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count(), sum(_col0)
                           Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
index 8ca54f9..bb922e7 100644
--- a/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
+++ b/ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out
@@ -260,7 +260,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: cd
-                  Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -271,7 +271,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [2]
-                    Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -299,7 +299,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: hd
-                  Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -310,7 +310,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [3]
-                    Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                     Spark HashTable Sink Operator
                       Spark Hash Table Sink Vectorization:
                           className: VectorSparkHashTableSinkOperator
@@ -345,7 +345,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: c
-                  Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -356,7 +356,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [2, 3]
-                    Statistics: Num rows: 20 Data size: 5277 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 20 Data size: 4431 Basic stats: COMPLETE Column stats: NONE
                     Map Join Operator
                       condition map:
                            Left Outer Join 0 to 1
@@ -374,7 +374,7 @@ STAGE PLANS:
                       outputColumnNames: _col1
                       input vertices:
                         1 Map 3
-                      Statistics: Num rows: 22 Data size: 5804 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 22 Data size: 4874 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Left Outer Join 0 to 1
@@ -392,7 +392,7 @@ STAGE PLANS:
                         outputColumnNames: _col1
                         input vertices:
                           1 Map 4
-                        Statistics: Num rows: 24 Data size: 6384 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 24 Data size: 5361 Basic stats: COMPLETE Column stats: NONE
                         Group By Operator
                           aggregations: count(), sum(_col1)
                           Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
index 9c39b33..503efa2 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_0.q.out
@@ -34,7 +34,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -45,7 +45,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count()
                       Group By Vectorization:
@@ -213,7 +213,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -224,7 +224,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [0]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(ctinyint)
                       Group By Vectorization:
@@ -401,11 +401,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: ctinyint (type: tinyint)
                     outputColumnNames: ctinyint
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint)
                       mode: hash
@@ -537,7 +537,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -548,7 +548,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [3]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(cbigint), max(cbigint), count(cbigint), count()
                       Group By Vectorization:
@@ -716,7 +716,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -727,7 +727,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [3]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(cbigint)
                       Group By Vectorization:
@@ -904,11 +904,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cbigint (type: bigint)
                     outputColumnNames: cbigint
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint)
                       mode: hash
@@ -1040,7 +1040,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1051,7 +1051,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [4]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: min(cfloat), max(cfloat), count(cfloat), count()
                       Group By Vectorization:
@@ -1219,7 +1219,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1230,7 +1230,7 @@ STAGE PLANS:
                         className: VectorSelectOperator
                         native: true
                         projectedOutputColumns: [4]
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: sum(cfloat)
                       Group By Vectorization:
@@ -1407,11 +1407,11 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   Select Operator
                     expressions: cfloat (type: float)
                     outputColumnNames: cfloat
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat)
                       mode: hash
@@ -1581,7 +1581,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -1591,7 +1591,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 7, pattern %b%) -> boolean, FilterDecimalScalarNotEqualDecimalColumn(val 79.553, col 12)(children: CastLongToDecimal(col 2) -> 12:decimal(13,3)) -> boolean, FilterDoubleColLessDoubleColumn(col 13, col 5)(children: CastLongToDouble(col 3) -> 13:double) -> boolean, FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 0, col 1)(children: col 0) -> boolean, FilterLongColEqualLongScalar(col 11, val 1) -> boolean, FilterLongScalarEqualLongColumn(val 3569, col 0)(children: col 0) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 like '%b%') or (79.553 <> CAST( cint AS decimal(13,3))) or (UDFToDouble(cbigint) < cdouble) or ((UDFToShort(ctinyint) >= csmallint) and (cboolean2 = 1) and (3569 = UDFToInteger(ctinyint)))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cbigint (type: bigint), cfloat (type: float)
                       outputColumnNames: ctinyint, cbigint, cfloat
@@ -1599,7 +1599,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 3, 4]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint)
                         Group By Vectorization:
@@ -1776,14 +1776,14 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: ((cstring1 like 'a%') or (cstring1 like 'b%') or (cstring1 like 'c%') or ((length(cstring1) < 50) and (cstring1 like '%n') and (length(cstring1) > 0))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: count()
                         mode: hash
@@ -1816,7 +1816,7 @@ STAGE PLANS:
                     name default.alltypesorc
                     numFiles 1
                     numRows 12288
-                    rawDataSize 0
+                    rawDataSize 2641964
                     serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -1837,7 +1837,7 @@ STAGE PLANS:
                       name default.alltypesorc
                       numFiles 1
                       numRows 12288
-                      rawDataSize 0
+                      rawDataSize 2641964
                       serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30584,22 +30584,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean)
-                    Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30636,7 +30636,7 @@ STAGE PLANS:
                     name default.alltypesorc
                     numFiles 1
                     numRows 12288
-                    rawDataSize 0
+                    rawDataSize 2641964
                     serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30657,7 +30657,7 @@ STAGE PLANS:
                       name default.alltypesorc
                       numFiles 1
                       numRows 12288
-                      rawDataSize 0
+                      rawDataSize 2641964
                       serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30698,22 +30698,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean)
-                    Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30749,7 +30749,7 @@ STAGE PLANS:
                     name default.alltypesorc
                     numFiles 1
                     numRows 12288
-                    rawDataSize 0
+                    rawDataSize 2641964
                     serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30770,7 +30770,7 @@ STAGE PLANS:
                       name default.alltypesorc
                       numFiles 1
                       numRows 12288
-                      rawDataSize 0
+                      rawDataSize 2641964
                       serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30811,22 +30811,22 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         GlobalTableId: 0
 #### A masked pattern was here ####
                         NumFilesPerFileSink: 1
-                        Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -30863,7 +30863,7 @@ STAGE PLANS:
                     name default.alltypesorc
                     numFiles 1
                     numRows 12288
-                    rawDataSize 0
+                    rawDataSize 2641964
                     serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30884,7 +30884,7 @@ STAGE PLANS:
                       name default.alltypesorc
                       numFiles 1
                       numRows 12288
-                      rawDataSize 0
+                      rawDataSize 2641964
                       serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30926,24 +30926,24 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   GatherStats: false
                   Filter Operator
                     isSamplingPred: false
                     predicate: (cstring1) IN ('biology', 'history', 'topology') (type: boolean)
-                    Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                     Group By Operator
                       aggregations: count()
                       keys: cstring1 (type: string)
                       mode: hash
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         null sort order: a
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 6144 Data size: 188618 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 6144 Data size: 1320982 Basic stats: COMPLETE Column stats: NONE
                         tag: -1
                         value expressions: _col1 (type: bigint)
                         auto parallelism: false
@@ -30967,7 +30967,7 @@ STAGE PLANS:
                     name default.alltypesorc
                     numFiles 1
                     numRows 12288
-                    rawDataSize 0
+                    rawDataSize 2641964
                     serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                     serialization.format 1
                     serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -30988,7 +30988,7 @@ STAGE PLANS:
                       name default.alltypesorc
                       numFiles 1
                       numRows 12288
-                      rawDataSize 0
+                      rawDataSize 2641964
                       serialization.ddl struct alltypesorc { byte ctinyint, i16 csmallint, i32 cint, i64 cbigint, float cfloat, double cdouble, string cstring1, string cstring2, timestamp ctimestamp1, timestamp ctimestamp2, bool cboolean1, bool cboolean2}
                       serialization.format 1
                       serialization.lib org.apache.hadoop.hive.ql.io.orc.OrcSerde
@@ -31008,16 +31008,16 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: bigint), _col0 (type: string)
                   outputColumnNames: _col0, _col1
-                  Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string)
                     null sort order: a
                     sort order: +
-                    Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                     tag: -1
                     value expressions: _col0 (type: bigint)
                     auto parallelism: false
@@ -31028,13 +31028,13 @@ STAGE PLANS:
               Select Operator
                 expressions: VALUE._col0 (type: bigint), KEY.reducesinkkey0 (type: string)
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
                 File Output Operator
                   compressed: false
                   GlobalTableId: 0
 #### A masked pattern was here ####
                   NumFilesPerFileSink: 1
-                  Statistics: Num rows: 3072 Data size: 94309 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 3072 Data size: 660491 Basic stats: COMPLETE Column stats: NONE
 #### A masked pattern was here ####
                   table:
                       input format: org.apache.hadoop.mapred.SequenceFileInputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
index 78f1517..ec8a433 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_1.q.out
@@ -59,7 +59,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -69,7 +69,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5, col 12)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterLongColGreaterLongScalar(col 11, val 0) -> boolean) -> boolean, FilterLongColLessLongColumn(col 3, col 0)(children: col 0) -> boolean, FilterLongColGreaterLongColumn(col 2, col 3)(children: col 2) -> boolean, FilterLongColLessLongScalar(col 10, val 0) -> boolean) -> boolean
                     predicate: (((cdouble > UDFToDouble(ctinyint)) and (cboolean2 > 0)) or (cbigint < UDFToLong(ctinyint)) or (UDFToLong(cint) > cbigint) or (cboolean1 < 0)) (type: boolean)
-                    Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: ctinyint (type: tinyint), cint (type: int), cfloat (type: float), cdouble (type: double)
                       outputColumnNames: ctinyint, cint, cfloat, cdouble
@@ -77,7 +77,7 @@ STAGE PLANS:
                           className: VectorSelectOperator
                           native: true
                           projectedOutputColumns: [0, 2, 4, 5]
-                      Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                       Group By Operator
                         aggregations: var_pop(ctinyint), sum(cfloat), max(ctinyint), max(cint), var_samp(cdouble), count(cint)
                         Group By Vectorization:

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
index 4e9cce3..1f95357 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_10.q.out
@@ -63,7 +63,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -73,7 +73,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7, val 10) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 12, col 5)(children: CastLongToDouble(col 0) -> 12:double) -> boolean, FilterDecimalScalarGreaterEqualDecimalColumn(val -5638.15, col 13)(children: CastLongToDecimal(col 0) -> 13:decimal(6,2)) -> boolean) -> boolean, FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5, val 6981.0) -> boolean, FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 14, val 9763215.5639)(children: CastLongToDecimal(col 1) -> 14:decimal(11,4)) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (-5638.15 >= CAST( ctinyint AS decimal(6,2)))) or ((cdouble > 6981.0) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean)
-                    Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639 - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14
@@ -82,13 +82,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [5, 8, 0, 10, 6, 12, 16, 15, 17, 19, 20, 18, 22, 23, 25]
                           selectExpressions: DoubleColUnaryMinus(col 5) -> 12:double, DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 16:double, DoubleColModuloDoubleScalar(col 17, val 33.0)(children: DoubleColAddDoubleColumn(col 5, col 15)(children: CastLongToDouble(col 1) -> 15:double) -> 17:double) -> 15:double, DoubleColUnaryMinus(col 5) -> 17:double, DoubleColModuloDoubleColumn(col 18, col 5)(children: CastLongToDouble(col 0) -> 18:double) -> 19:double, LongColModuloLongColumn(col 0, col 1)(children: col 0) -> 20:long, DoubleColUnaryMinus(col 5) -> 18:double, LongColMultiplyLongColumn(col 3, col 21)(children: col 21) -> 22:long, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 24)(children: DoubleColAddDoubleColumn(col 5, col 23)(children: CastLongToDouble(col 1) -> 23:double) -> 24:double) -> 23:double, DoubleColUnaryMinus(col 24)(children: DoubleColUnaryMinus(col 5) -> 24:double) -> 25:double
-                      Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 5461 Data size: 167650 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 5461 Data size: 1174134 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/002626d5/ql/src/test/results/clientpositive/spark/vectorization_11.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/spark/vectorization_11.q.out b/ql/src/test/results/clientpositive/spark/vectorization_11.q.out
index f79c3a0..50307e9 100644
--- a/ql/src/test/results/clientpositive/spark/vectorization_11.q.out
+++ b/ql/src/test/results/clientpositive/spark/vectorization_11.q.out
@@ -45,7 +45,7 @@ STAGE PLANS:
             Map Operator Tree:
                 TableScan
                   alias: alltypesorc
-                  Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE
                   TableScan Vectorization:
                       native: true
                       projectedOutputColumns: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
@@ -55,7 +55,7 @@ STAGE PLANS:
                         native: true
                         predicateExpression: FilterExprOrExpr(children: FilterStringGroupColEqualStringGroupColumn(col 7, col 6) -> boolean, FilterExprAndExpr(children: SelectColumnIsNull(col 8) -> boolean, FilterStringColLikeStringScalar(col 6, pattern %a) -> boolean) -> boolean) -> boolean
                     predicate: ((cstring2 = cstring1) or (ctimestamp1 is null and (cstring1 like '%a'))) (type: boolean)
-                    Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0) (type: double), (cdouble * -5638.15) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8
@@ -64,13 +64,13 @@ STAGE PLANS:
                           native: true
                           projectedOutputColumns: [6, 10, 5, 8, 12, 13, 14, 16, 15]
                           selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1)(children: col 1) -> 12:long, DoubleColSubtractDoubleScalar(col 5, val 9763215.5639) -> 13:double, DoubleColUnaryMinus(col 5) -> 14:double, DoubleColAddDoubleScalar(col 15, val 6981.0)(children: DoubleColUnaryMinus(col 5) -> 15:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5, val -5638.15) -> 15:double
-                      Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                       File Output Operator
                         compressed: false
                         File Sink Vectorization:
                             className: VectorFileSinkOperator
                             native: false
-                        Statistics: Num rows: 9216 Data size: 282927 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 9216 Data size: 1981473 Basic stats: COMPLETE Column stats: NONE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat