You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2016/12/16 17:06:49 UTC

hive git commit: HIVE-15122: Hive: Upcasting types should not obscure stats (min/max/ndv) (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 3da29fe7e -> 2ae78f01b


HIVE-15122: Hive: Upcasting types should not obscure stats (min/max/ndv) (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ae78f01
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ae78f01
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ae78f01

Branch: refs/heads/master
Commit: 2ae78f01be77e636c44e87965f6c34b942967ea0
Parents: 3da29fe
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Dec 15 18:27:25 2016 +0000
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Fri Dec 16 17:06:27 2016 +0000

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/stats/StatsUtils.java |  39 ++++-
 .../clientpositive/annotate_stats_join_pkfk.q   |  41 +++++
 .../annotate_stats_join_pkfk.q.out              | 171 +++++++++++++++++++
 .../llap/vector_char_simple.q.out               |   4 +-
 .../llap/vector_varchar_simple.q.out            |   4 +-
 .../clientpositive/llap/vectorized_casts.q.out  |   4 +-
 6 files changed, 252 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index a718264..d1f717b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -30,12 +30,10 @@ import java.util.Map.Entry;
 import java.util.Set;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
-import java.util.concurrent.Executors;
 import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 import java.util.concurrent.Future;
 
-
-import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -100,6 +98,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObje
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableTimestampObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hive.common.util.AnnotationUtils;
@@ -110,6 +109,7 @@ import org.slf4j.LoggerFactory;
 import com.google.common.base.Joiner;
 import com.google.common.collect.Lists;
 import com.google.common.math.LongMath;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 public class StatsUtils {
 
@@ -1307,11 +1307,31 @@ public class StatsUtils {
         countDistincts = 1;
       }
     } else if (end instanceof ExprNodeGenericFuncDesc) {
-
-      // udf projection
       ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end;
       colName = engfd.getName();
       colType = engfd.getTypeString();
+
+      // If it is a widening cast, we do not change NDV, min, max
+      if (isWideningCast(engfd) && engfd.getChildren().get(0) instanceof ExprNodeColumnDesc) {
+        // cast on single column
+        ColStatistics stats = parentStats.getColumnStatisticsFromColName(engfd.getCols().get(0));
+        if (stats != null) {
+          ColStatistics newStats;
+          try {
+            newStats = stats.clone();
+          } catch (CloneNotSupportedException e) {
+            LOG.warn("error cloning stats, this should not happen");
+            return null;
+          }
+          newStats.setColumnName(colName);
+          colType = colType.toLowerCase();
+          newStats.setColumnType(colType);
+          newStats.setAvgColLen(getAvgColLenOf(conf, oi, colType));
+          return newStats;
+        }
+      }
+
+      // fallback to default
       countDistincts = getNDVFor(engfd, numRows, parentStats);
     } else if (end instanceof ExprNodeColumnListDesc) {
 
@@ -1341,6 +1361,15 @@ public class StatsUtils {
     return colStats;
   }
 
+  private static boolean isWideningCast(ExprNodeGenericFuncDesc engfd) {
+    GenericUDF udf = engfd.getGenericUDF();
+    if (!FunctionRegistry.isOpCast(udf)) {
+      // It is not a cast
+      return false;
+    }
+    return TypeInfoUtils.implicitConvertible(engfd.getChildren().get(0).getTypeInfo(),
+            engfd.getTypeInfo());
+  }
 
   public static Long addWithExpDecay (List<Long> distinctVals) {
     // Exponential back-off for NDVs.

http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/test/queries/clientpositive/annotate_stats_join_pkfk.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/annotate_stats_join_pkfk.q b/ql/src/test/queries/clientpositive/annotate_stats_join_pkfk.q
index aa62c60..f94994a 100644
--- a/ql/src/test/queries/clientpositive/annotate_stats_join_pkfk.q
+++ b/ql/src/test/queries/clientpositive/annotate_stats_join_pkfk.q
@@ -69,6 +69,40 @@ create table store
 )
 row format delimited fields terminated by '|';
 
+create table store_bigint
+(
+    s_store_sk                bigint,
+    s_store_id                string,
+    s_rec_start_date          string,
+    s_rec_end_date            string,
+    s_closed_date_sk          int,
+    s_store_name              string,
+    s_number_employees        int,
+    s_floor_space             int,
+    s_hours                   string,
+    s_manager                 string,
+    s_market_id               int,
+    s_geography_class         string,
+    s_market_desc             string,
+    s_market_manager          string,
+    s_division_id             int,
+    s_division_name           string,
+    s_company_id              int,
+    s_company_name            string,
+    s_street_number           string,
+    s_street_name             string,
+    s_street_type             string,
+    s_suite_number            string,
+    s_city                    string,
+    s_county                  string,
+    s_state                   string,
+    s_zip                     string,
+    s_country                 string,
+    s_gmt_offset              float,
+    s_tax_precentage          float
+)
+row format delimited fields terminated by '|';
+
 create table customer_address
 (
     ca_address_sk             int,
@@ -88,11 +122,14 @@ create table customer_address
 row format delimited fields terminated by '|';
 
 load data local inpath '../../data/files/store.txt' overwrite into table store;
+load data local inpath '../../data/files/store.txt' overwrite into table store_bigint;
 load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales;
 load data local inpath '../../data/files/customer_address.txt' overwrite into table customer_address;
 
 analyze table store compute statistics;
 analyze table store compute statistics for columns s_store_sk, s_floor_space;
+analyze table store_bigint compute statistics;
+analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space;
 analyze table store_sales compute statistics;
 analyze table store_sales compute statistics for columns ss_store_sk, ss_addr_sk, ss_quantity;
 analyze table customer_address compute statistics;
@@ -100,6 +137,9 @@ analyze table customer_address compute statistics for columns ca_address_sk;
 
 explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk);
 
+-- widening cast: inferred PK-FK, thus same row count as previous query
+explain select s.s_store_sk from store_bigint s join store_sales ss on (s.s_store_sk = ss.ss_store_sk);
+
 explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0;
 
 explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_company_id > 0 and ss.ss_quantity > 10;
@@ -120,4 +160,5 @@ explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk =
 
 drop table store_sales;
 drop table store;
+drop table store_bigint;
 drop table customer_address;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
index 6588db2..c581aff 100644
--- a/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
+++ b/ql/src/test/results/clientpositive/annotate_stats_join_pkfk.q.out
@@ -148,6 +148,78 @@ row format delimited fields terminated by '|'
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@store
+PREHOOK: query: create table store_bigint
+(
+    s_store_sk                bigint,
+    s_store_id                string,
+    s_rec_start_date          string,
+    s_rec_end_date            string,
+    s_closed_date_sk          int,
+    s_store_name              string,
+    s_number_employees        int,
+    s_floor_space             int,
+    s_hours                   string,
+    s_manager                 string,
+    s_market_id               int,
+    s_geography_class         string,
+    s_market_desc             string,
+    s_market_manager          string,
+    s_division_id             int,
+    s_division_name           string,
+    s_company_id              int,
+    s_company_name            string,
+    s_street_number           string,
+    s_street_name             string,
+    s_street_type             string,
+    s_suite_number            string,
+    s_city                    string,
+    s_county                  string,
+    s_state                   string,
+    s_zip                     string,
+    s_country                 string,
+    s_gmt_offset              float,
+    s_tax_precentage          float
+)
+row format delimited fields terminated by '|'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@store_bigint
+POSTHOOK: query: create table store_bigint
+(
+    s_store_sk                bigint,
+    s_store_id                string,
+    s_rec_start_date          string,
+    s_rec_end_date            string,
+    s_closed_date_sk          int,
+    s_store_name              string,
+    s_number_employees        int,
+    s_floor_space             int,
+    s_hours                   string,
+    s_manager                 string,
+    s_market_id               int,
+    s_geography_class         string,
+    s_market_desc             string,
+    s_market_manager          string,
+    s_division_id             int,
+    s_division_name           string,
+    s_company_id              int,
+    s_company_name            string,
+    s_street_number           string,
+    s_street_name             string,
+    s_street_type             string,
+    s_suite_number            string,
+    s_city                    string,
+    s_county                  string,
+    s_state                   string,
+    s_zip                     string,
+    s_country                 string,
+    s_gmt_offset              float,
+    s_tax_precentage          float
+)
+row format delimited fields terminated by '|'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@store_bigint
 PREHOOK: query: create table customer_address
 (
     ca_address_sk             int,
@@ -196,6 +268,14 @@ POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite i
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@store
+PREHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@store_bigint
+POSTHOOK: query: load data local inpath '../../data/files/store.txt' overwrite into table store_bigint
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@store_bigint
 PREHOOK: query: load data local inpath '../../data/files/store_sales.txt' overwrite into table store_sales
 PREHOOK: type: LOAD
 #### A masked pattern was here ####
@@ -228,6 +308,22 @@ POSTHOOK: query: analyze table store compute statistics for columns s_store_sk,
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@store
 #### A masked pattern was here ####
+PREHOOK: query: analyze table store_bigint compute statistics
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_bigint
+PREHOOK: Output: default@store_bigint
+POSTHOOK: query: analyze table store_bigint compute statistics
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_bigint
+POSTHOOK: Output: default@store_bigint
+PREHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space
+PREHOOK: type: QUERY
+PREHOOK: Input: default@store_bigint
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table store_bigint compute statistics for columns s_store_sk, s_floor_space
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@store_bigint
+#### A masked pattern was here ####
 PREHOOK: query: analyze table store_sales compute statistics
 PREHOOK: type: QUERY
 PREHOOK: Input: default@store_sales
@@ -325,6 +421,73 @@ STAGE PLANS:
       Processor Tree:
         ListSink
 
+PREHOOK: query: -- widening cast: inferred PK-FK, thus same row count as previous query
+explain select s.s_store_sk from store_bigint s join store_sales ss on (s.s_store_sk = ss.ss_store_sk)
+PREHOOK: type: QUERY
+POSTHOOK: query: -- widening cast: inferred PK-FK, thus same row count as previous query
+explain select s.s_store_sk from store_bigint s join store_sales ss on (s.s_store_sk = ss.ss_store_sk)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: s
+            Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: s_store_sk is not null (type: boolean)
+              Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: s_store_sk (type: bigint)
+                outputColumnNames: _col0
+                Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: _col0 (type: bigint)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: bigint)
+                  Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: COMPLETE
+          TableScan
+            alias: ss
+            Statistics: Num rows: 1000 Data size: 3860 Basic stats: COMPLETE Column stats: COMPLETE
+            Filter Operator
+              predicate: ss_store_sk is not null (type: boolean)
+              Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE
+              Select Operator
+                expressions: ss_store_sk (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE
+                Reduce Output Operator
+                  key expressions: UDFToLong(_col0) (type: bigint)
+                  sort order: +
+                  Map-reduce partition columns: UDFToLong(_col0) (type: bigint)
+                  Statistics: Num rows: 964 Data size: 3720 Basic stats: COMPLETE Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+          keys:
+            0 _col0 (type: bigint)
+            1 UDFToLong(_col0) (type: bigint)
+          outputColumnNames: _col0
+          Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 964 Data size: 7712 Basic stats: COMPLETE Column stats: COMPLETE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
 PREHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0
 PREHOOK: type: QUERY
 POSTHOOK: query: explain select s.s_store_sk from store s join store_sales ss on (s.s_store_sk = ss.ss_store_sk) where s.s_store_sk > 0
@@ -1057,6 +1220,14 @@ POSTHOOK: query: drop table store
 POSTHOOK: type: DROPTABLE
 POSTHOOK: Input: default@store
 POSTHOOK: Output: default@store
+PREHOOK: query: drop table store_bigint
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@store_bigint
+PREHOOK: Output: default@store_bigint
+POSTHOOK: query: drop table store_bigint
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@store_bigint
+POSTHOOK: Output: default@store_bigint
 PREHOOK: query: drop table customer_address
 PREHOOK: type: DROPTABLE
 PREHOOK: Input: default@customer_address

http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
index 3dea73d..063170d 100644
--- a/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_char_simple.q.out
@@ -306,10 +306,10 @@ STAGE PLANS:
                   Select Operator
                     expressions: CAST( _col0 AS CHAR(12) (type: char(12))
                     outputColumnNames: _col0
-                    Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 10 Data size: 960 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 10 Data size: 768 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                           output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
index edb67f1..118130e 100644
--- a/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
+++ b/ql/src/test/results/clientpositive/llap/vector_varchar_simple.q.out
@@ -306,10 +306,10 @@ STAGE PLANS:
                   Select Operator
                     expressions: CAST( _col0 AS varchar(25)) (type: varchar(25))
                     outputColumnNames: _col0
-                    Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE
                     File Output Operator
                       compressed: false
-                      Statistics: Num rows: 10 Data size: 1090 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 10 Data size: 872 Basic stats: COMPLETE Column stats: COMPLETE
                       table:
                           input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
                           output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat

http://git-wip-us.apache.org/repos/asf/hive/blob/2ae78f01/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
index bcddce5..61267dd 100644
--- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
+++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out
@@ -176,10 +176,10 @@ STAGE PLANS:
                     Select Operator
                       expressions: UDFToBoolean(ctinyint) (type: boolean), UDFToBoolean(csmallint) (type: boolean), UDFToBoolean(cint) (type: boolean), UDFToBoolean(cbigint) (type: boolean), UDFToBoolean(cfloat) (type: boolean), UDFToBoolean(cdouble) (type: boolean), cboolean1 (type: boolean), UDFToBoolean((cbigint * 0)) (type: boolean), UDFToBoolean(ctimestamp1) (type: boolean), UDFToBoolean(cstring1) (type: boolean), UDFToInteger(ctinyint) (type: int), UDFToInteger(csmallint) (type: int), cint (type: int), UDFToInteger(cbigint) (type: int), UDFToInteger(cfloat) (type: int), UDFToInteger(cdouble) (type: int), UDFToInteger(cboolean1) (type: int), UDFToInteger(ctimestamp1) (type: int), UDFToInteger(cstring1) (type: int), UDFToInteger(substr(cstring1, 1, 1)) (type: int), UDFToByte(cfloat) (type: tinyint), UDFToShort(cfloat) (type: smallint), UDFToLong(cfloat) (type: bigint), UDFToDouble(ctinyint) (type: double), UDFToDouble(csmallint) (type: double), UDFToDouble(cint) (type: double), 
 UDFToDouble(cbigint) (type: double), UDFToDouble(cfloat) (type: double), cdouble (type: double), UDFToDouble(cboolean1) (type: double), UDFToDouble(ctimestamp1) (type: double), UDFToDouble(cstring1) (type: double), UDFToDouble(substr(cstring1, 1, 1)) (type: double), UDFToFloat(cint) (type: float), UDFToFloat(cdouble) (type: float), CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp), UDFToString(ctinyint) (type: string), UDFToString(csmallint) (type: string), UDFToString(cint) (type: string), UDFToString(cbigint) (type: 
 string), UDFToString(cfloat) (type: string), UDFToString(cdouble) (type: string), UDFToString(cboolean1) (type: string), UDFToString((cbigint * 0)) (type: string), UDFToString(ctimestamp1) (type: string), cstring1 (type: string), UDFToString(CAST( cstring1 AS CHAR(10)) (type: string), UDFToString(CAST( cstring1 AS varchar(10))) (type: string), UDFToFloat(UDFToInteger(cfloat)) (type: float), UDFToDouble((cint * 2)) (type: double), UDFToString(sin(cfloat)) (type: string), (UDFToDouble(UDFToFloat(cint)) + UDFToDouble(cboolean1)) (type: double)
                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61
-                      Statistics: Num rows: 6144 Data size: 17929060 Basic stats: COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 6144 Data size: 16117100 Basic stats: COMPLETE Column stats: COMPLETE
                       File Output Operator
                         compressed: false
-                        Statistics: Num rows: 6144 Data size: 17929060 Basic stats: COMPLETE Column stats: COMPLETE
+                        Statistics: Num rows: 6144 Data size: 16117100 Basic stats: COMPLETE Column stats: COMPLETE
                         table:
                             input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                             output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat