You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/07/03 17:26:50 UTC

[1/2] hive git commit: HIVE-19326 : stats auto gather: incorrect aggregation during UNION queries (may lead to incorrect results) (Zoltan Haindrich via Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 4dbf991b4 -> 2ca70b919


http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/union_stats.q.out b/ql/src/test/results/clientpositive/union_stats.q.out
index 73e880b..1acf5c8 100644
--- a/ql/src/test/results/clientpositive/union_stats.q.out
+++ b/ql/src/test/results/clientpositive/union_stats.q.out
@@ -454,6 +454,249 @@ POSTHOOK: query: create table t2 like src
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@t2
+PREHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-2 is a root stage
+  Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6
+  Stage-5
+  Stage-0 depends on stages: Stage-5, Stage-4, Stage-7
+  Stage-3 depends on stages: Stage-0, Stage-10
+  Stage-4
+  Stage-6
+  Stage-7 depends on stages: Stage-6
+  Stage-1 depends on stages: Stage-2
+  Stage-9 depends on stages: Stage-1, Stage-10
+  Stage-10 depends on stages: Stage-2
+
+STAGE PLANS:
+  Stage: Stage-2
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Union
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t1
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t2
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+          TableScan
+            alias: src
+            Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+            Select Operator
+              expressions: key (type: string), value (type: string)
+              outputColumnNames: _col0, _col1
+              Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+              Union
+                Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t1
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                    Reduce Output Operator
+                      sort order: 
+                      Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                      value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+                File Output Operator
+                  compressed: false
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                      serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                      name: default.t2
+                Select Operator
+                  expressions: _col0 (type: string), _col1 (type: string)
+                  outputColumnNames: key, value
+                  Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE
+                  Group By Operator
+                    aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll')
+                    mode: hash
+                    outputColumnNames: _col0, _col1
+                    Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+                    File Output Operator
+                      compressed: false
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-8
+    Conditional Operator
+
+  Stage: Stage-5
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.t1
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.t1
+
+  Stage: Stage-4
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.t1
+
+  Stage: Stage-6
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            File Output Operator
+              compressed: false
+              table:
+                  input format: org.apache.hadoop.mapred.TextInputFormat
+                  output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+                  serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+                  name: default.t1
+
+  Stage: Stage-7
+    Move Operator
+      files:
+          hdfs directory: true
+#### A masked pattern was here ####
+
+  Stage: Stage-1
+    Move Operator
+      tables:
+          replace: true
+          table:
+              input format: org.apache.hadoop.mapred.TextInputFormat
+              output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+              serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+              name: default.t2
+
+  Stage: Stage-9
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: key, value
+          Column Types: string, string
+          Table: default.t2
+
+  Stage: Stage-10
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            Reduce Output Operator
+              sort order: 
+              Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+              value expressions: _col0 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>)
+      Execution mode: vectorized
+      Reduce Operator Tree:
+        Group By Operator
+          aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
+          mode: mergepartial
+          outputColumnNames: _col0, _col1
+          Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
 PREHOOK: query: from (select * from src union all select * from src)s
 insert overwrite table t1 select *
 insert overwrite table t2 select *

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java b/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
index 3b10987..09dddac 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/Deserializer.java
@@ -68,6 +68,8 @@ public interface Deserializer {
 
   /**
    * Returns statistics collected when serializing
+   *
+   * @return {@link SerDeStats} object; or in case not supported: null
    */
   SerDeStats getSerDeStats();
 }


[2/2] hive git commit: HIVE-19326 : stats auto gather: incorrect aggregation during UNION queries (may lead to incorrect results) (Zoltan Haindrich via Ashutosh Chauhan)

Posted by ha...@apache.org.
HIVE-19326 : stats auto gather: incorrect aggregation during UNION queries (may lead to incorrect results) (Zoltan Haindrich via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ca70b91
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ca70b91
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ca70b91

Branch: refs/heads/master
Commit: 2ca70b91917b8ce668a6fd277a32ab4f2c9a68c0
Parents: 4dbf991
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Fri May 11 11:22:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jul 3 10:17:06 2018 -0700

----------------------------------------------------------------------
 .../hive/accumulo/serde/AccumuloSerDe.java      |   6 +-
 .../test/resources/testconfiguration.properties |   1 +
 .../apache/hadoop/hive/ql/DriverContext.java    |  29 ++-
 .../hadoop/hive/ql/exec/FileSinkOperator.java   |  15 +-
 .../apache/hadoop/hive/ql/exec/Operator.java    |  11 +-
 .../hive/ql/exec/SerializationUtilities.java    |  22 --
 .../hadoop/hive/ql/exec/TableScanOperator.java  |   1 +
 .../hive/ql/optimizer/GenMapRedUtils.java       |   4 +-
 .../hadoop/hive/ql/parse/GenTezUtils.java       |   2 +-
 .../hive/ql/stats/StatsCollectionContext.java   |  12 +-
 .../hive/ql/stats/fs/FSStatsPublisher.java      |  15 +-
 .../queries/clientpositive/autoColumnStats_9.q  |   3 +
 .../queries/clientpositive/union_fast_stats.q   |   6 +
 .../queries/clientpositive/union_rowcounts.q    |  51 ++++
 .../test/queries/clientpositive/union_stats.q   |  44 ++--
 .../clientpositive/autoColumnStats_9.q.out      |  13 +-
 .../clientpositive/llap/multiMapJoin1.q.out     | 150 ++++++------
 .../clientpositive/llap/union_fast_stats.q.out  |  58 ++---
 .../clientpositive/llap/union_rowcounts.q.out   | 180 ++++++++++++++
 .../clientpositive/llap/union_stats.q.out       | 145 ++++++++++-
 .../results/clientpositive/union_stats.q.out    | 243 +++++++++++++++++++
 .../apache/hadoop/hive/serde2/Deserializer.java |   2 +
 22 files changed, 816 insertions(+), 197 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
----------------------------------------------------------------------
diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
index fcd819b..6fa48dd 100644
--- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
+++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
@@ -54,6 +54,7 @@ public class AccumuloSerDe extends AbstractSerDe {
 
   private static final Logger log = LoggerFactory.getLogger(AccumuloSerDe.class);
 
+  @Override
   public void initialize(Configuration conf, Properties properties) throws SerDeException {
     accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
 
@@ -109,6 +110,7 @@ public class AccumuloSerDe extends AbstractSerDe {
     return cachedRow;
   }
 
+  @Override
   public Class<? extends Writable> getSerializedClass() {
     return Mutation.class;
   }
@@ -135,12 +137,14 @@ public class AccumuloSerDe extends AbstractSerDe {
     return cachedRow;
   }
 
+  @Override
   public ObjectInspector getObjectInspector() throws SerDeException {
     return cachedObjectInspector;
   }
 
+  @Override
   public SerDeStats getSerDeStats() {
-    throw new UnsupportedOperationException("SerdeStats not supported.");
+    return null;
   }
 
   public AccumuloSerDeParameters getParams() {

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 385b71e..d02c0fe 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -728,6 +728,7 @@ minillaplocal.query.files=\
   udaf_collect_set_2.q,\
   udaf_all_keyword.q,\
   union_fast_stats.q,\
+  union_rowcounts.q,\
   union_remove_26.q,\
   union_top_level.q,\
   update_access_time_non_current_db.q, \

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
index bda1079..2dd83fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
@@ -18,28 +18,27 @@
 
 package org.apache.hadoop.hive.ql;
 
-import org.apache.hadoop.hive.ql.exec.StatsTask;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.NodeUtils;
 import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
 import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
 import org.apache.hadoop.hive.ql.exec.Task;
 import org.apache.hadoop.hive.ql.exec.TaskRunner;
 import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Iterator;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.LinkedBlockingQueue;
-
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -223,7 +222,11 @@ public class DriverContext {
       }
     });
     for (String statKey : statKeys) {
-      statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
+      if (statsTasks.containsKey(statKey)) {
+        statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
+      } else {
+        LOG.debug("There is no correspoing statTask for: " + statKey);
+      }
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 949a9e8..267d602 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -90,11 +90,9 @@ import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.util.ReflectionUtils;
 
 import org.apache.hive.common.util.HiveStringUtils;
-
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-
 /**
  * File Sink operator implementation.
  **/
@@ -321,7 +319,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
           // affects some less obscure scenario.
           try {
             FileSystem fpfs = finalPath.getFileSystem(hconf);
-            if (fpfs.exists(finalPath)) throw new RuntimeException(finalPath + " already exists");
+            if (fpfs.exists(finalPath)) {
+              throw new RuntimeException(finalPath + " already exists");
+            }
           } catch (IOException e) {
             throw new RuntimeException(e);
           }
@@ -354,7 +354,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
     }
 
     public Path buildTaskOutputTempPath() {
-      if (taskOutputTempPathRoot == null) return null;
+      if (taskOutputTempPathRoot == null) {
+        return null;
+      }
       assert subdirForTxn == null;
       String pathStr = taskOutputTempPathRoot.toString();
       if (subdirBeforeTxn != null) {
@@ -457,7 +459,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
     // 'Parent'
     boolean isLinked = conf.isLinkedFileSink();
     if (!isLinked) {
-      // Simple case - no union. 
+      // Simple case - no union.
       specPath = conf.getDirName();
       unionPath = null;
     } else {
@@ -1521,7 +1523,8 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
         }
       }
     }
-    sContext.setIndexForTezUnion(this.getIndexForTezUnion());
+    sContext.setContextSuffix(getOperatorId());
+
     if (!statsPublisher.closeConnection(sContext)) {
       LOG.error("Failed to close stats");
       // The original exception is lost.

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index acadb43..38316bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -86,7 +86,6 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
   private transient boolean rootInitializeCalled = false;
   protected transient long numRows = 0;
   protected transient long runTimeNumRows = 0;
-  protected int indexForTezUnion = -1;
   private transient Configuration hconf;
   protected final transient Collection<Future<?>> asyncInitOperations = new HashSet<>();
   private String marker;
@@ -1557,8 +1556,8 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
   private void publishRunTimeStats() throws HiveException {
     StatsPublisher statsPublisher = new FSStatsPublisher();
     StatsCollectionContext sContext = new StatsCollectionContext(hconf);
-    sContext.setIndexForTezUnion(indexForTezUnion);
     sContext.setStatsTmpDir(conf.getRuntimeStatsTmpDir());
+    sContext.setContextSuffix(getOperatorId());
 
     if (!statsPublisher.connect(sContext)) {
       LOG.error("StatsPublishing error: cannot connect to database");
@@ -1580,14 +1579,6 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
     }
   }
 
-  public int getIndexForTezUnion() {
-    return indexForTezUnion;
-  }
-
-  public void setIndexForTezUnion(int indexForTezUnion) {
-    this.indexForTezUnion = indexForTezUnion;
-  }
-
   /**
    * Decides whether two operators are logically the same.
    * This can be used to merge same operators and avoid repeated computation.

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
index ed1566f..e03429b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
@@ -668,28 +668,6 @@ public class SerializationUtilities {
     return result;
   }
 
-  public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots, int indexForTezUnion) {
-    ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
-    CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
-    serializePlan(roots, baos, true);
-    @SuppressWarnings("unchecked")
-    List<Operator<?>> result =
-        deserializePlan(new ByteArrayInputStream(baos.toByteArray()),
-            roots.getClass(), true);
-    // Restore the context.
-    LinkedList<Operator<?>> newOps = new LinkedList<>(result);
-    while (!newOps.isEmpty()) {
-      Operator<?> newOp = newOps.poll();
-      newOp.setIndexForTezUnion(indexForTezUnion);
-      newOp.setCompilationOpContext(ctx);
-      List<Operator<?>> children = newOp.getChildOperators();
-      if (children != null) {
-        newOps.addAll(children);
-      }
-    }
-    return result;
-  }
-
   /**
    * Clones using the powers of XML. Do not use unless necessary.
    * @param plan The plan.

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
index 0799181..5780bd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
@@ -349,6 +349,7 @@ public class TableScanOperator extends Operator<TableScanDesc> implements
     StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
     StatsCollectionContext sc = new StatsCollectionContext(jc);
     sc.setStatsTmpDir(conf.getTmpStatsDir());
+    sc.setContextSuffix(getOperatorId());
     if (!statsPublisher.connect(sc)) {
       // just return, stats gathering should not block the main query.
       if (LOG.isInfoEnabled()) {

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 605bb09..fa92385 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1907,12 +1907,12 @@ public final class GenMapRedUtils {
         mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable());
 
     // TODO: wtf?!! why is this in this method? This has nothing to do with anything.
-    if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
+    if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
         && !fsOp.getConf().isMaterialization()) {
       // mark the MapredWork and FileSinkOperator for gathering stats
       fsOp.getConf().setGatherStats(true);
       fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
-      if (!mvTask.hasFollowingStatsTask()) {
+      if (mvTask != null && !mvTask.hasFollowingStatsTask()) {
         GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf);
       }
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 741833b..bb0de94 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -219,7 +219,7 @@ public class GenTezUtils {
     roots.addAll(context.eventOperatorSet);
 
     // need to clone the plan.
-    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots, indexForTezUnion);
+    List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
 
     // we're cloning the operator plan but we're retaining the original work. That means
     // that root operators have to be replaced with the cloned ops. The replacement map

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
index 5c3328c..e5ed621 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
@@ -29,13 +29,13 @@ import org.apache.hadoop.hive.ql.exec.Task;
 
 
 @InterfaceAudience.Public
-@InterfaceStability.Stable
+@InterfaceStability.Unstable
 public class StatsCollectionContext {
 
   private final Configuration hiveConf;
   private Task task;
   private List<String> statsTmpDirs;
-  private int indexForTezUnion;
+  private String contextSuffix;
 
   public List<String> getStatsTmpDirs() {
     return statsTmpDirs;
@@ -67,11 +67,11 @@ public class StatsCollectionContext {
     this.task = task;
   }
 
-  public int getIndexForTezUnion() {
-    return indexForTezUnion;
+  public void setContextSuffix(String suffix) {
+    this.contextSuffix = suffix;
   }
 
-  public void setIndexForTezUnion(int indexForTezUnion) {
-    this.indexForTezUnion = indexForTezUnion;
+  public String getContextSuffix() {
+    return contextSuffix;
   }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
index 902b37f..67ab51d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
@@ -95,16 +95,17 @@ public class FSStatsPublisher implements StatsPublisher {
   public boolean closeConnection(StatsCollectionContext context) {
     List<String> statsDirs = context.getStatsTmpDirs();
     assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
+    if (context.getContextSuffix() == null) {
+      throw new RuntimeException("ContextSuffix must be set before publishing!");
+    }
+
     Path statsDir = new Path(statsDirs.get(0));
     try {
-      Path statsFile = null;
-      if (context.getIndexForTezUnion() != -1) {
-        statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
-            + conf.getInt("mapred.task.partition", 0) + "_" + context.getIndexForTezUnion());
-      } else {
-        statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
-            + conf.getInt("mapred.task.partition", 0));
+      String suffix = Integer.toString(conf.getInt("mapred.task.partition", 0));
+      if (context.getContextSuffix() != null) {
+        suffix += "_" + context.getContextSuffix();
       }
+      Path statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + suffix);
       Utilities.FILE_OP_LOGGER.trace("About to create stats file for this task : {}", statsFile);
       Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true));
       LOG.debug("Created file : " + statsFile);

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/autoColumnStats_9.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_9.q b/ql/src/test/queries/clientpositive/autoColumnStats_9.q
index 2b9eb82..63c94e6 100644
--- a/ql/src/test/queries/clientpositive/autoColumnStats_9.q
+++ b/ql/src/test/queries/clientpositive/autoColumnStats_9.q
@@ -17,6 +17,9 @@ INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value;
 FROM src src1 JOIN src src2 ON (src1.key = src2.key)
 INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value;
 
+
+select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0;
+
 desc formatted dest_j1_n23;
 
 desc formatted dest_j1_n23 key;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_fast_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_fast_stats.q b/ql/src/test/queries/clientpositive/union_fast_stats.q
index 221fbc1..6359482 100644
--- a/ql/src/test/queries/clientpositive/union_fast_stats.q
+++ b/ql/src/test/queries/clientpositive/union_fast_stats.q
@@ -60,8 +60,14 @@ create table small_alltypesorc_a_n2 stored as orc as select * from
 
 desc formatted small_alltypesorc_a_n2;
 
+set hive.optimize.metadataonly=true;
+select 15,count(*) from small_alltypesorc_a_n2;
+
 ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS;
 
+-- select assert_true(15=count(*)) from small_alltypesorc_a_n2;
+select 15,count(*) from small_alltypesorc_a_n2;
+
 desc formatted small_alltypesorc_a_n2;
 
 insert into table small_alltypesorc_a_n2 select * from small_alltypesorc1a_n2;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_rowcounts.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_rowcounts.q b/ql/src/test/queries/clientpositive/union_rowcounts.q
new file mode 100644
index 0000000..2dc5c06
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_rowcounts.q
@@ -0,0 +1,51 @@
+create table t1 (a int) ;
+insert into t1 values (1);
+
+create table t2a  as 
+        select * from t1
+        union all
+        select * from t1
+;
+
+select 2,count(*) from t2a;
+
+create table t2b  as select * from
+(
+        select * from (select * from t1) sq1
+        union all
+        select * from (select * from t1) sq2
+) tt
+;
+
+
+select 2,count(*) from t2b;
+
+drop table if exists t1;
+drop table if exists t2a;
+drop table if exists t2b;
+
+set hive.merge.tezfiles=true;
+
+create table t1 (a int) stored as orc;
+insert into t1 values (1);
+
+analyze table t1 compute statistics for columns;
+
+create table t2a stored as orc as
+	select * from t1
+	union all
+	select * from t1
+;
+
+select 2,count(*) from t2a;
+
+create table t2b stored as orc as select * from
+(
+	select * from (select * from t1) sq1
+	union all
+	select * from (select * from t1) sq2
+) tt
+;
+
+
+select 2,count(*) from t2b;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_stats.q b/ql/src/test/queries/clientpositive/union_stats.q
index 80856ed..f36e15c 100644
--- a/ql/src/test/queries/clientpositive/union_stats.q
+++ b/ql/src/test/queries/clientpositive/union_stats.q
@@ -1,30 +1,38 @@
---! qt_n4:dataset_n4:src
-explain extended create table t_n4 as select_n4 * from src union all select_n4 * from src;
+--! qt:dataset:src
+explain extended create table t as select * from src union all select * from src;
 
-create table t_n4 as select_n4 * from src union all select_n4 * from src;
+create table t as select * from src union all select * from src;
 
-select_n4 count_n4(1) from t_n4;
+select count(1) from t;
 
-desc formatted t_n4;
+desc formatted t;
 
-create table tt_n4 as select_n4 * from t_n4 union all select_n4 * from src;
+create table tt as select * from t union all select * from src;
 
-desc formatted tt_n4;
+desc formatted tt;
 
-drop table tt_n4;
+drop table tt;
 
-create table tt_n4 as select_n4 * from src union all select_n4 * from t_n4;
+create table tt as select * from src union all select * from t;
 
-desc formatted tt_n4;
+desc formatted tt;
 
-create table t1_n26 like src;
-create table t2_n17 like src;
+create table t1 like src;
+create table t2 like src;
+create table t3 like src;
 
-from (select_n4 * from src union all select_n4 * from src)s
-insert_n4 overwrite table t1_n26 select_n4 *
-insert_n4 overwrite table t2_n17 select_n4 *;
+set hive.explain.user=true;
+explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *;
 
-desc formatted t1_n26;
-desc formatted t2_n17;
+from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *;
 
-select_n4 count_n4(1) from t1_n26;
+desc formatted t1;
+desc formatted t2;
+
+select count(1) from t1;

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
index 8a94108..1c3db7c 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
@@ -209,6 +209,15 @@ POSTHOOK: Input: default@src
 POSTHOOK: Output: default@dest_j1_n23
 POSTHOOK: Lineage: dest_j1_n23.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: dest_j1_n23.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_j1_n23
+#### A masked pattern was here ####
+POSTHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_j1_n23
+#### A masked pattern was here ####
+cnt, check desc	1028
 PREHOOK: query: desc formatted dest_j1_n23
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@dest_j1_n23
@@ -229,8 +238,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
 	bucketing_version   	2                   
 	numFiles            	137                 
-	numRows             	855                 
-	rawDataSize         	9143                
+	numRows             	1028                
+	rawDataSize         	10968               
 	totalSize           	11996               
 #### A masked pattern was here ####
 	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
index ed1745f..261e153 100644
--- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
@@ -193,14 +193,14 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key is not null and value is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -210,11 +210,11 @@ STAGE PLANS:
                         outputColumnNames: _col1
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col1 (type: string)
                           outputColumnNames: _col0
-                          Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -223,7 +223,7 @@ STAGE PLANS:
                               1 _col0 (type: string)
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
                             Group By Operator
                               aggregations: count()
                               mode: hash
@@ -359,14 +359,14 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key is not null and value is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -376,11 +376,11 @@ STAGE PLANS:
                         outputColumnNames: _col1
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col1 (type: string)
                           outputColumnNames: _col0
-                          Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -389,7 +389,7 @@ STAGE PLANS:
                               1 _col0 (type: string)
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
                             Group By Operator
                               aggregations: count()
                               mode: hash
@@ -527,14 +527,14 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key is not null and value is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key (type: string), value (type: string)
                       outputColumnNames: _col0, _col1
-                      Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -544,11 +544,11 @@ STAGE PLANS:
                         outputColumnNames: _col1
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col1 (type: string)
                           outputColumnNames: _col0
-                          Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -558,18 +558,18 @@ STAGE PLANS:
                             outputColumnNames: _col1
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
                             Group By Operator
                               aggregations: count()
                               keys: _col1 (type: string)
                               mode: hash
                               outputColumnNames: _col0, _col1
-                              Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
                               Reduce Output Operator
                                 key expressions: _col0 (type: string)
                                 sort order: +
                                 Map-reduce partition columns: _col0 (type: string)
-                                Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+                                Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
                                 value expressions: _col1 (type: bigint)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -622,14 +622,14 @@ STAGE PLANS:
                 keys: KEY._col0 (type: string)
                 mode: mergepartial
                 outputColumnNames: _col0, _col1
-                Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col1 (type: bigint)
                   outputColumnNames: _col0
-                  Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
                     table:
                         input format: org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -835,14 +835,14 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key1 (type: string), key2 (type: string), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -852,11 +852,11 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
                           outputColumnNames: _col0, _col1, _col2, _col3
-                          Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -866,11 +866,11 @@ STAGE PLANS:
                             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                             Select Operator
                               expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
                               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                              Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                               Map Join Operator
                                 condition map:
                                      Inner Join 0 to 1
@@ -880,11 +880,11 @@ STAGE PLANS:
                                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                                 input vertices:
                                   1 Map 5
-                                Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                                Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                                 Select Operator
                                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
                                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                                  Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                                  Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                                   Map Join Operator
                                     condition map:
                                          Inner Join 0 to 1
@@ -894,11 +894,11 @@ STAGE PLANS:
                                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
                                     input vertices:
                                       1 Map 6
-                                    Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                                    Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                                     Select Operator
                                       expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
                                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                      Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                                      Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                                       Group By Operator
                                         aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
                                         mode: hash
@@ -1175,14 +1175,14 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key1 (type: string), key2 (type: string), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                       Map Join Operator
                         condition map:
                              Inner Join 0 to 1
@@ -1192,11 +1192,11 @@ STAGE PLANS:
                         outputColumnNames: _col0, _col1, _col2, _col3
                         input vertices:
                           1 Map 3
-                        Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                         Select Operator
                           expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
                           outputColumnNames: _col0, _col1, _col2, _col3
-                          Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                          Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                           Map Join Operator
                             condition map:
                                  Inner Join 0 to 1
@@ -1206,11 +1206,11 @@ STAGE PLANS:
                             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
                             input vertices:
                               1 Map 4
-                            Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                            Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                             Select Operator
                               expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
                               outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                              Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                              Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                               Map Join Operator
                                 condition map:
                                      Inner Join 0 to 1
@@ -1220,11 +1220,11 @@ STAGE PLANS:
                                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
                                 input vertices:
                                   1 Map 5
-                                Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                                Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                                 Select Operator
                                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
                                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                                  Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                                  Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                                   Map Join Operator
                                     condition map:
                                          Inner Join 0 to 1
@@ -1234,11 +1234,11 @@ STAGE PLANS:
                                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
                                     input vertices:
                                       1 Map 6
-                                    Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                                    Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                                     Select Operator
                                       expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
                                       outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                                      Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                                      Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                                       Group By Operator
                                         aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
                                         mode: hash
@@ -1518,19 +1518,19 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key1 (type: string), key2 (type: string), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -1625,16 +1625,16 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
-                  Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col3 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col3 (type: string)
-                    Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
         Reducer 3 
             Execution mode: llap
@@ -1646,16 +1646,16 @@ STAGE PLANS:
                   0 _col3 (type: string)
                   1 _col1 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col1 (type: string)
-                    Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
         Reducer 4 
             Execution mode: llap
@@ -1667,16 +1667,16 @@ STAGE PLANS:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                  Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col2 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col2 (type: string)
-                    Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
         Reducer 5 
             Execution mode: llap
@@ -1688,11 +1688,11 @@ STAGE PLANS:
                   0 _col2 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                  Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
                     mode: hash
@@ -1889,19 +1889,19 @@ STAGE PLANS:
                 TableScan
                   alias: bigtbl
                   filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
-                  Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
                   Filter Operator
                     predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
-                    Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                     Select Operator
                       expressions: key1 (type: string), key2 (type: string), value (type: string)
                       outputColumnNames: _col0, _col1, _col2
-                      Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                      Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                       Reduce Output Operator
                         key expressions: _col0 (type: string)
                         sort order: +
                         Map-reduce partition columns: _col0 (type: string)
-                        Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+                        Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
                         value expressions: _col1 (type: string), _col2 (type: string)
             Execution mode: vectorized, llap
             LLAP IO: no inputs
@@ -1996,16 +1996,16 @@ STAGE PLANS:
                   0 _col0 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3
-                Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3
-                  Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col3 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col3 (type: string)
-                    Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
         Reducer 3 
             Execution mode: llap
@@ -2017,16 +2017,16 @@ STAGE PLANS:
                   0 _col3 (type: string)
                   1 _col1 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
-                  Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col1 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col1 (type: string)
-                    Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
         Reducer 4 
             Execution mode: llap
@@ -2038,16 +2038,16 @@ STAGE PLANS:
                   0 _col1 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
-                  Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                   Reduce Output Operator
                     key expressions: _col2 (type: string)
                     sort order: +
                     Map-reduce partition columns: _col2 (type: string)
-                    Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+                    Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
                     value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
         Reducer 5 
             Execution mode: llap
@@ -2059,11 +2059,11 @@ STAGE PLANS:
                   0 _col2 (type: string)
                   1 _col0 (type: string)
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                 Select Operator
                   expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-                  Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+                  Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
                   Group By Operator
                     aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
                     mode: hash

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
index cdb9193..40f469b 100644
--- a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
@@ -175,14 +175,12 @@ Retention:          	0
 #### A masked pattern was here ####
 Table Type:         	MANAGED_TABLE       	 
 Table Parameters:	 	 
+	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	bucketing_version   	2                   
 	numFiles            	3                   
-<<<<<<< HEAD
-=======
-	numRows             	5                   
-	rawDataSize         	1300                
->>>>>>> asf/master
-	totalSize           	4033                
+	numRows             	15                  
+	rawDataSize         	3315                
+	totalSize           	4152                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -235,7 +233,7 @@ Table Parameters:
 	numFiles            	3                   
 	numRows             	15                  
 	rawDataSize         	3483                
-	totalSize           	4033                
+	totalSize           	4152                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -299,8 +297,8 @@ Table Parameters:
 	bucketing_version   	2                   
 	numFiles            	4                   
 	numRows             	20                  
-	rawDataSize         	4552                
-	totalSize           	5406                
+	rawDataSize         	4468                
+	totalSize           	5569                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -513,13 +511,9 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	bucketing_version   	2                   
 	numFiles            	1                   
-	numRows             	5                   
-	rawDataSize         	1069                
-<<<<<<< HEAD
-	totalSize           	3243                
-=======
-	totalSize           	3247                
->>>>>>> asf/master
+	numRows             	15                  
+	rawDataSize         	3315                
+	totalSize           	3318                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -532,6 +526,15 @@ Bucket Columns:     	[]
 Sort Columns:       	[]                  	 
 Storage Desc Params:	 	 
 	serialization.format	1                   
+PREHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+POSTHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+15	15
 PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS
 PREHOOK: type: QUERY
 PREHOOK: Input: default@small_alltypesorc_a_n2
@@ -540,6 +543,15 @@ POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@small_alltypesorc_a_n2
 POSTHOOK: Output: default@small_alltypesorc_a_n2
+PREHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+POSTHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+15	15
 PREHOOK: query: desc formatted small_alltypesorc_a_n2
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@small_alltypesorc_a_n2
@@ -572,11 +584,7 @@ Table Parameters:
 	numFiles            	1                   
 	numRows             	15                  
 	rawDataSize         	3320                
-<<<<<<< HEAD
-	totalSize           	3243                
-=======
-	totalSize           	3247                
->>>>>>> asf/master
+	totalSize           	3318                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 
@@ -640,12 +648,8 @@ Table Parameters:
 	bucketing_version   	2                   
 	numFiles            	2                   
 	numRows             	20                  
-	rawDataSize         	4389                
-<<<<<<< HEAD
-	totalSize           	4616                
-=======
-	totalSize           	4620                
->>>>>>> asf/master
+	rawDataSize         	4305                
+	totalSize           	4735                
 #### A masked pattern was here ####
 	 	 
 # Storage Information	 	 

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out b/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
new file mode 100644
index 0000000..8b06d6e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table t1 (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+PREHOOK: query: create table t2a  as 
+        select * from t1
+        union all
+        select * from t1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2a
+POSTHOOK: query: create table t2a  as 
+        select * from t1
+        union all
+        select * from t1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2a
+POSTHOOK: Lineage: t2a.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2a
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2a
+#### A masked pattern was here ####
+2	2
+PREHOOK: query: create table t2b  as select * from
+(
+        select * from (select * from t1) sq1
+        union all
+        select * from (select * from t1) sq2
+) tt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2b
+POSTHOOK: query: create table t2b  as select * from
+(
+        select * from (select * from t1) sq1
+        union all
+        select * from (select * from t1) sq2
+) tt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2b
+POSTHOOK: Lineage: t2b.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2b
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2b
+#### A masked pattern was here ####
+2	2
+PREHOOK: query: drop table if exists t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: drop table if exists t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: drop table if exists t2a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2a
+PREHOOK: Output: default@t2a
+POSTHOOK: query: drop table if exists t2a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2a
+POSTHOOK: Output: default@t2a
+PREHOOK: query: drop table if exists t2b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2b
+PREHOOK: Output: default@t2b
+POSTHOOK: query: drop table if exists t2b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2b
+POSTHOOK: Output: default@t2b
+PREHOOK: query: create table t1 (a int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+PREHOOK: query: analyze table t1 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t1 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: create table t2a stored as orc as
+	select * from t1
+	union all
+	select * from t1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2a
+POSTHOOK: query: create table t2a stored as orc as
+	select * from t1
+	union all
+	select * from t1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2a
+POSTHOOK: Lineage: t2a.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2a
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2a
+#### A masked pattern was here ####
+2	2
+PREHOOK: query: create table t2b stored as orc as select * from
+(
+	select * from (select * from t1) sq1
+	union all
+	select * from (select * from t1) sq2
+) tt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2b
+POSTHOOK: query: create table t2b stored as orc as select * from
+(
+	select * from (select * from t1) sq1
+	union all
+	select * from (select * from t1) sq2
+) tt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2b
+POSTHOOK: Lineage: t2b.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2b
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2b
+#### A masked pattern was here ####
+2	2

http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_stats.q.out b/ql/src/test/results/clientpositive/llap/union_stats.q.out
index 5a088f4..cea4847 100644
--- a/ql/src/test/results/clientpositive/llap/union_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/union_stats.q.out
@@ -236,7 +236,7 @@ POSTHOOK: query: select count(1) from t
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@t
 #### A masked pattern was here ####
-500
+1000
 PREHOOK: query: desc formatted t
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@t
@@ -257,8 +257,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	bucketing_version   	2                   
 	numFiles            	2                   
-	numRows             	500                 
-	rawDataSize         	5312                
+	numRows             	1000                
+	rawDataSize         	10624               
 	totalSize           	11624               
 #### A masked pattern was here ####
 	 	 
@@ -306,8 +306,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	bucketing_version   	2                   
 	numFiles            	2                   
-	numRows             	1000                
-	rawDataSize         	10624               
+	numRows             	1500                
+	rawDataSize         	15936               
 	totalSize           	17436               
 #### A masked pattern was here ####
 	 	 
@@ -363,8 +363,8 @@ Table Parameters:
 	COLUMN_STATS_ACCURATE	{\"BASIC_STATS\":\"true\"}
 	bucketing_version   	2                   
 	numFiles            	2                   
-	numRows             	1000                
-	rawDataSize         	10624               
+	numRows             	1500                
+	rawDataSize         	15936               
 	totalSize           	17436               
 #### A masked pattern was here ####
 	 	 
@@ -394,24 +394,155 @@ POSTHOOK: query: create table t2 like src
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@t2
+PREHOOK: query: create table t3 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Union 2 (CONTAINS)
+Map 6 <- Union 2 (CONTAINS)
+Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-5
+  Stats Work{}
+    Stage-0
+      Move Operator
+        table:{"name:":"default.t1"}
+        Stage-4
+          Dependency Collection{}
+            Stage-3
+              Reducer 3 llap
+              File Output Operator [FS_6]
+                Group By Operator [GBY_4] (rows=1 width=880)
+                  Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+                <-Union 2 [CUSTOM_SIMPLE_EDGE]
+                  <-Map 1 [CONTAINS] llap
+                    File Output Operator [FS_7]
+                      table:{"name:":"default.t1"}
+                      Select Operator [SEL_1] (rows=500 width=178)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_0] (rows=500 width=178)
+                          Output:["key","value"]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_1]
+                    File Output Operator [FS_9]
+                      table:{"name:":"default.t2"}
+                       Please refer to the previous Select Operator [SEL_1]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_1]
+                    File Output Operator [FS_11]
+                      table:{"name:":"default.t3"}
+                       Please refer to the previous Select Operator [SEL_1]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_1]
+                  <-Map 6 [CONTAINS] llap
+                    File Output Operator [FS_7]
+                      table:{"name:":"default.t1"}
+                      Select Operator [SEL_3] (rows=500 width=178)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_2] (rows=500 width=178)
+                          Output:["key","value"]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_3]
+                    File Output Operator [FS_9]
+                      table:{"name:":"default.t2"}
+                       Please refer to the previous Select Operator [SEL_3]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_3]
+                    File Output Operator [FS_11]
+                      table:{"name:":"default.t3"}
+                       Please refer to the previous Select Operator [SEL_3]
+                    Reduce Output Operator [RS_3]
+                      Group By Operator [GBY_2] (rows=1 width=880)
+                        Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+                        Select Operator [SEL_1] (rows=1000 width=178)
+                          Output:["key","value"]
+                           Please refer to the previous Select Operator [SEL_3]
+              Reducer 4 llap
+              File Output Operator [FS_6]
+                Group By Operator [GBY_4] (rows=1 width=880)
+                  Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+                <- Please refer to the previous Union 2 [CUSTOM_SIMPLE_EDGE]
+              Reducer 5 llap
+              File Output Operator [FS_6]
+                Group By Operator [GBY_4] (rows=1 width=880)
+                  Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+                <- Please refer to the previous Union 2 [CUSTOM_SIMPLE_EDGE]
+Stage-6
+  Stats Work{}
+    Stage-1
+      Move Operator
+        table:{"name:":"default.t2"}
+         Please refer to the previous Stage-4
+Stage-7
+  Stats Work{}
+    Stage-2
+      Move Operator
+        table:{"name:":"default.t3"}
+         Please refer to the previous Stage-4
+
 PREHOOK: query: from (select * from src union all select * from src)s
 insert overwrite table t1 select *
 insert overwrite table t2 select *
+insert overwrite table t3 select *
 PREHOOK: type: QUERY
 PREHOOK: Input: default@src
 PREHOOK: Output: default@t1
 PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
 POSTHOOK: query: from (select * from src union all select * from src)s
 insert overwrite table t1 select *
 insert overwrite table t2 select *
+insert overwrite table t3 select *
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@src
 POSTHOOK: Output: default@t1
 POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t3
 POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
 POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: t3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t3.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
 PREHOOK: query: desc formatted t1
 PREHOOK: type: DESCTABLE
 PREHOOK: Input: default@t1