You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/07/03 17:26:51 UTC
[2/2] hive git commit: HIVE-19326 : stats auto gather: incorrect
aggregation during UNION queries (may lead to incorrect results) (Zoltan
Haindrich via Ashutosh Chauhan)
HIVE-19326 : stats auto gather: incorrect aggregation during UNION queries (may lead to incorrect results) (Zoltan Haindrich via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/2ca70b91
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/2ca70b91
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/2ca70b91
Branch: refs/heads/master
Commit: 2ca70b91917b8ce668a6fd277a32ab4f2c9a68c0
Parents: 4dbf991
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Fri May 11 11:22:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Jul 3 10:17:06 2018 -0700
----------------------------------------------------------------------
.../hive/accumulo/serde/AccumuloSerDe.java | 6 +-
.../test/resources/testconfiguration.properties | 1 +
.../apache/hadoop/hive/ql/DriverContext.java | 29 ++-
.../hadoop/hive/ql/exec/FileSinkOperator.java | 15 +-
.../apache/hadoop/hive/ql/exec/Operator.java | 11 +-
.../hive/ql/exec/SerializationUtilities.java | 22 --
.../hadoop/hive/ql/exec/TableScanOperator.java | 1 +
.../hive/ql/optimizer/GenMapRedUtils.java | 4 +-
.../hadoop/hive/ql/parse/GenTezUtils.java | 2 +-
.../hive/ql/stats/StatsCollectionContext.java | 12 +-
.../hive/ql/stats/fs/FSStatsPublisher.java | 15 +-
.../queries/clientpositive/autoColumnStats_9.q | 3 +
.../queries/clientpositive/union_fast_stats.q | 6 +
.../queries/clientpositive/union_rowcounts.q | 51 ++++
.../test/queries/clientpositive/union_stats.q | 44 ++--
.../clientpositive/autoColumnStats_9.q.out | 13 +-
.../clientpositive/llap/multiMapJoin1.q.out | 150 ++++++------
.../clientpositive/llap/union_fast_stats.q.out | 58 ++---
.../clientpositive/llap/union_rowcounts.q.out | 180 ++++++++++++++
.../clientpositive/llap/union_stats.q.out | 145 ++++++++++-
.../results/clientpositive/union_stats.q.out | 243 +++++++++++++++++++
.../apache/hadoop/hive/serde2/Deserializer.java | 2 +
22 files changed, 816 insertions(+), 197 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
----------------------------------------------------------------------
diff --git a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
index fcd819b..6fa48dd 100644
--- a/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
+++ b/accumulo-handler/src/java/org/apache/hadoop/hive/accumulo/serde/AccumuloSerDe.java
@@ -54,6 +54,7 @@ public class AccumuloSerDe extends AbstractSerDe {
private static final Logger log = LoggerFactory.getLogger(AccumuloSerDe.class);
+ @Override
public void initialize(Configuration conf, Properties properties) throws SerDeException {
accumuloSerDeParameters = new AccumuloSerDeParameters(conf, properties, getClass().getName());
@@ -109,6 +110,7 @@ public class AccumuloSerDe extends AbstractSerDe {
return cachedRow;
}
+ @Override
public Class<? extends Writable> getSerializedClass() {
return Mutation.class;
}
@@ -135,12 +137,14 @@ public class AccumuloSerDe extends AbstractSerDe {
return cachedRow;
}
+ @Override
public ObjectInspector getObjectInspector() throws SerDeException {
return cachedObjectInspector;
}
+ @Override
public SerDeStats getSerDeStats() {
- throw new UnsupportedOperationException("SerdeStats not supported.");
+ return null;
}
public AccumuloSerDeParameters getParams() {
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 385b71e..d02c0fe 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -728,6 +728,7 @@ minillaplocal.query.files=\
udaf_collect_set_2.q,\
udaf_all_keyword.q,\
union_fast_stats.q,\
+ union_rowcounts.q,\
union_remove_26.q,\
union_top_level.q,\
update_access_time_non_current_db.q, \
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
index bda1079..2dd83fb 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java
@@ -18,28 +18,27 @@
package org.apache.hadoop.hive.ql;
-import org.apache.hadoop.hive.ql.exec.StatsTask;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.LinkedBlockingQueue;
+
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.NodeUtils;
import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
import org.apache.hadoop.hive.ql.exec.Operator;
+import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskRunner;
import org.apache.hadoop.hive.ql.exec.mr.MapRedTask;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Iterator;
-import java.util.Queue;
-import java.util.concurrent.ConcurrentLinkedQueue;
-import java.util.concurrent.LinkedBlockingQueue;
-
import org.apache.hadoop.hive.ql.session.SessionState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -223,7 +222,11 @@ public class DriverContext {
}
});
for (String statKey : statKeys) {
- statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
+ if (statsTasks.containsKey(statKey)) {
+ statsTasks.get(statKey).getWork().setSourceTask(mapredTask);
+ } else {
+ LOG.debug("There is no correspoing statTask for: " + statKey);
+ }
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index 949a9e8..267d602 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -90,11 +90,9 @@ import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hive.common.util.HiveStringUtils;
-
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
/**
* File Sink operator implementation.
**/
@@ -321,7 +319,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
// affects some less obscure scenario.
try {
FileSystem fpfs = finalPath.getFileSystem(hconf);
- if (fpfs.exists(finalPath)) throw new RuntimeException(finalPath + " already exists");
+ if (fpfs.exists(finalPath)) {
+ throw new RuntimeException(finalPath + " already exists");
+ }
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -354,7 +354,9 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
}
public Path buildTaskOutputTempPath() {
- if (taskOutputTempPathRoot == null) return null;
+ if (taskOutputTempPathRoot == null) {
+ return null;
+ }
assert subdirForTxn == null;
String pathStr = taskOutputTempPathRoot.toString();
if (subdirBeforeTxn != null) {
@@ -457,7 +459,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
// 'Parent'
boolean isLinked = conf.isLinkedFileSink();
if (!isLinked) {
- // Simple case - no union.
+ // Simple case - no union.
specPath = conf.getDirName();
unionPath = null;
} else {
@@ -1521,7 +1523,8 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
}
}
}
- sContext.setIndexForTezUnion(this.getIndexForTezUnion());
+ sContext.setContextSuffix(getOperatorId());
+
if (!statsPublisher.closeConnection(sContext)) {
LOG.error("Failed to close stats");
// The original exception is lost.
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
index acadb43..38316bf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java
@@ -86,7 +86,6 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
private transient boolean rootInitializeCalled = false;
protected transient long numRows = 0;
protected transient long runTimeNumRows = 0;
- protected int indexForTezUnion = -1;
private transient Configuration hconf;
protected final transient Collection<Future<?>> asyncInitOperations = new HashSet<>();
private String marker;
@@ -1557,8 +1556,8 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
private void publishRunTimeStats() throws HiveException {
StatsPublisher statsPublisher = new FSStatsPublisher();
StatsCollectionContext sContext = new StatsCollectionContext(hconf);
- sContext.setIndexForTezUnion(indexForTezUnion);
sContext.setStatsTmpDir(conf.getRuntimeStatsTmpDir());
+ sContext.setContextSuffix(getOperatorId());
if (!statsPublisher.connect(sContext)) {
LOG.error("StatsPublishing error: cannot connect to database");
@@ -1580,14 +1579,6 @@ public abstract class Operator<T extends OperatorDesc> implements Serializable,C
}
}
- public int getIndexForTezUnion() {
- return indexForTezUnion;
- }
-
- public void setIndexForTezUnion(int indexForTezUnion) {
- this.indexForTezUnion = indexForTezUnion;
- }
-
/**
* Decides whether two operators are logically the same.
* This can be used to merge same operators and avoid repeated computation.
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
index ed1566f..e03429b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/SerializationUtilities.java
@@ -668,28 +668,6 @@ public class SerializationUtilities {
return result;
}
- public static List<Operator<?>> cloneOperatorTree(List<Operator<?>> roots, int indexForTezUnion) {
- ByteArrayOutputStream baos = new ByteArrayOutputStream(4096);
- CompilationOpContext ctx = roots.isEmpty() ? null : roots.get(0).getCompilationOpContext();
- serializePlan(roots, baos, true);
- @SuppressWarnings("unchecked")
- List<Operator<?>> result =
- deserializePlan(new ByteArrayInputStream(baos.toByteArray()),
- roots.getClass(), true);
- // Restore the context.
- LinkedList<Operator<?>> newOps = new LinkedList<>(result);
- while (!newOps.isEmpty()) {
- Operator<?> newOp = newOps.poll();
- newOp.setIndexForTezUnion(indexForTezUnion);
- newOp.setCompilationOpContext(ctx);
- List<Operator<?>> children = newOp.getChildOperators();
- if (children != null) {
- newOps.addAll(children);
- }
- }
- return result;
- }
-
/**
* Clones using the powers of XML. Do not use unless necessary.
* @param plan The plan.
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
index 0799181..5780bd4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java
@@ -349,6 +349,7 @@ public class TableScanOperator extends Operator<TableScanDesc> implements
StatsPublisher statsPublisher = Utilities.getStatsPublisher(jc);
StatsCollectionContext sc = new StatsCollectionContext(jc);
sc.setStatsTmpDir(conf.getTmpStatsDir());
+ sc.setContextSuffix(getOperatorId());
if (!statsPublisher.connect(sc)) {
// just return, stats gathering should not block the main query.
if (LOG.isInfoEnabled()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
index 605bb09..fa92385 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java
@@ -1907,12 +1907,12 @@ public final class GenMapRedUtils {
mvTasks, fsOp.getConf().getFinalDirName(), fsOp.getConf().isMmTable());
// TODO: wtf?!! why is this in this method? This has nothing to do with anything.
- if (mvTask != null && isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
+ if (isInsertTable && hconf.getBoolVar(ConfVars.HIVESTATSAUTOGATHER)
&& !fsOp.getConf().isMaterialization()) {
// mark the MapredWork and FileSinkOperator for gathering stats
fsOp.getConf().setGatherStats(true);
fsOp.getConf().setStatsReliable(hconf.getBoolVar(ConfVars.HIVE_STATS_RELIABLE));
- if (!mvTask.hasFollowingStatsTask()) {
+ if (mvTask != null && !mvTask.hasFollowingStatsTask()) {
GenMapRedUtils.addStatsTask(fsOp, mvTask, currTask, hconf);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
index 741833b..bb0de94 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/GenTezUtils.java
@@ -219,7 +219,7 @@ public class GenTezUtils {
roots.addAll(context.eventOperatorSet);
// need to clone the plan.
- List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots, indexForTezUnion);
+ List<Operator<?>> newRoots = SerializationUtilities.cloneOperatorTree(roots);
// we're cloning the operator plan but we're retaining the original work. That means
// that root operators have to be replaced with the cloned ops. The replacement map
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
index 5c3328c..e5ed621 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsCollectionContext.java
@@ -29,13 +29,13 @@ import org.apache.hadoop.hive.ql.exec.Task;
@InterfaceAudience.Public
-@InterfaceStability.Stable
+@InterfaceStability.Unstable
public class StatsCollectionContext {
private final Configuration hiveConf;
private Task task;
private List<String> statsTmpDirs;
- private int indexForTezUnion;
+ private String contextSuffix;
public List<String> getStatsTmpDirs() {
return statsTmpDirs;
@@ -67,11 +67,11 @@ public class StatsCollectionContext {
this.task = task;
}
- public int getIndexForTezUnion() {
- return indexForTezUnion;
+ public void setContextSuffix(String suffix) {
+ this.contextSuffix = suffix;
}
- public void setIndexForTezUnion(int indexForTezUnion) {
- this.indexForTezUnion = indexForTezUnion;
+ public String getContextSuffix() {
+ return contextSuffix;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
index 902b37f..67ab51d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/fs/FSStatsPublisher.java
@@ -95,16 +95,17 @@ public class FSStatsPublisher implements StatsPublisher {
public boolean closeConnection(StatsCollectionContext context) {
List<String> statsDirs = context.getStatsTmpDirs();
assert statsDirs.size() == 1 : "Found multiple stats dirs: " + statsDirs;
+ if (context.getContextSuffix() == null) {
+ throw new RuntimeException("ContextSuffix must be set before publishing!");
+ }
+
Path statsDir = new Path(statsDirs.get(0));
try {
- Path statsFile = null;
- if (context.getIndexForTezUnion() != -1) {
- statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
- + conf.getInt("mapred.task.partition", 0) + "_" + context.getIndexForTezUnion());
- } else {
- statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX
- + conf.getInt("mapred.task.partition", 0));
+ String suffix = Integer.toString(conf.getInt("mapred.task.partition", 0));
+ if (context.getContextSuffix() != null) {
+ suffix += "_" + context.getContextSuffix();
}
+ Path statsFile = new Path(statsDir, StatsSetupConst.STATS_FILE_PREFIX + suffix);
Utilities.FILE_OP_LOGGER.trace("About to create stats file for this task : {}", statsFile);
Output output = new Output(statsFile.getFileSystem(conf).create(statsFile,true));
LOG.debug("Created file : " + statsFile);
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/autoColumnStats_9.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/autoColumnStats_9.q b/ql/src/test/queries/clientpositive/autoColumnStats_9.q
index 2b9eb82..63c94e6 100644
--- a/ql/src/test/queries/clientpositive/autoColumnStats_9.q
+++ b/ql/src/test/queries/clientpositive/autoColumnStats_9.q
@@ -17,6 +17,9 @@ INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value;
FROM src src1 JOIN src src2 ON (src1.key = src2.key)
INSERT OVERWRITE TABLE dest_j1_n23 SELECT src1.key, src2.value;
+
+select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0;
+
desc formatted dest_j1_n23;
desc formatted dest_j1_n23 key;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_fast_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_fast_stats.q b/ql/src/test/queries/clientpositive/union_fast_stats.q
index 221fbc1..6359482 100644
--- a/ql/src/test/queries/clientpositive/union_fast_stats.q
+++ b/ql/src/test/queries/clientpositive/union_fast_stats.q
@@ -60,8 +60,14 @@ create table small_alltypesorc_a_n2 stored as orc as select * from
desc formatted small_alltypesorc_a_n2;
+set hive.optimize.metadataonly=true;
+select 15,count(*) from small_alltypesorc_a_n2;
+
ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS;
+-- select assert_true(15=count(*)) from small_alltypesorc_a_n2;
+select 15,count(*) from small_alltypesorc_a_n2;
+
desc formatted small_alltypesorc_a_n2;
insert into table small_alltypesorc_a_n2 select * from small_alltypesorc1a_n2;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_rowcounts.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_rowcounts.q b/ql/src/test/queries/clientpositive/union_rowcounts.q
new file mode 100644
index 0000000..2dc5c06
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/union_rowcounts.q
@@ -0,0 +1,51 @@
+create table t1 (a int) ;
+insert into t1 values (1);
+
+create table t2a as
+ select * from t1
+ union all
+ select * from t1
+;
+
+select 2,count(*) from t2a;
+
+create table t2b as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+;
+
+
+select 2,count(*) from t2b;
+
+drop table if exists t1;
+drop table if exists t2a;
+drop table if exists t2b;
+
+set hive.merge.tezfiles=true;
+
+create table t1 (a int) stored as orc;
+insert into t1 values (1);
+
+analyze table t1 compute statistics for columns;
+
+create table t2a stored as orc as
+ select * from t1
+ union all
+ select * from t1
+;
+
+select 2,count(*) from t2a;
+
+create table t2b stored as orc as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+;
+
+
+select 2,count(*) from t2b;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/queries/clientpositive/union_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/union_stats.q b/ql/src/test/queries/clientpositive/union_stats.q
index 80856ed..f36e15c 100644
--- a/ql/src/test/queries/clientpositive/union_stats.q
+++ b/ql/src/test/queries/clientpositive/union_stats.q
@@ -1,30 +1,38 @@
---! qt_n4:dataset_n4:src
-explain extended create table t_n4 as select_n4 * from src union all select_n4 * from src;
+--! qt:dataset:src
+explain extended create table t as select * from src union all select * from src;
-create table t_n4 as select_n4 * from src union all select_n4 * from src;
+create table t as select * from src union all select * from src;
-select_n4 count_n4(1) from t_n4;
+select count(1) from t;
-desc formatted t_n4;
+desc formatted t;
-create table tt_n4 as select_n4 * from t_n4 union all select_n4 * from src;
+create table tt as select * from t union all select * from src;
-desc formatted tt_n4;
+desc formatted tt;
-drop table tt_n4;
+drop table tt;
-create table tt_n4 as select_n4 * from src union all select_n4 * from t_n4;
+create table tt as select * from src union all select * from t;
-desc formatted tt_n4;
+desc formatted tt;
-create table t1_n26 like src;
-create table t2_n17 like src;
+create table t1 like src;
+create table t2 like src;
+create table t3 like src;
-from (select_n4 * from src union all select_n4 * from src)s
-insert_n4 overwrite table t1_n26 select_n4 *
-insert_n4 overwrite table t2_n17 select_n4 *;
+set hive.explain.user=true;
+explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *;
-desc formatted t1_n26;
-desc formatted t2_n17;
+from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *;
-select_n4 count_n4(1) from t1_n26;
+desc formatted t1;
+desc formatted t2;
+
+select count(1) from t1;
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
index 8a94108..1c3db7c 100644
--- a/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
+++ b/ql/src/test/results/clientpositive/autoColumnStats_9.q.out
@@ -209,6 +209,15 @@ POSTHOOK: Input: default@src
POSTHOOK: Output: default@dest_j1_n23
POSTHOOK: Lineage: dest_j1_n23.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: dest_j1_n23.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@dest_j1_n23
+#### A masked pattern was here ####
+POSTHOOK: query: select 'cnt, check desc',count(*) from dest_j1_n23 group by key*key >= 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@dest_j1_n23
+#### A masked pattern was here ####
+cnt, check desc 1028
PREHOOK: query: desc formatted dest_j1_n23
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@dest_j1_n23
@@ -229,8 +238,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"key\":\"true\",\"value\":\"true\"}}
bucketing_version 2
numFiles 137
- numRows 855
- rawDataSize 9143
+ numRows 1028
+ rawDataSize 10968
totalSize 11996
#### A masked pattern was here ####
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
index ed1745f..261e153 100644
--- a/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
+++ b/ql/src/test/results/clientpositive/llap/multiMapJoin1.q.out
@@ -193,14 +193,14 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -210,11 +210,11 @@ STAGE PLANS:
outputColumnNames: _col1
input vertices:
1 Map 3
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -223,7 +223,7 @@ STAGE PLANS:
1 _col0 (type: string)
input vertices:
1 Map 4
- Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -359,14 +359,14 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -376,11 +376,11 @@ STAGE PLANS:
outputColumnNames: _col1
input vertices:
1 Map 3
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -389,7 +389,7 @@ STAGE PLANS:
1 _col0 (type: string)
input vertices:
1 Map 4
- Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
mode: hash
@@ -527,14 +527,14 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 1748368 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key is not null and value is not null) (type: boolean)
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key (type: string), value (type: string)
outputColumnNames: _col0, _col1
- Statistics: Num rows: 450 Data size: 157651 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4500 Data size: 1573531 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -544,11 +544,11 @@ STAGE PLANS:
outputColumnNames: _col1
input vertices:
1 Map 3
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: string)
outputColumnNames: _col0
- Statistics: Num rows: 495 Data size: 173416 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4950 Data size: 1730884 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -558,18 +558,18 @@ STAGE PLANS:
outputColumnNames: _col1
input vertices:
1 Map 4
- Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: count()
keys: _col1 (type: string)
mode: hash
outputColumnNames: _col0, _col1
- Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 544 Data size: 190757 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5445 Data size: 1903972 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: bigint)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -622,14 +622,14 @@ STAGE PLANS:
keys: KEY._col0 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1
- Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: bigint)
outputColumnNames: _col0
- Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
- Statistics: Num rows: 272 Data size: 95378 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 2722 Data size: 951811 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -835,14 +835,14 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key1 (type: string), key2 (type: string), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -852,11 +852,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -866,11 +866,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 4
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -880,11 +880,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
input vertices:
1 Map 5
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -894,11 +894,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
input vertices:
1 Map 6
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
mode: hash
@@ -1175,14 +1175,14 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key1 (type: string), key2 (type: string), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1192,11 +1192,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3
input vertices:
1 Map 3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1206,11 +1206,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
input vertices:
1 Map 4
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1220,11 +1220,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
input vertices:
1 Map 5
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Map Join Operator
condition map:
Inner Join 0 to 1
@@ -1234,11 +1234,11 @@ STAGE PLANS:
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
input vertices:
1 Map 6
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
mode: hash
@@ -1518,19 +1518,19 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key1 (type: string), key2 (type: string), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -1625,16 +1625,16 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: +
Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
Reducer 3
Execution mode: llap
@@ -1646,16 +1646,16 @@ STAGE PLANS:
0 _col3 (type: string)
1 _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string)
sort order: +
Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
Reducer 4
Execution mode: llap
@@ -1667,16 +1667,16 @@ STAGE PLANS:
0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string)
sort order: +
Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
Reducer 5
Execution mode: llap
@@ -1688,11 +1688,11 @@ STAGE PLANS:
0 _col2 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
mode: hash
@@ -1889,19 +1889,19 @@ STAGE PLANS:
TableScan
alias: bigtbl
filterExpr: (key1 is not null and value is not null and key2 is not null) (type: boolean)
- Statistics: Num rows: 500 Data size: 262752 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5000 Data size: 2622552 Basic stats: COMPLETE Column stats: NONE
Filter Operator
predicate: (key1 is not null and key2 is not null and value is not null) (type: boolean)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: key1 (type: string), key2 (type: string), value (type: string)
outputColumnNames: _col0, _col1, _col2
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: string)
sort order: +
Map-reduce partition columns: _col0 (type: string)
- Statistics: Num rows: 425 Data size: 223339 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4250 Data size: 2229169 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: string), _col2 (type: string)
Execution mode: vectorized, llap
LLAP IO: no inputs
@@ -1996,16 +1996,16 @@ STAGE PLANS:
0 _col0 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col2 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col3 (type: string)
sort order: +
Map-reduce partition columns: _col3 (type: string)
- Statistics: Num rows: 467 Data size: 245672 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 4675 Data size: 2452085 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string)
Reducer 3
Execution mode: llap
@@ -2017,16 +2017,16 @@ STAGE PLANS:
0 _col3 (type: string)
1 _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col5 (type: string), _col3 (type: string), _col4 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col1 (type: string)
sort order: +
Map-reduce partition columns: _col1 (type: string)
- Statistics: Num rows: 513 Data size: 270239 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5142 Data size: 2697293 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string)
Reducer 4
Execution mode: llap
@@ -2038,16 +2038,16 @@ STAGE PLANS:
0 _col1 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col6 (type: string), _col4 (type: string), _col5 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col2 (type: string)
sort order: +
Map-reduce partition columns: _col2 (type: string)
- Statistics: Num rows: 564 Data size: 297262 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 5656 Data size: 2967022 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: string), _col6 (type: string)
Reducer 5
Execution mode: llap
@@ -2059,11 +2059,11 @@ STAGE PLANS:
0 _col2 (type: string)
1 _col0 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int), hash(_col4) (type: int), hash(_col7) (type: int), hash(_col5) (type: int), hash(_col6) (type: int)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 620 Data size: 326988 Basic stats: COMPLETE Column stats: NONE
+ Statistics: Num rows: 6221 Data size: 3263724 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3), sum(_col4), sum(_col5), sum(_col6), sum(_col7)
mode: hash
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
index cdb9193..40f469b 100644
--- a/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/union_fast_stats.q.out
@@ -175,14 +175,12 @@ Retention: 0
#### A masked pattern was here ####
Table Type: MANAGED_TABLE
Table Parameters:
+ COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
bucketing_version 2
numFiles 3
-<<<<<<< HEAD
-=======
- numRows 5
- rawDataSize 1300
->>>>>>> asf/master
- totalSize 4033
+ numRows 15
+ rawDataSize 3315
+ totalSize 4152
#### A masked pattern was here ####
# Storage Information
@@ -235,7 +233,7 @@ Table Parameters:
numFiles 3
numRows 15
rawDataSize 3483
- totalSize 4033
+ totalSize 4152
#### A masked pattern was here ####
# Storage Information
@@ -299,8 +297,8 @@ Table Parameters:
bucketing_version 2
numFiles 4
numRows 20
- rawDataSize 4552
- totalSize 5406
+ rawDataSize 4468
+ totalSize 5569
#### A masked pattern was here ####
# Storage Information
@@ -513,13 +511,9 @@ Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
bucketing_version 2
numFiles 1
- numRows 5
- rawDataSize 1069
-<<<<<<< HEAD
- totalSize 3243
-=======
- totalSize 3247
->>>>>>> asf/master
+ numRows 15
+ rawDataSize 3315
+ totalSize 3318
#### A masked pattern was here ####
# Storage Information
@@ -532,6 +526,15 @@ Bucket Columns: []
Sort Columns: []
Storage Desc Params:
serialization.format 1
+PREHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+POSTHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+15 15
PREHOOK: query: ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS
PREHOOK: type: QUERY
PREHOOK: Input: default@small_alltypesorc_a_n2
@@ -540,6 +543,15 @@ POSTHOOK: query: ANALYZE TABLE small_alltypesorc_a_n2 COMPUTE STATISTICS
POSTHOOK: type: QUERY
POSTHOOK: Input: default@small_alltypesorc_a_n2
POSTHOOK: Output: default@small_alltypesorc_a_n2
+PREHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+POSTHOOK: query: select 15,count(*) from small_alltypesorc_a_n2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@small_alltypesorc_a_n2
+#### A masked pattern was here ####
+15 15
PREHOOK: query: desc formatted small_alltypesorc_a_n2
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@small_alltypesorc_a_n2
@@ -572,11 +584,7 @@ Table Parameters:
numFiles 1
numRows 15
rawDataSize 3320
-<<<<<<< HEAD
- totalSize 3243
-=======
- totalSize 3247
->>>>>>> asf/master
+ totalSize 3318
#### A masked pattern was here ####
# Storage Information
@@ -640,12 +648,8 @@ Table Parameters:
bucketing_version 2
numFiles 2
numRows 20
- rawDataSize 4389
-<<<<<<< HEAD
- totalSize 4616
-=======
- totalSize 4620
->>>>>>> asf/master
+ rawDataSize 4305
+ totalSize 4735
#### A masked pattern was here ####
# Storage Information
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out b/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
new file mode 100644
index 0000000..8b06d6e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/union_rowcounts.q.out
@@ -0,0 +1,180 @@
+PREHOOK: query: create table t1 (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+PREHOOK: query: create table t2a as
+ select * from t1
+ union all
+ select * from t1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2a
+POSTHOOK: query: create table t2a as
+ select * from t1
+ union all
+ select * from t1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2a
+POSTHOOK: Lineage: t2a.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2a
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2a
+#### A masked pattern was here ####
+2 2
+PREHOOK: query: create table t2b as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2b
+POSTHOOK: query: create table t2b as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2b
+POSTHOOK: Lineage: t2b.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2b
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2b
+#### A masked pattern was here ####
+2 2
+PREHOOK: query: drop table if exists t1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+POSTHOOK: query: drop table if exists t1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+PREHOOK: query: drop table if exists t2a
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2a
+PREHOOK: Output: default@t2a
+POSTHOOK: query: drop table if exists t2a
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2a
+POSTHOOK: Output: default@t2a
+PREHOOK: query: drop table if exists t2b
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t2b
+PREHOOK: Output: default@t2b
+POSTHOOK: query: drop table if exists t2b
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t2b
+POSTHOOK: Output: default@t2b
+PREHOOK: query: create table t1 (a int) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: insert into t1 values (1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t1
+POSTHOOK: query: insert into t1 values (1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.a SCRIPT []
+PREHOOK: query: analyze table t1 compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@t1
+PREHOOK: Output: default@t1
+#### A masked pattern was here ####
+POSTHOOK: query: analyze table t1 compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: default@t1
+#### A masked pattern was here ####
+PREHOOK: query: create table t2a stored as orc as
+ select * from t1
+ union all
+ select * from t1
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2a
+POSTHOOK: query: create table t2a stored as orc as
+ select * from t1
+ union all
+ select * from t1
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2a
+POSTHOOK: Lineage: t2a.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2a
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2a
+#### A masked pattern was here ####
+2 2
+PREHOOK: query: create table t2b stored as orc as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@t1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2b
+POSTHOOK: query: create table t2b stored as orc as select * from
+(
+ select * from (select * from t1) sq1
+ union all
+ select * from (select * from t1) sq2
+) tt
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@t1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2b
+POSTHOOK: Lineage: t2b.a EXPRESSION [(t1)t1.FieldSchema(name:a, type:int, comment:null), ]
+PREHOOK: query: select 2,count(*) from t2b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t2b
+#### A masked pattern was here ####
+POSTHOOK: query: select 2,count(*) from t2b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t2b
+#### A masked pattern was here ####
+2 2
http://git-wip-us.apache.org/repos/asf/hive/blob/2ca70b91/ql/src/test/results/clientpositive/llap/union_stats.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/llap/union_stats.q.out b/ql/src/test/results/clientpositive/llap/union_stats.q.out
index 5a088f4..cea4847 100644
--- a/ql/src/test/results/clientpositive/llap/union_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/union_stats.q.out
@@ -236,7 +236,7 @@ POSTHOOK: query: select count(1) from t
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t
#### A masked pattern was here ####
-500
+1000
PREHOOK: query: desc formatted t
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@t
@@ -257,8 +257,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
bucketing_version 2
numFiles 2
- numRows 500
- rawDataSize 5312
+ numRows 1000
+ rawDataSize 10624
totalSize 11624
#### A masked pattern was here ####
@@ -306,8 +306,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
bucketing_version 2
numFiles 2
- numRows 1000
- rawDataSize 10624
+ numRows 1500
+ rawDataSize 15936
totalSize 17436
#### A masked pattern was here ####
@@ -363,8 +363,8 @@ Table Parameters:
COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\"}
bucketing_version 2
numFiles 2
- numRows 1000
- rawDataSize 10624
+ numRows 1500
+ rawDataSize 15936
totalSize 17436
#### A masked pattern was here ####
@@ -394,24 +394,155 @@ POSTHOOK: query: create table t2 like src
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t2
+PREHOOK: query: create table t3 like src
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 like src
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *
+PREHOOK: type: QUERY
+POSTHOOK: query: explain from (select * from src union all select * from src)s
+insert overwrite table t1 select *
+insert overwrite table t2 select *
+insert overwrite table t3 select *
+POSTHOOK: type: QUERY
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Map 1 <- Union 2 (CONTAINS)
+Map 6 <- Union 2 (CONTAINS)
+Reducer 3 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 4 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+Reducer 5 <- Union 2 (CUSTOM_SIMPLE_EDGE)
+
+Stage-5
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.t1"}
+ Stage-4
+ Dependency Collection{}
+ Stage-3
+ Reducer 3 llap
+ File Output Operator [FS_6]
+ Group By Operator [GBY_4] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+ <-Union 2 [CUSTOM_SIMPLE_EDGE]
+ <-Map 1 [CONTAINS] llap
+ File Output Operator [FS_7]
+ table:{"name:":"default.t1"}
+ Select Operator [SEL_1] (rows=500 width=178)
+ Output:["_col0","_col1"]
+ TableScan [TS_0] (rows=500 width=178)
+ Output:["key","value"]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_1]
+ File Output Operator [FS_9]
+ table:{"name:":"default.t2"}
+ Please refer to the previous Select Operator [SEL_1]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_1]
+ File Output Operator [FS_11]
+ table:{"name:":"default.t3"}
+ Please refer to the previous Select Operator [SEL_1]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_1]
+ <-Map 6 [CONTAINS] llap
+ File Output Operator [FS_7]
+ table:{"name:":"default.t1"}
+ Select Operator [SEL_3] (rows=500 width=178)
+ Output:["_col0","_col1"]
+ TableScan [TS_2] (rows=500 width=178)
+ Output:["key","value"]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_3]
+ File Output Operator [FS_9]
+ table:{"name:":"default.t2"}
+ Please refer to the previous Select Operator [SEL_3]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_3]
+ File Output Operator [FS_11]
+ table:{"name:":"default.t3"}
+ Please refer to the previous Select Operator [SEL_3]
+ Reduce Output Operator [RS_3]
+ Group By Operator [GBY_2] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(key, 'hll')","compute_stats(value, 'hll')"]
+ Select Operator [SEL_1] (rows=1000 width=178)
+ Output:["key","value"]
+ Please refer to the previous Select Operator [SEL_3]
+ Reducer 4 llap
+ File Output Operator [FS_6]
+ Group By Operator [GBY_4] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+ <- Please refer to the previous Union 2 [CUSTOM_SIMPLE_EDGE]
+ Reducer 5 llap
+ File Output Operator [FS_6]
+ Group By Operator [GBY_4] (rows=1 width=880)
+ Output:["_col0","_col1"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)"]
+ <- Please refer to the previous Union 2 [CUSTOM_SIMPLE_EDGE]
+Stage-6
+ Stats Work{}
+ Stage-1
+ Move Operator
+ table:{"name:":"default.t2"}
+ Please refer to the previous Stage-4
+Stage-7
+ Stats Work{}
+ Stage-2
+ Move Operator
+ table:{"name:":"default.t3"}
+ Please refer to the previous Stage-4
+
PREHOOK: query: from (select * from src union all select * from src)s
insert overwrite table t1 select *
insert overwrite table t2 select *
+insert overwrite table t3 select *
PREHOOK: type: QUERY
PREHOOK: Input: default@src
PREHOOK: Output: default@t1
PREHOOK: Output: default@t2
+PREHOOK: Output: default@t3
POSTHOOK: query: from (select * from src union all select * from src)s
insert overwrite table t1 select *
insert overwrite table t2 select *
+insert overwrite table t3 select *
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
POSTHOOK: Output: default@t1
POSTHOOK: Output: default@t2
+POSTHOOK: Output: default@t3
POSTHOOK: Lineage: t1.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: t1.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
POSTHOOK: Lineage: t2.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
POSTHOOK: Lineage: t2.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+POSTHOOK: Lineage: t3.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: t3.value EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ]
PREHOOK: query: desc formatted t1
PREHOOK: type: DESCTABLE
PREHOOK: Input: default@t1