You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/08/30 22:21:20 UTC
[13/17] hive git commit: HIVE-14362: Support explain analyze in Hive
(Pengcheng Xiong, reviewed by Ashutosh Chauhan, Gopal V and Gabor Szadovszky)
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
index 57f9432..f3c6820 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SubQueryDiagnostic.java
@@ -29,7 +29,7 @@ public class SubQueryDiagnostic {
static QBSubQueryRewrite getRewrite(QBSubQuery subQuery,
TokenRewriteStream stream,
Context ctx) {
- if (ctx.getExplain()) {
+ if (ctx.isExplainSkipExecution()) {
return new QBSubQueryRewrite(subQuery, stream);
} else {
return new QBSubQueryRewriteNoop(subQuery, stream);
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
index 114fa2f..fb5ca57 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TaskCompiler.java
@@ -28,6 +28,7 @@ import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;
+import java.util.Stack;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -41,6 +42,7 @@ import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryState;
import org.apache.hadoop.hive.ql.exec.ColumnStatsTask;
import org.apache.hadoop.hive.ql.exec.FetchTask;
+import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.StatsTask;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.exec.TaskFactory;
@@ -52,6 +54,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
+import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer.AnalyzeRewriteContext;
import org.apache.hadoop.hive.ql.plan.ColumnStatsDesc;
import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
@@ -131,6 +134,7 @@ public abstract class TaskCompiler {
}
}
}
+
return;
}
@@ -497,4 +501,5 @@ public abstract class TaskCompiler {
clone.setMapJoinOps(pCtx.getMapJoinOps());
return clone;
}
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
index 66a8322..cd0b588 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TezCompiler.java
@@ -72,6 +72,7 @@ import org.apache.hadoop.hive.ql.optimizer.ReduceSinkMapJoinProc;
import org.apache.hadoop.hive.ql.optimizer.RemoveDynamicPruningBySize;
import org.apache.hadoop.hive.ql.optimizer.SetReducerParallelism;
import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
+import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.CrossProductCheck;
import org.apache.hadoop.hive.ql.optimizer.physical.LlapDecider;
import org.apache.hadoop.hive.ql.optimizer.physical.MemoryDecider;
@@ -390,8 +391,9 @@ public class TezCompiler extends TaskCompiler {
}
// we need to clone some operator plans and remove union operators still
+ int indexForTezUnion = 0;
for (BaseWork w: procCtx.workWithUnionOperators) {
- GenTezUtils.removeUnionOperators(procCtx, w);
+ GenTezUtils.removeUnionOperators(procCtx, w, indexForTezUnion++);
}
// then we make sure the file sink operators are set up right
@@ -489,7 +491,8 @@ public class TezCompiler extends TaskCompiler {
LOG.debug("Skipping cross product analysis");
}
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)
+ && ctx.getExplainAnalyze() == null) {
physicalCtx = new Vectorizer().resolve(physicalCtx);
} else {
LOG.debug("Skipping vectorization");
@@ -517,6 +520,11 @@ public class TezCompiler extends TaskCompiler {
// the backend. If you have a physical optimization that changes
// table scans or filters, you have to invoke it before this one.
physicalCtx = new SerializeFilter().resolve(physicalCtx);
+
+ if (physicalCtx.getContext().getExplainAnalyze() != null) {
+ new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
+ }
+
perfLogger.PerfLogEnd(this.getClass().getName(), PerfLogger.TEZ_COMPILER, "optimizeTaskPlan");
return;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 33fbffe..34d83ef 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -260,6 +260,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
// references.
HiveConf.setVar(conf, HiveConf.ConfVars.DYNAMICPARTITIONINGMODE, "nonstrict");
rewrittenCtx = new Context(conf);
+ rewrittenCtx.setExplainConfig(ctx.getExplainConfig());
} catch (IOException e) {
throw new SemanticException(ErrorMsg.UPDATEDELETE_IO_ERROR.getMsg());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
index 08278de..baf77c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/spark/SparkCompiler.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hive.ql.optimizer.ConstantPropagate;
import org.apache.hadoop.hive.ql.optimizer.DynamicPartitionPruningOptimization;
import org.apache.hadoop.hive.ql.optimizer.SparkRemoveDynamicPruningBySize;
import org.apache.hadoop.hive.ql.optimizer.metainfo.annotation.AnnotateWithOpTraits;
+import org.apache.hadoop.hive.ql.optimizer.physical.AnnotateRunTimeStatsOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.NullScanOptimizer;
import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext;
@@ -429,7 +430,8 @@ public class SparkCompiler extends TaskCompiler {
LOG.debug("Skipping cross product analysis");
}
- if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)) {
+ if (conf.getBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED)
+ && ctx.getExplainAnalyze() == null) {
(new Vectorizer()).resolve(physicalCtx);
} else {
LOG.debug("Skipping vectorization");
@@ -443,6 +445,10 @@ public class SparkCompiler extends TaskCompiler {
new CombineEquivalentWorkResolver().resolve(physicalCtx);
+ if (physicalCtx.getContext().getExplainAnalyze() != null) {
+ new AnnotateRunTimeStatsOptimizer().resolve(physicalCtx);
+ }
+
PERF_LOGGER.PerfLogEnd(CLASS_NAME, PerfLogger.SPARK_OPTIMIZE_TASK_TREE);
return;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
index adec5c7..e217bdf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractOperatorDesc.java
@@ -21,8 +21,10 @@ package org.apache.hadoop.hive.ql.plan;
import java.util.Map;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.PTFUtils;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
+import org.apache.hadoop.hive.ql.stats.StatsCollectionContext;
public class AbstractOperatorDesc implements OperatorDesc {
@@ -31,6 +33,7 @@ public class AbstractOperatorDesc implements OperatorDesc {
protected transient OpTraits opTraits;
protected transient Map<String, String> opProps;
protected long memNeeded = 0;
+ protected String runtimeStatsTmpDir;
@Override
@Explain(skipHeader = true, displayName = "Statistics")
@@ -89,4 +92,13 @@ public class AbstractOperatorDesc implements OperatorDesc {
public void setMemoryNeeded(long memNeeded) {
this.memNeeded = memNeeded;
}
+
+ public String getRuntimeStatsTmpDir() {
+ return runtimeStatsTmpDir;
+ }
+
+ public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir) {
+ this.runtimeStatsTmpDir = runtimeStatsTmpDir;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
index a213c83..9f4767c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
import org.apache.hadoop.hive.ql.parse.ParseContext;
/**
@@ -42,15 +43,10 @@ public class ExplainWork implements Serializable {
private HashSet<ReadEntity> inputs;
private ParseContext pCtx;
- boolean extended;
- boolean formatted;
- boolean dependency;
- boolean logical;
+ private ExplainConfiguration config;
boolean appendTaskType;
- boolean authorize;
- boolean userLevelExplain;
String cboInfo;
private transient BaseSemanticAnalyzer analyzer;
@@ -63,12 +59,7 @@ public class ExplainWork implements Serializable {
List<Task<? extends Serializable>> rootTasks,
Task<? extends Serializable> fetchTask,
BaseSemanticAnalyzer analyzer,
- boolean extended,
- boolean formatted,
- boolean dependency,
- boolean logical,
- boolean authorize,
- boolean userLevelExplain,
+ ExplainConfiguration config,
String cboInfo) {
this.resFile = resFile;
this.rootTasks = new ArrayList<Task<? extends Serializable>>(rootTasks);
@@ -77,14 +68,9 @@ public class ExplainWork implements Serializable {
if (analyzer != null) {
this.inputs = analyzer.getInputs();
}
- this.extended = extended;
- this.formatted = formatted;
- this.dependency = dependency;
- this.logical = logical;
this.pCtx = pCtx;
- this.authorize = authorize;
- this.userLevelExplain = userLevelExplain;
this.cboInfo = cboInfo;
+ this.config = config;
}
public Path getResFile() {
@@ -120,27 +106,15 @@ public class ExplainWork implements Serializable {
}
public boolean getExtended() {
- return extended;
- }
-
- public void setExtended(boolean extended) {
- this.extended = extended;
+ return config.isExtended();
}
public boolean getDependency() {
- return dependency;
- }
-
- public void setDependency(boolean dependency) {
- this.dependency = dependency;
+ return config.isDependency();
}
public boolean isFormatted() {
- return formatted;
- }
-
- public void setFormatted(boolean formatted) {
- this.formatted = formatted;
+ return config.isFormatted();
}
public ParseContext getParseContext() {
@@ -152,11 +126,7 @@ public class ExplainWork implements Serializable {
}
public boolean isLogical() {
- return logical;
- }
-
- public void setLogical(boolean logical) {
- this.logical = logical;
+ return config.isLogical();
}
public boolean isAppendTaskType() {
@@ -168,11 +138,7 @@ public class ExplainWork implements Serializable {
}
public boolean isAuthorize() {
- return authorize;
- }
-
- public void setAuthorize(boolean authorize) {
- this.authorize = authorize;
+ return config.isAuthorize();
}
public BaseSemanticAnalyzer getAnalyzer() {
@@ -180,11 +146,7 @@ public class ExplainWork implements Serializable {
}
public boolean isUserLevelExplain() {
- return userLevelExplain;
- }
-
- public void setUserLevelExplain(boolean userLevelExplain) {
- this.userLevelExplain = userLevelExplain;
+ return config.isUserLevelExplain();
}
public String getCboInfo() {
@@ -195,4 +157,12 @@ public class ExplainWork implements Serializable {
this.cboInfo = cboInfo;
}
+ public ExplainConfiguration getConfig() {
+ return config;
+ }
+
+ public void setConfig(ExplainConfiguration config) {
+ this.config = config;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
index ce0e0a8..07ed4fd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/FileSinkDesc.java
@@ -61,7 +61,6 @@ public class FileSinkDesc extends AbstractOperatorDesc {
private DynamicPartitionCtx dpCtx;
private String staticSpec; // static partition spec ends with a '/'
private boolean gatherStats;
- private int indexInTezUnion = -1;
// Consider a query like:
// insert overwrite table T3 select ... from T1 join T2 on T1.key = T2.key;
@@ -475,12 +474,4 @@ public class FileSinkDesc extends AbstractOperatorDesc {
this.statsTmpDir = statsCollectionTempDir;
}
- public int getIndexInTezUnion() {
- return indexInTezUnion;
- }
-
- public void setIndexInTezUnion(int indexInTezUnion) {
- this.indexInTezUnion = indexInTezUnion;
- }
-
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
index a5527dc..20cd56f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
@@ -48,7 +49,12 @@ public class MergeJoinWork extends BaseWork {
@Override
public Set<Operator<?>> getAllRootOperators() {
- return getMainWork().getAllRootOperators();
+ Set<Operator<?>> set = new HashSet<>();
+ set.addAll(getMainWork().getAllRootOperators());
+ for (BaseWork w : mergeWorkList) {
+ set.addAll(w.getAllRootOperators());
+ }
+ return set;
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
index 16be499..ad620c2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorDesc.java
@@ -30,4 +30,6 @@ public interface OperatorDesc extends Serializable, Cloneable {
public Map<String, String> getOpProps();
public long getMemoryNeeded();
public void setMemoryNeeded(long memoryNeeded);
+ public String getRuntimeStatsTmpDir();
+ public void setRuntimeStatsTmpDir(String runtimeStatsTmpDir);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
index 029043f..c46ea70 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/Statistics.java
@@ -40,18 +40,20 @@ public class Statistics implements Serializable {
}
private long numRows;
+ private long runTimeNumRows;
private long dataSize;
private State basicStatsState;
private Map<String, ColStatistics> columnStats;
private State columnStatsState;
public Statistics() {
- this(0, 0);
+ this(0, 0, -1);
}
- public Statistics(long nr, long ds) {
+ public Statistics(long nr, long ds, long rnr) {
this.setNumRows(nr);
this.setDataSize(ds);
+ this.setRunTimeNumRows(rnr);
this.basicStatsState = State.NONE;
this.columnStats = null;
this.columnStatsState = State.NONE;
@@ -107,6 +109,9 @@ public class Statistics implements Serializable {
StringBuilder sb = new StringBuilder();
sb.append("Num rows: ");
sb.append(numRows);
+ if (runTimeNumRows >= 0) {
+ sb.append("/" + runTimeNumRows);
+ }
sb.append(" Data size: ");
sb.append(dataSize);
sb.append(" Basic stats: ");
@@ -121,6 +126,9 @@ public class Statistics implements Serializable {
StringBuilder sb = new StringBuilder();
sb.append("rows=");
sb.append(numRows);
+ if (runTimeNumRows >= 0) {
+ sb.append("/" + runTimeNumRows);
+ }
sb.append(" width=");
// just to be safe about numRows
if (numRows != 0) {
@@ -148,7 +156,7 @@ public class Statistics implements Serializable {
@Override
public Statistics clone() throws CloneNotSupportedException {
- Statistics clone = new Statistics(numRows, dataSize);
+ Statistics clone = new Statistics(numRows, dataSize, runTimeNumRows);
clone.setBasicStatsState(basicStatsState);
clone.setColumnStatsState(columnStatsState);
if (columnStats != null) {
@@ -263,4 +271,12 @@ public class Statistics implements Serializable {
}
return null;
}
+
+ public long getRunTimeNumRows() {
+ return runTimeNumRows;
+ }
+
+ public void setRunTimeNumRows(long runTimeNumRows) {
+ this.runTimeNumRows = runTimeNumRows;
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
index 990d80c..805bc5b 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/TestExplainTask.java
@@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.plan.Explain;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
@@ -132,6 +133,7 @@ public class TestExplainTask {
pCtx.setTopOps(topOps);
work.setParseContext(pCtx);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ work.setConfig(new ExplainConfiguration());
new ExplainTask().getJSONLogicalPlan(new PrintStream(baos), work);
baos.close();
return baos.toString();
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
index ae1747d..d6fe540 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/parse/TestUpdateDeleteSemanticAnalyzer.java
@@ -297,8 +297,10 @@ public class TestUpdateDeleteSemanticAnalyzer {
Path tmp = new Path(f.getPath());
fs.create(tmp);
fs.deleteOnExit(tmp);
+ ExplainConfiguration config = new ExplainConfiguration();
+ config.setExtended(true);
ExplainWork work = new ExplainWork(tmp, sem.getParseContext(), sem.getRootTasks(),
- sem.getFetchTask(), sem, true, false, false, false, false, false, null);
+ sem.getFetchTask(), sem, config, null);
ExplainTask task = new ExplainTask();
task.setWork(work);
task.initialize(queryState, plan, null, null);
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_1.q b/ql/src/test/queries/clientpositive/explainanalyze_1.q
new file mode 100644
index 0000000..a4b3dc5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_1.q
@@ -0,0 +1,38 @@
+set hive.mapred.mode=nonstrict;
+
+explain analyze select * from src a union all select * from src b limit 10;
+
+explain analyze select key from src;
+
+explain analyze create table t as select key from src;
+
+create table t as select key from src;
+
+explain analyze insert overwrite table t select key from src;
+
+explain analyze select key from src limit 10;
+
+explain analyze select key from src where value < 10;
+
+explain analyze select key from src where key < 10;
+select count(*) from (select key from src where key < 10)subq;
+
+explain analyze select key, count(key) from src group by key;
+select count(*) from (select key, count(key) from src group by key)subq;
+
+explain analyze select count(*) from src a join src b on a.key = b.value where a.key > 0;
+
+explain analyze select count(*) from src a join src b on a.key = b.key where a.key > 0;
+select count(*) from src a join src b on a.key = b.key where a.key > 0;
+
+
+explain analyze select * from src a union all select * from src b;
+select count(*) from (select * from src a union all select * from src b)subq;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+EXPLAIN analyze
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key);
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_2.q b/ql/src/test/queries/clientpositive/explainanalyze_2.q
new file mode 100644
index 0000000..dfee826
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_2.q
@@ -0,0 +1,329 @@
+set hive.explain.user=true;
+set hive.metastore.aggregate.stats.cache.enabled=false;
+
+-- SORT_QUERY_RESULTS
+
+CREATE TABLE dest_j1(key STRING, value STRING, val2 STRING) STORED AS TEXTFILE;
+
+CREATE TABLE ss(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE;
+
+CREATE TABLE sr(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE;
+
+CREATE TABLE cs(k1 STRING,v1 STRING,k2 STRING,v2 STRING,k3 STRING,v3 STRING) STORED AS TEXTFILE;
+
+INSERT OVERWRITE TABLE ss
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x
+JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+INSERT OVERWRITE TABLE sr
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x
+JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=12);
+
+INSERT OVERWRITE TABLE cs
+SELECT x.key,x.value,y.key,y.value,z.key,z.value
+FROM src1 x
+JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08');
+
+
+ANALYZE TABLE ss COMPUTE STATISTICS;
+ANALYZE TABLE ss COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+ANALYZE TABLE sr COMPUTE STATISTICS;
+ANALYZE TABLE sr COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+ANALYZE TABLE cs COMPUTE STATISTICS;
+ANALYZE TABLE cs COMPUTE STATISTICS FOR COLUMNS k1,v1,k2,v2,k3,v3;
+
+set hive.auto.convert.join=false;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+explain analyze
+select
+ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3)
+FROM
+ss,sr,cs,src d1,src d2,src d3,src1,srcpart
+where
+ ss.k1 = d1.key
+and sr.k1 = d2.key
+and cs.k1 = d3.key
+and ss.k2 = sr.k2
+and ss.k3 = sr.k3
+and ss.v1 = src1.value
+and ss.v2 = srcpart.value
+and sr.v2 = cs.v2
+and sr.v3 = cs.v3
+and ss.v3='ssv3'
+and sr.v1='srv1'
+and src1.key = 'src1key'
+and srcpart.key = 'srcpartkey'
+and d1.value = 'd1value'
+and d2.value in ('2000Q1','2000Q2','2000Q3')
+and d3.value in ('2000Q1','2000Q2','2000Q3')
+group by
+ss.k1,sr.k2,cs.k3
+order by
+ss.k1,sr.k2,cs.k3
+limit 100;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value);
+
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.stats.fetch.column.stats=false;
+
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN srcpart z ON (x.value = z.value and z.ds='2008-04-08' and z.hr=11);
+
+explain analyze
+select
+ss.k1,sr.k2,cs.k3,count(ss.v1),count(sr.v2),count(cs.v3)
+FROM
+ss,sr,cs,src d1,src d2,src d3,src1,srcpart
+where
+ ss.k1 = d1.key
+and sr.k1 = d2.key
+and cs.k1 = d3.key
+and ss.k2 = sr.k2
+and ss.k3 = sr.k3
+and ss.v1 = src1.value
+and ss.v2 = srcpart.value
+and sr.v2 = cs.v2
+and sr.v3 = cs.v3
+and ss.v3='ssv3'
+and sr.v1='srv1'
+and src1.key = 'src1key'
+and srcpart.key = 'srcpartkey'
+and d1.value = 'd1value'
+and d2.value in ('2000Q1','2000Q2','2000Q3')
+and d3.value in ('2000Q1','2000Q2','2000Q3')
+group by
+ss.k1,sr.k2,cs.k3
+order by
+ss.k1,sr.k2,cs.k3
+limit 100;
+
+explain analyze
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, z.value, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value);
+
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+set hive.auto.convert.sortmerge.join.bigtable.selection.policy = org.apache.hadoop.hive.ql.optimizer.TableSizeBasedBigTableSelectorForAutoSMJ;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+
+
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+CREATE TABLE tab2(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) SORTED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab2 partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = false;
+set hive.auto.convert.sortmerge.join = true;
+
+set hive.auto.convert.join.noconditionaltask.size=500;
+
+explain analyze
+select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key;
+
+explain analyze
+select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value;
+
+explain analyze
+select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key;
+
+explain analyze
+select s1.key as key, s1.value as value from tab s1 join tab2 s3 on s1.key=s3.key join tab2 s2 on s1.value=s2.value;
+
+explain analyze
+select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key
+UNION ALL
+select s2.key as key, s2.value as value from tab s2
+) a join tab_part b on (a.key = b.key);
+
+explain analyze
+select count(*) from (select s1.key as key, s1.value as value from tab s1 join tab s3 on s1.key=s3.key join tab s2 on s1.value=s2.value
+UNION ALL
+select s2.key as key, s2.value as value from tab s2
+) a join tab_part b on (a.key = b.key);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union all select * from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key)
+JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value);
+
+explain analyze
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value);
+
+CREATE TABLE a(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE b(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE c(key STRING, value STRING) STORED AS TEXTFILE;
+
+explain analyze
+from
+(
+SELECT x.key, y.value
+FROM src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union all select * from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union all select key, value from src union all select key, value from src)z ON (x.value = z.value)
+union all
+SELECT x.key, y.value
+FROM src1 x JOIN src1 y ON (x.key = y.key)
+JOIN (select key, value from src1 union all select key, value from src union all select key, value from src union all select key, value from src)z ON (x.value = z.value)
+) tmp
+INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value;
+
+explain analyze
+FROM
+(
+SELECT x.key as key, y.value as value from src1 x JOIN src y ON (x.key = y.key)
+JOIN (select * from src1 union select * from src)z ON (x.value = z.value)
+union
+SELECT x.key as key, y.value as value from src x JOIN src y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src)z ON (x.value = z.value)
+union
+SELECT x.key as key, y.value as value from src1 x JOIN src1 y ON (x.key = y.key)
+JOIN (select key, value from src1 union select key, value from src union select key, value from src union select key, value from src)z ON (x.value = z.value)
+) tmp
+INSERT OVERWRITE TABLE a SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE b SELECT tmp.key, tmp.value
+INSERT OVERWRITE TABLE c SELECT tmp.key, tmp.value;
+
+
+CREATE TABLE DEST1(key STRING, value STRING) STORED AS TEXTFILE;
+CREATE TABLE DEST2(key STRING, val1 STRING, val2 STRING) STORED AS TEXTFILE;
+
+explain analyze
+FROM (select 'tst1' as key, cast(count(1) as string) as value from src s1
+ UNION DISTINCT
+ select s2.key as key, s2.value as value from src s2) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key, unionsrc.value;
+
+explain analyze FROM UNIQUEJOIN PRESERVE src a (a.key), PRESERVE src1 b (b.key), PRESERVE srcpart c (c.key) SELECT a.key, b.key, c.key;
+
+set hive.entity.capture.transform=true;
+
+explain analyze
+SELECT
+TRANSFORM(a.key, a.value) USING 'cat' AS (tkey, tvalue)
+FROM src a join src b
+on a.key = b.key;
+
+explain analyze
+FROM (
+ select key, value from (
+ select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1
+ UNION all
+ select s2.key as key, s2.value as value, 'tst1' as value2 from src s2) unionsub
+ UNION all
+ select key, value from src s0
+ ) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5))
+GROUP BY unionsrc.key, unionsrc.value;
+
+explain analyze
+FROM (
+ select 'tst1' as key, cast(count(1) as string) as value, 'tst1' as value2 from src s1
+ UNION all
+ select s2.key as key, s2.value as value, 'tst1' as value2 from src s2
+ ) unionsrc
+INSERT OVERWRITE TABLE DEST1 SELECT unionsrc.key, COUNT(DISTINCT SUBSTR(unionsrc.value,5)) GROUP BY unionsrc.key
+INSERT OVERWRITE TABLE DEST2 SELECT unionsrc.key, unionsrc.value, COUNT(DISTINCT SUBSTR(unionsrc.value,5))
+GROUP BY unionsrc.key, unionsrc.value;
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_3.q b/ql/src/test/queries/clientpositive/explainanalyze_3.q
new file mode 100644
index 0000000..69f82e5
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_3.q
@@ -0,0 +1,158 @@
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.DefaultHiveAuthorizationProvider;
+set hive.metastore.filter.hook=org.apache.hadoop.hive.metastore.DefaultMetaStoreFilterHookImpl;
+set hive.mapred.mode=nonstrict;
+set hive.explain.user=true;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.vectorized.execution.enabled=true;
+
+explain analyze select key, value
+FROM srcpart LATERAL VIEW explode(array(1,2,3)) myTable AS myCol;
+
+explain analyze show tables;
+
+explain analyze create database newDB location "/tmp/";
+
+create database newDB location "/tmp/";
+
+explain analyze describe database extended newDB;
+
+describe database extended newDB;
+
+explain analyze use newDB;
+
+use newDB;
+
+create table tab (name string);
+
+explain analyze alter table tab rename to newName;
+
+explain analyze drop table tab;
+
+drop table tab;
+
+explain analyze use default;
+
+use default;
+
+drop database newDB;
+
+explain analyze analyze table src compute statistics;
+
+explain analyze analyze table src compute statistics for columns;
+
+explain analyze
+CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
+
+CREATE TEMPORARY MACRO SIGMOID (x DOUBLE) 1.0 / (1.0 + EXP(-x));
+
+explain analyze SELECT SIGMOID(2) FROM src LIMIT 1;
+explain analyze DROP TEMPORARY MACRO SIGMOID;
+DROP TEMPORARY MACRO SIGMOID;
+
+explain analyze create table src_autho_test as select * from src;
+create table src_autho_test as select * from src;
+
+set hive.security.authorization.enabled=true;
+
+explain analyze grant select on table src_autho_test to user hive_test_user;
+grant select on table src_autho_test to user hive_test_user;
+
+explain analyze show grant user hive_test_user on table src_autho_test;
+explain analyze show grant user hive_test_user on table src_autho_test(key);
+
+select key from src_autho_test order by key limit 20;
+
+explain analyze revoke select on table src_autho_test from user hive_test_user;
+
+explain analyze grant select(key) on table src_autho_test to user hive_test_user;
+
+explain analyze revoke select(key) on table src_autho_test from user hive_test_user;
+
+explain analyze
+create role sRc_roLE;
+
+create role sRc_roLE;
+
+explain analyze
+grant role sRc_roLE to user hive_test_user;
+
+grant role sRc_roLE to user hive_test_user;
+
+explain analyze show role grant user hive_test_user;
+
+explain analyze drop role sRc_roLE;
+drop role sRc_roLE;
+
+set hive.security.authorization.enabled=false;
+drop table src_autho_test;
+
+explain analyze drop view v;
+
+explain analyze create view v as with cte as (select * from src order by key limit 5)
+select * from cte;
+
+explain analyze with cte as (select * from src order by key limit 5)
+select * from cte;
+
+create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc;
+
+load data local inpath '../../data/files/orc_split_elim.orc' into table orc_merge5;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET mapred.min.split.size=1000;
+SET mapred.max.split.size=50000;
+SET hive.optimize.index.filter=true;
+set hive.merge.orcfile.stripe.level=false;
+set hive.merge.tezfiles=false;
+set hive.merge.mapfiles=false;
+set hive.merge.mapredfiles=false;
+set hive.compute.splits.in.am=true;
+set tez.grouping.min-size=1000;
+set tez.grouping.max-size=50000;
+
+set hive.merge.orcfile.stripe.level=true;
+set hive.merge.tezfiles=true;
+set hive.merge.mapfiles=true;
+set hive.merge.mapredfiles=true;
+
+explain analyze insert overwrite table orc_merge5 select userid,string1,subtype,decimal1,ts from orc_merge5 where userid<=13;
+
+drop table orc_merge5;
+
+set hive.auto.convert.join=true;
+set hive.auto.convert.join.noconditionaltask=true;
+set hive.auto.convert.join.noconditionaltask.size=10000;
+
+CREATE TABLE srcbucket_mapjoin(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE tab_part (key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE;
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin partition(ds='2008-04-08');
+
+load data local inpath '../../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+load data local inpath '../../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08');
+
+
+
+set hive.optimize.bucketingsorting=false;
+insert overwrite table tab_part partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin_part;
+
+CREATE TABLE tab(key int, value string) PARTITIONED BY(ds STRING) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE;
+insert overwrite table tab partition (ds='2008-04-08')
+select key,value from srcbucket_mapjoin;
+
+set hive.convert.join.bucket.mapjoin.tez = true;
+explain analyze
+select a.key, a.value, b.value
+from tab a join tab_part b on a.key = b.key;
+
+
+
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_4.q b/ql/src/test/queries/clientpositive/explainanalyze_4.q
new file mode 100644
index 0000000..dad397b
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_4.q
@@ -0,0 +1,103 @@
+set hive.mapred.mode=nonstrict;
+
+set hive.explain.user=true;
+set hive.auto.convert.join=false;
+set hive.optimize.dynamic.partition.hashjoin=false;
+
+-- First try with regular mergejoin
+explain analyze
+select
+ *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+select
+ *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+explain analyze
+select
+ count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+select
+ count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+explain analyze
+select
+ a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+select
+ a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+set hive.auto.convert.join=true;
+set hive.optimize.dynamic.partition.hashjoin=true;
+set hive.auto.convert.join.noconditionaltask.size=200000;
+set hive.stats.fetch.column.stats=false;
+set hive.exec.reducers.bytes.per.reducer=200000;
+
+-- Try with dynamically partitioned hashjoin
+explain analyze
+select
+ *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+select
+ *
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+order by a.cint;
+
+explain analyze
+select
+ count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+select
+ count(*)
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null;
+
+explain analyze
+select
+ a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
+
+select
+ a.csmallint, count(*) c1
+from alltypesorc a join alltypesorc b on a.cint = b.cint
+where
+ a.cint between 1000000 and 3000000 and b.cbigint is not null
+group by a.csmallint
+order by c1;
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/queries/clientpositive/explainanalyze_5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/explainanalyze_5.q b/ql/src/test/queries/clientpositive/explainanalyze_5.q
new file mode 100644
index 0000000..bb23e45
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/explainanalyze_5.q
@@ -0,0 +1,81 @@
+set hive.stats.column.autogather=true;
+
+explain analyze analyze table src compute statistics;
+
+explain analyze analyze table src compute statistics for columns;
+
+drop table src_multi2;
+
+create table src_multi2 like src;
+
+explain analyze insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key;
+
+select count(*) from (select * from src union select * from src1)subq;
+
+insert overwrite table src_multi2 select subq.key, src.value from (select * from src union select * from src1)subq join src on subq.key=src.key;
+
+describe formatted src_multi2;
+
+
+set hive.mapred.mode=nonstrict;
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+
+-- SORT_QUERY_RESULTS
+
+create table acid_uami(i int,
+ de decimal(5,2),
+ vc varchar(128)) clustered by (i) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into table acid_uami values
+ (1, 109.23, 'mary had a little lamb'),
+ (6553, 923.19, 'its fleece was white as snow');
+
+insert into table acid_uami values
+ (10, 119.23, 'and everywhere that mary went'),
+ (65530, 823.19, 'the lamb was sure to go');
+
+select * from acid_uami order by de;
+
+explain analyze update acid_uami set de = 3.14 where de = 109.23 or de = 119.23;
+
+select * from acid_uami order by de;
+
+update acid_uami set de = 3.14 where de = 109.23 or de = 119.23;
+
+select * from acid_uami order by de;
+
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/delete_orig_table;
+dfs -copyFromLocal ../../data/files/alltypesorc ${system:test.tmp.dir}/delete_orig_table/00000_0;
+
+create table acid_dot(
+ ctinyint TINYINT,
+ csmallint SMALLINT,
+ cint INT,
+ cbigint BIGINT,
+ cfloat FLOAT,
+ cdouble DOUBLE,
+ cstring1 STRING,
+ cstring2 STRING,
+ ctimestamp1 TIMESTAMP,
+ ctimestamp2 TIMESTAMP,
+ cboolean1 BOOLEAN,
+ cboolean2 BOOLEAN) clustered by (cint) into 1 buckets stored as orc location '${system:test.tmp.dir}/delete_orig_table' TBLPROPERTIES ('transactional'='true');
+
+select count(*) from acid_dot;
+
+explain analyze delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+delete from acid_dot where cint < -1070551679;
+
+select count(*) from acid_dot;
+
+dfs -rmr ${system:test.tmp.dir}/delete_orig_table;
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
index f6f2bfa..7e2edd9 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl.q.out
@@ -46,18 +46,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0 (type: double)
sort order: +
Map-reduce partition columns: 2000.0 (type: double)
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -65,11 +69,14 @@ STAGE PLANS:
keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -98,20 +105,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0 (type: double)
null sort order: a
sort order: +
Map-reduce partition columns: 2000.0 (type: double)
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
auto parallelism: false
@@ -173,14 +184,17 @@ STAGE PLANS:
keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -232,18 +246,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 4000.0 (type: double)
sort order: +
Map-reduce partition columns: 4000.0 (type: double)
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -251,11 +269,14 @@ STAGE PLANS:
keys: 4000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double)
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -284,20 +305,24 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
GatherStats: false
Select Operator
expressions: employeeid (type: int)
outputColumnNames: employeeid
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: 4000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 4000.0 (type: double)
null sort order: a
sort order: +
Map-reduce partition columns: 4000.0 (type: double)
+ Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE
tag: -1
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
auto parallelism: false
@@ -359,14 +384,17 @@ STAGE PLANS:
keys: 4000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double)
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
GlobalTableId: 0
#### A masked pattern was here ####
NumFilesPerFileSink: 1
+ Statistics: Num rows: 13 Data size: 52 Basic stats: COMPLETE Column stats: NONE
#### A masked pattern was here ####
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
@@ -418,18 +446,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int), employeename (type: string)
outputColumnNames: employeeid, employeename
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
keys: 2000.0 (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0 (type: double)
sort order: +
Map-reduce partition columns: 2000.0 (type: double)
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -437,11 +469,14 @@ STAGE PLANS:
keys: 2000.0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double)
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -498,18 +533,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeesalary (type: double), employeeid (type: int), employeename (type: string)
outputColumnNames: employeesalary, employeeid, employeename
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
keys: employeesalary (type: double)
mode: hash
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double)
sort order: +
Map-reduce partition columns: _col0 (type: double)
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
value expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -517,11 +556,14 @@ STAGE PLANS:
keys: KEY._col0 (type: double)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col1 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double)
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 105 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -580,23 +622,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeeid (type: int), employeename (type: string)
outputColumnNames: employeeid, employeename
+ Statistics: Num rows: 2 Data size: 210 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
index 21089e1..47fffab 100644
--- a/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_partlvl_dp.q.out
@@ -84,18 +84,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: country (type: string), employeename (type: string), employeeid (type: int)
outputColumnNames: country, employeename, employeeid
+ Statistics: Num rows: 1 Data size: 64 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: compute_stats(employeename, 16), compute_stats(employeeid, 16)
keys: 4000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 4000.0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: 4000.0 (type: double), _col1 (type: string)
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -103,11 +107,14 @@ STAGE PLANS:
keys: 4000.0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 4000.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -157,18 +164,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: country (type: string), employeeid (type: int)
outputColumnNames: country, employeeid
+ Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: 2000.0 (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: 2000.0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: 2000.0 (type: double), _col1 (type: string)
+ Statistics: Num rows: 42 Data size: 169 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -176,11 +187,14 @@ STAGE PLANS:
keys: 2000.0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), 2000.0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -241,18 +255,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: employeesalary (type: double), country (type: string), employeeid (type: int)
outputColumnNames: employeesalary, country, employeeid
+ Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16)
keys: employeesalary (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
+ Statistics: Num rows: 116 Data size: 466 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -260,11 +278,14 @@ STAGE PLANS:
keys: KEY._col0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 58 Data size: 233 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -322,18 +343,22 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: employee_part
+ Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: employeesalary (type: double), country (type: string), employeeid (type: int), employeename (type: string)
outputColumnNames: employeesalary, country, employeeid, employeename
+ Statistics: Num rows: 2 Data size: 466 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: compute_stats(employeeid, 16), compute_stats(employeename, 16)
keys: employeesalary (type: double), country (type: string)
mode: hash
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
key expressions: _col0 (type: double), _col1 (type: string)
sort order: ++
Map-reduce partition columns: _col0 (type: double), _col1 (type: string)
+ Statistics: Num rows: 2 Data size: 466 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
@@ -341,11 +366,14 @@ STAGE PLANS:
keys: KEY._col0 (type: double), KEY._col1 (type: string)
mode: mergepartial
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col3 (type: struct<columntype:string,maxlength:bigint,avglength:double,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:string>), _col0 (type: double), _col1 (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 233 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
http://git-wip-us.apache.org/repos/asf/hive/blob/20824f27/ql/src/test/results/clientpositive/columnstats_quoting.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/columnstats_quoting.q.out b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
index 288e61b..52e3538 100644
--- a/ql/src/test/results/clientpositive/columnstats_quoting.q.out
+++ b/ql/src/test/results/clientpositive/columnstats_quoting.q.out
@@ -24,23 +24,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: user_web_events
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: user id (type: bigint), user name (type: string)
outputColumnNames: user id, user name
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: compute_stats(user id, 16), compute_stats(user name, 16)
mode: hash
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Statistics: Num rows: 1 Data size: 968 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>), _col1 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1)
mode: mergepartial
outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 972 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
@@ -75,23 +81,29 @@ STAGE PLANS:
Map Operator Tree:
TableScan
alias: user_web_events
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Select Operator
expressions: user id (type: bigint)
outputColumnNames: user id
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
Group By Operator
aggregations: compute_stats(user id, 16)
mode: hash
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE
Reduce Output Operator
sort order:
+ Statistics: Num rows: 1 Data size: 476 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:string,numbitvectors:int>)
Reduce Operator Tree:
Group By Operator
aggregations: compute_stats(VALUE._col0)
mode: mergepartial
outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
+ Statistics: Num rows: 1 Data size: 480 Basic stats: COMPLETE Column stats: NONE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat