You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/07 02:21:48 UTC
svn commit: r1616379 [2/4] - in /hive/branches/cbo: ./
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ conf/ data/files/
hcatalog/webhcat/java-client/src/main/java/org/apache/hive/hcatalog/api/
hcatalog/webhc...
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/AnnotateWithStatistics.java Thu Aug 7 00:21:45 2014
@@ -27,6 +27,7 @@ import org.apache.hadoop.hive.ql.exec.Fi
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.LimitOperator;
import org.apache.hadoop.hive.ql.exec.MapJoinOperator;
+import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator;
import org.apache.hadoop.hive.ql.exec.SelectOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
@@ -62,6 +63,8 @@ public class AnnotateWithStatistics impl
+ MapJoinOperator.getOperatorName() + "%"), StatsRulesProcFactory.getJoinRule());
opRules.put(new RuleRegExp("LIM", LimitOperator.getOperatorName() + "%"),
StatsRulesProcFactory.getLimitRule());
+ opRules.put(new RuleRegExp("RS", ReduceSinkOperator.getOperatorName() + "%"),
+ StatsRulesProcFactory.getReduceSinkRule());
// The dispatcher fires the processor corresponding to the closest matching
// rule and passes the context along
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/stats/annotation/StatsRulesProcFactory.java Thu Aug 7 00:21:45 2014
@@ -601,12 +601,18 @@ public class StatsRulesProcFactory {
}
dvProd *= dv;
} else {
-
- // partial column statistics on grouping attributes case.
- // if column statistics on grouping attribute is missing, then
- // assume worst case.
- // GBY rule will emit half the number of rows if dvProd is 0
- dvProd = 0;
+ if (parentStats.getColumnStatsState().equals(Statistics.State.COMPLETE)) {
+ // the column must be an aggregate column inserted by GBY. We
+ // don't have to account for this column when computing product
+ // of NDVs
+ continue;
+ } else {
+ // partial column statistics on grouping attributes case.
+ // if column statistics on grouping attribute is missing, then
+ // assume worst case.
+ // GBY rule will emit half the number of rows if dvProd is 0
+ dvProd = 0;
+ }
break;
}
}
@@ -687,7 +693,17 @@ public class StatsRulesProcFactory {
aggColStats.add(cs);
}
}
- stats.addToColumnStats(aggColStats);
+
+ // add the new aggregate column and recompute data size
+ if (aggColStats.size() > 0) {
+ stats.addToColumnStats(aggColStats);
+
+ // only if the column stats is available, update the data size from
+ // the column stats
+ if (!stats.getColumnStatsState().equals(Statistics.State.NONE)) {
+ updateStats(stats, stats.getNumRows(), true);
+ }
+ }
// if UDAF present and if column expression map is empty then it must
// be full aggregation query like count(*) in which case number of
@@ -734,15 +750,24 @@ public class StatsRulesProcFactory {
* <p>
* In the absence of histograms, we can use the following general case
* <p>
- * <b>Single attribute</b>
+ * <b>2 Relations, 1 attribute</b>
* <p>
* T(RXS) = (T(R)*T(S))/max(V(R,Y), V(S,Y)) where Y is the join attribute
* <p>
- * <b>Multiple attributes</b>
+ * <b>2 Relations, 2 attributes</b>
* <p>
* T(RXS) = T(R)*T(S)/max(V(R,y1), V(S,y1)) * max(V(R,y2), V(S,y2)), where y1 and y2 are the join
* attributes
* <p>
+ * <b>3 Relations, 1 attributes</b>
+ * <p>
+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y), V(S,y), V(Q,y)), where y is the join attribute
+ * <p>
+ * <b>3 Relations, 2 attributes</b>
+ * <p>
+ * T(RXSXQ) = T(R)*T(S)*T(Q)/top2largest(V(R,y1), V(S,y1), V(Q,y1)) * top2largest(V(R,y2), V(S,y2), V(Q,y2)),
+ * where y1 and y2 are the join attributes
+ * <p>
* <i>Worst case:</i> If no column statistics are available, then T(RXS) = joinFactor * max(T(R),
* T(S)) * (numParents - 1) will be used as heuristics. joinFactor is from hive.stats.join.factor
* hive config. In the worst case, since we do not know any information about join keys (and hence
@@ -783,9 +808,12 @@ public class StatsRulesProcFactory {
// statistics object that is combination of statistics from all
// relations involved in JOIN
Statistics stats = new Statistics();
- long prodRows = 1;
+ List<Long> rowCountParents = Lists.newArrayList();
List<Long> distinctVals = Lists.newArrayList();
+
+ // 2 relations, multiple attributes
boolean multiAttr = false;
+ int numAttr = 1;
Map<String, ColStatistics> joinedColStats = Maps.newHashMap();
Map<Integer, List<String>> joinKeys = Maps.newHashMap();
@@ -795,12 +823,13 @@ public class StatsRulesProcFactory {
ReduceSinkOperator parent = (ReduceSinkOperator) jop.getParentOperators().get(pos);
Statistics parentStats = parent.getStatistics();
- prodRows *= parentStats.getNumRows();
+ rowCountParents.add(parentStats.getNumRows());
List<ExprNodeDesc> keyExprs = parent.getConf().getKeyCols();
// multi-attribute join key
if (keyExprs.size() > 1) {
multiAttr = true;
+ numAttr = keyExprs.size();
}
// compute fully qualified join key column names. this name will be
@@ -811,16 +840,9 @@ public class StatsRulesProcFactory {
StatsUtils.getFullQualifedColNameFromExprs(keyExprs, parent.getColumnExprMap());
joinKeys.put(pos, fqCols);
- Map<String, ExprNodeDesc> colExprMap = parent.getColumnExprMap();
- RowSchema rs = parent.getSchema();
-
// get column statistics for all output columns
- List<ColStatistics> cs =
- StatsUtils.getColStatisticsFromExprMap(conf, parentStats, colExprMap, rs);
- for (ColStatistics c : cs) {
- if (c != null) {
- joinedColStats.put(c.getFullyQualifiedColName(), c);
- }
+ for (ColStatistics cs : parentStats.getColumnStats()) {
+ joinedColStats.put(cs.getFullyQualifiedColName(), cs);
}
// since new statistics is derived from all relations involved in
@@ -834,10 +856,10 @@ public class StatsRulesProcFactory {
long denom = 1;
if (multiAttr) {
List<Long> perAttrDVs = Lists.newArrayList();
- int numAttr = joinKeys.get(0).size();
for (int idx = 0; idx < numAttr; idx++) {
for (Integer i : joinKeys.keySet()) {
String col = joinKeys.get(i).get(idx);
+ col = StatsUtils.stripPrefixFromColumnName(col);
ColStatistics cs = joinedColStats.get(col);
if (cs != null) {
perAttrDVs.add(cs.getCountDistint());
@@ -853,6 +875,7 @@ public class StatsRulesProcFactory {
} else {
for (List<String> jkeys : joinKeys.values()) {
for (String jk : jkeys) {
+ jk = StatsUtils.stripPrefixFromColumnName(jk);
ColStatistics cs = joinedColStats.get(jk);
if (cs != null) {
distinctVals.add(cs.getCountDistint());
@@ -862,6 +885,11 @@ public class StatsRulesProcFactory {
denom = getDenominator(distinctVals);
}
+ // Update NDV of joined columns to be min(V(R,y), V(S,y))
+ if (multiAttr) {
+ updateJoinColumnsNDV(joinKeys, joinedColStats, numAttr);
+ }
+
// column statistics from different sources are put together and rename
// fully qualified column names based on output schema of join operator
Map<String, ExprNodeDesc> colExprMap = jop.getColumnExprMap();
@@ -878,7 +906,6 @@ public class StatsRulesProcFactory {
ColStatistics cs = joinedColStats.get(fqColName);
String outColName = key;
String outTabAlias = ci.getTabAlias();
- outColName = StatsUtils.stripPrefixFromColumnName(outColName);
if (cs != null) {
cs.setColumnName(outColName);
cs.setTableAlias(outTabAlias);
@@ -889,13 +916,21 @@ public class StatsRulesProcFactory {
// update join statistics
stats.setColumnStats(outColStats);
- long newRowCount = prodRows / denom;
+ long newRowCount = computeNewRowCount(rowCountParents, denom);
+
+ if (newRowCount <= 0 && LOG.isDebugEnabled()) {
+ newRowCount = 0;
+ LOG.debug("[0] STATS-" + jop.toString() + ": Product of #rows might be greater than"
+ + " denominator or overflow might have occurred. Resetting row count to 0."
+ + " #Rows of parents: " + rowCountParents.toString() + ". Denominator: " + denom);
+ }
+
stats.setNumRows(newRowCount);
stats.setDataSize(StatsUtils.getDataSizeFromColumnStats(newRowCount, outColStats));
jop.setStatistics(stats);
if (LOG.isDebugEnabled()) {
- LOG.debug("[0] STATS-" + jop.toString() + ": " + stats.extendedToString());
+ LOG.debug("[1] STATS-" + jop.toString() + ": " + stats.extendedToString());
}
} else {
@@ -930,13 +965,72 @@ public class StatsRulesProcFactory {
jop.setStatistics(wcStats);
if (LOG.isDebugEnabled()) {
- LOG.debug("[1] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
+ LOG.debug("[2] STATS-" + jop.toString() + ": " + wcStats.extendedToString());
}
}
}
return null;
}
+ private long computeNewRowCount(List<Long> rowCountParents, long denom) {
+ double factor = 0.0d;
+ long result = 1;
+ long max = rowCountParents.get(0);
+ long maxIdx = 0;
+
+ // To avoid long overflow, we will divide the max row count by denominator
+ // and use that factor to multiply with other row counts
+ for (int i = 1; i < rowCountParents.size(); i++) {
+ if (rowCountParents.get(i) > max) {
+ max = rowCountParents.get(i);
+ maxIdx = i;
+ }
+ }
+
+ factor = (double) max / (double) denom;
+
+ for (int i = 0; i < rowCountParents.size(); i++) {
+ if (i != maxIdx) {
+ result *= rowCountParents.get(i);
+ }
+ }
+
+ result = (long) (result * factor);
+
+ return result;
+ }
+
+ private void updateJoinColumnsNDV(Map<Integer, List<String>> joinKeys,
+ Map<String, ColStatistics> joinedColStats, int numAttr) {
+ int joinColIdx = 0;
+ while (numAttr > 0) {
+ long minNDV = Long.MAX_VALUE;
+
+ // find min NDV for joining columns
+ for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ String key = entry.getValue().get(joinColIdx);
+ key = StatsUtils.stripPrefixFromColumnName(key);
+ ColStatistics cs = joinedColStats.get(key);
+ if (cs != null && cs.getCountDistint() < minNDV) {
+ minNDV = cs.getCountDistint();
+ }
+ }
+
+ // set min NDV value to both columns involved in join
+ if (minNDV != Long.MAX_VALUE) {
+ for (Map.Entry<Integer, List<String>> entry : joinKeys.entrySet()) {
+ String key = entry.getValue().get(joinColIdx);
+ key = StatsUtils.stripPrefixFromColumnName(key);
+ ColStatistics cs = joinedColStats.get(key);
+ cs.setCountDistint(minNDV);
+ }
+ }
+
+ joinColIdx++;
+ numAttr--;
+ }
+ }
+
private long getDenominator(List<Long> distinctVals) {
if (distinctVals.isEmpty()) {
@@ -954,16 +1048,23 @@ public class StatsRulesProcFactory {
return Collections.max(distinctVals);
} else {
+ // remember min value and ignore it from the denominator
+ long minNDV = distinctVals.get(0);
+ int minIdx = 0;
+
+ for (int i = 1; i < distinctVals.size(); i++) {
+ if (distinctVals.get(i) < minNDV) {
+ minNDV = distinctVals.get(i);
+ minIdx = i;
+ }
+ }
+
// join from multiple relations:
- // denom = max(v1, v2) * max(v2, v3) * max(v3, v4)
+ // denom = Product of all NDVs except the least of all
long denom = 1;
- for (int i = 0; i < distinctVals.size() - 1; i++) {
- long v1 = distinctVals.get(i);
- long v2 = distinctVals.get(i + 1);
- if (v1 >= v2) {
- denom *= v1;
- } else {
- denom *= v2;
+ for (int i = 0; i < distinctVals.size(); i++) {
+ if (i != minIdx) {
+ denom *= distinctVals.get(i);
}
}
return denom;
@@ -983,8 +1084,6 @@ public class StatsRulesProcFactory {
LimitOperator lop = (LimitOperator) nd;
Operator<? extends OperatorDesc> parent = lop.getParentOperators().get(0);
Statistics parentStats = parent.getStatistics();
- AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
- HiveConf conf = aspCtx.getConf();
try {
long limit = -1;
@@ -1032,6 +1131,73 @@ public class StatsRulesProcFactory {
}
/**
+ * ReduceSink operator does not change any of the statistics. But it renames
+ * the column statistics from its parent based on the output key and value
+ * column names to make it easy for the downstream operators. This is different
+ * from the default stats which just aggregates and passes along the statistics
+ * without actually renaming based on output schema of the operator.
+ */
+ public static class ReduceSinkStatsRule extends DefaultStatsRule implements NodeProcessor {
+
+ @Override
+ public Object process(Node nd, Stack<Node> stack, NodeProcessorCtx procCtx,
+ Object... nodeOutputs) throws SemanticException {
+ ReduceSinkOperator rop = (ReduceSinkOperator) nd;
+ Operator<? extends OperatorDesc> parent = rop.getParentOperators().get(0);
+ Statistics parentStats = parent.getStatistics();
+ if (parentStats != null) {
+ AnnotateStatsProcCtx aspCtx = (AnnotateStatsProcCtx) procCtx;
+ HiveConf conf = aspCtx.getConf();
+
+ List<String> outKeyColNames = rop.getConf().getOutputKeyColumnNames();
+ List<String> outValueColNames = rop.getConf().getOutputValueColumnNames();
+ Map<String, ExprNodeDesc> colExprMap = rop.getColumnExprMap();
+ try {
+ Statistics outStats = parentStats.clone();
+ if (satisfyPrecondition(parentStats)) {
+ List<ColStatistics> colStats = Lists.newArrayList();
+ for (String key : outKeyColNames) {
+ String prefixedKey = "KEY." + key;
+ ExprNodeDesc end = colExprMap.get(prefixedKey);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(key);
+ colStats.add(cs);
+ }
+ }
+ }
+
+ for (String val : outValueColNames) {
+ String prefixedVal = "VALUE." + val;
+ ExprNodeDesc end = colExprMap.get(prefixedVal);
+ if (end != null) {
+ ColStatistics cs = StatsUtils
+ .getColStatisticsFromExpression(conf, parentStats, end);
+ if (cs != null) {
+ cs.setColumnName(val);
+ colStats.add(cs);
+ }
+ }
+ }
+
+ outStats.setColumnStats(colStats);
+ }
+ rop.setStatistics(outStats);
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("[0] STATS-" + rop.toString() + ": " + outStats.extendedToString());
+ }
+ } catch (CloneNotSupportedException e) {
+ throw new SemanticException(ErrorMsg.STATISTICS_CLONING_FAILED.getMsg());
+ }
+ }
+ return null;
+ }
+
+ }
+
+ /**
* Default rule is to aggregate the statistics from all its parent operators.
*/
public static class DefaultStatsRule implements NodeProcessor {
@@ -1108,6 +1274,10 @@ public class StatsRulesProcFactory {
return new LimitStatsRule();
}
+ public static NodeProcessor getReduceSinkRule() {
+ return new ReduceSinkStatsRule();
+ }
+
public static NodeProcessor getDefaultRule() {
return new DefaultStatsRule();
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java Thu Aug 7 00:21:45 2014
@@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
@@ -102,6 +103,10 @@ public class ColumnStatsSemanticAnalyzer
private Map<String,String> getPartKeyValuePairsFromAST(ASTNode tree) {
ASTNode child = ((ASTNode) tree.getChild(0).getChild(1));
Map<String,String> partSpec = new HashMap<String, String>();
+ if (null == child) {
+ // case of analyze table T compute statistics for columns;
+ return partSpec;
+ }
String partKey;
String partValue;
for (int i = 0; i < child.getChildCount(); i++) {
@@ -361,6 +366,9 @@ public class ColumnStatsSemanticAnalyzer
checkIfTemporaryTable();
checkForPartitionColumns(colNames, Utilities.getColumnNamesFromFieldSchema(tbl.getPartitionKeys()));
validateSpecifiedColumnNames(colNames);
+ if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_PART_LEVEL_STATS) && tbl.isPartitioned()) {
+ isPartitionStats = true;
+ }
if (isPartitionStats) {
isTableLevel = false;
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/BaseWork.java Thu Aug 7 00:21:45 2014
@@ -52,6 +52,11 @@ public abstract class BaseWork extends A
private String name;
+ // Vectorization.
+ protected Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
+ protected Map<String, Map<String, Integer>> scratchColumnMap = null;
+ protected boolean vectorMode = false;
+
public void setGatheringStats(boolean gatherStats) {
this.gatheringStats = gatherStats;
}
@@ -107,5 +112,31 @@ public abstract class BaseWork extends A
return returnSet;
}
+ public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
+ return scratchColumnVectorTypes;
+ }
+
+ public void setScratchColumnVectorTypes(
+ Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
+ this.scratchColumnVectorTypes = scratchColumnVectorTypes;
+ }
+
+ public Map<String, Map<String, Integer>> getScratchColumnMap() {
+ return scratchColumnMap;
+ }
+
+ public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
+ this.scratchColumnMap = scratchColumnMap;
+ }
+
+ @Override
+ public void setVectorMode(boolean vectorMode) {
+ this.vectorMode = vectorMode;
+ }
+
+ public boolean getVectorMode() {
+ return vectorMode;
+ }
+
public abstract void configureJobConf(JobConf job);
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/MapWork.java Thu Aug 7 00:21:45 2014
@@ -116,10 +116,6 @@ public class MapWork extends BaseWork {
private boolean useOneNullRowInputFormat;
- private Map<String, Map<Integer, String>> scratchColumnVectorTypes = null;
- private Map<String, Map<String, Integer>> scratchColumnMap = null;
- private boolean vectorMode = false;
-
public MapWork() {}
public MapWork(String name) {
@@ -519,32 +515,6 @@ public class MapWork extends BaseWork {
}
}
- public Map<String, Map<Integer, String>> getScratchColumnVectorTypes() {
- return scratchColumnVectorTypes;
- }
-
- public void setScratchColumnVectorTypes(
- Map<String, Map<Integer, String>> scratchColumnVectorTypes) {
- this.scratchColumnVectorTypes = scratchColumnVectorTypes;
- }
-
- public Map<String, Map<String, Integer>> getScratchColumnMap() {
- return scratchColumnMap;
- }
-
- public void setScratchColumnMap(Map<String, Map<String, Integer>> scratchColumnMap) {
- this.scratchColumnMap = scratchColumnMap;
- }
-
- public boolean getVectorMode() {
- return vectorMode;
- }
-
- @Override
- public void setVectorMode(boolean vectorMode) {
- this.vectorMode = vectorMode;
- }
-
public void logPathToAliases() {
if (LOG.isDebugEnabled()) {
LOG.debug("LOGGING PATH TO ALIASES");
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceWork.java Thu Aug 7 00:21:45 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
+import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@@ -30,7 +31,18 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
+import org.apache.hadoop.hive.ql.exec.Utilities;
+import org.apache.hadoop.hive.ql.lib.NodeProcessor;
+import org.apache.hadoop.hive.ql.lib.Rule;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.util.ReflectionUtils;
/**
* ReduceWork represents all the information used to run a reduce task on the cluster.
@@ -84,6 +96,11 @@ public class ReduceWork extends BaseWork
// for auto reduce parallelism - max reducers requested
private int maxReduceTasks;
+ private ObjectInspector keyObjectInspector = null;
+ private ObjectInspector valueObjectInspector = null;
+
+ private Map<String, Integer> reduceColumnNameMap = new LinkedHashMap<String, Integer>();
+
/**
* If the plan has a reducer and correspondingly a reduce-sink, then store the TableDesc pointing
* to keySerializeInfo of the ReduceSink
@@ -95,7 +112,90 @@ public class ReduceWork extends BaseWork
}
public TableDesc getKeyDesc() {
- return keyDesc;
+ return keyDesc;
+ }
+
+ private ObjectInspector getObjectInspector(TableDesc desc) {
+ ObjectInspector objectInspector;
+ try {
+ Deserializer deserializer = (SerDe) ReflectionUtils.newInstance(desc
+ .getDeserializerClass(), null);
+ SerDeUtils.initializeSerDe(deserializer, null, desc.getProperties(), null);
+ objectInspector = deserializer.getObjectInspector();
+ } catch (Exception e) {
+ return null;
+ }
+ return objectInspector;
+ }
+
+ public ObjectInspector getKeyObjectInspector() {
+ if (keyObjectInspector == null) {
+ keyObjectInspector = getObjectInspector(keyDesc);
+ }
+ return keyObjectInspector;
+ }
+
+ // Only works when not tagging.
+ public ObjectInspector getValueObjectInspector() {
+ if (needsTagging) {
+ return null;
+ }
+ if (valueObjectInspector == null) {
+ valueObjectInspector = getObjectInspector(tagToValueDesc.get(0));
+ }
+ return valueObjectInspector;
+ }
+
+ private int addToReduceColumnNameMap(StructObjectInspector structObjectInspector, int startIndex, String prefix) {
+ List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+ int index = startIndex;
+ for (StructField field: fields) {
+ reduceColumnNameMap.put(prefix + "." + field.getFieldName(), index);
+ index++;
+ }
+ return index;
+ }
+
+ public Boolean fillInReduceColumnNameMap() {
+ ObjectInspector keyObjectInspector = getKeyObjectInspector();
+ if (keyObjectInspector == null || !(keyObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector keyStructObjectInspector = (StructObjectInspector) keyObjectInspector;
+
+ ObjectInspector valueObjectInspector = getValueObjectInspector();
+ if (valueObjectInspector == null || !(valueObjectInspector instanceof StructObjectInspector)) {
+ return false;
+ }
+ StructObjectInspector valueStructObjectInspector = (StructObjectInspector) valueObjectInspector;
+
+ int keyCount = addToReduceColumnNameMap(keyStructObjectInspector, 0, Utilities.ReduceField.KEY.toString());
+ addToReduceColumnNameMap(valueStructObjectInspector, keyCount, Utilities.ReduceField.VALUE.toString());
+ return true;
+ }
+
+ public Map<String, Integer> getReduceColumnNameMap() {
+ if (needsTagging) {
+ return null;
+ }
+ if (reduceColumnNameMap.size() == 0) {
+ if (!fillInReduceColumnNameMap()) {
+ return null;
+ }
+ }
+ return reduceColumnNameMap;
+ }
+
+ public List<String> getReduceColumnNames() {
+ if (needsTagging) {
+ return null;
+ }
+ if (reduceColumnNameMap.size() == 0) {
+ if (!fillInReduceColumnNameMap()) {
+ return null;
+ }
+ }
+ return new ArrayList<String>(reduceColumnNameMap.keySet());
}
public List<TableDesc> getTagToValueDesc() {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/StorageBasedAuthorizationProvider.java Thu Aug 7 00:21:45 2014
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.ql.securi
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.AccessControlException;
+import java.security.PrivilegedExceptionAction;
import java.util.EnumSet;
import java.util.List;
@@ -35,6 +36,9 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DFSConfigKeys;
+import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.hive.common.FileUtils;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.HiveMetaStore.HMSHandler;
import org.apache.hadoop.hive.metastore.Warehouse;
import org.apache.hadoop.hive.metastore.api.Database;
@@ -44,6 +48,7 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.shims.ShimLoader;
/**
* StorageBasedAuthorizationProvider is an implementation of
@@ -288,7 +293,7 @@ public class StorageBasedAuthorizationPr
* If the given path does not exists, it checks for its parent folder.
*/
protected void checkPermissions(final Configuration conf, final Path path,
- final EnumSet<FsAction> actions) throws IOException, LoginException {
+ final EnumSet<FsAction> actions) throws IOException, LoginException, HiveException {
if (path == null) {
throw new IllegalArgumentException("path is null");
@@ -297,8 +302,7 @@ public class StorageBasedAuthorizationPr
final FileSystem fs = path.getFileSystem(conf);
if (fs.exists(path)) {
- checkPermissions(fs, path, actions,
- authenticator.getUserName(), authenticator.getGroupNames());
+ checkPermissions(fs, path, actions, authenticator.getUserName());
} else if (path.getParent() != null) {
// find the ancestor which exists to check its permissions
Path par = path.getParent();
@@ -309,8 +313,7 @@ public class StorageBasedAuthorizationPr
par = par.getParent();
}
- checkPermissions(fs, par, actions,
- authenticator.getUserName(), authenticator.getGroupNames());
+ checkPermissions(fs, par, actions, authenticator.getUserName());
}
}
@@ -320,56 +323,23 @@ public class StorageBasedAuthorizationPr
*/
@SuppressWarnings("deprecation")
protected static void checkPermissions(final FileSystem fs, final Path path,
- final EnumSet<FsAction> actions, String user, List<String> groups) throws IOException,
- AccessControlException {
-
- String superGroupName = getSuperGroupName(fs.getConf());
- if (userBelongsToSuperGroup(superGroupName, groups)) {
- LOG.info("User \"" + user + "\" belongs to super-group \"" + superGroupName + "\". " +
- "Permission granted for actions: (" + actions + ").");
- return;
- }
-
- final FileStatus stat;
+ final EnumSet<FsAction> actions, String user) throws IOException,
+ AccessControlException, HiveException {
try {
- stat = fs.getFileStatus(path);
+ FileStatus stat = fs.getFileStatus(path);
+ for (FsAction action : actions) {
+ FileUtils.checkFileAccessWithImpersonation(fs, stat, action, user);
+ }
} catch (FileNotFoundException fnfe) {
// File named by path doesn't exist; nothing to validate.
return;
} catch (org.apache.hadoop.fs.permission.AccessControlException ace) {
// Older hadoop version will throw this @deprecated Exception.
throw accessControlException(ace);
+ } catch (Exception err) {
+ throw new HiveException(err);
}
-
- final FsPermission dirPerms = stat.getPermission();
- final String grp = stat.getGroup();
-
- for (FsAction action : actions) {
- if (user.equals(stat.getOwner())) {
- if (dirPerms.getUserAction().implies(action)) {
- continue;
- }
- }
- if (groups.contains(grp)) {
- if (dirPerms.getGroupAction().implies(action)) {
- continue;
- }
- }
- if (dirPerms.getOtherAction().implies(action)) {
- continue;
- }
- throw new AccessControlException("action " + action + " not permitted on path "
- + path + " for user " + user);
- }
- }
-
- private static String getSuperGroupName(Configuration configuration) {
- return configuration.get(DFSConfigKeys.DFS_PERMISSIONS_SUPERUSERGROUP_KEY, "");
- }
-
- private static boolean userBelongsToSuperGroup(String superGroupName, List<String> groups) {
- return groups.contains(superGroupName);
}
protected Path getDbLocation(Database db) throws HiveException {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLAuthorizationUtils.java Thu Aug 7 00:21:45 2014
@@ -394,7 +394,7 @@ public class SQLAuthorizationUtils {
if (FileUtils.isActionPermittedForFileHierarchy(fs, fileStatus, userName, FsAction.READ)) {
availPrivs.addPrivilege(SQLPrivTypeGrant.SELECT_NOGRANT);
}
- } catch (IOException e) {
+ } catch (Exception e) {
String msg = "Error getting permissions for " + filePath + ": " + e.getMessage();
throw new HiveAuthzPluginException(msg, e);
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java Thu Aug 7 00:21:45 2014
@@ -212,7 +212,8 @@ public class StatsUtils {
stats.addToDataSize(ds);
// if at least a partition does not contain row count then mark basic stats state as PARTIAL
- if (containsNonPositives(rowCounts)) {
+ if (containsNonPositives(rowCounts) &&
+ stats.getBasicStatsState().equals(State.COMPLETE)) {
stats.setBasicStatsState(State.PARTIAL);
}
boolean haveFullStats = fetchColStats;
@@ -876,12 +877,9 @@ public class StatsUtils {
if (colExprMap != null) {
for (ColumnInfo ci : rowSchema.getSignature()) {
String outColName = ci.getInternalName();
+ outColName = StatsUtils.stripPrefixFromColumnName(outColName);
String outTabAlias = ci.getTabAlias();
ExprNodeDesc end = colExprMap.get(outColName);
- if (end == null) {
- outColName = StatsUtils.stripPrefixFromColumnName(outColName);
- end = colExprMap.get(outColName);
- }
ColStatistics colStat = getColStatisticsFromExpression(conf, parentStats, end);
if (colStat != null) {
outColName = StatsUtils.stripPrefixFromColumnName(outColName);
@@ -1142,7 +1140,7 @@ public class StatsUtils {
*/
public static String stripPrefixFromColumnName(String colName) {
String stripedName = colName;
- if (colName.startsWith("KEY._") || colName.startsWith("VALUE._")) {
+ if (colName.startsWith("KEY") || colName.startsWith("VALUE")) {
// strip off KEY./VALUE. from column name
stripedName = colName.split("\\.")[1];
}
@@ -1210,15 +1208,16 @@ public class StatsUtils {
for (Map.Entry<String, ExprNodeDesc> entry : map.entrySet()) {
if (entry.getValue().isSame(end)) {
outColName = entry.getKey();
+ outColName = stripPrefixFromColumnName(outColName);
}
}
if (end instanceof ExprNodeColumnDesc) {
ExprNodeColumnDesc encd = (ExprNodeColumnDesc) end;
if (outColName == null) {
outColName = encd.getColumn();
+ outColName = stripPrefixFromColumnName(outColName);
}
String tabAlias = encd.getTabAlias();
- outColName = stripPrefixFromColumnName(outColName);
result.add(getFullyQualifiedColumnName(tabAlias, outColName));
} else if (end instanceof ExprNodeGenericFuncDesc) {
ExprNodeGenericFuncDesc enf = (ExprNodeGenericFuncDesc) end;
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBasePad.java Thu Aug 7 00:21:45 2014
@@ -48,9 +48,9 @@ public abstract class GenericUDFBasePad
throw new UDFArgumentException(udfName + " requires three arguments. Found :"
+ arguments.length);
}
- converter1 = checkArguments(arguments, 0);
- converter2 = checkArguments(arguments, 1);
- converter3 = checkArguments(arguments, 2);
+ converter1 = checkTextArguments(arguments, 0);
+ converter2 = checkIntArguments(arguments, 1);
+ converter3 = checkTextArguments(arguments, 2);
return PrimitiveObjectInspectorFactory.writableStringObjectInspector;
}
@@ -91,31 +91,39 @@ public abstract class GenericUDFBasePad
protected abstract void performOp(byte[] data, byte[] txt, byte[] padTxt, int len, Text str,
Text pad);
- private Converter checkArguments(ObjectInspector[] arguments, int i)
+ // Convert input arguments to Text, if necessary.
+ private Converter checkTextArguments(ObjectInspector[] arguments, int i)
throws UDFArgumentException {
if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but "
- + arguments[i].getTypeName() + " is passed. as arguments");
+ + arguments[i].getTypeName() + " is passed. as arguments");
+ }
+
+ Converter converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector);
+
+ return converter;
+ }
+
+ private Converter checkIntArguments(ObjectInspector[] arguments, int i)
+ throws UDFArgumentException {
+ if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
+ throw new UDFArgumentTypeException(i + 1, "Only primitive type arguments are accepted but "
+ + arguments[i].getTypeName() + " is passed. as arguments");
}
PrimitiveCategory inputType = ((PrimitiveObjectInspector) arguments[i]).getPrimitiveCategory();
Converter converter;
switch (inputType) {
- case STRING:
- case CHAR:
- case VARCHAR:
- converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
- PrimitiveObjectInspectorFactory.writableStringObjectInspector);
- break;
case INT:
case SHORT:
case BYTE:
converter = ObjectInspectorConverters.getConverter((PrimitiveObjectInspector) arguments[i],
- PrimitiveObjectInspectorFactory.writableIntObjectInspector);
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector);
break;
default:
throw new UDFArgumentTypeException(i + 1, udfName
- + " only takes STRING/CHAR/INT/SHORT/BYTE/VARCHAR types as " + (i + 1) + "-ths argument, got "
- + inputType);
+ + " only takes INT/SHORT/BYTE types as " + (i + 1) + "-ths argument, got "
+ + inputType);
}
return converter;
}
Modified: hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java (original)
+++ hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java Thu Aug 7 00:21:45 2014
@@ -107,7 +107,7 @@ public class TestVectorizer {
gbyOp.setConf(desc);
Vectorizer v = new Vectorizer();
- Assert.assertTrue(v.validateOperator(gbyOp));
+ Assert.assertTrue(v.validateMapWorkOperator(gbyOp));
VectorGroupByOperator vectorOp = (VectorGroupByOperator) v.vectorizeOperator(gbyOp, vContext);
Assert.assertEquals(VectorUDAFSumLong.class, vectorOp.getAggregators()[0].getClass());
VectorUDAFSumLong udaf = (VectorUDAFSumLong) vectorOp.getAggregators()[0];
@@ -187,7 +187,7 @@ public class TestVectorizer {
mop.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
- Assert.assertTrue(vectorizer.validateOperator(mop));
+ Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
}
@@ -203,6 +203,6 @@ public class TestVectorizer {
mop.setConf(mjdesc);
Vectorizer vectorizer = new Vectorizer();
- Assert.assertTrue(vectorizer.validateOperator(mop));
+ Assert.assertTrue(vectorizer.validateMapWorkOperator(mop));
}
}
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_filter.q Thu Aug 7 00:21:45 2014
@@ -15,76 +15,76 @@ load data local inpath '../../data/files
insert overwrite table loc_orc select * from loc_staging;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- column stats are not COMPLETE, so stats are not updated
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- state column has 5 distincts. numRows/countDistincts
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where state='OH';
+explain select * from loc_orc where state='OH';
-- not equals comparison shouldn't affect number of rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where state!='OH';
-explain extended select * from loc_orc where state<>'OH';
+explain select * from loc_orc where state!='OH';
+explain select * from loc_orc where state<>'OH';
-- nulls are treated as constant equality comparison
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where zip is null;
+explain select * from loc_orc where zip is null;
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where !(zip is not null);
+explain select * from loc_orc where !(zip is not null);
-- not nulls are treated as inverse of nulls
-- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where zip is not null;
+explain select * from loc_orc where zip is not null;
-- numRows: 7 rawDataSize: 702
-explain extended select * from loc_orc where !(zip is null);
+explain select * from loc_orc where !(zip is null);
-- NOT evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where !false;
+explain select * from loc_orc where !false;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where !true;
+explain select * from loc_orc where !true;
-- Constant evaluation. true will pass all rows, false will not pass any rows
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true;
+explain select * from loc_orc where true;
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where 'foo';
+explain select * from loc_orc where 'foo';
-- numRows: 8 rawDataSize: 804
-explain extended select * from loc_orc where true = true;
+explain select * from loc_orc where true = true;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false = true;
+explain select * from loc_orc where false = true;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where 'foo' = 'bar';
+explain select * from loc_orc where 'foo' = 'bar';
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc where false;
+explain select * from loc_orc where false;
-- OR evaluation. 1 row for OH and 1 row for CA
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where state='OH' or state='CA';
+explain select * from loc_orc where state='OH' or state='CA';
-- AND evaluation. cascadingly apply rules. 8/2 = 4/2 = 2
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where year=2001 and year is null;
+explain select * from loc_orc where year=2001 and year is null;
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where year=2001 and state='OH' and state='FL';
+explain select * from loc_orc where year=2001 and state='OH' and state='FL';
-- AND and OR together. left expr will yield 1 row and right will yield 1 row
-- numRows: 3 rawDataSize: 306
-explain extended select * from loc_orc where (year=2001 and year is null) or (state='CA');
+explain select * from loc_orc where (year=2001 and year is null) or (state='CA');
-- AND and OR together. left expr will yield 8 rows and right will yield 1 row
-- numRows: 1 rawDataSize: 102
-explain extended select * from loc_orc where (year=2001 or year is null) and (state='CA');
+explain select * from loc_orc where (year=2001 or year is null) and (state='CA');
-- all inequality conditions rows/3 is the rules
-- numRows: 2 rawDataSize: 204
-explain extended select * from loc_orc where locid < 30;
-explain extended select * from loc_orc where locid > 30;
-explain extended select * from loc_orc where locid <= 30;
-explain extended select * from loc_orc where locid >= 30;
+explain select * from loc_orc where locid < 30;
+explain select * from loc_orc where locid > 30;
+explain select * from loc_orc where locid <= 30;
+explain select * from loc_orc where locid >= 30;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_groupby.q Thu Aug 7 00:21:45 2014
@@ -15,14 +15,14 @@ load data local inpath '../../data/files
insert overwrite table loc_orc select * from loc_staging;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- partial column stats
analyze table loc_orc compute statistics for columns state;
-- inner group by: map - numRows: 8 reduce - numRows: 4
-- outer group by: map - numRows: 4 reduce numRows: 2
-explain extended select a, c, min(b)
+explain select a, c, min(b)
from ( select state as a, locid as b, count(*) as c
from loc_orc
group by state,locid
@@ -34,36 +34,36 @@ analyze table loc_orc compute statistics
-- only one distinct value in year column + 1 NULL value
-- map-side GBY: numRows: 8 (map-side will not do any reduction)
-- reduce-side GBY: numRows: 2
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
-- map-side GBY: numRows: 8
-- reduce-side GBY: numRows: 4
-explain extended select state,locid from loc_orc group by state,locid;
+explain select state,locid from loc_orc group by state,locid;
-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid with rollup;
+explain select state,locid from loc_orc group by state,locid with rollup;
-- map-side GBY numRows: 8 reduce-side GBY numRows: 4
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state));
+explain select state,locid from loc_orc group by state,locid grouping sets((state));
-- map-side GBY numRows: 16 reduce-side GBY numRows: 8
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid));
-- map-side GBY numRows: 24 reduce-side GBY numRows: 12
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state),(locid),());
-- map-side GBY numRows: 32 reduce-side GBY numRows: 16
-explain extended select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
+explain select state,locid from loc_orc group by state,locid grouping sets((state,locid),(state),(locid),());
set hive.stats.map.parallelism=10;
-- map-side GBY: numRows: 80 (map-side will not do any reduction)
-- reduce-side GBY: numRows: 2 Reason: numDistinct of year is 2. numRows = min(80/2, 2)
-explain extended select year from loc_orc group by year;
+explain select year from loc_orc group by year;
-- map-side GBY numRows: 320 reduce-side GBY numRows: 42 Reason: numDistinct of state and locid are 6,7 resp. numRows = min(320/2, 6*7)
-explain extended select state,locid from loc_orc group by state,locid with cube;
+explain select state,locid from loc_orc group by state,locid with cube;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_join.q Thu Aug 7 00:21:45 2014
@@ -1,81 +1,70 @@
set hive.stats.fetch.column.stats=true;
+set hive.stats.ndv.error=0.0;
-create table if not exists emp_staging (
+create table if not exists emp (
lastname string,
- deptid int
+ deptid int,
+ locid int
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists dept_staging (
+create table if not exists dept (
deptid int,
deptname string
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists loc_staging (
+create table if not exists loc (
state string,
locid int,
zip bigint,
year int
) row format delimited fields terminated by '|' stored as textfile;
-create table if not exists emp_orc like emp_staging;
-alter table emp_orc set fileformat orc;
-
-create table if not exists dept_orc like dept_staging;
-alter table dept_orc set fileformat orc;
-
-create table loc_orc like loc_staging;
-alter table loc_orc set fileformat orc;
-
-LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
-LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept_staging;
-LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc_staging;
-
-insert overwrite table emp_orc select * from emp_staging;
-insert overwrite table dept_orc select * from dept_staging;
-insert overwrite table loc_orc select * from loc_staging;
-
-analyze table emp_orc compute statistics for columns lastname,deptid;
-analyze table dept_orc compute statistics for columns deptname,deptid;
-analyze table loc_orc compute statistics for columns state,locid,zip,year;
+LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp;
+LOAD DATA LOCAL INPATH '../../data/files/dept.txt' OVERWRITE INTO TABLE dept;
+LOAD DATA LOCAL INPATH '../../data/files/loc.txt' OVERWRITE INTO TABLE loc;
+
+analyze table emp compute statistics;
+analyze table dept compute statistics;
+analyze table loc compute statistics;
+analyze table emp compute statistics for columns lastname,deptid,locid;
+analyze table dept compute statistics for columns deptname,deptid;
+analyze table loc compute statistics for columns state,locid,zip,year;
-- number of rows
--- emp_orc - 6
--- dept_orc - 4
--- loc_orc - 8
+-- emp - 48
+-- dept - 6
+-- loc - 8
-- count distincts for relevant columns (since count distinct values are approximate in some cases count distint values will be greater than number of rows)
--- emp_orc.deptid - 3
--- emp_orc.lastname - 7
--- dept_orc.deptid - 6
--- dept_orc.deptname - 5
--- loc_orc.locid - 6
--- loc_orc.state - 7
-
--- Expected output rows: 4
--- Reason: #rows = (6*4)/max(3,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid);
-
--- 3 way join
--- Expected output rows: 4
--- Reason: #rows = (6*4*6)/max(3,6)*max(6,3)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join emp_orc e1 on (e.deptid = e1.deptid);
-
--- Expected output rows: 5
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,6)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.locid);
-
--- join keys of different types
--- Expected output rows: 4
--- Reason: #rows = (6*4*8)/max(3,6)*max(6,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid) join loc_orc l on (e.deptid = l.state);
-
--- multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4)/max(3,6)*max(7,5)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname);
-
--- 3 way and multi-attribute join
--- Expected output rows: 0
--- Reason: #rows = (6*4*8)/max(3,6)*max(7,5)*max(3,6)*max(7,7)
-explain extended select * from emp_orc e join dept_orc d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc_orc l on (e.deptid = l.locid and e.lastname = l.state);
+-- emp.deptid - 3
+-- emp.lastname - 6
+-- emp.locid - 7
+-- dept.deptid - 7
+-- dept.deptname - 6
+-- loc.locid - 7
+-- loc.state - 6
+
+-- 2 relations, 1 attribute
+-- Expected output rows: (48*6)/max(3,7) = 41
+explain select * from emp e join dept d on (e.deptid = d.deptid);
+
+-- 2 relations, 2 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6)) = 6
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname;
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname);
+
+-- 2 relations, 3 attributes
+-- Expected output rows: (48*6)/(max(3,7) * max(6,6) * max(6,6)) = 1
+explain select * from emp,dept where emp.deptid = dept.deptid and emp.lastname = dept.deptname and dept.deptname = emp.lastname;
+
+-- 3 relations, 1 attribute
+-- Expected output rows: (48*6*48)/top2largest(3,7,3) = 658
+explain select * from emp e join dept d on (e.deptid = d.deptid) join emp e1 on (e.deptid = e1.deptid);
+
+-- Expected output rows: (48*6*8)/top2largest(3,7,7) = 47
+explain select * from emp e join dept d on (e.deptid = d.deptid) join loc l on (e.deptid = l.locid);
+
+-- 3 relations and 2 attribute
+-- Expected output rows: (48*6*8)/top2largest(3,7,7)*top2largest(6,6,6) = 1
+explain select * from emp e join dept d on (e.deptid = d.deptid and e.lastname = d.deptname) join loc l on (e.deptid = l.locid and e.lastname = l.state);
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_limit.q Thu Aug 7 00:21:45 2014
@@ -17,14 +17,14 @@ insert overwrite table loc_orc select *
analyze table loc_orc compute statistics for columns state, locid, zip, year;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- numRows: 4 rawDataSize: 396
-explain extended select * from loc_orc limit 4;
+explain select * from loc_orc limit 4;
-- greater than the available number of rows
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc limit 16;
+explain select * from loc_orc limit 16;
-- numRows: 0 rawDataSize: 0
-explain extended select * from loc_orc limit 0;
+explain select * from loc_orc limit 0;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_part.q Thu Aug 7 00:21:45 2014
@@ -19,67 +19,67 @@ create table if not exists loc_orc (
) partitioned by(year string) stored as orc;
-- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
insert overwrite table loc_orc partition(year) select * from loc_staging;
-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- partition level analyze statistics for specific parition
analyze table loc_orc partition(year='2001') compute statistics;
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001';
+explain select * from loc_orc where year='2001';
-- partition level analyze statistics for all partitions
analyze table loc_orc partition(year) compute statistics;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='__HIVE_DEFAULT_PARTITION__';
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' or year='__HIVE_DEFAULT_PARTITION__';
-- both partitions will be pruned
-- basicStatState: NONE colStatState: NONE
-explain extended select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
+explain select * from loc_orc where year='2001' and year='__HIVE_DEFAULT_PARTITION__';
-- partition level partial column statistics
analyze table loc_orc partition(year='2001') compute statistics for columns state,locid;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select zip from loc_orc;
+explain select zip from loc_orc;
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state from loc_orc;
+explain select state from loc_orc;
-- column statistics for __HIVE_DEFAULT_PARTITION__ is not supported yet. Hence colStatState reports PARTIAL
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select state,locid from loc_orc;
+explain select state,locid from loc_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select state,locid from loc_orc where year='2001';
+explain select state,locid from loc_orc where year='2001';
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select state,locid from loc_orc where year!='2001';
+explain select state,locid from loc_orc where year!='2001';
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- This is to test filter expression evaluation on partition column
-- numRows: 2 dataSize: 8 basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select locid from loc_orc where locid>0 and year='2001';
-explain extended select locid,year from loc_orc where locid>0 and year='2001';
-explain extended select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
+explain select locid from loc_orc where locid>0 and year='2001';
+explain select locid,year from loc_orc where locid>0 and year='2001';
+explain select * from (select locid,year from loc_orc) test where locid>0 and year='2001';
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_select.q Thu Aug 7 00:21:45 2014
@@ -28,116 +28,116 @@ load data local inpath '../../data/files
insert overwrite table alltypes_orc select * from alltypes;
-- basicStatState: COMPLETE colStatState: NONE numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
-- statistics for complex types are not supported yet
analyze table alltypes_orc compute statistics for columns bo1, ti1, si1, i1, bi1, f1, d1, s1, vc1;
-- numRows: 2 rawDataSize: 1514
-explain extended select * from alltypes_orc;
+explain select * from alltypes_orc;
-- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc;
+explain select bo1 from alltypes_orc;
-- col alias renaming
-- numRows: 2 rawDataSize: 8
-explain extended select i1 as int1 from alltypes_orc;
+explain select i1 as int1 from alltypes_orc;
-- numRows: 2 rawDataSize: 174
-explain extended select s1 from alltypes_orc;
+explain select s1 from alltypes_orc;
-- column statistics for complex types unsupported and so statistics will not be updated
-- numRows: 2 rawDataSize: 1514
-explain extended select m1 from alltypes_orc;
+explain select m1 from alltypes_orc;
-- numRows: 2 rawDataSize: 246
-explain extended select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
+explain select bo1, ti1, si1, i1, bi1, f1, d1,s1 from alltypes_orc;
-- numRows: 2 rawDataSize: 0
-explain extended select null from alltypes_orc;
+explain select null from alltypes_orc;
-- numRows: 2 rawDataSize: 8
-explain extended select 11 from alltypes_orc;
+explain select 11 from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select 11L from alltypes_orc;
+explain select 11L from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select 11.0 from alltypes_orc;
+explain select 11.0 from alltypes_orc;
-- numRows: 2 rawDataSize: 178
-explain extended select "hello" from alltypes_orc;
-explain extended select cast("hello" as char(5)) from alltypes_orc;
-explain extended select cast("hello" as varchar(5)) from alltypes_orc;
+explain select "hello" from alltypes_orc;
+explain select cast("hello" as char(5)) from alltypes_orc;
+explain select cast("hello" as varchar(5)) from alltypes_orc;
-- numRows: 2 rawDataSize: 96
-explain extended select unbase64("0xe23") from alltypes_orc;
+explain select unbase64("0xe23") from alltypes_orc;
-- numRows: 2 rawDataSize: 16
-explain extended select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
+explain select cast("1" as TINYINT), cast("20" as SMALLINT) from alltypes_orc;
-- numRows: 2 rawDataSize: 80
-explain extended select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as TIMESTAMP) from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
+explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc;
-- numRows: 2 rawDataSize: 224
-explain extended select cast("58.174" as DECIMAL) from alltypes_orc;
+explain select cast("58.174" as DECIMAL) from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select array(1,2,3) from alltypes_orc;
+explain select array(1,2,3) from alltypes_orc;
-- numRows: 2 rawDataSize: 1508
-explain extended select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
+explain select str_to_map("a=1 b=2 c=3", " ", "=") from alltypes_orc;
-- numRows: 2 rawDataSize: 112
-explain extended select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
+explain select NAMED_STRUCT("a", 11, "b", 11) from alltypes_orc;
-- numRows: 2 rawDataSize: 250
-explain extended select CREATE_UNION(0, "hello") from alltypes_orc;
+explain select CREATE_UNION(0, "hello") from alltypes_orc;
-- COUNT(*) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
-- numRows: 1 rawDataSize: 8
-explain extended select count(*) from alltypes_orc;
+explain select count(*) from alltypes_orc;
-- COUNT(1) is projected as new column. It is not projected as GenericUDF and so datasize estimate will be based on number of rows
-- numRows: 1 rawDataSize: 8
-explain extended select count(1) from alltypes_orc;
+explain select count(1) from alltypes_orc;
-- column statistics for complex column types will be missing. data size will be calculated from available column statistics
-- numRows: 2 rawDataSize: 254
-explain extended select *,11 from alltypes_orc;
+explain select *,11 from alltypes_orc;
-- subquery selects
-- inner select - numRows: 2 rawDataSize: 8
-- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 16
-- outer select - numRows: 2 rawDataSize: 8
-explain extended select i1 from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1 from (select i1,11 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 16
-- outer select - numRows: 2 rawDataSize: 186
-explain extended select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
+explain select i1,"hello" from (select i1,11 from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 24
-- outer select - numRows: 2 rawDataSize: 16
-explain extended select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
+explain select x from (select i1,11.0 as x from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 104
-- outer select - numRows: 2 rawDataSize: 186
-explain extended select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
+explain select x,"hello" from (select i1 as x, unbase64("0xe23") as ub from alltypes_orc limit 10) temp;
-- inner select - numRows: 2 rawDataSize: 186
-- middle select - numRows: 2 rawDataSize: 178
-- outer select - numRows: 2 rawDataSize: 194
-explain extended select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
+explain select h, 11.0 from (select hell as h from (select i1, "hello" as hell from alltypes_orc limit 10) in1 limit 10) in2;
-- This test is for FILTER operator where filter expression is a boolean column
-- numRows: 2 rawDataSize: 8
-explain extended select bo1 from alltypes_orc where bo1;
+explain select bo1 from alltypes_orc where bo1;
-- numRows: 0 rawDataSize: 0
-explain extended select bo1 from alltypes_orc where !bo1;
+explain select bo1 from alltypes_orc where !bo1;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_table.q Thu Aug 7 00:21:45 2014
@@ -10,7 +10,7 @@ create table if not exists emp_orc like
alter table emp_orc set fileformat orc;
-- basicStatState: NONE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
LOAD DATA LOCAL INPATH '../../data/files/emp.txt' OVERWRITE INTO TABLE emp_staging;
@@ -19,35 +19,35 @@ insert overwrite table emp_orc select *
-- stats are disabled. basic stats will report the file size but not raw data size. so initial statistics will be PARTIAL
-- basicStatState: PARTIAL colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- table level analyze statistics
analyze table emp_orc compute statistics;
-- basicStatState: COMPLETE colStatState: NONE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- column level partial statistics
analyze table emp_orc compute statistics for columns deptid;
-- basicStatState: COMPLETE colStatState: PARTIAL
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- all selected columns have statistics
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
-- column level complete statistics
analyze table emp_orc compute statistics for columns lastname,deptid;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select * from emp_orc;
+explain select * from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname from emp_orc;
+explain select lastname from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select deptid from emp_orc;
+explain select deptid from emp_orc;
-- basicStatState: COMPLETE colStatState: COMPLETE
-explain extended select lastname,deptid from emp_orc;
+explain select lastname,deptid from emp_orc;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/annotate_stats_union.q Thu Aug 7 00:21:45 2014
@@ -17,16 +17,16 @@ insert overwrite table loc_orc select *
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- numRows: 8 rawDataSize: 688
-explain extended select state from loc_orc;
+explain select state from loc_orc;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from loc_orc union all select state from loc_orc) tmp;
+explain select * from (select state from loc_orc union all select state from loc_orc) tmp;
-- numRows: 8 rawDataSize: 796
-explain extended select * from loc_orc;
+explain select * from loc_orc;
-- numRows: 16 rawDataSize: 1592
-explain extended select * from (select * from loc_orc union all select * from loc_orc) tmp;
+explain select * from (select * from loc_orc union all select * from loc_orc) tmp;
create database test;
use test;
@@ -49,7 +49,7 @@ analyze table loc_staging compute statis
analyze table loc_orc compute statistics for columns state,locid,zip,year;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
+explain select * from (select state from default.loc_orc union all select state from test.loc_orc) temp;
-- numRows: 16 rawDataSize: 1376
-explain extended select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
+explain select * from (select state from test.loc_staging union all select state from test.loc_orc) temp;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/columnstats_partlvl.q Thu Aug 7 00:21:45 2014
@@ -30,4 +30,12 @@ explain
analyze table Employee_Part compute statistics for columns;
analyze table Employee_Part compute statistics for columns;
+describe formatted Employee_Part.employeeID partition(employeeSalary=2000.0);
+describe formatted Employee_Part.employeeID partition(employeeSalary=4000.0);
+
+set hive.analyze.stmt.collect.partlevel.stats=false;
+explain
+analyze table Employee_Part compute statistics for columns;
+analyze table Employee_Part compute statistics for columns;
+
describe formatted Employee_Part.employeeID;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/parquet_columnar.q Thu Aug 7 00:21:45 2014
@@ -13,15 +13,16 @@ CREATE TABLE parquet_columnar_access_sta
CREATE TABLE parquet_columnar_access (
s string,
- i int,
+ x int,
+ y int,
f float
) STORED AS PARQUET;
LOAD DATA LOCAL INPATH '../../data/files/parquet_columnar.txt' OVERWRITE INTO TABLE parquet_columnar_access_stage;
-INSERT OVERWRITE TABLE parquet_columnar_access SELECT * FROM parquet_columnar_access_stage;
+INSERT OVERWRITE TABLE parquet_columnar_access SELECT s, i, (i + 1), f FROM parquet_columnar_access_stage;
SELECT * FROM parquet_columnar_access;
-ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, i1 int, f1 float);
+ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 int, y1 int, f1 float);
SELECT * FROM parquet_columnar_access;
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_14.q Thu Aug 7 00:21:45 2014
@@ -1,4 +1,38 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT ctimestamp1,
+ cfloat,
+ cstring1,
+ cboolean1,
+ cdouble,
+ (-26.28 + cdouble),
+ (-((-26.28 + cdouble))),
+ STDDEV_SAMP((-((-26.28 + cdouble)))),
+ (cfloat * -26.28),
+ MAX(cfloat),
+ (-(cfloat)),
+ (-(MAX(cfloat))),
+ ((-((-26.28 + cdouble))) / 10.175),
+ STDDEV_POP(cfloat),
+ COUNT(cfloat),
+ (-(((-((-26.28 + cdouble))) / 10.175))),
+ (-1.389 % STDDEV_SAMP((-((-26.28 + cdouble))))),
+ (cfloat - cdouble),
+ VAR_POP(cfloat),
+ (VAR_POP(cfloat) % 10.175),
+ VAR_SAMP(cfloat),
+ (-((cfloat - cdouble)))
+FROM alltypesorc
+WHERE (((ctinyint <= cbigint)
+ AND ((cint <= cdouble)
+ OR (ctimestamp2 < ctimestamp1)))
+ AND ((cdouble < ctinyint)
+ AND ((cbigint > -257)
+ OR (cfloat < cint))))
+GROUP BY ctimestamp1, cfloat, cstring1, cboolean1, cdouble
+ORDER BY cstring1, cfloat, cdouble, ctimestamp1;
+
SELECT ctimestamp1,
cfloat,
cstring1,
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_15.q Thu Aug 7 00:21:45 2014
@@ -1,4 +1,36 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cfloat,
+ cboolean1,
+ cdouble,
+ cstring1,
+ ctinyint,
+ cint,
+ ctimestamp1,
+ STDDEV_SAMP(cfloat),
+ (-26.28 - cint),
+ MIN(cdouble),
+ (cdouble * 79.553),
+ (33 % cfloat),
+ STDDEV_SAMP(ctinyint),
+ VAR_POP(ctinyint),
+ (-23 % cdouble),
+ (-(ctinyint)),
+ VAR_SAMP(cint),
+ (cint - cfloat),
+ (-23 % ctinyint),
+ (-((-26.28 - cint))),
+ STDDEV_POP(cint)
+FROM alltypesorc
+WHERE (((cstring2 LIKE '%ss%')
+ OR (cstring1 LIKE '10%'))
+ OR ((cint >= -75)
+ AND ((ctinyint = csmallint)
+ AND (cdouble >= -3728))))
+GROUP BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1
+ORDER BY cfloat, cboolean1, cdouble, cstring1, ctinyint, cint, ctimestamp1;
+
SELECT cfloat,
cboolean1,
cdouble,
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_16.q Thu Aug 7 00:21:45 2014
@@ -1,4 +1,25 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cstring1,
+ cdouble,
+ ctimestamp1,
+ (cdouble - 9763215.5639),
+ (-((cdouble - 9763215.5639))),
+ COUNT(cdouble),
+ STDDEV_SAMP(cdouble),
+ (-(STDDEV_SAMP(cdouble))),
+ (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+ MIN(cdouble),
+ (9763215.5639 / cdouble),
+ (COUNT(cdouble) / -1.389),
+ STDDEV_SAMP(cdouble)
+FROM alltypesorc
+WHERE ((cstring2 LIKE '%b%')
+ AND ((cdouble >= -1.389)
+ OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
SELECT cstring1,
cdouble,
ctimestamp1,
Modified: hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q (original)
+++ hive/branches/cbo/ql/src/test/queries/clientpositive/vectorization_9.q Thu Aug 7 00:21:45 2014
@@ -1,4 +1,25 @@
SET hive.vectorized.execution.enabled=true;
+
+EXPLAIN
+SELECT cstring1,
+ cdouble,
+ ctimestamp1,
+ (cdouble - 9763215.5639),
+ (-((cdouble - 9763215.5639))),
+ COUNT(cdouble),
+ STDDEV_SAMP(cdouble),
+ (-(STDDEV_SAMP(cdouble))),
+ (STDDEV_SAMP(cdouble) * COUNT(cdouble)),
+ MIN(cdouble),
+ (9763215.5639 / cdouble),
+ (COUNT(cdouble) / -1.389),
+ STDDEV_SAMP(cdouble)
+FROM alltypesorc
+WHERE ((cstring2 LIKE '%b%')
+ AND ((cdouble >= -1.389)
+ OR (cstring1 < 'a')))
+GROUP BY cstring1, cdouble, ctimestamp1;
+
SELECT cfloat,
cstring1,
cint,
Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_filter.q.out Thu Aug 7 00:21:45 2014 differ
Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_groupby.q.out Thu Aug 7 00:21:45 2014 differ
Modified: hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out?rev=1616379&r1=1616378&r2=1616379&view=diff
==============================================================================
Files hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out (original) and hive/branches/cbo/ql/src/test/results/clientpositive/annotate_stats_join.q.out Thu Aug 7 00:21:45 2014 differ