You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/11/08 21:22:58 UTC
svn commit: r1637592 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql:
exec/tez/ optimizer/optiq/rules/ optimizer/optiq/stats/
optimizer/optiq/translator/
Author: gunther
Date: Sat Nov 8 20:22:58 2014
New Revision: 1637592
URL: http://svn.apache.org/r1637592
Log:
HIVE-8768: CBO: Fix filter selectivity for 'in clause' & '<>' (Laljo John Pullokkaran via Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java?rev=1637592&r1=1637591&r2=1637592&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezJobMonitor.java Sat Nov 8 20:22:58 2014
@@ -110,9 +110,13 @@ public class TezJobMonitor {
/* Pretty print the values */
private final NumberFormat secondsFormat;
private final NumberFormat commaFormat;
- private static final List<DAGClient> shutdownList;
+ private static List<DAGClient> shutdownList;
+
+ public static void initShutdownHook() {
+ if (shutdownList != null) {
+ return;
+ }
- static {
shutdownList = Collections.synchronizedList(new LinkedList<DAGClient>());
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
@@ -130,9 +134,10 @@ public class TezJobMonitor {
}
}
});
- }
+ }
public TezJobMonitor() {
+ initShutdownHook();
console = SessionState.getConsole();
secondsFormat = new DecimalFormat("#0.00");
commaFormat = NumberFormat.getNumberInstance(Locale.US);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java?rev=1637592&r1=1637591&r2=1637592&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/TezSessionState.java Sat Nov 8 20:22:58 2014
@@ -187,6 +187,7 @@ public class TezSessionState {
LOG.info("Opening new Tez Session (id: " + sessionId
+ ", scratch dir: " + tezScratchDir + ")");
+ TezJobMonitor.initShutdownHook();
session.start();
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_PREWARM_ENABLED)) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java?rev=1637592&r1=1637591&r2=1637592&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java Sat Nov 8 20:22:58 2014
@@ -95,36 +95,40 @@ public abstract class HivePushFilterPast
* not equi join conditions.
*/
@Override
- protected void validateJoinFilters(List<RexNode> aboveFilters,
- List<RexNode> joinFilters, JoinRelBase join, JoinRelType joinType) {
- if (joinType.equals(JoinRelType.INNER)) {
- ListIterator<RexNode> filterIter = joinFilters.listIterator();
- while (filterIter.hasNext()) {
- RexNode exp = filterIter.next();
- if (exp instanceof RexCall) {
- RexCall c = (RexCall) exp;
- if (c.getOperator().getKind() == SqlKind.EQUALS) {
- boolean validHiveJoinFilter = true;
- for (RexNode rn : c.getOperands()) {
- // NOTE: Hive dis-allows projections from both left
- // &
- // right side
- // of join condition. Example: Hive disallows
- // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
- if (filterRefersToBothSidesOfJoin(rn, join)) {
- validHiveJoinFilter = false;
- break;
- }
- }
- if (validHiveJoinFilter)
- continue;
- }
- }
- aboveFilters.add(exp);
- filterIter.remove();
- }
- }
- }
+ protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters,
+ JoinRelBase join, JoinRelType joinType) {
+ if (joinType.equals(JoinRelType.INNER)) {
+ ListIterator<RexNode> filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ if ((c.getOperator().getKind() == SqlKind.EQUALS)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+ boolean validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left
+ // &
+ // right side
+ // of join condition. Example: Hive disallows
+ // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ if (validHiveJoinFilter)
+ continue;
+ }
+ }
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+ }
+ }
private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
boolean refersToBothSides = false;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java?rev=1637592&r1=1637591&r2=1637592&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java Sat Nov 8 20:22:58 2014
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
@@ -32,6 +35,10 @@ import org.eigenbase.rex.RexInputRef;
import org.eigenbase.rex.RexNode;
import org.eigenbase.rex.RexVisitorImpl;
import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.type.SqlTypeUtil;
+
+import com.google.common.collect.Sets;
public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
private final RelNode childRel;
@@ -61,7 +68,7 @@ public class FilterSelectivityEstimator
}
Double selectivity = null;
- SqlKind op = call.getKind();
+ SqlKind op = getOp(call);
switch (op) {
case AND: {
@@ -74,6 +81,7 @@ public class FilterSelectivityEstimator
break;
}
+ case NOT:
case NOT_EQUALS: {
selectivity = computeNotEqualitySelectivity(call);
break;
@@ -88,7 +96,16 @@ public class FilterSelectivityEstimator
}
case IN: {
- selectivity = ((double) 1 / ((double) call.operands.size()));
+ // TODO: 1) check for duplicates 2) We assume in clause values to be
+ // present in NDV which may not be correct (Range check can find it) 3) We
+ // assume values in NDV set is uniformly distributed over col values
+ // (account for skewness - histogram).
+ selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+ if (selectivity <= 0.0) {
+ selectivity = 0.10;
+ } else if (selectivity >= 1.0) {
+ selectivity = 1.0;
+ }
break;
}
@@ -152,18 +169,19 @@ public class FilterSelectivityEstimator
}
tmpCardinality = childCardinality * tmpSelectivity;
- if (tmpCardinality > 1)
+ if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
tmpSelectivity = (1 - tmpCardinality / childCardinality);
- else
+ } else {
tmpSelectivity = 1.0;
+ }
selectivity *= tmpSelectivity;
}
- if (selectivity > 1)
- return (1 - selectivity);
- else
- return 1.0;
+ if (selectivity < 0.0)
+ selectivity = 0.0;
+
+ return (1 - selectivity);
}
/**
@@ -225,4 +243,19 @@ public class FilterSelectivityEstimator
}
return false;
}
+
+ private SqlKind getOp(RexCall call) {
+ SqlKind op = call.getKind();
+
+ if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+ && SqlTypeUtil.inBooleanFamily(call.getType())) {
+ SqlOperator sqlOp = call.getOperator();
+ String opName = (sqlOp != null) ? sqlOp.getName() : "";
+ if (opName.equalsIgnoreCase("in")) {
+ op = SqlKind.IN;
+ }
+ }
+
+ return op;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java?rev=1637592&r1=1637591&r2=1637592&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java Sat Nov 8 20:22:58 2014
@@ -278,6 +278,7 @@ public class SqlFunctionConverter {
registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
+ registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
}
private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {