You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/11/08 21:26:24 UTC
svn commit: r1637594 - in
/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq:
rules/HivePushFilterPastJoinRule.java stats/FilterSelectivityEstimator.java
translator/SqlFunctionConverter.java
Author: gunther
Date: Sat Nov 8 20:26:24 2014
New Revision: 1637594
URL: http://svn.apache.org/r1637594
Log:
HIVE-8768: CBO: Fix filter selectivity for 'in clause' & '<>' (Laljo John Pullokkaran via Gunther Hagleitner)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java?rev=1637594&r1=1637593&r2=1637594&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java Sat Nov 8 20:26:24 2014
@@ -95,36 +95,40 @@ public abstract class HivePushFilterPast
* not equi join conditions.
*/
@Override
- protected void validateJoinFilters(List<RexNode> aboveFilters,
- List<RexNode> joinFilters, JoinRelBase join, JoinRelType joinType) {
- if (joinType.equals(JoinRelType.INNER)) {
- ListIterator<RexNode> filterIter = joinFilters.listIterator();
- while (filterIter.hasNext()) {
- RexNode exp = filterIter.next();
- if (exp instanceof RexCall) {
- RexCall c = (RexCall) exp;
- if (c.getOperator().getKind() == SqlKind.EQUALS) {
- boolean validHiveJoinFilter = true;
- for (RexNode rn : c.getOperands()) {
- // NOTE: Hive dis-allows projections from both left
- // &
- // right side
- // of join condition. Example: Hive disallows
- // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
- if (filterRefersToBothSidesOfJoin(rn, join)) {
- validHiveJoinFilter = false;
- break;
- }
- }
- if (validHiveJoinFilter)
- continue;
- }
- }
- aboveFilters.add(exp);
- filterIter.remove();
- }
- }
- }
+ protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters,
+ JoinRelBase join, JoinRelType joinType) {
+ if (joinType.equals(JoinRelType.INNER)) {
+ ListIterator<RexNode> filterIter = joinFilters.listIterator();
+ while (filterIter.hasNext()) {
+ RexNode exp = filterIter.next();
+ if (exp instanceof RexCall) {
+ RexCall c = (RexCall) exp;
+ if ((c.getOperator().getKind() == SqlKind.EQUALS)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+ || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+ || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+ boolean validHiveJoinFilter = true;
+ for (RexNode rn : c.getOperands()) {
+ // NOTE: Hive dis-allows projections from both left
+ // &
+ // right side
+ // of join condition. Example: Hive disallows
+ // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+ if (filterRefersToBothSidesOfJoin(rn, join)) {
+ validHiveJoinFilter = false;
+ break;
+ }
+ }
+ if (validHiveJoinFilter)
+ continue;
+ }
+ }
+ aboveFilters.add(exp);
+ filterIter.remove();
+ }
+ }
+ }
private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
boolean refersToBothSides = false;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java?rev=1637594&r1=1637593&r2=1637594&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java Sat Nov 8 20:26:24 2014
@@ -18,6 +18,9 @@
package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
@@ -32,6 +35,10 @@ import org.eigenbase.rex.RexInputRef;
import org.eigenbase.rex.RexNode;
import org.eigenbase.rex.RexVisitorImpl;
import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.type.SqlTypeUtil;
+
+import com.google.common.collect.Sets;
public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
private final RelNode childRel;
@@ -61,7 +68,7 @@ public class FilterSelectivityEstimator
}
Double selectivity = null;
- SqlKind op = call.getKind();
+ SqlKind op = getOp(call);
switch (op) {
case AND: {
@@ -74,6 +81,7 @@ public class FilterSelectivityEstimator
break;
}
+ case NOT:
case NOT_EQUALS: {
selectivity = computeNotEqualitySelectivity(call);
break;
@@ -88,7 +96,16 @@ public class FilterSelectivityEstimator
}
case IN: {
- selectivity = ((double) 1 / ((double) call.operands.size()));
+ // TODO: 1) check for duplicates 2) We assume in clause values to be
+ // present in NDV which may not be correct (Range check can find it) 3) We
+ // assume values in NDV set is uniformly distributed over col values
+ // (account for skewness - histogram).
+ selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+ if (selectivity <= 0.0) {
+ selectivity = 0.10;
+ } else if (selectivity >= 1.0) {
+ selectivity = 1.0;
+ }
break;
}
@@ -152,18 +169,19 @@ public class FilterSelectivityEstimator
}
tmpCardinality = childCardinality * tmpSelectivity;
- if (tmpCardinality > 1)
+ if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
tmpSelectivity = (1 - tmpCardinality / childCardinality);
- else
+ } else {
tmpSelectivity = 1.0;
+ }
selectivity *= tmpSelectivity;
}
- if (selectivity > 1)
- return (1 - selectivity);
- else
- return 1.0;
+ if (selectivity < 0.0)
+ selectivity = 0.0;
+
+ return (1 - selectivity);
}
/**
@@ -225,4 +243,19 @@ public class FilterSelectivityEstimator
}
return false;
}
+
+ private SqlKind getOp(RexCall call) {
+ SqlKind op = call.getKind();
+
+ if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+ && SqlTypeUtil.inBooleanFamily(call.getType())) {
+ SqlOperator sqlOp = call.getOperator();
+ String opName = (sqlOp != null) ? sqlOp.getName() : "";
+ if (opName.equalsIgnoreCase("in")) {
+ op = SqlKind.IN;
+ }
+ }
+
+ return op;
+ }
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java?rev=1637594&r1=1637593&r2=1637594&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java Sat Nov 8 20:26:24 2014
@@ -278,6 +278,7 @@ public class SqlFunctionConverter {
registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
+ registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
}
private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {