You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/11/08 21:27:02 UTC

svn commit: r1637595 - in /hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq: rules/HivePushFilterPastJoinRule.java stats/FilterSelectivityEstimator.java translator/SqlFunctionConverter.java

Author: gunther
Date: Sat Nov  8 20:27:02 2014
New Revision: 1637595

URL: http://svn.apache.org/r1637595
Log:
HIVE-8768: CBO: Fix filter selectivity for 'in clause' & '<>' (Laljo John Pullokkaran via Gunther Hagleitner)

Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java?rev=1637595&r1=1637594&r2=1637595&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/rules/HivePushFilterPastJoinRule.java Sat Nov  8 20:27:02 2014
@@ -95,36 +95,40 @@ public abstract class HivePushFilterPast
 	 * not equi join conditions.
 	 */
 	@Override
-	protected void validateJoinFilters(List<RexNode> aboveFilters,
-			List<RexNode> joinFilters, JoinRelBase join, JoinRelType joinType) {
-		if (joinType.equals(JoinRelType.INNER)) {
-			ListIterator<RexNode> filterIter = joinFilters.listIterator();
-			while (filterIter.hasNext()) {
-				RexNode exp = filterIter.next();
-				if (exp instanceof RexCall) {
-					RexCall c = (RexCall) exp;
-					if (c.getOperator().getKind() == SqlKind.EQUALS) {
-						boolean validHiveJoinFilter = true;
-						for (RexNode rn : c.getOperands()) {
-							// NOTE: Hive dis-allows projections from both left
-							// &
-							// right side
-							// of join condition. Example: Hive disallows
-							// (r1.x=r2.x)=(r1.y=r2.y) on join condition.
-							if (filterRefersToBothSidesOfJoin(rn, join)) {
-								validHiveJoinFilter = false;
-								break;
-							}
-						}
-						if (validHiveJoinFilter)
-							continue;
-					}
-				}
-				aboveFilters.add(exp);
-				filterIter.remove();
-			}
-		}
-	}
+  protected void validateJoinFilters(List<RexNode> aboveFilters, List<RexNode> joinFilters,
+      JoinRelBase join, JoinRelType joinType) {
+    if (joinType.equals(JoinRelType.INNER)) {
+      ListIterator<RexNode> filterIter = joinFilters.listIterator();
+      while (filterIter.hasNext()) {
+        RexNode exp = filterIter.next();
+        if (exp instanceof RexCall) {
+          RexCall c = (RexCall) exp;
+          if ((c.getOperator().getKind() == SqlKind.EQUALS)
+              || (c.getOperator().getKind() == SqlKind.LESS_THAN)
+              || (c.getOperator().getKind() == SqlKind.GREATER_THAN)
+              || (c.getOperator().getKind() == SqlKind.LESS_THAN_OR_EQUAL)
+              || (c.getOperator().getKind() == SqlKind.GREATER_THAN_OR_EQUAL)) {
+            boolean validHiveJoinFilter = true;
+            for (RexNode rn : c.getOperands()) {
+              // NOTE: Hive dis-allows projections from both left
+              // &
+              // right side
+              // of join condition. Example: Hive disallows
+              // (r1.x=r2.x)=(r1.y=r2.y) on join condition.
+              if (filterRefersToBothSidesOfJoin(rn, join)) {
+                validHiveJoinFilter = false;
+                break;
+              }
+            }
+            if (validHiveJoinFilter)
+              continue;
+          }
+        }
+        aboveFilters.add(exp);
+        filterIter.remove();
+      }
+    }
+  }
 
 	private boolean filterRefersToBothSidesOfJoin(RexNode filter, JoinRelBase j) {
 		boolean refersToBothSides = false;

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java?rev=1637595&r1=1637594&r2=1637595&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/stats/FilterSelectivityEstimator.java Sat Nov  8 20:27:02 2014
@@ -18,6 +18,9 @@
 package org.apache.hadoop.hive.ql.optimizer.optiq.stats;
 
 import java.util.BitSet;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
 
 import org.apache.hadoop.hive.ql.optimizer.optiq.RelOptHiveTable;
 import org.apache.hadoop.hive.ql.optimizer.optiq.reloperators.HiveTableScanRel;
@@ -32,6 +35,10 @@ import org.eigenbase.rex.RexInputRef;
 import org.eigenbase.rex.RexNode;
 import org.eigenbase.rex.RexVisitorImpl;
 import org.eigenbase.sql.SqlKind;
+import org.eigenbase.sql.SqlOperator;
+import org.eigenbase.sql.type.SqlTypeUtil;
+
+import com.google.common.collect.Sets;
 
 public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
   private final RelNode childRel;
@@ -61,7 +68,7 @@ public class FilterSelectivityEstimator 
     }
 
     Double selectivity = null;
-    SqlKind op = call.getKind();
+    SqlKind op = getOp(call);
 
     switch (op) {
     case AND: {
@@ -74,6 +81,7 @@ public class FilterSelectivityEstimator 
       break;
     }
 
+    case NOT:
     case NOT_EQUALS: {
       selectivity = computeNotEqualitySelectivity(call);
       break;
@@ -88,7 +96,16 @@ public class FilterSelectivityEstimator 
     }
 
     case IN: {
-      selectivity = ((double) 1 / ((double) call.operands.size()));
+      // TODO: 1) check for duplicates 2) We assume in clause values to be
+      // present in NDV which may not be correct (Range check can find it) 3) We
+      // assume values in NDV set is uniformly distributed over col values
+      // (account for skewness - histogram).
+      selectivity = computeFunctionSelectivity(call) * (call.operands.size() - 1);
+      if (selectivity <= 0.0) {
+        selectivity = 0.10;
+      } else if (selectivity >= 1.0) {
+        selectivity = 1.0;
+      }
       break;
     }
 
@@ -152,18 +169,19 @@ public class FilterSelectivityEstimator 
       }
       tmpCardinality = childCardinality * tmpSelectivity;
 
-      if (tmpCardinality > 1)
+      if (tmpCardinality > 1 && tmpCardinality < childCardinality) {
         tmpSelectivity = (1 - tmpCardinality / childCardinality);
-      else
+      } else {
         tmpSelectivity = 1.0;
+      }
 
       selectivity *= tmpSelectivity;
     }
 
-    if (selectivity > 1)
-      return (1 - selectivity);
-    else
-      return 1.0;
+    if (selectivity < 0.0)
+      selectivity = 0.0;
+
+    return (1 - selectivity);
   }
 
   /**
@@ -225,4 +243,19 @@ public class FilterSelectivityEstimator 
     }
     return false;
   }
+
+  private SqlKind getOp(RexCall call) {
+    SqlKind op = call.getKind();
+
+    if (call.getKind().equals(SqlKind.OTHER_FUNCTION)
+        && SqlTypeUtil.inBooleanFamily(call.getType())) {
+      SqlOperator sqlOp = call.getOperator();
+      String opName = (sqlOp != null) ? sqlOp.getName() : "";
+      if (opName.equalsIgnoreCase("in")) {
+        op = SqlKind.IN;
+      }
+    }
+
+    return op;
+  }
 }

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java?rev=1637595&r1=1637594&r2=1637595&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/optimizer/optiq/translator/SqlFunctionConverter.java Sat Nov  8 20:27:02 2014
@@ -278,6 +278,7 @@ public class SqlFunctionConverter {
       registerFunction(">=", SqlStdOperatorTable.GREATER_THAN_OR_EQUAL,
           hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
       registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
+      registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
     }
 
     private void registerFunction(String name, SqlOperator optiqFn, HiveToken hiveToken) {