You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2015/09/17 18:48:52 UTC

[1/2] hive git commit: HIVE-11842: Improve RuleRegExp by caching some internal data structures (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)

Repository: hive
Updated Branches:
  refs/heads/master 8d524e062 -> 7201c264a


HIVE-11842: Improve RuleRegExp by caching some internal data structures (Jesus Camacho Rodriguez, reviewed by Sergey Shelukhin)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/79244ab4
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/79244ab4
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/79244ab4

Branch: refs/heads/master
Commit: 79244ab453823b8787b70a08f923e25c2abbd0bf
Parents: 8d524e0
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 17 17:46:55 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 17 17:46:55 2015 +0100

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/lib/RuleRegExp.java   | 61 ++++++++++++++++----
 1 file changed, 51 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/79244ab4/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
index fd5f133..1e850d6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/lib/RuleRegExp.java
@@ -19,7 +19,9 @@
 package org.apache.hadoop.hive.ql.lib;
 
 import java.util.Arrays;
+import java.util.HashMap;
 import java.util.HashSet;
+import java.util.Map;
 import java.util.Set;
 import java.util.Stack;
 import java.util.regex.Matcher;
@@ -125,6 +127,12 @@ public class RuleRegExp implements Rule {
    */
   private int costPatternWithoutWildCardChar(Stack<Node> stack) throws SemanticException {
     int numElems = (stack != null ? stack.size() : 0);
+
+    // No elements
+    if (numElems == 0) {
+      return -1;
+    }
+
     int patLen = patternWithoutWildCardChar.length();
     StringBuilder name = new StringBuilder(patLen + numElems);
     for (int pos = numElems - 1; pos >= 0; pos--) {
@@ -133,9 +141,8 @@ public class RuleRegExp implements Rule {
       if (name.length() >= patLen) {
         if (patternWithoutWildCardChar.contentEquals(name)) {
           return patLen;
-        } else {
-          return -1;
         }
+        break;
       }
     }
     return -1;
@@ -152,20 +159,54 @@ public class RuleRegExp implements Rule {
    */
   private int costPatternWithORWildCardChar(Stack<Node> stack) throws SemanticException {
     int numElems = (stack != null ? stack.size() : 0);
+
+    // No elements
+    if (numElems == 0) {
+      return -1;
+    }
+
+    // These DS are used to cache previously created String
+    Map<Integer,String> cachedNames = new HashMap<Integer,String>();
+    int maxDepth = numElems;
+    int maxLength = 0;
+
+    // For every pattern
     for (String pattern : patternORWildChar) {
       int patLen = pattern.length();
 
-      StringBuilder name = new StringBuilder(patLen + numElems);
-      for (int pos = numElems - 1; pos >= 0; pos--) {
-        String nodeName = stack.get(pos).getName() + "%";
-        name.insert(0, nodeName);
-        if (name.length() >= patLen) {
-          if (pattern.contentEquals(name)) {
-            return patLen;
-          } else {
+      // If the stack has been explored already till that level,
+      // obtained cached String
+      if (cachedNames.containsKey(patLen)) {
+        if (pattern.contentEquals(cachedNames.get(patLen))) {
+          return patLen;
+        }
+      } else if (maxLength >= patLen) {
+        // We have already explored the stack deep enough, but
+        // we do not have a matching
+        continue;
+      } else {
+        // We are going to build the name
+        StringBuilder name = new StringBuilder(patLen + numElems);
+        if (maxLength != 0) {
+          name.append(cachedNames.get(maxLength));
+        }
+        for (int pos = maxDepth - 1; pos >= 0; pos--) {
+          String nodeName = stack.get(pos).getName() + "%";
+          name.insert(0, nodeName);
+
+          // We cache the values
+          cachedNames.put(name.length(), name.toString());
+          maxLength = name.length();
+          maxDepth--;
+
+          if (name.length() >= patLen) {
+            if (pattern.contentEquals(name)) {
+              return patLen;
+            }
             break;
           }
         }
+        
       }
     }
     return -1;


[2/2] hive git commit: HIVE-11789: Better support for functions recognition in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Posted by jc...@apache.org.
HIVE-11789: Better support for functions recognition in CBO (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/7201c264
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/7201c264
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/7201c264

Branch: refs/heads/master
Commit: 7201c264a1fe8347fd87fc8c1bb835083e9aac75
Parents: 79244ab
Author: Jesus Camacho Rodriguez <jc...@apache.org>
Authored: Thu Sep 17 17:48:01 2015 +0100
Committer: Jesus Camacho Rodriguez <jc...@apache.org>
Committed: Thu Sep 17 17:48:01 2015 +0100

----------------------------------------------------------------------
 .../calcite/reloperators/HiveBetween.java       | 75 ++++++++++++++++++++
 .../optimizer/calcite/reloperators/HiveIn.java  | 41 +++++++++++
 .../calcite/rules/HivePreFilteringRule.java     | 37 +++-------
 .../translator/SqlFunctionConverter.java        | 16 ++++-
 4 files changed, 142 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
new file mode 100644
index 0000000..2388939
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveBetween.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.sql.SqlCallBinding;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.ReturnTypes;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlTypeName;
+
+public class HiveBetween extends SqlSpecialOperator {
+
+  public static final SqlSpecialOperator INSTANCE =
+          new HiveBetween();
+
+  private HiveBetween() {
+    super(
+        "BETWEEN",
+        SqlKind.BETWEEN,
+        30,
+        true,
+        ReturnTypes.BOOLEAN_NULLABLE,
+        FIRST_BOOLEAN_THEN_FIRST_KNOWN,
+        null);
+  }
+
+  /**
+   * Operand type-inference strategy where an unknown operand type is derived
+   * from the first operand with a known type, but the first operand is a boolean.
+   */
+  public static final SqlOperandTypeInference FIRST_BOOLEAN_THEN_FIRST_KNOWN =
+      new SqlOperandTypeInference() {
+        public void inferOperandTypes(
+            SqlCallBinding callBinding,
+            RelDataType returnType,
+            RelDataType[] operandTypes) {
+          final RelDataType unknownType =
+              callBinding.getValidator().getUnknownType();
+          RelDataType knownType = unknownType;
+          for (int i = 1; i < callBinding.getCall().getOperandList().size(); i++) {
+            SqlNode operand = callBinding.getCall().getOperandList().get(i);
+            knownType = callBinding.getValidator().deriveType(
+                callBinding.getScope(), operand);
+            if (!knownType.equals(unknownType)) {
+              break;
+            }
+          }
+
+          RelDataTypeFactory typeFactory = callBinding.getTypeFactory();
+          operandTypes[0] = typeFactory.createSqlType(SqlTypeName.BOOLEAN);
+          for (int i = 1; i < operandTypes.length; ++i) {
+            operandTypes[i] = knownType;
+          }
+        }
+      };
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
new file mode 100644
index 0000000..6d87003
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveIn.java
@@ -0,0 +1,41 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.SqlSpecialOperator;
+import org.apache.calcite.sql.type.InferTypes;
+import org.apache.calcite.sql.type.ReturnTypes;
+
+public class HiveIn extends SqlSpecialOperator {
+
+  public static final SqlSpecialOperator INSTANCE =
+          new HiveIn();
+
+  private HiveIn() {
+    super(
+        "IN",
+        SqlKind.IN,
+        30,
+        true,
+        ReturnTypes.BOOLEAN_NULLABLE,
+        InferTypes.FIRST_KNOWN,
+        null);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
index dde6288..3e2311c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HivePreFilteringRule.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.EnumSet;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.Set;
@@ -41,22 +42,11 @@ import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBetween;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFIn;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqual;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualNS;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPEqualOrLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPGreaterThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPLessThan;
-import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotEqual;
 
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.LinkedHashMultimap;
 import com.google.common.collect.Multimap;
-import com.google.common.collect.Sets;
 
 
 public class HivePreFilteringRule extends RelOptRule {
@@ -71,18 +61,13 @@ public class HivePreFilteringRule extends RelOptRule {
   private final FilterFactory filterFactory;
 
 
-  private static final Set<String> COMPARISON_UDFS = Sets.newHashSet(
-          GenericUDFOPEqual.class.getAnnotation(Description.class).name(),
-          GenericUDFOPEqualNS.class.getAnnotation(Description.class).name(),
-          GenericUDFOPEqualOrGreaterThan.class.getAnnotation(Description.class).name(),
-          GenericUDFOPEqualOrLessThan.class.getAnnotation(Description.class).name(),
-          GenericUDFOPGreaterThan.class.getAnnotation(Description.class).name(),
-          GenericUDFOPLessThan.class.getAnnotation(Description.class).name(),
-          GenericUDFOPNotEqual.class.getAnnotation(Description.class).name());
-  private static final String IN_UDF =
-          GenericUDFIn.class.getAnnotation(Description.class).name();
-  private static final String BETWEEN_UDF =
-          GenericUDFBetween.class.getAnnotation(Description.class).name();
+  private static final Set<SqlKind> COMPARISON = EnumSet.of(
+          SqlKind.EQUALS,
+          SqlKind.GREATER_THAN_OR_EQUAL,
+          SqlKind.LESS_THAN_OR_EQUAL,
+          SqlKind.GREATER_THAN,
+          SqlKind.LESS_THAN,
+          SqlKind.NOT_EQUALS);
 
 
   private HivePreFilteringRule() {
@@ -176,7 +161,7 @@ public class HivePreFilteringRule extends RelOptRule {
           continue;
         }
         RexCall conjCall = (RexCall) conjunction;
-        if(COMPARISON_UDFS.contains(conjCall.getOperator().getName())) {
+        if(COMPARISON.contains(conjCall.getOperator().getKind())) {
           if (conjCall.operands.get(0) instanceof RexInputRef &&
                   conjCall.operands.get(1) instanceof RexLiteral) {
             reductionCondition.put(conjCall.operands.get(0).toString(),
@@ -188,11 +173,11 @@ public class HivePreFilteringRule extends RelOptRule {
                     conjCall);
             addedToReductionCondition = true;
           }
-        } else if(conjCall.getOperator().getName().equals(IN_UDF)) {
+        } else if(conjCall.getOperator().getKind().equals(SqlKind.IN)) {
           reductionCondition.put(conjCall.operands.get(0).toString(),
                   conjCall);
           addedToReductionCondition = true;
-        } else if(conjCall.getOperator().getName().equals(BETWEEN_UDF)) {
+        } else if(conjCall.getOperator().getKind().equals(SqlKind.BETWEEN)) {
           reductionCondition.put(conjCall.operands.get(1).toString(),
                   conjCall);
           addedToReductionCondition = true;

http://git-wip-us.apache.org/repos/asf/hive/blob/7201c264/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 219289c..fd78824 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -45,6 +45,8 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException;
 import org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSemanticException.UnsupportedFeature;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveBetween;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.HiveParser;
 import org.apache.hadoop.hive.ql.parse.ParseDriver;
@@ -193,7 +195,16 @@ public class SqlFunctionConverter {
     HiveToken hToken = calciteToHiveToken.get(op);
     ASTNode node;
     if (hToken != null) {
-      node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+      switch (op.kind) {
+        case IN:
+        case BETWEEN:
+        case ROW:
+          node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
+          node.addChild((ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text));
+          break;
+        default:
+          node = (ASTNode) ParseDriver.adaptor.create(hToken.type, hToken.text);
+      }
     } else {
       node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTION, "TOK_FUNCTION");
       if (op.kind != SqlKind.CAST) {
@@ -296,6 +307,9 @@ public class SqlFunctionConverter {
           hToken(HiveParser.GREATERTHANOREQUALTO, ">="));
       registerFunction("!", SqlStdOperatorTable.NOT, hToken(HiveParser.KW_NOT, "not"));
       registerFunction("<>", SqlStdOperatorTable.NOT_EQUALS, hToken(HiveParser.NOTEQUAL, "<>"));
+      registerFunction("in", HiveIn.INSTANCE, hToken(HiveParser.Identifier, "in"));
+      registerFunction("between", HiveBetween.INSTANCE, hToken(HiveParser.Identifier, "between"));
+      registerFunction("struct", SqlStdOperatorTable.ROW, hToken(HiveParser.Identifier, "struct"));
     }
 
     private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) {