You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/03/11 17:59:18 UTC

[hive] branch branch-3 updated: HIVE-21388: Constant UDF is not pushed to JDBCStorage Handler (Jesus Camacho Rodriguez, reviewed by Jason Dere)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/branch-3 by this push:
     new 008797c  HIVE-21388: Constant UDF is not pushed to JDBCStorage Handler (Jesus Camacho Rodriguez, reviewed by Jason Dere)
008797c is described below

commit 008797cb2fab6291197459fddce35d61e638c004
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Mon Mar 4 18:50:21 2019 -0800

    HIVE-21388: Constant UDF is not pushed to JDBCStorage Handler (Jesus Camacho Rodriguez, reviewed by Jason Dere)
---
 .../hive/ql/optimizer/calcite/HiveCalciteUtil.java |  6 ++-
 .../calcite/reloperators/HiveSqlFunction.java      | 58 ++++++++++++++++++++++
 .../calcite/translator/SqlFunctionConverter.java   | 37 +++-----------
 .../clientpositive/current_date_timestamp.q        |  2 +
 .../llap/current_date_timestamp.q.out              | 14 +++++-
 5 files changed, 84 insertions(+), 33 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 1f8a48c..89ee6c5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -69,6 +69,7 @@ import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSqlFunction;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableFunctionScan;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.SqlFunctionConverter;
@@ -978,7 +979,6 @@ public class HiveCalciteUtil {
    * Check if the expression is usable for query materialization, returning the first failing expression.
    */
   public static RexCall checkMaterializable(RexNode expr) {
-    boolean deterministic = true;
     RexCall failingCall = null;
 
     if (expr == null) {
@@ -989,7 +989,9 @@ public class HiveCalciteUtil {
       @Override
       public Void visitCall(org.apache.calcite.rex.RexCall call) {
         // non-deterministic functions as well as runtime constants are not materializable.
-        if (!call.getOperator().isDeterministic() || call.getOperator().isDynamicFunction()) {
+        SqlOperator op = call.getOperator();
+        if (!op.isDeterministic() || op.isDynamicFunction() ||
+            (op instanceof HiveSqlFunction && ((HiveSqlFunction) op).isRuntimeConstant())) {
           throw new Util.FoundOne(call);
         }
         return super.visitCall(call);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSqlFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSqlFunction.java
new file mode 100644
index 0000000..a20520b
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/reloperators/HiveSqlFunction.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.optimizer.calcite.reloperators;
+
+import org.apache.calcite.sql.SqlFunction;
+import org.apache.calcite.sql.SqlFunctionCategory;
+import org.apache.calcite.sql.SqlKind;
+import org.apache.calcite.sql.type.SqlOperandTypeChecker;
+import org.apache.calcite.sql.type.SqlOperandTypeInference;
+import org.apache.calcite.sql.type.SqlReturnTypeInference;
+
+public class HiveSqlFunction extends SqlFunction {
+  private final boolean deterministic;
+  private final boolean runtimeConstant;
+
+  public HiveSqlFunction(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference,
+      SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
+      SqlFunctionCategory category, boolean deterministic, boolean runtimeConstant) {
+    super(name, kind, returnTypeInference, operandTypeInference, operandTypeChecker, category);
+    this.deterministic = deterministic;
+    this.runtimeConstant = runtimeConstant;
+  }
+
+  @Override
+  public boolean isDeterministic() {
+    return deterministic;
+  }
+
+  /**
+   * Whether it is safe to cache or materialize plans containing this operator.
+   * We do not rely on {@link SqlFunction#isDynamicFunction()} because it has
+   * different implications, e.g., a dynamic function will not be reduced in
+   * Calcite since plans may be cached in the context of prepared statements.
+   * In our case, we check whether a plan contains runtime constants before
+   * constant folding happens, hence we can let Calcite reduce these functions.
+   *
+   * @return true iff it is unsafe to cache or materialized query plans
+   * referencing this operator
+   */
+  public boolean isRuntimeConstant() {
+    return runtimeConstant;
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
index 06c9617..7f1ada8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java
@@ -22,7 +22,6 @@ import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.sql.SqlAggFunction;
-import org.apache.calcite.sql.SqlFunction;
 import org.apache.calcite.sql.SqlFunctionCategory;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
@@ -57,6 +56,7 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveExtractDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFloorDate;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFromUnixTimeSqlOperator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveIn;
+import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSqlFunction;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveToDateSqlOperator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTruncSqlOperator;
 import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveUnixTimestampSqlOperator;
@@ -120,10 +120,10 @@ public class SqlFunctionConverter {
     }
 
     // For calcite, isDeterministic just matters for within the query.
-    // isDynamicFunction used to indicate the function is not deterministic between queries.
+    // runtimeConstant used to indicate the function is not deterministic between queries.
     boolean isDeterministic = FunctionRegistry.isConsistentWithinQuery(hiveUDF);
-    boolean isDynamicFunction = FunctionRegistry.isRuntimeConstant(hiveUDF);
-    return getCalciteFn(name, calciteArgTypes, retType, isDeterministic, isDynamicFunction);
+    boolean runtimeConstant = FunctionRegistry.isRuntimeConstant(hiveUDF);
+    return getCalciteFn(name, calciteArgTypes, retType, isDeterministic, runtimeConstant);
   }
 
   public static SqlOperator getCalciteOperator(String funcTextName, GenericUDTF hiveUDTF,
@@ -504,29 +504,6 @@ public class SqlFunctionConverter {
     }
   }
 
-  private static class CalciteSqlFn extends SqlFunction {
-    private final boolean deterministic;
-    private final boolean dynamicFunction;
-
-    public CalciteSqlFn(String name, SqlKind kind, SqlReturnTypeInference returnTypeInference,
-        SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker,
-        SqlFunctionCategory category, boolean deterministic, boolean dynamicFunction) {
-      super(name, kind, returnTypeInference, operandTypeInference, operandTypeChecker, category);
-      this.deterministic = deterministic;
-      this.dynamicFunction = dynamicFunction;
-    }
-
-    @Override
-    public boolean isDeterministic() {
-      return deterministic;
-    }
-
-    @Override
-    public boolean isDynamicFunction() {
-      return dynamicFunction;
-    }
-  }
-
   private static class CalciteUDFInfo {
     private String                     udfName;
     private SqlReturnTypeInference     returnTypeInference;
@@ -550,7 +527,7 @@ public class SqlFunctionConverter {
 
   public static SqlOperator getCalciteFn(String hiveUdfName,
       ImmutableList<RelDataType> calciteArgTypes, RelDataType calciteRetType,
-      boolean deterministic, boolean dynamicFunction)
+      boolean deterministic, boolean runtimeConstant)
       throws CalciteSemanticException {
 
     if (hiveUdfName != null && hiveUdfName.trim().equals("<=>")) {
@@ -576,9 +553,9 @@ public class SqlFunctionConverter {
       default:
         calciteOp = hiveToCalcite.get(hiveUdfName);
         if (null == calciteOp) {
-          calciteOp = new CalciteSqlFn(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference,
+          calciteOp = new HiveSqlFunction(uInf.udfName, SqlKind.OTHER_FUNCTION, uInf.returnTypeInference,
               uInf.operandTypeInference, uInf.operandTypeChecker,
-              SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic, dynamicFunction);
+              SqlFunctionCategory.USER_DEFINED_FUNCTION, deterministic, runtimeConstant);
         }
         break;
     }
diff --git a/ql/src/test/queries/clientpositive/current_date_timestamp.q b/ql/src/test/queries/clientpositive/current_date_timestamp.q
index a1157ce..91f2da3 100644
--- a/ql/src/test/queries/clientpositive/current_date_timestamp.q
+++ b/ql/src/test/queries/clientpositive/current_date_timestamp.q
@@ -2,6 +2,8 @@ select current_timestamp = current_timestamp(), current_date = current_date() fr
 
 set hive.test.currenttimestamp =2012-01-01 01:02:03;
 
+explain cbo select current_timestamp() from alltypesorc;
+
 --ensure that timestamp is same for all the rows while using current_timestamp() query should return single row
 select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq;
 
diff --git a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
index 6dbca90..5424467 100644
--- a/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
+++ b/ql/src/test/results/clientpositive/llap/current_date_timestamp.q.out
@@ -11,6 +11,18 @@ true	true
 true	true
 true	true
 true	true
+PREHOOK: query: explain cbo select current_timestamp() from alltypesorc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+POSTHOOK: query: explain cbo select current_timestamp() from alltypesorc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject($f0=[CAST(2012-01-01 01:02:03):TIMESTAMP(9)])
+  HiveTableScan(table=[[default, alltypesorc]], table:alias=[alltypesorc])
+
 PREHOOK: query: select count(*) from (select current_timestamp() from alltypesorc union select current_timestamp() from src limit 5 ) subq
 PREHOOK: type: QUERY
 PREHOOK: Input: default@alltypesorc
@@ -47,7 +59,7 @@ POSTHOOK: query: explain extended select current_timestamp() from alltypesorc
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@alltypesorc
 #### A masked pattern was here ####
-OPTIMIZED SQL: SELECT CURRENT_TIMESTAMP() AS `_o__c0`
+OPTIMIZED SQL: SELECT CAST(TIMESTAMP '2012-01-01 01:02:03.000000000' AS TIMESTAMP(9)) AS `$f0`
 FROM `default`.`alltypesorc`
 STAGE DEPENDENCIES:
   Stage-0 is a root stage