You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/10/10 08:39:22 UTC

hive git commit: HIVE-20710: Constant folding may not create null constants without types (Zoltan Haindrich reviewed by Ashutosh Chauhan)

Repository: hive
Updated Branches:
  refs/heads/master 2ff9c5229 -> 90e12280c


HIVE-20710: Constant folding may not create null constants without types (Zoltan Haindrich reviewed by Ashutosh Chauhan)

Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/90e12280
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/90e12280
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/90e12280

Branch: refs/heads/master
Commit: 90e12280ca06b3df0099a858f58a542f190cf9b3
Parents: 2ff9c52
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Wed Oct 10 10:38:00 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Wed Oct 10 10:38:00 2018 +0200

----------------------------------------------------------------------
 .../optimizer/calcite/HiveRexExecutorImpl.java  |   3 +-
 .../stats/FilterSelectivityEstimator.java       |  33 +--
 .../calcite/translator/RexNodeConverter.java    |  13 +-
 .../test/queries/clientpositive/fold_to_null.q  |  14 ++
 .../results/clientpositive/fold_to_null.q.out   | 209 +++++++++++++++++++
 .../clientpositive/literal_decimal.q.out        |   6 +-
 6 files changed, 253 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
index b4bd142..1dede0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
@@ -28,7 +28,6 @@ import org.apache.calcite.rex.RexNode;
 import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter;
 import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
 import org.slf4j.Logger;
@@ -63,7 +62,7 @@ public class HiveRexExecutorImpl extends RexExecutorImpl {
         if (constant != null) {
           try {
             // convert constant back to RexNode
-            reducedValues.add(rexNodeConverter.convert((ExprNodeConstantDesc) constant));
+            reducedValues.add(rexNodeConverter.convert(constant));
           } catch (Exception e) {
             LOG.warn(e.getMessage());
             reducedValues.add(rexNode);

http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index 43f8508..d362e9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -31,6 +31,7 @@ import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexVisitorImpl;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.SqlOperator;
@@ -57,6 +58,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
     return predicate.accept(this);
   }
 
+  @Override
   public Double visitCall(RexCall call) {
     if (!deep) {
       return 1.0;
@@ -138,17 +140,18 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
    * NDV of "f1(x, y, z) != f2(p, q, r)" ->
    * "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
    * <p>
-   * 
+   *
    * @param call
    * @return
    */
   private Double computeNotEqualitySelectivity(RexCall call) {
     double tmpNDV = getMaxNDV(call);
 
-    if (tmpNDV > 1)
-      return (tmpNDV - (double) 1) / tmpNDV;
-    else
+    if (tmpNDV > 1) {
+      return (tmpNDV - 1) / tmpNDV;
+    } else {
       return 1.0;
+    }
   }
 
   /**
@@ -156,7 +159,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
    * <p>
    * Note that >, >=, <, <=, = ... are considered generic functions and uses
    * this method to find their selectivity.
-   * 
+   *
    * @param call
    * @return
    */
@@ -171,7 +174,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
    * <p>
    * Note we compute m1. m2.. by applying selectivity of the disjunctive element
    * on the cardinality from child.
-   * 
+   *
    * @param call
    * @return
    */
@@ -196,8 +199,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
       selectivity *= tmpSelectivity;
     }
 
-    if (selectivity < 0.0)
+    if (selectivity < 0.0) {
       selectivity = 0.0;
+    }
 
     return (1 - selectivity);
   }
@@ -205,7 +209,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
   /**
    * Selectivity of conjunctive predicate -> (selectivity of conjunctive
    * element1) * (selectivity of conjunctive element2)...
-   * 
+   *
    * @param call
    * @return
    */
@@ -226,9 +230,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
   /**
    * Given a RexCall & TableScan find max no of nulls. Currently it picks the
    * col with max no of nulls.
-   * 
+   *
    * TODO: improve this
-   * 
+   *
    * @param call
    * @param t
    * @return
@@ -258,16 +262,18 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
       if (op instanceof RexInputRef) {
         tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq,
             ((RexInputRef) op).getIndex());
-        if (tmpNDV > maxNDV)
+        if (tmpNDV > maxNDV) {
           maxNDV = tmpNDV;
+        }
       } else {
         irv = new InputReferencedVisitor();
         irv.apply(op);
         for (Integer childProjIndx : irv.inputPosReferenced) {
           tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
               mq, childProjIndx);
-          if (tmpNDV > maxNDV)
+          if (tmpNDV > maxNDV) {
             maxNDV = tmpNDV;
+          }
         }
       }
     }
@@ -304,8 +310,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
     return op;
   }
 
+  @Override
   public Double visitLiteral(RexLiteral literal) {
-    if (literal.isAlwaysFalse()) {
+    if (literal.isAlwaysFalse() || RexUtil.isNull(literal)) {
       return 0.0;
     } else if (literal.isAlwaysTrue()) {
       return 1.0;

http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 35aae6a..78b1281 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableList.Builder;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
+
 import org.apache.calcite.avatica.util.TimeUnit;
 import org.apache.calcite.avatica.util.TimeUnitRange;
 import org.apache.calcite.plan.RelOptCluster;
@@ -111,7 +112,6 @@ import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 
-
 public class RexNodeConverter {
 
   private static class InputCtx {
@@ -664,10 +664,10 @@ public class RexNodeConverter {
   }
 
   protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
-    RexBuilder rexBuilder = cluster.getRexBuilder();
-    RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
-    PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
-    RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
+    final RexBuilder rexBuilder = cluster.getRexBuilder();
+    final RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
+    final PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
+    final RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
 
     PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
 
@@ -821,8 +821,7 @@ public class RexNodeConverter {
        SqlParserPos(1, 1)));
        break;
     case VOID:
-      calciteLiteral = cluster.getRexBuilder().makeLiteral(null,
-          cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true);
+      calciteLiteral = rexBuilder.makeLiteral(null, calciteDataType, true);
       break;
     case BINARY:
     case UNKNOWN:

http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/queries/clientpositive/fold_to_null.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/fold_to_null.q b/ql/src/test/queries/clientpositive/fold_to_null.q
new file mode 100644
index 0000000..002d57f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/fold_to_null.q
@@ -0,0 +1,14 @@
+create table t (a int);
+create table t2 (b int);
+create table t3 (c int);
+
+insert into t values(3),(10);
+
+explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+ ;
+
+explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+ ;

http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/results/clientpositive/fold_to_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/fold_to_null.q.out b/ql/src/test/results/clientpositive/fold_to_null.q.out
new file mode 100644
index 0000000..896856d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/fold_to_null.q.out
@@ -0,0 +1,209 @@
+PREHOOK: query: create table t (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: create table t2 (b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: create table t3 (c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 (c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: insert into t values(3),(10)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values(3),(10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a SCRIPT []
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t
+            filterExpr: ((a > 3) and null) is null (type: boolean)
+            Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: ((a > 3) and null) is null (type: boolean)
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: a (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  sort order: 
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                  value expressions: _col0 (type: int)
+          TableScan
+            alias: t3
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+          TableScan
+            alias: t2
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+            Select Operator
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+              Reduce Output Operator
+                sort order: 
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          keys:
+            0 
+            1 
+            2 
+          outputColumnNames: _col0
+          Statistics: Num rows: 1 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 1 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Map Operator Tree:
+          TableScan
+            alias: t
+            filterExpr: (((a > 5) or null) and ((a * a) < 101) and a is not null) (type: boolean)
+            Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+            Filter Operator
+              predicate: (((a * a) < 101) and ((a > 5) or null) and a is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+              Select Operator
+                expressions: a (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+          TableScan
+            alias: t2
+            filterExpr: (((b > 5) or null) and ((b * b) < 101) and b is not null) (type: boolean)
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (((b * b) < 101) and ((b > 5) or null) and b is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: b (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+          TableScan
+            alias: t3
+            filterExpr: (((c > 5) or null) and ((c * c) < 101) and c is not null) (type: boolean)
+            Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+            Filter Operator
+              predicate: (((c * c) < 101) and ((c > 5) or null) and c is not null) (type: boolean)
+              Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+              Select Operator
+                expressions: c (type: int)
+                outputColumnNames: _col0
+                Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+                Reduce Output Operator
+                  key expressions: _col0 (type: int)
+                  sort order: +
+                  Map-reduce partition columns: _col0 (type: int)
+                  Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+      Reduce Operator Tree:
+        Join Operator
+          condition map:
+               Inner Join 0 to 1
+               Inner Join 0 to 2
+          keys:
+            0 _col0 (type: int)
+            1 _col0 (type: int)
+            2 _col0 (type: int)
+          outputColumnNames: _col0
+          Statistics: Num rows: 2 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+          File Output Operator
+            compressed: false
+            Statistics: Num rows: 2 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+            table:
+                input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+

http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/results/clientpositive/literal_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/literal_decimal.q.out b/ql/src/test/results/clientpositive/literal_decimal.q.out
index 61f9f7f..64112ec 100644
--- a/ql/src/test/results/clientpositive/literal_decimal.q.out
+++ b/ql/src/test/results/clientpositive/literal_decimal.q.out
@@ -18,12 +18,12 @@ STAGE PLANS:
           alias: src
           Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
           Select Operator
-            expressions: -1 (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), -3.14 (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), null (type: void)
+            expressions: -1 (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), -3.14 (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), null (type: decimal(1,0))
             outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
-            Statistics: Num rows: 500 Data size: 392004 Basic stats: COMPLETE Column stats: COMPLETE
+            Statistics: Num rows: 500 Data size: 392112 Basic stats: COMPLETE Column stats: COMPLETE
             Limit
               Number of rows: 1
-              Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE
+              Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE
               ListSink
 
 PREHOOK: query: SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E99BD FROM src LIMIT 1