You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kg...@apache.org on 2018/10/10 08:39:22 UTC
hive git commit: HIVE-20710: Constant folding may not create null
constants without types (Zoltan Haindrich reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/master 2ff9c5229 -> 90e12280c
HIVE-20710: Constant folding may not create null constants without types (Zoltan Haindrich reviewed by Ashutosh Chauhan)
Signed-off-by: Zoltan Haindrich <ki...@rxd.hu>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/90e12280
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/90e12280
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/90e12280
Branch: refs/heads/master
Commit: 90e12280ca06b3df0099a858f58a542f190cf9b3
Parents: 2ff9c52
Author: Zoltan Haindrich <ki...@rxd.hu>
Authored: Wed Oct 10 10:38:00 2018 +0200
Committer: Zoltan Haindrich <ki...@rxd.hu>
Committed: Wed Oct 10 10:38:00 2018 +0200
----------------------------------------------------------------------
.../optimizer/calcite/HiveRexExecutorImpl.java | 3 +-
.../stats/FilterSelectivityEstimator.java | 33 +--
.../calcite/translator/RexNodeConverter.java | 13 +-
.../test/queries/clientpositive/fold_to_null.q | 14 ++
.../results/clientpositive/fold_to_null.q.out | 209 +++++++++++++++++++
.../clientpositive/literal_decimal.q.out | 6 +-
6 files changed, 253 insertions(+), 25 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
index b4bd142..1dede0f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRexExecutorImpl.java
@@ -28,7 +28,6 @@ import org.apache.calcite.rex.RexNode;
import org.apache.hadoop.hive.ql.optimizer.ConstantPropagateProcFactory;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.ExprNodeConverter;
import org.apache.hadoop.hive.ql.optimizer.calcite.translator.RexNodeConverter;
-import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.slf4j.Logger;
@@ -63,7 +62,7 @@ public class HiveRexExecutorImpl extends RexExecutorImpl {
if (constant != null) {
try {
// convert constant back to RexNode
- reducedValues.add(rexNodeConverter.convert((ExprNodeConstantDesc) constant));
+ reducedValues.add(rexNodeConverter.convert(constant));
} catch (Exception e) {
LOG.warn(e.getMessage());
reducedValues.add(rexNode);
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
index 43f8508..d362e9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/stats/FilterSelectivityEstimator.java
@@ -31,6 +31,7 @@ import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexInputRef;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.rex.RexVisitorImpl;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlOperator;
@@ -57,6 +58,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
return predicate.accept(this);
}
+ @Override
public Double visitCall(RexCall call) {
if (!deep) {
return 1.0;
@@ -138,17 +140,18 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
* NDV of "f1(x, y, z) != f2(p, q, r)" ->
* "(maxNDV(x,y,z,p,q,r) - 1)/maxNDV(x,y,z,p,q,r)".
* <p>
- *
+ *
* @param call
* @return
*/
private Double computeNotEqualitySelectivity(RexCall call) {
double tmpNDV = getMaxNDV(call);
- if (tmpNDV > 1)
- return (tmpNDV - (double) 1) / tmpNDV;
- else
+ if (tmpNDV > 1) {
+ return (tmpNDV - 1) / tmpNDV;
+ } else {
return 1.0;
+ }
}
/**
@@ -156,7 +159,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
* <p>
* Note that >, >=, <, <=, = ... are considered generic functions and uses
* this method to find their selectivity.
- *
+ *
* @param call
* @return
*/
@@ -171,7 +174,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
* <p>
* Note we compute m1. m2.. by applying selectivity of the disjunctive element
* on the cardinality from child.
- *
+ *
* @param call
* @return
*/
@@ -196,8 +199,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
selectivity *= tmpSelectivity;
}
- if (selectivity < 0.0)
+ if (selectivity < 0.0) {
selectivity = 0.0;
+ }
return (1 - selectivity);
}
@@ -205,7 +209,7 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
/**
* Selectivity of conjunctive predicate -> (selectivity of conjunctive
* element1) * (selectivity of conjunctive element2)...
- *
+ *
* @param call
* @return
*/
@@ -226,9 +230,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
/**
* Given a RexCall & TableScan find max no of nulls. Currently it picks the
* col with max no of nulls.
- *
+ *
* TODO: improve this
- *
+ *
* @param call
* @param t
* @return
@@ -258,16 +262,18 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
if (op instanceof RexInputRef) {
tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel, mq,
((RexInputRef) op).getIndex());
- if (tmpNDV > maxNDV)
+ if (tmpNDV > maxNDV) {
maxNDV = tmpNDV;
+ }
} else {
irv = new InputReferencedVisitor();
irv.apply(op);
for (Integer childProjIndx : irv.inputPosReferenced) {
tmpNDV = HiveRelMdDistinctRowCount.getDistinctRowCount(this.childRel,
mq, childProjIndx);
- if (tmpNDV > maxNDV)
+ if (tmpNDV > maxNDV) {
maxNDV = tmpNDV;
+ }
}
}
}
@@ -304,8 +310,9 @@ public class FilterSelectivityEstimator extends RexVisitorImpl<Double> {
return op;
}
+ @Override
public Double visitLiteral(RexLiteral literal) {
- if (literal.isAlwaysFalse()) {
+ if (literal.isAlwaysFalse() || RexUtil.isNull(literal)) {
return 0.0;
} else if (literal.isAlwaysTrue()) {
return 1.0;
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
index 35aae6a..78b1281 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java
@@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableList.Builder;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
+
import org.apache.calcite.avatica.util.TimeUnit;
import org.apache.calcite.avatica.util.TimeUnitRange;
import org.apache.calcite.plan.RelOptCluster;
@@ -111,7 +112,6 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
-
public class RexNodeConverter {
private static class InputCtx {
@@ -664,10 +664,10 @@ public class RexNodeConverter {
}
protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException {
- RexBuilder rexBuilder = cluster.getRexBuilder();
- RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
- PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
- RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
+ final RexBuilder rexBuilder = cluster.getRexBuilder();
+ final RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
+ final PrimitiveTypeInfo hiveType = (PrimitiveTypeInfo) literal.getTypeInfo();
+ final RelDataType calciteDataType = TypeConverter.convert(hiveType, dtFactory);
PrimitiveCategory hiveTypeCategory = hiveType.getPrimitiveCategory();
@@ -821,8 +821,7 @@ public class RexNodeConverter {
SqlParserPos(1, 1)));
break;
case VOID:
- calciteLiteral = cluster.getRexBuilder().makeLiteral(null,
- cluster.getTypeFactory().createSqlType(SqlTypeName.NULL), true);
+ calciteLiteral = rexBuilder.makeLiteral(null, calciteDataType, true);
break;
case BINARY:
case UNKNOWN:
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/queries/clientpositive/fold_to_null.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/fold_to_null.q b/ql/src/test/queries/clientpositive/fold_to_null.q
new file mode 100644
index 0000000..002d57f
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/fold_to_null.q
@@ -0,0 +1,14 @@
+create table t (a int);
+create table t2 (b int);
+create table t3 (c int);
+
+insert into t values(3),(10);
+
+explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+ ;
+
+explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+ ;
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/results/clientpositive/fold_to_null.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/fold_to_null.q.out b/ql/src/test/results/clientpositive/fold_to_null.q.out
new file mode 100644
index 0000000..896856d
--- /dev/null
+++ b/ql/src/test/results/clientpositive/fold_to_null.q.out
@@ -0,0 +1,209 @@
+PREHOOK: query: create table t (a int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: create table t2 (b int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: create table t3 (c int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t3
+POSTHOOK: query: create table t3 (c int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t3
+PREHOOK: query: insert into t values(3),(10)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values(3),(10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a SCRIPT []
+Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-1:MAPRED' is a cross product
+PREHOOK: query: explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t,t2,t3 where
+ (a>3 and null between 0 and 10) is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ filterExpr: ((a > 3) and null) is null (type: boolean)
+ Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((a > 3) and null) is null (type: boolean)
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ TableScan
+ alias: t3
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ TableScan
+ alias: t2
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ sort order:
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0
+ 1
+ 2
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t
+PREHOOK: Input: default@t2
+PREHOOK: Input: default@t3
+#### A masked pattern was here ####
+POSTHOOK: query: explain select a from t,t2,t3 where
+ (a>5 or null between 0 and 10) and (a*a < 101)
+ and t.a=t2.b and t.a=t3.c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t
+POSTHOOK: Input: default@t2
+POSTHOOK: Input: default@t3
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: t
+ filterExpr: (((a > 5) or null) and ((a * a) < 101) and a is not null) (type: boolean)
+ Statistics: Num rows: 2 Data size: 3 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((a * a) < 101) and ((a > 5) or null) and a is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: a (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 1 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: t2
+ filterExpr: (((b > 5) or null) and ((b * b) < 101) and b is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (((b * b) < 101) and ((b > 5) or null) and b is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: b (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ TableScan
+ alias: t3
+ filterExpr: (((c > 5) or null) and ((c * c) < 101) and c is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Filter Operator
+ predicate: (((c * c) < 101) and ((c > 5) or null) and c is not null) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Select Operator
+ expressions: c (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: int)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ Inner Join 0 to 2
+ keys:
+ 0 _col0 (type: int)
+ 1 _col0 (type: int)
+ 2 _col0 (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 2 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 2 Basic stats: PARTIAL Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/90e12280/ql/src/test/results/clientpositive/literal_decimal.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/literal_decimal.q.out b/ql/src/test/results/clientpositive/literal_decimal.q.out
index 61f9f7f..64112ec 100644
--- a/ql/src/test/results/clientpositive/literal_decimal.q.out
+++ b/ql/src/test/results/clientpositive/literal_decimal.q.out
@@ -18,12 +18,12 @@ STAGE PLANS:
alias: src
Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
- expressions: -1 (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), -3.14 (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), null (type: void)
+ expressions: -1 (type: decimal(1,0)), 0 (type: decimal(1,0)), 1 (type: decimal(1,0)), 3.14 (type: decimal(3,2)), -3.14 (type: decimal(3,2)), 99999999999999999 (type: decimal(17,0)), 99999999999999999.9999999999999 (type: decimal(30,13)), null (type: decimal(1,0))
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7
- Statistics: Num rows: 500 Data size: 392004 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 500 Data size: 392112 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
- Statistics: Num rows: 1 Data size: 788 Basic stats: COMPLETE Column stats: COMPLETE
+ Statistics: Num rows: 1 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE
ListSink
PREHOOK: query: SELECT -1BD, 0BD, 1BD, 3.14BD, -3.14BD, 99999999999999999BD, 99999999999999999.9999999999999BD, 1E99BD FROM src LIMIT 1