You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2017/12/18 23:25:49 UTC
[06/50] [abbrv] hive git commit: HIVE-18272: Fix check-style
violations in subquery code (Vineet Garg, reviewed by Ashutosh Chauhan)
http://git-wip-us.apache.org/repos/asf/hive/blob/ca96613d/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
index 90aab6e..4758a37 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveSubQueryRemoveRule.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
-import org.apache.calcite.plan.RelOptRuleOperand;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Aggregate;
@@ -42,7 +41,6 @@ import org.apache.calcite.sql.type.InferTypes;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.sql.type.SqlTypeName;
-import org.apache.calcite.tools.RelBuilderFactory;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;
@@ -67,7 +65,6 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
* TODO:
* Reason this is replicated instead of using Calcite's is
* Calcite creates null literal with null type but hive needs it to be properly typed
- * Need fix for Calcite-1493
*
* <p>Sub-queries are represented by {@link RexSubQuery} expressions.
*
@@ -76,493 +73,491 @@ import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter;
* the rewrite, and the product of the rewrite will be a {@link Correlate}.
* The Correlate can be removed using {@link RelDecorrelator}.
*/
-public class HiveSubQueryRemoveRule extends RelOptRule{
-
- private HiveConf conf;
-
- public HiveSubQueryRemoveRule(HiveConf conf) {
- super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE,
- any()),
- HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter") ;
- this.conf = conf;
-
+public class HiveSubQueryRemoveRule extends RelOptRule {
+
+ private HiveConf conf;
+
+ public HiveSubQueryRemoveRule(HiveConf conf) {
+ super(operand(RelNode.class, null, HiveSubQueryFinder.RELNODE_PREDICATE,
+ any()),
+ HiveRelFactories.HIVE_BUILDER, "SubQueryRemoveRule:Filter");
+ this.conf = conf;
+ }
+ public void onMatch(RelOptRuleCall call) {
+ final RelNode relNode = call.rel(0);
+ final HiveSubQRemoveRelBuilder builder =
+ new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
+
+ // if subquery is in FILTER
+ if(relNode instanceof Filter) {
+ final Filter filter = call.rel(0);
+ final RexSubQuery e =
+ RexUtil.SubQueryFinder.find(filter.getCondition());
+ assert e != null;
+
+ final RelOptUtil.Logic logic =
+ LogicVisitor.find(RelOptUtil.Logic.TRUE,
+ ImmutableList.of(filter.getCondition()), e);
+ builder.push(filter.getInput());
+ final int fieldCount = builder.peek().getRowType().getFieldCount();
+
+ assert(filter instanceof HiveFilter);
+ SubqueryConf subqueryConfig = filter.getCluster().getPlanner().
+ getContext().unwrap(SubqueryConf.class);
+ boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
+ boolean hasNoWindowingAndNoGby =
+ subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel);
+
+ final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic,
+ builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby);
+ final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+ builder.filter(shuttle.apply(filter.getCondition()));
+ builder.project(fields(builder, filter.getRowType().getFieldCount()));
+ call.transformTo(builder.build());
+ } else if(relNode instanceof Project) {
+ // if subquery is in PROJECT
+ final Project project = call.rel(0);
+ final RexSubQuery e =
+ RexUtil.SubQueryFinder.find(project.getProjects());
+ assert e != null;
+
+ final RelOptUtil.Logic logic =
+ LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN,
+ project.getProjects(), e);
+ builder.push(project.getInput());
+ final int fieldCount = builder.peek().getRowType().getFieldCount();
+
+ SubqueryConf subqueryConfig =
+ project.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class);
+ boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
+ boolean hasNoWindowingAndNoGby =
+ subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel);
+
+ final RexNode target = apply(e, HiveFilter.getVariablesSet(e),
+ logic, builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby);
+ final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
+ builder.project(shuttle.apply(project.getProjects()),
+ project.getRowType().getFieldNames());
+ call.transformTo(builder.build());
}
- public void onMatch(RelOptRuleCall call) {
- final RelNode relNode = call.rel(0);
- //TODO: replace HiveSubQRemoveRelBuilder with calcite's once calcite 1.11.0 is released
- final HiveSubQRemoveRelBuilder builder = new HiveSubQRemoveRelBuilder(null, call.rel(0).getCluster(), null);
-
- // if subquery is in FILTER
- if(relNode instanceof Filter) {
- final Filter filter = call.rel(0);
- final RexSubQuery e =
- RexUtil.SubQueryFinder.find(filter.getCondition());
- assert e != null;
-
- final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE,
- ImmutableList.of(filter.getCondition()), e);
- builder.push(filter.getInput());
- final int fieldCount = builder.peek().getRowType().getFieldCount();
-
- assert(filter instanceof HiveFilter);
- SubqueryConf subqueryConfig = filter.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class);
- boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
- boolean hasNoWindowingAndNoGby = subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel);
-
- final RexNode target = apply(e, HiveFilter.getVariablesSet(e), logic,
- builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby);
- final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
- builder.filter(shuttle.apply(filter.getCondition()));
- builder.project(fields(builder, filter.getRowType().getFieldCount()));
- call.transformTo(builder.build());
- }
- // if subquery is in PROJECT
- else if(relNode instanceof Project) {
- final Project project = call.rel(0);
- final RexSubQuery e =
- RexUtil.SubQueryFinder.find(project.getProjects());
- assert e != null;
-
- final RelOptUtil.Logic logic =
- LogicVisitor.find(RelOptUtil.Logic.TRUE_FALSE_UNKNOWN,
- project.getProjects(), e);
- builder.push(project.getInput());
- final int fieldCount = builder.peek().getRowType().getFieldCount();
-
- SubqueryConf subqueryConfig = project.getCluster().getPlanner().getContext().unwrap(SubqueryConf.class);
- boolean isCorrScalarQuery = subqueryConfig.getCorrScalarRexSQWithAgg().contains(e.rel);
- boolean hasNoWindowingAndNoGby = subqueryConfig.getScalarAggWithoutGbyWindowing().contains(e.rel);
-
- final RexNode target = apply(e, HiveFilter.getVariablesSet(e),
- logic, builder, 1, fieldCount, isCorrScalarQuery, hasNoWindowingAndNoGby);
- final RexShuttle shuttle = new ReplaceSubQueryShuttle(e, target);
- builder.project(shuttle.apply(project.getProjects()),
- project.getRowType().getFieldNames());
- call.transformTo(builder.build());
- }
+ }
+
+ // given a subquery it checks to see what is the aggegate function
+ /// if COUNT returns true since COUNT produces 0 on empty result set
+ private boolean isAggZeroOnEmpty(RexSubQuery e) {
+ //as this is corr scalar subquery with agg we expect one aggregate
+ assert(e.getKind() == SqlKind.SCALAR_QUERY);
+ assert(e.rel.getInputs().size() == 1);
+ Aggregate relAgg = (Aggregate)e.rel.getInput(0);
+ assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate
+ if(relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT) {
+ return true;
}
-
- /*private HiveSubQueryRemoveRule(RelOptRuleOperand operand,
- RelBuilderFactory relBuilderFactory,
- String description) {
- super(operand, relBuilderFactory, description);
- } */
-
- // given a subquery it checks to see what is the aggegate function
- /// if COUNT returns true since COUNT produces 0 on empty result set
- private boolean isAggZeroOnEmpty(RexSubQuery e) {
- //as this is corr scalar subquery with agg we expect one aggregate
- assert(e.getKind() == SqlKind.SCALAR_QUERY);
- assert(e.rel.getInputs().size() == 1);
- Aggregate relAgg = (Aggregate)e.rel.getInput(0);
- assert( relAgg.getAggCallList().size() == 1); //should only have one aggregate
- if( relAgg.getAggCallList().get(0).getAggregation().getKind() == SqlKind.COUNT ) {
- return true;
+ return false;
+ }
+
+ private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) {
+ assert(e.getKind() == SqlKind.SCALAR_QUERY);
+ assert(e.rel.getInputs().size() == 1);
+ Aggregate relAgg = (Aggregate)e.rel.getInput(0);
+ assert(relAgg.getAggCallList().size() == 1); //should only have one aggregate
+ return relAgg.getAggCallList().get(0).getType().getSqlTypeName();
+ }
+
+ protected RexNode apply(RexSubQuery e, Set<CorrelationId> variablesSet,
+ RelOptUtil.Logic logic,
+ HiveSubQRemoveRelBuilder builder, int inputCount, int offset,
+ boolean isCorrScalarAgg,
+ boolean hasNoWindowingAndNoGby) {
+ switch (e.getKind()) {
+ case SCALAR_QUERY:
+ // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check
+ // since it is guaranteed to produce at most one row
+ if(!hasNoWindowingAndNoGby) {
+ final List<RexNode> parentQueryFields = new ArrayList<>();
+ if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
+ // we want to have project after join since sq_count_check's count() expression wouldn't
+ // be needed further up
+ parentQueryFields.addAll(builder.fields());
}
- return false;
- }
- private SqlTypeName getAggTypeForScalarSub(RexSubQuery e) {
- assert(e.getKind() == SqlKind.SCALAR_QUERY);
- assert(e.rel.getInputs().size() == 1);
- Aggregate relAgg = (Aggregate)e.rel.getInput(0);
- assert( relAgg.getAggCallList().size() == 1); //should only have one aggregate
- return relAgg.getAggCallList().get(0).getType().getSqlTypeName();
- }
- protected RexNode apply(RexSubQuery e, Set<CorrelationId> variablesSet,
- RelOptUtil.Logic logic,
- HiveSubQRemoveRelBuilder builder, int inputCount, int offset,
- boolean isCorrScalarAgg,
- boolean hasNoWindowingAndNoGby ) {
- switch (e.getKind()) {
- case SCALAR_QUERY:
- // if scalar query has aggregate and no windowing and no gby avoid adding sq_count_check
- // since it is guaranteed to produce at most one row
- if(!hasNoWindowingAndNoGby) {
- final List<RexNode> parentQueryFields = new ArrayList<>();
- if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
- // we want to have project after join since sq_count_check's count() expression wouldn't
- // be needed further up
- parentQueryFields.addAll(builder.fields());
- }
-
- builder.push(e.rel);
- // returns single row/column
- builder.aggregate(builder.groupKey(), builder.count(false, "cnt"));
-
- SqlFunction countCheck =
- new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
- InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION);
-
- // we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer
- // ends up getting rid of Project since it is not used further up the tree
- builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
- builder.call(countCheck, builder.field("cnt")), builder.literal(1)));
- if (!variablesSet.isEmpty()) {
- builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
- } else
- builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
-
- if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
- builder.project(parentQueryFields);
- }
- else {
- offset++;
- }
-
- }
- if(isCorrScalarAgg) {
- // Transformation :
- // Outer Query Left Join (inner query) on correlated predicate and preserve rows only from left side.
- builder.push(e.rel);
- final List<RexNode> parentQueryFields = new ArrayList<>();
- parentQueryFields.addAll(builder.fields());
-
- // id is appended since there could be multiple scalar subqueries and FILTER
- // is created using field name
- String indicator = "alwaysTrue" + e.rel.getId();
- parentQueryFields.add(builder.alias(builder.literal(true), indicator));
- builder.project(parentQueryFields);
- builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
-
- final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
- RexNode literal;
- if(isAggZeroOnEmpty(e)) {
- // since count has a return type of BIG INT we need to make a literal of type big int
- // relbuilder's literal doesn't allow this
- literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0));
- }
- else {
- literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e));
- }
- operands.add((builder.isNull(builder.field(indicator))), literal);
- operands.add(field(builder, 1, builder.fields().size()-2));
- return builder.call(SqlStdOperatorTable.CASE, operands.build());
- }
-
- //Transformation is to left join for correlated predicates and inner join otherwise,
- // but do a count on inner side before that to make sure it generates atmost 1 row.
- builder.push(e.rel);
- builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
- return field(builder, inputCount, offset);
-
- case IN:
- case EXISTS:
- // Most general case, where the left and right keys might have nulls, and
- // caller requires 3-valued logic return.
- //
- // select e.deptno, e.deptno in (select deptno from emp)
- //
- // becomes
- //
- // select e.deptno,
- // case
- // when ct.c = 0 then false
- // when dt.i is not null then true
- // when e.deptno is null then null
- // when ct.ck < ct.c then null
- // else false
- // end
- // from e
- // left join (
- // (select count(*) as c, count(deptno) as ck from emp) as ct
- // cross join (select distinct deptno, true as i from emp)) as dt
- // on e.deptno = dt.deptno
- //
- // If keys are not null we can remove "ct" and simplify to
- //
- // select e.deptno,
- // case
- // when dt.i is not null then true
- // else false
- // end
- // from e
- // left join (select distinct deptno, true as i from emp) as dt
- // on e.deptno = dt.deptno
- //
- // We could further simplify to
- //
- // select e.deptno,
- // dt.i is not null
- // from e
- // left join (select distinct deptno, true as i from emp) as dt
- // on e.deptno = dt.deptno
- //
- // but have not yet.
- //
- // If the logic is TRUE we can just kill the record if the condition
- // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
- // join:
- //
- // select e.deptno,
- // true
- // from e
- // inner join (select distinct deptno from emp) as dt
- // on e.deptno = dt.deptno
- //
-
- builder.push(e.rel);
- final List<RexNode> fields = new ArrayList<>();
- switch (e.getKind()) {
- case IN:
- fields.addAll(builder.fields());
- // Transformation: sq_count_check(count(*), true) FILTER is generated on top
- // of subquery which is then joined (LEFT or INNER) with outer query
- // This transformation is done to add run time check using sq_count_check to
- // throw an error if subquery is producing zero row, since with aggregate this
- // will produce wrong results (because we further rewrite such queries into JOIN)
- if(isCorrScalarAgg) {
- // returns single row/column
- builder.aggregate(builder.groupKey(),
- builder.count(false, "cnt_in"));
-
- if (!variablesSet.isEmpty()) {
- builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
- } else {
- builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
- }
-
- SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
- InferTypes.RETURN_TYPE, OperandTypes.NUMERIC, SqlFunctionCategory.USER_DEFINED_FUNCTION);
-
- // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT because RelFieldTrimmer
- // ends up getting rid of Project since it is not used further up the tree
- builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN,
- //true here indicates that sq_count_check is for IN/NOT IN subqueries
- builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)),
- builder.literal(0)));
- offset = offset + 1;
- builder.push(e.rel);
- }
- }
-
- // First, the cross join
- switch (logic) {
- case TRUE_FALSE_UNKNOWN:
- case UNKNOWN_AS_TRUE:
- // Since EXISTS/NOT EXISTS are not affected by presence of
- // null keys we do not need to generate count(*), count(c)
- if (e.getKind() == SqlKind.EXISTS) {
- logic = RelOptUtil.Logic.TRUE_FALSE;
- break;
- }
- builder.aggregate(builder.groupKey(),
- builder.count(false, "c"),
- builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck",
- builder.fields()));
- builder.as("ct");
- if( !variablesSet.isEmpty())
- {
- //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
- builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
- }
- else
- builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
-
- offset += 2;
- builder.push(e.rel);
- break;
- }
-
- // Now the left join
- switch (logic) {
- case TRUE:
- if (fields.isEmpty()) {
- builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
- if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
- // avoid adding group by for correlated IN/EXISTS queries
- // since this is rewritting into semijoin
- break;
- }
- else {
- builder.aggregate(builder.groupKey(0));
- }
- } else {
- if(!variablesSet.isEmpty() && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
- // avoid adding group by for correlated IN/EXISTS queries
- // since this is rewritting into semijoin
- break;
- }
- else {
- builder.aggregate(builder.groupKey(fields));
- }
- }
- break;
- default:
- fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId()));
- builder.project(fields);
- builder.distinct();
- }
- builder.as("dt");
- final List<RexNode> conditions = new ArrayList<>();
- for (Pair<RexNode, RexNode> pair
- : Pair.zip(e.getOperands(), builder.fields())) {
- conditions.add(
- builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
- }
- switch (logic) {
- case TRUE:
- builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true);
- return builder.literal(true);
- }
- builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
-
- final List<RexNode> keyIsNulls = new ArrayList<>();
- for (RexNode operand : e.getOperands()) {
- if (operand.getType().isNullable()) {
- keyIsNulls.add(builder.isNull(operand));
- }
- }
- final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
- switch (logic) {
- case TRUE_FALSE_UNKNOWN:
- case UNKNOWN_AS_TRUE:
- operands.add(
- builder.equals(builder.field("ct", "c"), builder.literal(0)),
- builder.literal(false));
- //now that we are using LEFT OUTER JOIN to join inner count, count(*)
- // with outer table, we wouldn't be able to tell if count is zero
- // for inner table since inner join with correlated values will get rid
- // of all values where join cond is not true (i.e where actual inner table
- // will produce zero result). To handle this case we need to check both
- // count is zero or count is null
- operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
- break;
- }
- operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())),
- builder.literal(true));
- if (!keyIsNulls.isEmpty()) {
- //Calcite creates null literal with Null type here but because HIVE doesn't support null type
- // it is appropriately typed boolean
- operands.add(builder.or(keyIsNulls), e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN));
- // we are creating filter here so should not be returning NULL. Not sure why Calcite return NULL
- //operands.add(builder.or(keyIsNulls), builder.literal(false));
- }
- RexNode b = builder.literal(true);
- switch (logic) {
- case TRUE_FALSE_UNKNOWN:
- b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN);
- // fall through
- case UNKNOWN_AS_TRUE:
- operands.add(
- builder.call(SqlStdOperatorTable.LESS_THAN,
- builder.field("ct", "ck"), builder.field("ct", "c")),
- b);
- break;
- }
- operands.add(builder.literal(false));
- return builder.call(SqlStdOperatorTable.CASE, operands.build());
-
- default:
- throw new AssertionError(e.getKind());
+ builder.push(e.rel);
+ // returns single row/column
+ builder.aggregate(builder.groupKey(), builder.count(false, "cnt"));
+
+ SqlFunction countCheck =
+ new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION, ReturnTypes.BIGINT,
+ InferTypes.RETURN_TYPE, OperandTypes.NUMERIC,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION);
+
+ //we create FILTER (sq_count_check(count()) <= 1) instead of PROJECT because RelFieldTrimmer
+ // ends up getting rid of Project since it is not used further up the tree
+ builder.filter(builder.call(SqlStdOperatorTable.LESS_THAN_OR_EQUAL,
+ builder.call(countCheck, builder.field("cnt")), builder.literal(1)));
+ if (!variablesSet.isEmpty()) {
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ } else {
+ builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
- }
- /** Returns a reference to a particular field, by offset, across several
- * inputs on a {@link RelBuilder}'s stack. */
- private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
- for (int inputOrdinal = 0;;) {
- final RelNode r = builder.peek(inputCount, inputOrdinal);
- if (offset < r.getRowType().getFieldCount()) {
- return builder.field(inputCount, inputOrdinal, offset);
- }
- ++inputOrdinal;
- offset -= r.getRowType().getFieldCount();
+ if (conf.getBoolVar(ConfVars.HIVE_REMOVE_SQ_COUNT_CHECK)) {
+ builder.project(parentQueryFields);
+ } else {
+ offset++;
}
- }
-
- /** Returns a list of expressions that project the first {@code fieldCount}
- * fields of the top input on a {@link RelBuilder}'s stack. */
- private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) {
- final List<RexNode> projects = new ArrayList<>();
- for (int i = 0; i < fieldCount; i++) {
- projects.add(builder.field(i));
+ }
+ if(isCorrScalarAgg) {
+ // Transformation :
+ // Outer Query Left Join (inner query) on correlated predicate
+ // and preserve rows only from left side.
+ builder.push(e.rel);
+ final List<RexNode> parentQueryFields = new ArrayList<>();
+ parentQueryFields.addAll(builder.fields());
+
+ // id is appended since there could be multiple scalar subqueries and FILTER
+ // is created using field name
+ String indicator = "alwaysTrue" + e.rel.getId();
+ parentQueryFields.add(builder.alias(builder.literal(true), indicator));
+ builder.project(parentQueryFields);
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+
+ final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
+ RexNode literal;
+ if(isAggZeroOnEmpty(e)) {
+ // since count has a return type of BIG INT we need to make a literal of type big int
+ // relbuilder's literal doesn't allow this
+ literal = e.rel.getCluster().getRexBuilder().makeBigintLiteral(new BigDecimal(0));
+ } else {
+ literal = e.rel.getCluster().getRexBuilder().makeNullLiteral(getAggTypeForScalarSub(e));
}
- return projects;
- }
-
- /** Shuttle that replaces occurrences of a given
- * {@link org.apache.calcite.rex.RexSubQuery} with a replacement
- * expression. */
- private static class ReplaceSubQueryShuttle extends RexShuttle {
- private final RexSubQuery subQuery;
- private final RexNode replacement;
-
- public ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) {
- this.subQuery = subQuery;
- this.replacement = replacement;
+ operands.add((builder.isNull(builder.field(indicator))), literal);
+ operands.add(field(builder, 1, builder.fields().size()-2));
+ return builder.call(SqlStdOperatorTable.CASE, operands.build());
+ }
+
+ //Transformation is to left join for correlated predicates and inner join otherwise,
+ // but do a count on inner side before that to make sure it generates atmost 1 row.
+ builder.push(e.rel);
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ return field(builder, inputCount, offset);
+
+ case IN:
+ case EXISTS:
+ // Most general case, where the left and right keys might have nulls, and
+ // caller requires 3-valued logic return.
+ //
+ // select e.deptno, e.deptno in (select deptno from emp)
+ //
+ // becomes
+ //
+ // select e.deptno,
+ // case
+ // when ct.c = 0 then false
+ // when dt.i is not null then true
+ // when e.deptno is null then null
+ // when ct.ck < ct.c then null
+ // else false
+ // end
+ // from e
+ // left join (
+ // (select count(*) as c, count(deptno) as ck from emp) as ct
+ // cross join (select distinct deptno, true as i from emp)) as dt
+ // on e.deptno = dt.deptno
+ //
+ // If keys are not null we can remove "ct" and simplify to
+ //
+ // select e.deptno,
+ // case
+ // when dt.i is not null then true
+ // else false
+ // end
+ // from e
+ // left join (select distinct deptno, true as i from emp) as dt
+ // on e.deptno = dt.deptno
+ //
+ // We could further simplify to
+ //
+ // select e.deptno,
+ // dt.i is not null
+ // from e
+ // left join (select distinct deptno, true as i from emp) as dt
+ // on e.deptno = dt.deptno
+ //
+ // but have not yet.
+ //
+ // If the logic is TRUE we can just kill the record if the condition
+ // evaluates to FALSE or UNKNOWN. Thus the query simplifies to an inner
+ // join:
+ //
+ // select e.deptno,
+ // true
+ // from e
+ // inner join (select distinct deptno from emp) as dt
+ // on e.deptno = dt.deptno
+ //
+
+ builder.push(e.rel);
+ final List<RexNode> fields = new ArrayList<>();
+ switch (e.getKind()) {
+ case IN:
+ fields.addAll(builder.fields());
+ // Transformation: sq_count_check(count(*), true) FILTER is generated on top
+ // of subquery which is then joined (LEFT or INNER) with outer query
+ // This transformation is done to add run time check using sq_count_check to
+ // throw an error if subquery is producing zero row, since with aggregate this
+ // will produce wrong results (because we further rewrite such queries into JOIN)
+ if(isCorrScalarAgg) {
+ // returns single row/column
+ builder.aggregate(builder.groupKey(),
+ builder.count(false, "cnt_in"));
+
+ if (!variablesSet.isEmpty()) {
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ } else {
+ builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+ }
+
+ SqlFunction inCountCheck = new SqlFunction("sq_count_check", SqlKind.OTHER_FUNCTION,
+ ReturnTypes.BIGINT, InferTypes.RETURN_TYPE, OperandTypes.NUMERIC,
+ SqlFunctionCategory.USER_DEFINED_FUNCTION);
+
+ // we create FILTER (sq_count_check(count()) > 0) instead of PROJECT
+ // because RelFieldTrimmer ends up getting rid of Project
+ // since it is not used further up the tree
+ builder.filter(builder.call(SqlStdOperatorTable.GREATER_THAN,
+ //true here indicates that sq_count_check is for IN/NOT IN subqueries
+ builder.call(inCountCheck, builder.field("cnt_in"), builder.literal(true)),
+ builder.literal(0)));
+ offset = offset + 1;
+ builder.push(e.rel);
}
-
- @Override public RexNode visitSubQuery(RexSubQuery subQuery) {
- return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery;
+ }
+
+ // First, the cross join
+ switch (logic) {
+ case TRUE_FALSE_UNKNOWN:
+ case UNKNOWN_AS_TRUE:
+ // Since EXISTS/NOT EXISTS are not affected by presence of
+ // null keys we do not need to generate count(*), count(c)
+ if (e.getKind() == SqlKind.EXISTS) {
+ logic = RelOptUtil.Logic.TRUE_FALSE;
+ break;
}
- }
-
- // TODO:
- // Following HiveSubQueryFinder has been copied from RexUtil::SubQueryFinder
- // since there is BUG in there (CALCITE-1726).
- // Once CALCITE-1726 is fixed we should get rid of the following code
- /** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if
- * applied to an expression that contains a {@link RexSubQuery}. */
- public static class HiveSubQueryFinder extends RexVisitorImpl<Void> {
- public static final HiveSubQueryFinder INSTANCE = new HiveSubQueryFinder();
-
- /** Returns whether a {@link Project} contains a sub-query. */
- public static final Predicate<RelNode> RELNODE_PREDICATE=
- new Predicate<RelNode>() {
- public boolean apply(RelNode relNode) {
- if (relNode instanceof Project) {
- Project project = (Project)relNode;
- for (RexNode node : project.getProjects()) {
- try {
- node.accept(INSTANCE);
- } catch (Util.FoundOne e) {
- return true;
- }
- }
- return false;
- }
- else if (relNode instanceof Filter) {
- try {
- ((Filter)relNode).getCondition().accept(INSTANCE);
- return false;
- } catch (Util.FoundOne e) {
- return true;
- }
- }
- return false;
- }
- };
-
- private HiveSubQueryFinder() {
- super(true);
+ builder.aggregate(builder.groupKey(),
+ builder.count(false, "c"),
+ builder.aggregateCall(SqlStdOperatorTable.COUNT, false, null, "ck",
+ builder.fields()));
+ builder.as("ct");
+ if(!variablesSet.isEmpty()) {
+ //builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
+ builder.join(JoinRelType.LEFT, builder.literal(true), variablesSet);
+ } else {
+ builder.join(JoinRelType.INNER, builder.literal(true), variablesSet);
}
- @Override public Void visitSubQuery(RexSubQuery subQuery) {
- throw new Util.FoundOne(subQuery);
+ offset += 2;
+ builder.push(e.rel);
+ break;
+ }
+
+ // Now the left join
+ switch (logic) {
+ case TRUE:
+ if (fields.isEmpty()) {
+ builder.project(builder.alias(builder.literal(true), "i" + e.rel.getId()));
+ if(!variablesSet.isEmpty()
+ && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ // avoid adding group by for correlated IN/EXISTS queries
+ // since this is rewritting into semijoin
+ break;
+ } else {
+ builder.aggregate(builder.groupKey(0));
+ }
+ } else {
+ if(!variablesSet.isEmpty()
+ && (e.getKind() == SqlKind.EXISTS || e.getKind() == SqlKind.IN)) {
+ // avoid adding group by for correlated IN/EXISTS queries
+ // since this is rewritting into semijoin
+ break;
+ } else {
+ builder.aggregate(builder.groupKey(fields));
+ }
}
+ break;
+ default:
+ fields.add(builder.alias(builder.literal(true), "i" + e.rel.getId()));
+ builder.project(fields);
+ builder.distinct();
+ }
+ builder.as("dt");
+ final List<RexNode> conditions = new ArrayList<>();
+ for (Pair<RexNode, RexNode> pair
+ : Pair.zip(e.getOperands(), builder.fields())) {
+ conditions.add(
+ builder.equals(pair.left, RexUtil.shift(pair.right, offset)));
+ }
+ switch (logic) {
+ case TRUE:
+ builder.join(JoinRelType.INNER, builder.and(conditions), variablesSet, true);
+ return builder.literal(true);
+ }
+ builder.join(JoinRelType.LEFT, builder.and(conditions), variablesSet);
+
+ final List<RexNode> keyIsNulls = new ArrayList<>();
+ for (RexNode operand : e.getOperands()) {
+ if (operand.getType().isNullable()) {
+ keyIsNulls.add(builder.isNull(operand));
+ }
+ }
+ final ImmutableList.Builder<RexNode> operands = ImmutableList.builder();
+ switch (logic) {
+ case TRUE_FALSE_UNKNOWN:
+ case UNKNOWN_AS_TRUE:
+ operands.add(
+ builder.equals(builder.field("ct", "c"), builder.literal(0)),
+ builder.literal(false));
+ //now that we are using LEFT OUTER JOIN to join inner count, count(*)
+ // with outer table, we wouldn't be able to tell if count is zero
+ // for inner table since inner join with correlated values will get rid
+ // of all values where join cond is not true (i.e where actual inner table
+ // will produce zero result). To handle this case we need to check both
+ // count is zero or count is null
+ operands.add((builder.isNull(builder.field("ct", "c"))), builder.literal(false));
+ break;
+ }
+ operands.add(builder.isNotNull(builder.field("dt", "i" + e.rel.getId())),
+ builder.literal(true));
+ if (!keyIsNulls.isEmpty()) {
+ //Calcite creates null literal with Null type here but
+ // because HIVE doesn't support null type it is appropriately typed boolean
+ operands.add(builder.or(keyIsNulls),
+ e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN));
+ // we are creating filter here so should not be returning NULL.
+ // Not sure why Calcite return NULL
+ }
+ RexNode b = builder.literal(true);
+ switch (logic) {
+ case TRUE_FALSE_UNKNOWN:
+ b = e.rel.getCluster().getRexBuilder().makeNullLiteral(SqlTypeName.BOOLEAN);
+ // fall through
+ case UNKNOWN_AS_TRUE:
+ operands.add(
+ builder.call(SqlStdOperatorTable.LESS_THAN,
+ builder.field("ct", "ck"), builder.field("ct", "c")),
+ b);
+ break;
+ }
+ operands.add(builder.literal(false));
+ return builder.call(SqlStdOperatorTable.CASE, operands.build());
+
+ default:
+ throw new AssertionError(e.getKind());
+ }
+ }
+
+ /** Returns a reference to a particular field, by offset, across several
+ * inputs on a {@link RelBuilder}'s stack. */
+ private RexInputRef field(HiveSubQRemoveRelBuilder builder, int inputCount, int offset) {
+ for (int inputOrdinal = 0;;) {
+ final RelNode r = builder.peek(inputCount, inputOrdinal);
+ if (offset < r.getRowType().getFieldCount()) {
+ return builder.field(inputCount, inputOrdinal, offset);
+ }
+ ++inputOrdinal;
+ offset -= r.getRowType().getFieldCount();
+ }
+ }
+
+ /** Returns a list of expressions that project the first {@code fieldCount}
+ * fields of the top input on a {@link RelBuilder}'s stack. */
+ private static List<RexNode> fields(HiveSubQRemoveRelBuilder builder, int fieldCount) {
+ final List<RexNode> projects = new ArrayList<>();
+ for (int i = 0; i < fieldCount; i++) {
+ projects.add(builder.field(i));
+ }
+ return projects;
+ }
+
+ /** Shuttle that replaces occurrences of a given
+ * {@link org.apache.calcite.rex.RexSubQuery} with a replacement
+ * expression. */
+ private static class ReplaceSubQueryShuttle extends RexShuttle {
+ private final RexSubQuery subQuery;
+ private final RexNode replacement;
+
+ ReplaceSubQueryShuttle(RexSubQuery subQuery, RexNode replacement) {
+ this.subQuery = subQuery;
+ this.replacement = replacement;
+ }
- public static RexSubQuery find(Iterable<RexNode> nodes) {
- for (RexNode node : nodes) {
+ @Override public RexNode visitSubQuery(RexSubQuery subQuery) {
+ return RexUtil.eq(subQuery, this.subQuery) ? replacement : subQuery;
+ }
+ }
+
+ // TODO:
+ // Following HiveSubQueryFinder has been copied from RexUtil::SubQueryFinder
+ // since there is BUG in there (CALCITE-1726).
+ // Once CALCITE-1726 is fixed we should get rid of the following code
+ /** Visitor that throws {@link org.apache.calcite.util.Util.FoundOne} if
+ * applied to an expression that contains a {@link RexSubQuery}. */
+ public static final class HiveSubQueryFinder extends RexVisitorImpl<Void> {
+ public static final HiveSubQueryFinder INSTANCE = new HiveSubQueryFinder();
+
+ /** Returns whether a {@link Project} contains a sub-query. */
+ public static final Predicate<RelNode> RELNODE_PREDICATE=
+ new Predicate<RelNode>() {
+ public boolean apply(RelNode relNode) {
+ if (relNode instanceof Project) {
+ Project project = (Project)relNode;
+ for (RexNode node : project.getProjects()) {
try {
- node.accept(INSTANCE);
+ node.accept(INSTANCE);
} catch (Util.FoundOne e) {
- return (RexSubQuery) e.getNode();
+ return true;
}
+ }
+ return false;
+ } else if (relNode instanceof Filter) {
+ try {
+ ((Filter)relNode).getCondition().accept(INSTANCE);
+ return false;
+ } catch (Util.FoundOne e) {
+ return true;
+ }
}
- return null;
- }
+ return false;
+ }
+ };
- public static RexSubQuery find(RexNode node) {
- try {
- node.accept(INSTANCE);
- return null;
- } catch (Util.FoundOne e) {
- return (RexSubQuery) e.getNode();
- }
+ private HiveSubQueryFinder() {
+ super(true);
+ }
+
+ @Override public Void visitSubQuery(RexSubQuery subQuery) {
+ throw new Util.FoundOne(subQuery);
+ }
+
+ public static RexSubQuery find(Iterable<RexNode> nodes) {
+ for (RexNode node : nodes) {
+ try {
+ node.accept(INSTANCE);
+ } catch (Util.FoundOne e) {
+ return (RexSubQuery) e.getNode();
}
+ }
+ return null;
+ }
+
+ public static RexSubQuery find(RexNode node) {
+ try {
+ node.accept(INSTANCE);
+ return null;
+ } catch (Util.FoundOne e) {
+ return (RexSubQuery) e.getNode();
+ }
}
+ }
}