You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/04/27 08:45:24 UTC
[hive] branch master updated: HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)
This is an automated email from the ASF dual-hosted git repository.
zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7583142cbff HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)
7583142cbff is described below
commit 7583142cbffcb3958a546a9aaa15700bbc243df9
Author: Alessandro Solimando <al...@gmail.com>
AuthorDate: Mon Jan 24 13:08:56 2022 +0100
HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)
Closes #2966
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 ++
.../hive/ql/optimizer/calcite/HiveCalciteUtil.java | 52 ++++++++++++++
.../HiveJoinPushTransitivePredicatesRule.java | 82 +++++++++-------------
.../hadoop/hive/ql/parse/CalcitePlanner.java | 7 +-
.../cbo_join_transitive_pred_loop_1.q | 17 +++++
.../cbo_join_transitive_pred_loop_2.q | 24 +++++++
.../cbo_join_transitive_pred_loop_3.q | 23 ++++++
.../cbo_join_transitive_pred_loop_4.q | 23 ++++++
.../llap/cbo_join_transitive_pred_loop_1.q.out | 75 ++++++++++++++++++++
.../llap/cbo_join_transitive_pred_loop_2.q.out | 74 +++++++++++++++++++
.../llap/cbo_join_transitive_pred_loop_3.q.out | 67 ++++++++++++++++++
.../llap/cbo_join_transitive_pred_loop_4.q.out | 73 +++++++++++++++++++
12 files changed, 470 insertions(+), 51 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 99964fc7732..caf223dd91b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2530,6 +2530,10 @@ public class HiveConf extends Configuration {
"If this config is true only pushed down filters remain in the operator tree, \n" +
"and the original filter is removed. If this config is false, the original filter \n" +
"is also left in the operator tree at the original place."),
+ HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN("hive.optimize.join.disjunctive.transitive.predicates.pushdown",
+ true, "Whether to transitively infer disjunctive predicates across joins. \n"
+ + "Disjunctive predicates are hard to simplify and pushing them down might lead to infinite rule matching "
+ + "causing stackoverflow and OOM errors"),
HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
"Whether to transform OR clauses in Filter operators into IN clauses"),
HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 160bfb86f6c..264756f0413 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -1214,6 +1214,58 @@ public class HiveCalciteUtil {
}
}
+ private static class DisjunctivePredicatesFinder extends RexVisitorImpl<Void> {
+ // accounting for DeMorgan's law
+ boolean inNegation = false;
+ boolean hasDisjunction = false;
+
+ public DisjunctivePredicatesFinder() {
+ super(true);
+ }
+
+ @Override
+ public Void visitCall(RexCall call) {
+ switch (call.getKind()) {
+ case OR:
+ if (inNegation) {
+ return super.visitCall(call);
+ } else {
+ this.hasDisjunction = true;
+ return null;
+ }
+ case AND:
+ if (inNegation) {
+ this.hasDisjunction = true;
+ return null;
+ } else {
+ return super.visitCall(call);
+ }
+ case NOT:
+ inNegation = !inNegation;
+ return super.visitCall(call);
+ default:
+ return super.visitCall(call);
+ }
+ }
+ }
+
+ /**
+ * Returns whether the expression has disjunctions (OR) at any level of nesting.
+ * <ul>
+ * <li> Example 1: OR(=($0, $1), IS NOT NULL($2))):INTEGER (OR in the top-level expression) </li>
+ * <li> Example 2: NOT(AND(=($0, $1), IS NOT NULL($2)) </li>
+ * this is equivalent to OR((<>($0, $1), IS NULL($2))
+ * <li> Example 3: AND(OR(=($0, $1), IS NOT NULL($2)))) (OR in inner expression) </li>
+ * </ul>
+ * @param node the expression where to look for disjunctions.
+ * @return true if the given expressions contains a disjunction, false otherwise.
+ */
+ public static boolean hasDisjuction(RexNode node) {
+ DisjunctivePredicatesFinder finder = new DisjunctivePredicatesFinder();
+ node.accept(finder);
+ return finder.hasDisjunction;
+ }
+
/**
* Checks if any of the expression given as list expressions are from right side of the join.
* This is used during anti join conversion.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
index 3a2bf82f2ff..da68a7474cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
@@ -20,14 +20,13 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
+import java.util.stream.Collectors;
-import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptPredicateList;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.core.Join;
-import org.apache.calcite.rel.core.RelFactories.FilterFactory;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.rex.RexBuilder;
@@ -42,9 +41,6 @@ import org.apache.calcite.util.Util;
import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
import org.apache.hive.common.util.AnnotationUtils;
@@ -65,21 +61,11 @@ import com.google.common.collect.Sets;
*/
public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
- public static final HiveJoinPushTransitivePredicatesRule INSTANCE_JOIN =
- new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
+ private final boolean allowDisjunctivePredicates;
- public static final HiveJoinPushTransitivePredicatesRule INSTANCE_SEMIJOIN =
- new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
-
- public static final HiveJoinPushTransitivePredicatesRule INSTANCE_ANTIJOIN =
- new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
-
- private final FilterFactory filterFactory;
-
- public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz,
- FilterFactory filterFactory) {
+ public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz, boolean allowDisjunctivePredicates) {
super(operand(clazz, any()));
- this.filterFactory = filterFactory;
+ this.allowDisjunctivePredicates = allowDisjunctivePredicates;
}
@Override
@@ -95,11 +81,11 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
RelNode rChild = join.getRight();
Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
- List<RexNode> leftPreds = getValidPreds(join.getCluster(), lChild,
- leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
+ List<RexNode> leftPreds =
+ getValidPreds(lChild, leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
- List<RexNode> rightPreds = getValidPreds(join.getCluster(), rChild,
- rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
+ List<RexNode> rightPreds =
+ getValidPreds(rChild, rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false);
RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false);
@@ -109,13 +95,15 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
if (!newLeftPredicate.isAlwaysTrue()) {
RelNode curr = lChild;
- lChild = filterFactory.createFilter(lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of());
+ lChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(
+ lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of());
call.getPlanner().onCopy(curr, lChild);
}
if (!newRightPredicate.isAlwaysTrue()) {
RelNode curr = rChild;
- rChild = filterFactory.createFilter(rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of());
+ rChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(
+ rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of());
call.getPlanner().onCopy(curr, rChild);
}
@@ -130,10 +118,10 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
call.transformTo(newRel);
}
- private ImmutableList<RexNode> getValidPreds(RelOptCluster cluster, RelNode child,
- Set<String> predicatesToExclude, List<RexNode> rexs, RelDataType rType) {
+ private ImmutableList<RexNode> getValidPreds(RelNode child, Set<String> predicatesToExclude,
+ List<RexNode> rexs, RelDataType rType) {
InputRefValidator validator = new InputRefValidator(rType.getFieldList());
- List<RexNode> valids = new ArrayList<RexNode>(rexs.size());
+ List<RexNode> valids = new ArrayList<>(rexs.size());
for (RexNode rex : rexs) {
try {
rex.accept(validator);
@@ -143,30 +131,27 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
}
}
- // We need to filter i) those that have been pushed already as stored in the join,
- // and ii) those that were already in the subtree rooted at child
- ImmutableList<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude,
- child, valids);
- return toPush;
- }
-
- private RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex, RelDataType rType) {
- RexNode typeSafeRex = rex;
- if ((typeSafeRex instanceof RexCall) && HiveCalciteUtil.isComparisonOp((RexCall) typeSafeRex)) {
- RexBuilder rb = cluster.getRexBuilder();
- List<RexNode> fixedPredElems = new ArrayList<RexNode>();
- RelDataType commonType = cluster.getTypeFactory().leastRestrictive(
- RexUtil.types(((RexCall) rex).getOperands()));
- for (RexNode rn : ((RexCall) rex).getOperands()) {
- fixedPredElems.add(rb.ensureType(commonType, rn, true));
- }
-
- typeSafeRex = rb.makeCall(((RexCall) typeSafeRex).getOperator(), fixedPredElems);
+ // We need to filter:
+ // i) those that have been pushed already as stored in the join,
+ // ii) those that were already in the subtree rooted at child.
+ List<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, child, valids);
+
+ // Disjunctive predicates, when merged with other existing predicates, might become redundant but RexSimplify still
+ // cannot simplify them. This situation generally leads to OOM, since these new predicates keep getting inferred
+ // between the LHS and the RHS recursively, they grow by getting merged with existing predicates, but they can
+ // never be simplified by RexSimplify, in this way the fix-point is never reached.
+ // This restriction can be lifted if RexSimplify gets more powerful, and it can handle such cases.
+ if (!allowDisjunctivePredicates) {
+ toPush = toPush.stream()
+ .filter(e -> !HiveCalciteUtil.hasDisjuction(e))
+ .collect(Collectors.toList());
}
- return typeSafeRex;
+ return ImmutableList.copyOf(toPush);
}
+ //~ Inner Classes ----------------------------------------------------------
+
private static class InputRefValidator extends RexVisitorImpl<Void> {
private final List<RelDataTypeField> types;
@@ -178,7 +163,8 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
@Override
public Void visitCall(RexCall call) {
- if(AnnotationUtils.getAnnotation(GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) {
+ if(AnnotationUtils.getAnnotation(
+ GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) {
if(call.getOperands().get(0) instanceof RexInputRef &&
!types.get(((RexInputRef)call.getOperands().get(0)).getIndex()).getType().isNullable()) {
// No need to add not null filter for a constant.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 6ba9a9075f4..cb3fddb60b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1782,6 +1782,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT);
final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+ final boolean allowDisjunctivePredicates = conf.getBoolVar(ConfVars.HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN);
final HepProgramBuilder program = new HepProgramBuilder();
@@ -1879,9 +1880,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
rules.add(HiveJoinAddNotNullRule.INSTANCE_ANTIJOIN);
- rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN);
- rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
- rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_ANTIJOIN);
+ rules.add(new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, allowDisjunctivePredicates));
+ rules.add(new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, allowDisjunctivePredicates));
+ rules.add(new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, allowDisjunctivePredicates));
rules.add(HiveSortMergeRule.INSTANCE);
rules.add(HiveSortPullUpConstantsRule.SORT_LIMIT_INSTANCE);
rules.add(HiveSortPullUpConstantsRule.SORT_EXCHANGE_INSTANCE);
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
new file mode 100644
index 00000000000..d9986318f11
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
@@ -0,0 +1,17 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE test1 (act_nbr string);
+CREATE TABLE test2 (month int);
+CREATE TABLE test3 (mth int, con_usd double);
+
+EXPLAIN CBO
+SELECT c.month,
+ d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+ FROM test1
+ UNION ALL
+ SELECT month
+ FROM test2
+ WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q
new file mode 100644
index 00000000000..44abbbe69c8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q
@@ -0,0 +1,24 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE EXTERNAL TABLE table2 (
+ tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET;
+
+CREATE EXTERNAL TABLE tenant_1 (
+ tenant_id int,
+ tenant_key bigint
+) STORED AS PARQUET;
+
+EXPLAIN CBO
+SELECT * FROM (
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id = 0
+ UNION ALL
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+ ON a.tenant_id = dt.tenant_id;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q
new file mode 100644
index 00000000000..96fd7fa1795
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q
@@ -0,0 +1,23 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+);
+
+EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ UNION ALL
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q
new file mode 100644
index 00000000000..e7d31f46ad3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q
@@ -0,0 +1,23 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+);
+
+EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ INTERSECT
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8;
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
new file mode 100644
index 00000000000..17c1bcc9d41
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
@@ -0,0 +1,75 @@
+PREHOOK: query: CREATE TABLE test1 (act_nbr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (act_nbr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: CREATE TABLE test2 (month int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (month int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: CREATE TABLE test3 (mth int, con_usd double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test3
+POSTHOOK: query: CREATE TABLE test3 (mth int, con_usd double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test3
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+PREHOOK: query: EXPLAIN CBO
+SELECT c.month,
+ d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+ FROM test1
+ UNION ALL
+ SELECT month
+ FROM test2
+ WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+PREHOOK: Input: default@test3
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT c.month,
+ d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+ FROM test1
+ UNION ALL
+ SELECT month
+ FROM test2
+ WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+POSTHOOK: Input: default@test3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(month=[$0], con_usd=[$2])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(month=[$0])
+ HiveUnion(all=[true])
+ HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER])
+ HiveFilter(condition=[IS NOT NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)])
+ HiveProject(DUMMY=[0])
+ HiveTableScan(table=[[default, test1]], table:alias=[test1])
+ HiveProject($f0=[CAST(202110):INTEGER])
+ HiveFilter(condition=[=($0, 202110)])
+ HiveTableScan(table=[[default, test2]], table:alias=[test2])
+ HiveProject(mth=[$0], con_usd=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, test3]], table:alias=[d])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out
new file mode 100644
index 00000000000..270de1776ed
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: CREATE EXTERNAL TABLE table2 (
+ tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table2
+POSTHOOK: query: CREATE EXTERNAL TABLE table2 (
+ tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table2
+PREHOOK: query: CREATE EXTERNAL TABLE tenant_1 (
+ tenant_id int,
+ tenant_key bigint
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tenant_1
+POSTHOOK: query: CREATE EXTERNAL TABLE tenant_1 (
+ tenant_id int,
+ tenant_key bigint
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tenant_1
+PREHOOK: query: EXPLAIN CBO
+SELECT * FROM (
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id = 0
+ UNION ALL
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+ ON a.tenant_id = dt.tenant_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@tenant_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT * FROM (
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id = 0
+ UNION ALL
+ SELECT date_key, tenant_id
+ FROM table2
+ WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+ ON a.tenant_id = dt.tenant_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@tenant_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(date_key=[$0], tenant_id=[$1])
+ HiveUnion(all=[true])
+ HiveProject(date_key=[$1], tenant_id=[CAST(0):INTEGER])
+ HiveFilter(condition=[=($0, 0)])
+ HiveTableScan(table=[[default, table2]], table:alias=[table2])
+ HiveProject(date_key=[$1], tenant_id=[$0])
+ HiveFilter(condition=[<>($0, 0)])
+ HiveTableScan(table=[[default, table2]], table:alias=[table2])
+ HiveProject(tenant_id=[$0], tenant_key=[$1])
+ HiveFilter(condition=[IS NOT NULL($0)])
+ HiveTableScan(table=[[default, tenant_1]], table:alias=[dt])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out
new file mode 100644
index 00000000000..365cb86b2d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out
@@ -0,0 +1,67 @@
+PREHOOK: query: CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tableA
+POSTHOOK: query: CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tableA
+PREHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ UNION ALL
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tablea
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ UNION ALL
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tablea
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(bd_id=[$0])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(bd_id=[$0])
+ HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+ HiveProject($f0=[$0])
+ HiveUnion(all=[true])
+ HiveProject($f0=[CAST(9:BIGINT):BIGINT])
+ HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+ HiveProject(bd_id=[$0])
+ HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out
new file mode 100644
index 00000000000..da16c285203
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out
@@ -0,0 +1,73 @@
+PREHOOK: query: CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tableA
+POSTHOOK: query: CREATE TABLE tableA (
+ bd_id bigint,
+ quota_type string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tableA
+PREHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ INTERSECT
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tablea
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+ INTERSECT
+ SELECT t.bd_id
+ FROM tableA t
+ WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tablea
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(bd_id=[$0])
+ HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+ HiveProject(bd_id=[$0])
+ HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+ HiveProject($f0=[$0])
+ HiveFilter(condition=[=($1, 2)])
+ HiveAggregate(group=[{0}], agg#0=[count($1)])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveUnion(all=[true])
+ HiveProject($f0=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveProject($f0=[CAST(9:BIGINT):BIGINT])
+ HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+ HiveProject(bd_id=[$0], $f1=[$1])
+ HiveAggregate(group=[{0}], agg#0=[count()])
+ HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+ HiveTableScan(table=[[default, tablea]], table:alias=[t])
+