You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by za...@apache.org on 2022/04/27 08:45:24 UTC

[hive] branch master updated: HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)

This is an automated email from the ASF dual-hosted git repository.

zabetak pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7583142cbff HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)
7583142cbff is described below

commit 7583142cbffcb3958a546a9aaa15700bbc243df9
Author: Alessandro Solimando <al...@gmail.com>
AuthorDate: Mon Jan 24 13:08:56 2022 +0100

    HIVE-25758: OOM due to recursive application of CBO rules (Alessandro Solimando, reviewed by Stamatis Zampetakis)
    
    Closes #2966
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  4 ++
 .../hive/ql/optimizer/calcite/HiveCalciteUtil.java | 52 ++++++++++++++
 .../HiveJoinPushTransitivePredicatesRule.java      | 82 +++++++++-------------
 .../hadoop/hive/ql/parse/CalcitePlanner.java       |  7 +-
 .../cbo_join_transitive_pred_loop_1.q              | 17 +++++
 .../cbo_join_transitive_pred_loop_2.q              | 24 +++++++
 .../cbo_join_transitive_pred_loop_3.q              | 23 ++++++
 .../cbo_join_transitive_pred_loop_4.q              | 23 ++++++
 .../llap/cbo_join_transitive_pred_loop_1.q.out     | 75 ++++++++++++++++++++
 .../llap/cbo_join_transitive_pred_loop_2.q.out     | 74 +++++++++++++++++++
 .../llap/cbo_join_transitive_pred_loop_3.q.out     | 67 ++++++++++++++++++
 .../llap/cbo_join_transitive_pred_loop_4.q.out     | 73 +++++++++++++++++++
 12 files changed, 470 insertions(+), 51 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 99964fc7732..caf223dd91b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2530,6 +2530,10 @@ public class HiveConf extends Configuration {
         "If this config is true only pushed down filters remain in the operator tree, \n" +
         "and the original filter is removed. If this config is false, the original filter \n" +
         "is also left in the operator tree at the original place."),
+    HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN("hive.optimize.join.disjunctive.transitive.predicates.pushdown",
+        true, "Whether to transitively infer disjunctive predicates across joins. \n"
+        + "Disjunctive predicates are hard to simplify and pushing them down might lead to infinite rule matching "
+        + "causing stackoverflow and OOM errors"),
     HIVEPOINTLOOKUPOPTIMIZER("hive.optimize.point.lookup", true,
          "Whether to transform OR clauses in Filter operators into IN clauses"),
     HIVEPOINTLOOKUPOPTIMIZERMIN("hive.optimize.point.lookup.min", 2,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
index 160bfb86f6c..264756f0413 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveCalciteUtil.java
@@ -1214,6 +1214,58 @@ public class HiveCalciteUtil {
     }
   }
 
+  private static class DisjunctivePredicatesFinder extends RexVisitorImpl<Void> {
+    // accounting for DeMorgan's law
+    boolean inNegation = false;
+    boolean hasDisjunction = false;
+
+    public DisjunctivePredicatesFinder() {
+      super(true);
+    }
+
+    @Override
+    public Void visitCall(RexCall call) {
+      switch (call.getKind()) {
+      case OR:
+        if (inNegation) {
+          return super.visitCall(call);
+        } else {
+          this.hasDisjunction = true;
+          return null;
+        }
+      case AND:
+        if (inNegation) {
+          this.hasDisjunction = true;
+          return null;
+        } else {
+          return super.visitCall(call);
+        }
+      case NOT:
+        inNegation = !inNegation;
+        return super.visitCall(call);
+      default:
+        return super.visitCall(call);
+      }
+    }
+  }
+
+  /**
+   * Returns whether the expression has disjunctions (OR) at any level of nesting.
+   * <ul>
+   * <li> Example 1: OR(=($0, $1), IS NOT NULL($2))):INTEGER (OR in the top-level expression) </li>
+   * <li> Example 2: NOT(AND(=($0, $1), IS NOT NULL($2)) </li>
+   *   this is equivalent to OR((&lt&gt($0, $1), IS NULL($2))
+   * <li> Example 3: AND(OR(=($0, $1), IS NOT NULL($2)))) (OR in inner expression) </li>
+   * </ul>
+   * @param node the expression where to look for disjunctions.
+   * @return true if the given expressions contains a disjunction, false otherwise.
+   */
+  public static boolean hasDisjuction(RexNode node) {
+    DisjunctivePredicatesFinder finder = new DisjunctivePredicatesFinder();
+    node.accept(finder);
+    return finder.hasDisjunction;
+  }
+
   /**
    * Checks if any of the expression given as list expressions are from right side of the join.
    *  This is used during anti join conversion.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
index 3a2bf82f2ff..da68a7474cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveJoinPushTransitivePredicatesRule.java
@@ -20,14 +20,13 @@ package org.apache.hadoop.hive.ql.optimizer.calcite.rules;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
+import java.util.stream.Collectors;
 
-import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptPredicateList;
 import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptRuleCall;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Join;
-import org.apache.calcite.rel.core.RelFactories.FilterFactory;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
 import org.apache.calcite.rex.RexBuilder;
@@ -42,9 +41,6 @@ import org.apache.calcite.util.Util;
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
 import org.apache.hadoop.hive.ql.optimizer.calcite.HiveRelFactories;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAntiJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin;
-import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveSemiJoin;
 import org.apache.hadoop.hive.ql.udf.generic.GenericUDFOPNotNull;
 import org.apache.hive.common.util.AnnotationUtils;
 
@@ -65,21 +61,11 @@ import com.google.common.collect.Sets;
  */
 public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
 
-  public static final HiveJoinPushTransitivePredicatesRule INSTANCE_JOIN =
-          new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
+  private final boolean allowDisjunctivePredicates;
 
-  public static final HiveJoinPushTransitivePredicatesRule INSTANCE_SEMIJOIN =
-          new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
-
-  public static final HiveJoinPushTransitivePredicatesRule INSTANCE_ANTIJOIN =
-          new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, HiveRelFactories.HIVE_FILTER_FACTORY);
-
-  private final FilterFactory filterFactory;
-
-  public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz,
-      FilterFactory filterFactory) {
+  public HiveJoinPushTransitivePredicatesRule(Class<? extends Join> clazz, boolean allowDisjunctivePredicates) {
     super(operand(clazz, any()));
-    this.filterFactory = filterFactory;
+    this.allowDisjunctivePredicates = allowDisjunctivePredicates;
   }
 
   @Override
@@ -95,11 +81,11 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
     RelNode rChild = join.getRight();
 
     Set<String> leftPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 0));
-    List<RexNode> leftPreds = getValidPreds(join.getCluster(), lChild,
-            leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
+    List<RexNode> leftPreds =
+        getValidPreds(lChild, leftPushedPredicates, preds.leftInferredPredicates, lChild.getRowType());
     Set<String> rightPushedPredicates = Sets.newHashSet(registry.getPushedPredicates(join, 1));
-    List<RexNode> rightPreds = getValidPreds(join.getCluster(), rChild,
-            rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
+    List<RexNode> rightPreds =
+        getValidPreds(rChild, rightPushedPredicates, preds.rightInferredPredicates, rChild.getRowType());
 
     RexNode newLeftPredicate = RexUtil.composeConjunction(rB, leftPreds, false);
     RexNode newRightPredicate = RexUtil.composeConjunction(rB, rightPreds, false);
@@ -109,13 +95,15 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
 
     if (!newLeftPredicate.isAlwaysTrue()) {
       RelNode curr = lChild;
-      lChild = filterFactory.createFilter(lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of());
+      lChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(
+          lChild, newLeftPredicate.accept(new RexReplacer(lChild)), ImmutableSet.of());
       call.getPlanner().onCopy(curr, lChild);
     }
 
     if (!newRightPredicate.isAlwaysTrue()) {
       RelNode curr = rChild;
-      rChild = filterFactory.createFilter(rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of());
+      rChild = HiveRelFactories.HIVE_FILTER_FACTORY.createFilter(
+          rChild, newRightPredicate.accept(new RexReplacer(rChild)), ImmutableSet.of());
       call.getPlanner().onCopy(curr, rChild);
     }
 
@@ -130,10 +118,10 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
     call.transformTo(newRel);
   }
 
-  private ImmutableList<RexNode> getValidPreds(RelOptCluster cluster, RelNode child,
-      Set<String> predicatesToExclude, List<RexNode> rexs, RelDataType rType) {
+  private ImmutableList<RexNode> getValidPreds(RelNode child, Set<String> predicatesToExclude,
+      List<RexNode> rexs, RelDataType rType) {
     InputRefValidator validator = new InputRefValidator(rType.getFieldList());
-    List<RexNode> valids = new ArrayList<RexNode>(rexs.size());
+    List<RexNode> valids = new ArrayList<>(rexs.size());
     for (RexNode rex : rexs) {
       try {
         rex.accept(validator);
@@ -143,30 +131,27 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
       }
     }
 
-    // We need to filter i) those that have been pushed already as stored in the join,
-    // and ii) those that were already in the subtree rooted at child
-    ImmutableList<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude,
-            child, valids);
-    return toPush;
-  }
-
-  private RexNode getTypeSafePred(RelOptCluster cluster, RexNode rex, RelDataType rType) {
-    RexNode typeSafeRex = rex;
-    if ((typeSafeRex instanceof RexCall) && HiveCalciteUtil.isComparisonOp((RexCall) typeSafeRex)) {
-      RexBuilder rb = cluster.getRexBuilder();
-      List<RexNode> fixedPredElems = new ArrayList<RexNode>();
-      RelDataType commonType = cluster.getTypeFactory().leastRestrictive(
-          RexUtil.types(((RexCall) rex).getOperands()));
-      for (RexNode rn : ((RexCall) rex).getOperands()) {
-        fixedPredElems.add(rb.ensureType(commonType, rn, true));
-      }
-
-      typeSafeRex = rb.makeCall(((RexCall) typeSafeRex).getOperator(), fixedPredElems);
+    // We need to filter:
+    //  i) those that have been pushed already as stored in the join,
+    //  ii) those that were already in the subtree rooted at child.
+    List<RexNode> toPush = HiveCalciteUtil.getPredsNotPushedAlready(predicatesToExclude, child, valids);
+
+    // Disjunctive predicates, when merged with other existing predicates, might become redundant but RexSimplify still
+    // cannot simplify them. This situation generally leads to OOM, since these new predicates keep getting inferred
+    // between the LHS and the RHS recursively, they grow by getting merged with existing predicates, but they can
+    // never be simplified by RexSimplify, in this way the fix-point is never reached.
+    // This restriction can be lifted if RexSimplify gets more powerful, and it can handle such cases.
+    if (!allowDisjunctivePredicates) {
+      toPush = toPush.stream()
+          .filter(e -> !HiveCalciteUtil.hasDisjuction(e))
+          .collect(Collectors.toList());
     }
 
-    return typeSafeRex;
+    return ImmutableList.copyOf(toPush);
   }
 
+  //~ Inner Classes ----------------------------------------------------------
+
   private static class InputRefValidator extends RexVisitorImpl<Void> {
 
     private final List<RelDataTypeField> types;
@@ -178,7 +163,8 @@ public class HiveJoinPushTransitivePredicatesRule extends RelOptRule {
     @Override
     public Void visitCall(RexCall call) {
 
-      if(AnnotationUtils.getAnnotation(GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) {
+      if(AnnotationUtils.getAnnotation(
+          GenericUDFOPNotNull.class, Description.class).name().equals(call.getOperator().getName())) {
         if(call.getOperands().get(0) instanceof RexInputRef &&
             !types.get(((RexInputRef)call.getOperands().get(0)).getIndex()).getType().isNullable()) {
           // No need to add not null filter for a constant.
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 6ba9a9075f4..cb3fddb60b7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -1782,6 +1782,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
 
       final int maxCNFNodeCount = conf.getIntVar(HiveConf.ConfVars.HIVE_CBO_CNF_NODES_LIMIT);
       final int minNumORClauses = conf.getIntVar(HiveConf.ConfVars.HIVEPOINTLOOKUPOPTIMIZERMIN);
+      final boolean allowDisjunctivePredicates = conf.getBoolVar(ConfVars.HIVE_JOIN_DISJ_TRANSITIVE_PREDICATES_PUSHDOWN);
 
       final HepProgramBuilder program = new HepProgramBuilder();
 
@@ -1879,9 +1880,9 @@ public class CalcitePlanner extends SemanticAnalyzer {
       rules.add(HiveJoinAddNotNullRule.INSTANCE_JOIN);
       rules.add(HiveJoinAddNotNullRule.INSTANCE_SEMIJOIN);
       rules.add(HiveJoinAddNotNullRule.INSTANCE_ANTIJOIN);
-      rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_JOIN);
-      rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_SEMIJOIN);
-      rules.add(HiveJoinPushTransitivePredicatesRule.INSTANCE_ANTIJOIN);
+      rules.add(new HiveJoinPushTransitivePredicatesRule(HiveJoin.class, allowDisjunctivePredicates));
+      rules.add(new HiveJoinPushTransitivePredicatesRule(HiveSemiJoin.class, allowDisjunctivePredicates));
+      rules.add(new HiveJoinPushTransitivePredicatesRule(HiveAntiJoin.class, allowDisjunctivePredicates));
       rules.add(HiveSortMergeRule.INSTANCE);
       rules.add(HiveSortPullUpConstantsRule.SORT_LIMIT_INSTANCE);
       rules.add(HiveSortPullUpConstantsRule.SORT_EXCHANGE_INSTANCE);
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
new file mode 100644
index 00000000000..d9986318f11
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_1.q
@@ -0,0 +1,17 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE test1 (act_nbr string);
+CREATE TABLE test2 (month int);
+CREATE TABLE test3 (mth int, con_usd double);
+
+EXPLAIN CBO
+SELECT c.month,
+      d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+  FROM test1
+  UNION ALL
+  SELECT month
+  FROM test2
+  WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q
new file mode 100644
index 00000000000..44abbbe69c8
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_2.q
@@ -0,0 +1,24 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE EXTERNAL TABLE table2 (
+  tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET;
+
+CREATE EXTERNAL TABLE tenant_1 (
+  tenant_id int,
+  tenant_key bigint
+) STORED AS PARQUET;
+
+EXPLAIN CBO
+SELECT * FROM (
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id = 0
+   UNION ALL
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+  ON a.tenant_id = dt.tenant_id;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q
new file mode 100644
index 00000000000..96fd7fa1795
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_3.q
@@ -0,0 +1,23 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+);
+
+EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN  ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     UNION ALL
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8;
diff --git a/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q
new file mode 100644
index 00000000000..e7d31f46ad3
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/cbo_join_transitive_pred_loop_4.q
@@ -0,0 +1,23 @@
+set hive.optimize.join.disjunctive.transitive.predicates.pushdown=false;
+
+CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+);
+
+EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     INTERSECT
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8;
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
new file mode 100644
index 00000000000..17c1bcc9d41
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_1.q.out
@@ -0,0 +1,75 @@
+PREHOOK: query: CREATE TABLE test1 (act_nbr string)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test1
+POSTHOOK: query: CREATE TABLE test1 (act_nbr string)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test1
+PREHOOK: query: CREATE TABLE test2 (month int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test2
+POSTHOOK: query: CREATE TABLE test2 (month int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test2
+PREHOOK: query: CREATE TABLE test3 (mth int, con_usd double)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test3
+POSTHOOK: query: CREATE TABLE test3 (mth int, con_usd double)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test3
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+unix_timestamp(void) is deprecated. Use current_timestamp instead.
+PREHOOK: query: EXPLAIN CBO
+SELECT c.month,
+      d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+  FROM test1
+  UNION ALL
+  SELECT month
+  FROM test2
+  WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test1
+PREHOOK: Input: default@test2
+PREHOOK: Input: default@test3
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT c.month,
+      d.con_usd
+FROM
+ (SELECT cast(regexp_replace(substr(add_months(from_unixtime(unix_timestamp(), 'yyyy-MM-dd'), -1), 1, 7), '-', '') AS int) AS month
+  FROM test1
+  UNION ALL
+  SELECT month
+  FROM test2
+  WHERE month = 202110) c
+JOIN test3 d ON c.month = d.mth
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test1
+POSTHOOK: Input: default@test2
+POSTHOOK: Input: default@test3
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(month=[$0], con_usd=[$2])
+  HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+    HiveProject(month=[$0])
+      HiveUnion(all=[true])
+        HiveProject(month=[CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER])
+          HiveFilter(condition=[IS NOT NULL(CAST(regexp_replace(substr(add_months(FROM_UNIXTIME(UNIX_TIMESTAMP, _UTF-16LE'yyyy-MM-dd':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"), -1), 1, 7), _UTF-16LE'-':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")):INTEGER)])
+            HiveProject(DUMMY=[0])
+              HiveTableScan(table=[[default, test1]], table:alias=[test1])
+        HiveProject($f0=[CAST(202110):INTEGER])
+          HiveFilter(condition=[=($0, 202110)])
+            HiveTableScan(table=[[default, test2]], table:alias=[test2])
+    HiveProject(mth=[$0], con_usd=[$1])
+      HiveFilter(condition=[IS NOT NULL($0)])
+        HiveTableScan(table=[[default, test3]], table:alias=[d])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out
new file mode 100644
index 00000000000..270de1776ed
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_2.q.out
@@ -0,0 +1,74 @@
+PREHOOK: query: CREATE EXTERNAL TABLE table2 (
+  tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table2
+POSTHOOK: query: CREATE EXTERNAL TABLE table2 (
+  tenant_id int
+) PARTITIONED BY (date_key int)
+STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table2
+PREHOOK: query: CREATE EXTERNAL TABLE tenant_1 (
+  tenant_id int,
+  tenant_key bigint
+) STORED AS PARQUET
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tenant_1
+POSTHOOK: query: CREATE EXTERNAL TABLE tenant_1 (
+  tenant_id int,
+  tenant_key bigint
+) STORED AS PARQUET
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tenant_1
+PREHOOK: query: EXPLAIN CBO
+SELECT * FROM (
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id = 0
+   UNION ALL
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+  ON a.tenant_id = dt.tenant_id
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table2
+PREHOOK: Input: default@tenant_1
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT * FROM (
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id = 0
+   UNION ALL
+   SELECT date_key, tenant_id
+   FROM  table2
+   WHERE tenant_id <> 0
+) a
+JOIN tenant_1 dt
+  ON a.tenant_id = dt.tenant_id
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table2
+POSTHOOK: Input: default@tenant_1
+#### A masked pattern was here ####
+CBO PLAN:
+HiveJoin(condition=[=($1, $2)], joinType=[inner], algorithm=[none], cost=[not available])
+  HiveProject(date_key=[$0], tenant_id=[$1])
+    HiveUnion(all=[true])
+      HiveProject(date_key=[$1], tenant_id=[CAST(0):INTEGER])
+        HiveFilter(condition=[=($0, 0)])
+          HiveTableScan(table=[[default, table2]], table:alias=[table2])
+      HiveProject(date_key=[$1], tenant_id=[$0])
+        HiveFilter(condition=[<>($0, 0)])
+          HiveTableScan(table=[[default, table2]], table:alias=[table2])
+  HiveProject(tenant_id=[$0], tenant_key=[$1])
+    HiveFilter(condition=[IS NOT NULL($0)])
+      HiveTableScan(table=[[default, tenant_1]], table:alias=[dt])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out
new file mode 100644
index 00000000000..365cb86b2d9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_3.q.out
@@ -0,0 +1,67 @@
+PREHOOK: query: CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tableA
+POSTHOOK: query: CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tableA
+PREHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN  ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     UNION ALL
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tablea
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN  ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     UNION ALL
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tablea
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(bd_id=[$0])
+  HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+    HiveProject(bd_id=[$0])
+      HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+        HiveTableScan(table=[[default, tablea]], table:alias=[t])
+    HiveProject($f0=[$0])
+      HiveUnion(all=[true])
+        HiveProject($f0=[CAST(9:BIGINT):BIGINT])
+          HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))])
+            HiveTableScan(table=[[default, tablea]], table:alias=[t])
+        HiveProject(bd_id=[$0])
+          HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+            HiveTableScan(table=[[default, tablea]], table:alias=[t])
+
diff --git a/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out
new file mode 100644
index 00000000000..da16c285203
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/cbo_join_transitive_pred_loop_4.q.out
@@ -0,0 +1,73 @@
+PREHOOK: query: CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tableA
+POSTHOOK: query: CREATE TABLE tableA (
+  bd_id      bigint,
+  quota_type string
+)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tableA
+PREHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     INTERSECT
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tablea
+#### A masked pattern was here ####
+POSTHOOK: query: EXPLAIN CBO
+SELECT a.bd_id
+FROM (
+    SELECT t.bd_id
+    FROM tableA t
+    WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+ ) a JOIN (
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE t.bd_id = 9 AND t.quota_type IN ('A','B')
+     INTERSECT
+     SELECT t.bd_id
+     FROM tableA t
+     WHERE (t.bd_id = 8 AND t.quota_type IN ('A','C')) OR (t.bd_id = 9 AND t.quota_type IN ('A','B'))
+) b ON a.bd_id = b.bd_id
+WHERE a.bd_id = 8 OR a.bd_id <> 8
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tablea
+#### A masked pattern was here ####
+CBO PLAN:
+HiveProject(bd_id=[$0])
+  HiveJoin(condition=[=($0, $1)], joinType=[inner], algorithm=[none], cost=[not available])
+    HiveProject(bd_id=[$0])
+      HiveFilter(condition=[AND(OR(=($0, 8), <>($0, 8)), OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+        HiveTableScan(table=[[default, tablea]], table:alias=[t])
+    HiveProject($f0=[$0])
+      HiveFilter(condition=[=($1, 2)])
+        HiveAggregate(group=[{0}], agg#0=[count($1)])
+          HiveProject($f0=[$0], $f1=[$1])
+            HiveUnion(all=[true])
+              HiveProject($f0=[$0], $f1=[$1])
+                HiveAggregate(group=[{0}], agg#0=[count()])
+                  HiveProject($f0=[CAST(9:BIGINT):BIGINT])
+                    HiveFilter(condition=[AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))])
+                      HiveTableScan(table=[[default, tablea]], table:alias=[t])
+              HiveProject(bd_id=[$0], $f1=[$1])
+                HiveAggregate(group=[{0}], agg#0=[count()])
+                  HiveFilter(condition=[AND(OR(AND(=($0, 8), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'C':VARCHAR(2147483647) CHARACTER SET "UTF-16LE")), AND(=($0, 9), IN($1, _UTF-16LE'A':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", _UTF-16LE'B':VARCHAR(2147483647) CHARACTER SET "UTF-16LE"))), IS NOT NULL($0))])
+                    HiveTableScan(table=[[default, tablea]], table:alias=[t])
+