You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/06/05 16:06:57 UTC
[impala] 06/06: IMPALA-7957: Fix slot equivalences may be enforced multiple times

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit ae8295118191486f31da4d8d3c9d0f7e7e5d4b3a
Author: stiga-huang <hu...@gmail.com>
AuthorDate: Sun Apr 14 02:28:54 2019 -0700

    IMPALA-7957: Fix slot equivalences may be enforced multiple times
    
    Predicates can be divided into three types according to the way they are
    generated:
      1) origin predicates that come from the query
      2) auxiliary equal predicates generated for equivalence between a
    label(alias) and its real expression
      3) inferred predicates that inferred from the slot equivalences graph
    The slot equivalences graph (valueTransferGraph in Analyzer) is
    generated by the first two kinds of predicates. Analyzer will create
    equivalence predicates for a PlanNode based on the unassigned predicates
    and the valueTransferGraph. However, the current implementation can't
    avoid creating inferred predicates that are duplicated with previously
    created inferred predicates if they have been assigned before.
    
    Duplicated inferred predicates are either redundant or wrong. Say, if we
    create predicate p1: s1 = s2 for the current PlanNode and p1 duplicates
    with a previously inferred predicate p0: s1 = s2 (same as s2 = s1), we
    can prove that p1 is redundant or wrong:
      1) p0 must have been assigned. Otherwise, p0 will be in the unassigned
    conjuncts list and p1 won't be created.
      2) p0 must have been assigned to an offspring node of the current
    PlanNode since we create the PlanNodes in a depth first manner.
      3) The origin predicates that infer to p0 have been assigned to an
    offspring node too.
    Then, rows that should be rejected have been filtered out either by p0
    or the origin predicates that infer to p0. What's worse, assigning p1 on
    top of the origin predicates may wrongly reject rows. Hence, p1 is
    either redundant or wrong.
    
    In inferring predicates based on slot equivalence (createEquivConjuncts)
    we should update partialEquivSlots based on the previously assigned
    equivalence predicates. So slot equivalence won't be enforced multiple
    times.
    
    This patch also adds some useful TRACE level logs.
    
    Tests:
     * Add tests for UNIONs in inline-view.test
     * Run all tests locally in CORE exploration strategy
    
    Change-Id: Ida2d5d8149b217e18ebae61e136848162503653e
    Reviewed-on: http://gerrit.cloudera.org:8080/13051
    Reviewed-by: Tim Armstrong <ta...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/analysis/Analyzer.java  |  87 ++++-
 .../apache/impala/analysis/BinaryPredicate.java    |   9 +-
 .../org/apache/impala/analysis/SlotDescriptor.java |  11 +
 .../java/org/apache/impala/analysis/SlotRef.java   |   1 +
 .../apache/impala/planner/SingleNodePlanner.java   |   7 +-
 .../queries/PlannerTest/inline-view.test           | 380 +++++++++++++++++++++
 6 files changed, 482 insertions(+), 13 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
index d20c54e..45b8046 100644
--- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
+++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java
@@ -254,6 +254,12 @@ public class Analyzer {
     // preserve the order in which conjuncts are added.
     public final Map<ExprId, Expr> conjuncts = new LinkedHashMap<>();
 
+    // all registered inferred conjuncts (map from tuple id to conjuncts). This map is
+    // used to make sure that slot equivalences are not enforced multiple times (e.g.
+    // duplicated to previously inferred conjuncts).
+    public final Map<TupleId, List<BinaryPredicate>> assignedConjunctsByTupleId =
+        new HashMap<>();
+
     // all registered conjuncts bound by a single tuple id; used in getBoundPredicates()
     public final List<ExprId> singleTidConjuncts = new ArrayList<>();
 
@@ -1161,7 +1167,7 @@ public class Analyzer {
 
     if (LOG.isTraceEnabled()) {
       LOG.trace("register tuple/slotConjunct: " + Integer.toString(e.getId().asInt())
-      + " " + e.toSql() + " " + e.debugString());
+          + " " + e.toSql() + " " + e.debugString());
     }
 
     if (!(e instanceof BinaryPredicate)) return;
@@ -1214,7 +1220,7 @@ public class Analyzer {
     BinaryPredicate p = new BinaryPredicate(BinaryPredicate.Operator.EQ, lhs, rhs);
     p.setIsAuxExpr();
     if (LOG.isTraceEnabled()) {
-      LOG.trace("register eq predicate: " + p.toSql() + " " + p.debugString());
+      LOG.trace("register auxiliary eq predicate: " + p.toSql() + " " + p.debugString());
     }
     registerConjunct(p);
   }
@@ -1297,6 +1303,16 @@ public class Analyzer {
     return result;
   }
 
+  public String conjunctAssignmentsDebugString() {
+    StringBuilder res = new StringBuilder();
+    for (Expr _e : globalState_.conjuncts.values()) {
+      String state = globalState_.assignedConjuncts.contains(_e.getId()) ? "assigned"
+              : "unassigned";
+      res.append("\n\t" + state + " " + _e.debugString());
+    }
+    return res.toString();
+  }
+
   /**
    * Returns true if 'e' must be evaluated after or by a join node. Note that it may
    * still be safe to evaluate 'e' elsewhere as well, but in any case 'e' must be
@@ -1658,7 +1674,10 @@ public class Analyzer {
                     != globalState_.outerJoinedTupleIds.get(destTid)));
 
           // mark all bound predicates including duplicate ones
-          if (reverseValueTransfer && !evalAfterJoin) markConjunctAssigned(srcConjunct);
+          if (reverseValueTransfer && !evalAfterJoin) {
+            markConjunctAssigned(srcConjunct);
+            if (p != srcConjunct) markConjunctAssigned(p);
+          }
         }
 
         // check if we already created this predicate
@@ -1801,6 +1820,11 @@ public class Analyzer {
   @SuppressWarnings("unchecked")
   public <T extends Expr> void createEquivConjuncts(TupleId tid, List<T> conjuncts,
       Set<SlotId> ignoreSlots) {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(String.format(
+          "createEquivConjuncts: tid=%s, conjuncts=%s, ignoreSlots=%s", tid.toString(),
+          Expr.debugString(conjuncts), ignoreSlots), new Exception("call trace"));
+    }
     // Maps from a slot id to its set of equivalent slots. Used to track equivalences
     // that have been established by 'conjuncts' and the 'ignoredsSlots'.
     DisjointSet<SlotId> partialEquivSlots = new DisjointSet<SlotId>();
@@ -1822,7 +1846,27 @@ public class Analyzer {
       // slots may not be in the same eq class due to outer joins
       if (firstEqClassId != secondEqClassId) continue;
       // update equivalences and remove redundant conjuncts
-      if (!partialEquivSlots.union(eqSlots.first, eqSlots.second)) conjunctIter.remove();
+      if (!partialEquivSlots.union(eqSlots.first, eqSlots.second)) {
+        conjunctIter.remove();
+        if (LOG.isTraceEnabled()) {
+          LOG.trace("Removed redundant conjunct: " + conjunct.debugString());
+        }
+      }
+    }
+    // For any assigned predicate, union its slots. So we can make sure that slot
+    // equivalences are not enforced multiple times.
+    if (globalState_.assignedConjunctsByTupleId.containsKey(tid)) {
+      List<BinaryPredicate> inferredConjuncts =
+          globalState_.assignedConjunctsByTupleId.get(tid);
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("Previously assigned predicates: " +
+            Expr.debugString(inferredConjuncts));
+      }
+      for (BinaryPredicate conjunct : inferredConjuncts) {
+        Pair<SlotId, SlotId> slots = conjunct.getEqSlots();
+        if (slots == null) continue;
+        partialEquivSlots.union(slots.first, slots.second);
+      }
     }
     // Suppose conjuncts had these predicates belonging to equivalence classes e1 and e2:
     // e1: s1 = s2, s3 = s4, s3 = s5
@@ -1892,6 +1936,9 @@ public class Analyzer {
           result.put(sccId, slotIds);
         }
         slotIds.add(slotDesc.getId());
+        if (LOG.isTraceEnabled()) {
+          LOG.trace(String.format("slot(%s) -> scc(%d)", slotDesc.getId(), sccId));
+        }
       }
     }
     return result;
@@ -2026,11 +2073,19 @@ public class Analyzer {
    * predicates.
    */
   public void computeValueTransferGraph() {
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("All slots: " + SlotDescriptor.debugString(
+          globalState_.descTbl.getSlotDescs()));
+    }
     WritableGraph directValueTransferGraph =
         new WritableGraph(globalState_.descTbl.getMaxSlotId().asInt() + 1);
     constructValueTransfersFromEqPredicates(directValueTransferGraph);
     for (Pair<SlotId, SlotId> p : globalState_.registeredValueTransfers) {
       directValueTransferGraph.addEdge(p.first.asInt(), p.second.asInt());
+      if (LOG.isTraceEnabled()) {
+        LOG.trace("value transfer: from " + p.first.toString() + " to " +
+            p.second.toString());
+      }
     }
     globalState_.valueTransferGraph =
         SccCondensedGraph.condensedReflexiveTransitiveClosure(directValueTransferGraph);
@@ -2251,10 +2306,8 @@ public class Analyzer {
    * Mark predicates as assigned.
    */
   public void markConjunctsAssigned(List<Expr> conjuncts) {
-    if (conjuncts == null) return;
-    for (Expr p: conjuncts) {
-      globalState_.assignedConjuncts.add(p.getId());
-    }
+    if (conjuncts == null || conjuncts.isEmpty()) return;
+    for (Expr p: conjuncts) markConjunctAssigned(p);
   }
 
   /**
@@ -2262,6 +2315,24 @@ public class Analyzer {
    */
   public void markConjunctAssigned(Expr conjunct) {
     globalState_.assignedConjuncts.add(conjunct.getId());
+    if (Predicate.isEquivalencePredicate(conjunct)) {
+      BinaryPredicate binaryPred = (BinaryPredicate) conjunct;
+      List<TupleId> tupleIds = new ArrayList<>();
+      List<SlotId> slotIds = new ArrayList<>();
+      binaryPred.getIds(tupleIds, slotIds);
+      if (tupleIds.size() == 1 && slotIds.size() == 2
+          && binaryPred.getEqSlots() != null) {
+        // keep assigned predicates that bounds in a tuple
+        TupleId tupleId = tupleIds.get(0);
+        if (!globalState_.assignedConjunctsByTupleId.containsKey(tupleId)) {
+          globalState_.assignedConjunctsByTupleId.put(tupleId, new ArrayList<>());
+        }
+        globalState_.assignedConjunctsByTupleId.get(tupleId).add(binaryPred);
+      }
+    }
+    if (LOG.isTraceEnabled()) {
+      LOG.trace("Assigned " + conjunct.debugString());
+    }
   }
 
   public Set<ExprId> getAssignedConjuncts() {
diff --git a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
index 2bb6625..c7f0e87 100644
--- a/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
+++ b/fe/src/main/java/org/apache/impala/analysis/BinaryPredicate.java
@@ -162,10 +162,11 @@ public class BinaryPredicate extends Predicate {
 
   @Override
   public String debugString() {
-    return Objects.toStringHelper(this)
-        .add("op", op_)
-        .addValue(super.debugString())
-        .toString();
+    Objects.ToStringHelper toStrHelper = Objects.toStringHelper(this);
+    toStrHelper.add("op", op_).addValue(super.debugString());
+    if (isAuxExpr()) toStrHelper.add("isAux", true);
+    if (isInferred_) toStrHelper.add("isInferred", true);
+    return toStrHelper.toString();
   }
 
   @Override
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
index 5be2303..4495f19 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotDescriptor.java
@@ -18,6 +18,7 @@
 package org.apache.impala.analysis;
 
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 
@@ -290,12 +291,22 @@ public class SlotDescriptor {
     return result;
   }
 
+  public static String debugString(Collection<SlotDescriptor> slots) {
+    if (slots == null || slots.isEmpty()) return "";
+    List<String> strings = new ArrayList<>();
+    for (SlotDescriptor slot: slots) {
+      strings.add(slot.debugString());
+    }
+    return Joiner.on("\n").join(strings);
+  }
+
   public String debugString() {
     String pathStr = (path_ == null) ? "null" : path_.toString();
     String typeStr = (type_ == null ? "null" : type_.toString());
     return Objects.toStringHelper(this)
         .add("id", id_.asInt())
         .add("path", pathStr)
+        .add("label", label_)
         .add("type", typeStr)
         .add("materialized", isMaterialized_)
         .add("byteSize", byteSize_)
diff --git a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
index b329b51..7bd3b4d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
+++ b/fe/src/main/java/org/apache/impala/analysis/SlotRef.java
@@ -169,6 +169,7 @@ public class SlotRef extends Expr {
   @Override
   public String debugString() {
     Objects.ToStringHelper toStrHelper = Objects.toStringHelper(this);
+    if (label_ != null) toStrHelper.add("label", label_);
     if (rawPath_ != null) toStrHelper.add("path", Joiner.on('.').join(rawPath_));
     toStrHelper.add("type", type_.toSql());
     String idStr = (desc_ == null ? "null" : Integer.toString(desc_.getId().asInt()));
diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
index 0bbc2b3..9413e11 100644
--- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
+++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java
@@ -355,9 +355,14 @@ public class SingleNodePlanner {
     // No point in adding SelectNode on top of an EmptyNode.
     if (root instanceof EmptySetNode) return root;
     Preconditions.checkNotNull(root);
-    // Gather unassigned conjuncts and generate predicates to enfore
+    // Gather unassigned conjuncts and generate predicates to enforce
     // slot equivalences for each tuple id.
     List<Expr> conjuncts = analyzer.getUnassignedConjuncts(root);
+    if (LOG.isTraceEnabled()) {
+      LOG.trace(String.format("unassigned conjuncts for (Node %s): %s",
+          root.getDisplayLabel(), Expr.debugString(conjuncts)));
+      LOG.trace("all conjuncts: " + analyzer.conjunctAssignmentsDebugString());
+    }
     for (TupleId tid: tupleIds) {
       analyzer.createEquivConjuncts(tid, conjuncts);
     }
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
index e2b7d98..872821d 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/inline-view.test
@@ -1840,3 +1840,383 @@ PLAN-ROOT SINK
    HDFS partitions=4/4 files=4 size=6.32KB
    row-size=8B cardinality=100
 ====
+# IMPALA-7957: Slot equivalences should not be enforced multiple times.
+# Without this patch, the planner will incorrectly generated a SELECT node with a wrong
+# predicate "functional.alltypestiny.id = functional.alltypestiny.int_col" on top of the
+# JOIN node. So LEFT JOIN results with NULL values will be incorrectly rejects.
+SELECT t.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+   FROM functional.alltypestiny
+   WHERE int_col = id) t2
+ON (t.id = t2.id)
+UNION ALL
+VALUES (NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=4B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  row-size=12B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# IMPALA-7957: Slot equivalences should not be enforced multiple times.
+# Coverage for UNION DISTINCT
+SELECT t.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+  FROM functional.alltypestiny
+  WHERE int_col = id) t2
+ON (t.id = t2.id)
+UNION DISTINCT
+VALUES (NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+04:AGGREGATE [FINALIZE]
+|  group by: id
+|  row-size=4B cardinality=9
+|
+00:UNION
+|  constant-operands=1
+|  row-size=4B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  row-size=12B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# IMPALA-7957: Slot equivalences should not be enforced multiple times.
+# The WHERE predicate "t.int_col = t.id" is pushed down to the scan node of t.
+SELECT t.id, t.int_col
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+  FROM functional.alltypestiny) t2
+ON (t.id = t2.id)
+WHERE t.int_col = t.id
+UNION ALL
+VALUES (NULL, NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=1
+|
+03:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: id = t.id
+|  runtime filters: RF000 <- t.id
+|  row-size=12B cardinality=1
+|
+|--01:SCAN HDFS [functional.alltypestiny t]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: t.int_col = t.id
+|     row-size=8B cardinality=1
+|
+02:SCAN HDFS [functional.alltypestiny]
+   HDFS partitions=4/4 files=4 size=460B
+   runtime filters: RF000 -> id
+   row-size=4B cardinality=8
+====
+# Regression test for IMPALA-7957. Comparing to the test above, this test changes the
+# WHERE clause to target on the rhs of the LEFT JOIN. The WHERE predicate is correctly
+# duplicated and pushed down to the SCAN node.
+SELECT t2.id, t2.int_col
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+  FROM functional.alltypestiny) t2
+ON (t.id = t2.id)
+WHERE t2.int_col = t2.id
+UNION ALL
+VALUES (NULL, NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  other predicates: int_col = id
+|  row-size=12B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: functional.alltypestiny.int_col = functional.alltypestiny.id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Regression test for IMPALA-7957. The inline view t2 has a ORDER BY LIMIT clause.
+# No behavior changes after the patch.
+SELECT t2.id, t2.int_col
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+  FROM functional.alltypestiny
+  ORDER BY id LIMIT 2) t2
+ON (t.id = t2.id)
+WHERE t2.int_col = t2.id
+UNION ALL
+VALUES (NULL, NULL);
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=9
+|
+05:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  other predicates: int_col = id
+|  row-size=12B cardinality=8
+|
+|--04:SELECT
+|  |  predicates: id = int_col
+|  |  row-size=8B cardinality=0
+|  |
+|  03:TOP-N [LIMIT=2]
+|  |  order by: id ASC
+|  |  row-size=8B cardinality=2
+|  |
+|  02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     row-size=8B cardinality=8
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Regression test for IMPALA-7957. The inline view t2 has an analytic function.
+# No behavior changes after the patch.
+SELECT t2.id, t2.int_col
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col, count(int_col) over (partition by int_col) int_sum
+  FROM functional.alltypestiny
+  ORDER BY id LIMIT 2) t2
+ON (t.id = t2.id)
+WHERE t2.int_col = t2.id
+UNION ALL
+VALUES (NULL, NULL);
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=9
+|
+05:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  other predicates: int_col = id
+|  row-size=12B cardinality=8
+|
+|--04:SELECT
+|  |  predicates: id = int_col
+|  |  row-size=8B cardinality=0
+|  |
+|  03:TOP-N [LIMIT=2]
+|  |  order by: id ASC
+|  |  row-size=8B cardinality=2
+|  |
+|  02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     row-size=8B cardinality=8
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Regression test for IMPALA-7957. The first union operand has an ORDER BY LIMIT clause.
+SELECT t.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+   FROM functional.alltypestiny
+   WHERE int_col = id) t2
+ON (t.id = t2.id)
+ORDER BY 1 LIMIT 3
+UNION ALL
+VALUES (NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  pass-through-operands: all
+|  row-size=4B cardinality=4
+|
+04:TOP-N [LIMIT=3]
+|  order by: id ASC
+|  row-size=4B cardinality=3
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  row-size=12B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Regression test for IMPALA-7957. The first union operand has a GROUP BY clause.
+SELECT t.id, sum(t.int_col)
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+   FROM functional.alltypestiny
+   WHERE int_col = id) t2
+ON (t.id = t2.id)
+GROUP BY 1
+UNION ALL
+VALUES (NULL, NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  pass-through-operands: all
+|  row-size=12B cardinality=9
+|
+04:AGGREGATE [FINALIZE]
+|  output: sum(t.int_col)
+|  group by: t.id
+|  row-size=12B cardinality=8
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  row-size=16B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=8B cardinality=8
+====
+# IMPALA-7957: Add the same predicate *outside* the left join, it should still be enforced.
+SELECT t.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col
+  FROM functional.alltypestiny
+  WHERE int_col = id) t2
+ON (t.id = t2.id) where t2.id = t2.int_col
+UNION ALL
+VALUES (NULL);
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=4B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  other predicates: id = int_col
+|  row-size=12B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# IMPALA-7957: Same thing except with a predicate on a different column (i.e. adding it
+# to the equivalence class)
+SELECT t.id, t2.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col, smallint_col
+  FROM functional.alltypestiny
+  WHERE int_col = id) t2
+ON (t.id = t2.id) where t2.int_col = t2.smallint_col
+UNION ALL
+VALUES (NULL, NULL)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  other predicates: int_col = smallint_col
+|  row-size=14B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id, id = smallint_col
+|     row-size=10B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# IMPALA-7957: Multiple predicates that must not be placed above the join
+SELECT t.id, t2.id
+FROM functional.alltypestiny t
+LEFT JOIN
+  (SELECT id, int_col, smallint_col
+  FROM functional.alltypestiny
+  WHERE int_col = id and smallint_col = id and tinyint_col = id) t2
+ON (t.id = t2.id)
+UNION ALL
+VALUES (NULL, NULL);
+---- PLAN
+PLAN-ROOT SINK
+|
+00:UNION
+|  constant-operands=1
+|  row-size=8B cardinality=9
+|
+03:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: t.id = id
+|  row-size=15B cardinality=8
+|
+|--02:SCAN HDFS [functional.alltypestiny]
+|     HDFS partitions=4/4 files=4 size=460B
+|     predicates: int_col = id, smallint_col = id, tinyint_col = id
+|     row-size=11B cardinality=1
+|
+01:SCAN HDFS [functional.alltypestiny t]
+   HDFS partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====