You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2017/07/03 00:29:16 UTC

[6/6] incubator-impala git commit: IMPALA-5547: Rework FK/PK join detection.

IMPALA-5547: Rework FK/PK join detection.

Reworks the FK/PK join detection logic to:
- more accurately recognize many-to-many joins
- avoid dim/dim joins for multi-column PKs

The new detection logic maintains our existing philosophy of generally
assuming a FK/PK join, unless there is strong evidence to the
contrary, as follows.

For each set of simple equi-join conjuncts between two tables, we
compute the joint NDV of the right-hand side columns by
multiplication, and if the joint NDV is significantly smaller than
the right-hand side row count, then we are fairly confident that the
right-hand side is not a PK. Otherwise, we assume the set of conjuncts
could represent a FK/PK relationship.

Extends the explain plan to include the outcome of the FK/PK detection
at EXPLAIN_LEVEL > STANDARD.

Performance testing:
1. Full TPC-DS run on 10TB:
   - Q10 improved by >100x
   - Q72 improved by >25x
   - Q17,Q26,Q29 improved by 2x
   - Q64 regressed by 10x
   - Total runtime: Improved by 2x
   - Geomean: Minor improvement
   The regression of Q64 is understood and we will try to address it
   in follow-on changes. The previous plan was better by accident and
   not because of superior logic.
2. Nightly TPC-H and TPC-DS runs:
   - No perf differences

Testing:
- The existing planner test cover the changes.
- Code/hdfs run passed.

Change-Id: I49074fe743a28573cff541ef7dbd0edd88892067
Reviewed-on: http://gerrit.cloudera.org:8080/7257
Reviewed-by: Alex Behm <al...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/9f678a74
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/9f678a74
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/9f678a74

Branch: refs/heads/master
Commit: 9f678a74269250bf5c7ae2c5e8afd93c5b3734de
Parents: 931bf49
Author: Alex Behm <al...@cloudera.com>
Authored: Tue Jun 6 16:54:41 2017 -0700
Committer: Impala Public Jenkins <im...@gerrit.cloudera.org>
Committed: Mon Jul 3 00:04:54 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/JoinOperator.java    |   14 +-
 .../org/apache/impala/planner/HashJoinNode.java |   17 +
 .../org/apache/impala/planner/JoinNode.java     |  309 ++--
 .../org/apache/impala/planner/PlannerTest.java  |    8 +
 .../queries/PlannerTest/constant-folding.test   |    1 +
 .../PlannerTest/fk-pk-join-detection.test       |  440 ++++++
 .../queries/PlannerTest/hbase.test              |   24 +-
 .../queries/PlannerTest/joins.test              |   98 +-
 .../queries/PlannerTest/mt-dop-validation.test  |    2 +
 .../PlannerTest/resource-requirements.test      |   46 +-
 .../PlannerTest/spillable-buffer-sizing.test    |   12 +
 .../queries/PlannerTest/tablesample.test        |   14 +-
 .../queries/PlannerTest/tpcds-all.test          | 1335 +++++++++---------
 .../queries/PlannerTest/tpch-nested.test        |  262 ++--
 .../queries/QueryTest/explain-level2.test       |    1 +
 .../queries/QueryTest/explain-level3.test       |    1 +
 16 files changed, 1591 insertions(+), 993 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java b/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java
index f79e490..e2321f0 100644
--- a/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java
+++ b/fe/src/main/java/org/apache/impala/analysis/JoinOperator.java
@@ -44,18 +44,10 @@ public enum JoinOperator {
   }
 
   @Override
-  public String toString() {
-    return description_;
-  }
-
-  public TJoinOp toThrift() {
-    return thriftJoinOp_;
-  }
-
-  public boolean isInnerJoin() {
-    return this == INNER_JOIN;
-  }
+  public String toString() { return description_; }
+  public TJoinOp toThrift() { return thriftJoinOp_; }
 
+  public boolean isInnerJoin() { return this == INNER_JOIN; }
   public boolean isLeftOuterJoin() { return this == LEFT_OUTER_JOIN; }
   public boolean isRightOuterJoin() { return this == RIGHT_OUTER_JOIN; }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java b/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java
index 030f9c5..48492b1 100644
--- a/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/HashJoinNode.java
@@ -37,6 +37,7 @@ import org.apache.impala.thrift.TPlanNodeType;
 import org.apache.impala.thrift.TQueryOptions;
 import org.apache.impala.util.BitUtil;
 
+import com.google.common.base.Joiner;
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@@ -163,6 +164,22 @@ public class HashJoinNode extends JoinNode {
         if (i + 1 != eqJoinConjuncts_.size()) output.append(", ");
       }
       output.append("\n");
+
+      // Optionally print FK/PK equi-join conjuncts.
+      if (joinOp_.isInnerJoin() || joinOp_.isOuterJoin()) {
+        if (detailLevel.ordinal() > TExplainLevel.STANDARD.ordinal()) {
+          output.append(detailPrefix + "fk/pk conjuncts: ");
+          if (fkPkEqJoinConjuncts_ == null) {
+            output.append("none");
+          } else if (fkPkEqJoinConjuncts_.isEmpty()) {
+            output.append("assumed fk/pk");
+          } else {
+            output.append(Joiner.on(", ").join(fkPkEqJoinConjuncts_));
+          }
+          output.append("\n");
+        }
+      }
+
       if (!otherJoinConjuncts_.isEmpty()) {
         output.append(detailPrefix + "other join predicates: ")
         .append(getExplainString(otherJoinConjuncts_) + "\n");

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/main/java/org/apache/impala/planner/JoinNode.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/planner/JoinNode.java b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
index 0c983d9..47fa3e5 100644
--- a/fe/src/main/java/org/apache/impala/planner/JoinNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/JoinNode.java
@@ -19,9 +19,7 @@ package org.apache.impala.planner;
 
 import java.util.Collections;
 import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import java.util.Map;
 
 import org.apache.impala.analysis.Analyzer;
 import org.apache.impala.analysis.BinaryPredicate;
@@ -29,12 +27,18 @@ import org.apache.impala.analysis.Expr;
 import org.apache.impala.analysis.JoinOperator;
 import org.apache.impala.analysis.SlotDescriptor;
 import org.apache.impala.analysis.SlotRef;
+import org.apache.impala.analysis.TupleId;
 import org.apache.impala.catalog.ColumnStats;
 import org.apache.impala.catalog.Table;
 import org.apache.impala.common.ImpalaException;
+import org.apache.impala.common.Pair;
 import org.apache.impala.thrift.TJoinDistributionMode;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
 
 /**
  * Logical join operator. Subclasses correspond to implementations of the join operator
@@ -72,6 +76,20 @@ public abstract class JoinNode extends PlanNode {
   // joinTableId_
   protected JoinTableId joinTableId_ = JoinTableId.INVALID;
 
+  // List of equi-join conjuncts believed to be involved in a FK/PK relationship.
+  // The conjuncts are grouped by the tuple ids of the joined base table refs. A conjunct
+  // is only included in this list if it is of the form <SlotRef> = <SlotRef> and the
+  // underlying columns and tables on both sides have stats. See getFkPkEqJoinConjuncts()
+  // for more details on the FK/PK detection logic.
+  // The value of this member represents three different states:
+  // - null: There are eligible join conjuncts and we have high confidence that none of
+  //   them represent a FK/PK relationship.
+  // - non-null and empty: There are no eligible join conjuncts. We assume a FK/PK join.
+  // - non-null and non-empty: There are eligible join conjuncts that could represent
+  //   a FK/PK relationship.
+  // Theses conjuncts are printed in the explain plan.
+  protected List<EqJoinConjunctScanSlots> fkPkEqJoinConjuncts_;
+
   public enum DistributionMode {
     NONE("NONE"),
     BROADCAST("BROADCAST"),
@@ -178,14 +196,16 @@ public abstract class JoinNode extends PlanNode {
    *
    * We estimate the cardinality based on equality join predicates of the form
    * "L.c = R.d", with L being a table from child(0) and R a table from child(1).
-   * For each such join predicate we try to determine whether it is a foreign/primary
-   * key (FK/PK) join condition, and either use a special FK/PK estimation or a generic
-   * estimation method. We maintain the minimum cardinality for each method separately,
-   * and finally return in order of preference:
-   * - the FK/PK estimate, if there was at least one FP/PK predicate
-   * - the generic estimate, if there was at least one predicate with sufficient stats
-   * - otherwise, we optimistically assume a FK/PK join with a join selectivity of 1,
-   *   and return |child(0)|
+   * For each set of such join predicates between two tables, we try to determine whether
+   * the tables might have foreign/primary key (FK/PK) relationship, and either use a
+   * special FK/PK estimation or a generic estimation method. Once the estimation method
+   * has been determined we compute the final cardinality based on the single most
+   * selective join predicate. We do not attempt to estimate the joint selectivity of
+   * multiple join predicates to avoid underestimation.
+   * The FK/PK detection logic is based on the assumption that most joins are FK/PK. We
+   * only use the generic estimation method if we have high confidence that there is no
+   * FK/PK relationship. In the absence of relevant stats, we assume FK/PK with a join
+   * selectivity of 1.
    *
    * FK/PK estimation:
    * cardinality = |child(0)| * (|child(1)| / |R|) * (NDV(R.d) / NDV(L.c))
@@ -208,111 +228,206 @@ public abstract class JoinNode extends PlanNode {
    *   might have reduce the cardinality and NDVs
    */
   private long getJoinCardinality(Analyzer analyzer) {
-    Preconditions.checkState(
-        joinOp_ == JoinOperator.INNER_JOIN || joinOp_.isOuterJoin());
+    Preconditions.checkState(joinOp_.isInnerJoin() || joinOp_.isOuterJoin());
+    fkPkEqJoinConjuncts_ = Collections.emptyList();
 
     long lhsCard = getChild(0).cardinality_;
     long rhsCard = getChild(1).cardinality_;
-    if (lhsCard == -1 || rhsCard == -1) return -1;
+    if (lhsCard == -1 || rhsCard == -1) {
+      // Assume FK/PK with a join selectivity of 1.
+      return lhsCard;
+    }
 
-    // Minimum of estimated join cardinalities for FK/PK join conditions.
-    long fkPkJoinCard = -1;
-    // Minimum of estimated join cardinalities for other join conditions.
-    long genericJoinCard = -1;
+    // Collect join conjuncts that are eligible to participate in cardinality estimation.
+    List<EqJoinConjunctScanSlots> eqJoinConjunctSlots = Lists.newArrayList();
     for (Expr eqJoinConjunct: eqJoinConjuncts_) {
-      SlotStats lhsStats = SlotStats.create(eqJoinConjunct.getChild(0));
-      SlotStats rhsStats = SlotStats.create(eqJoinConjunct.getChild(1));
-      // Ignore the equi-join conjunct if we have no relevant table or column stats.
-      if (lhsStats == null || rhsStats == null) continue;
-
-      // We assume a FK/PK join based on the following intuitions:
-      // 1. NDV(L.c) <= NDV(R.d)
-      //    The reasoning is that a FK/PK join is unlikely if the foreign key
-      //    side has a higher NDV than the primary key side. We may miss true
-      //    FK/PK joins due to inaccurate and/or stale stats.
-      // 2. R.d is probably a primary key.
-      //    Requires that NDV(R.d) is very close to |R|.
-      // The idea is that, by default, we assume that every join is a FK/PK join unless
-      // we have compelling evidence that suggests otherwise, so by using || we give the
-      // FK/PK assumption more chances to succeed.
-      if (lhsStats.ndv <= rhsStats.ndv * (1.0 + FK_PK_MAX_STATS_DELTA_PERC) ||
-          Math.abs(rhsStats.numRows - rhsStats.ndv) / (double) rhsStats.numRows
-            <= FK_PK_MAX_STATS_DELTA_PERC) {
-        // Adjust the join selectivity based on the NDV ratio to avoid underestimating
-        // the cardinality if the PK side has a higher NDV than the FK side.
-        double ndvRatio = (double) rhsStats.ndv / (double) lhsStats.ndv;
-        double rhsSelectivity = (double) rhsCard / (double) rhsStats.numRows;
-        long joinCard = (long) Math.ceil(lhsCard * rhsSelectivity * ndvRatio);
-        // FK/PK join cardinality must be <= the lhs cardinality.
-        joinCard = Math.min(lhsCard, joinCard);
-        if (fkPkJoinCard == -1) {
-          fkPkJoinCard = joinCard;
-        } else {
-          fkPkJoinCard = Math.min(fkPkJoinCard, joinCard);
-        }
+      EqJoinConjunctScanSlots slots = EqJoinConjunctScanSlots.create(eqJoinConjunct);
+      if (slots != null) eqJoinConjunctSlots.add(slots);
+    }
+
+    if (eqJoinConjunctSlots.isEmpty()) {
+      // There are no eligible equi-join conjuncts. Optimistically assume FK/PK with a
+      // join selectivity of 1.
+      return lhsCard;
+    }
+
+    fkPkEqJoinConjuncts_ = getFkPkEqJoinConjuncts(eqJoinConjunctSlots);
+    if (fkPkEqJoinConjuncts_ != null) {
+      return getFkPkJoinCardinality(fkPkEqJoinConjuncts_, lhsCard, rhsCard);
+    } else {
+      return getGenericJoinCardinality(eqJoinConjunctSlots, lhsCard, rhsCard);
+    }
+  }
+
+  /**
+   * Returns a list of equi-join conjuncts believed to have a FK/PK relationship based on
+   * whether the right-hand side might be a PK. The conjuncts are grouped by the tuple
+   * ids of the joined base table refs. We prefer to include the conjuncts in the result
+   * unless we have high confidence that a FK/PK relationship is not present. The
+   * right-hand side columns are unlikely to form a PK if their joint NDV is less than
+   * the right-hand side row count. If the joint NDV is close to or higher than the row
+   * count, then it might be a PK.
+   * The given list of eligible join conjuncts must be non-empty.
+   */
+  private List<EqJoinConjunctScanSlots> getFkPkEqJoinConjuncts(
+      List<EqJoinConjunctScanSlots> eqJoinConjunctSlots) {
+    Preconditions.checkState(!eqJoinConjunctSlots.isEmpty());
+    Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>> scanSlotsByJoinedTids =
+        EqJoinConjunctScanSlots.groupByJoinedTupleIds(eqJoinConjunctSlots);
+
+    List<EqJoinConjunctScanSlots> result = null;
+    // Iterate over all groups of conjuncts that belong to the same joined tuple id pair.
+    // For each group, we compute the join NDV of the rhs slots and compare it to the
+    // number of rows in the rhs table.
+    for (List<EqJoinConjunctScanSlots> fkPkCandidate: scanSlotsByJoinedTids.values()) {
+      double jointNdv = 1.0;
+      for (EqJoinConjunctScanSlots slots: fkPkCandidate) jointNdv *= slots.rhsNdv();
+      double rhsNumRows = fkPkCandidate.get(0).rhsNumRows();
+      if (jointNdv >= Math.round(rhsNumRows * (1.0 - FK_PK_MAX_STATS_DELTA_PERC))) {
+        // We cannot disprove that the RHS is a PK.
+        if (result == null) result = Lists.newArrayList();
+        result.addAll(fkPkCandidate);
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Returns the estimated join cardinality of a FK/PK inner or outer join based on the
+   * given list of equi-join conjunct slots and the join input cardinalities.
+   * The returned result is >= 0.
+   * The list of join conjuncts must be non-empty and the cardinalities must be >= 0.
+   */
+  private long getFkPkJoinCardinality(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots,
+      long lhsCard, long rhsCard) {
+    Preconditions.checkState(!eqJoinConjunctSlots.isEmpty());
+    Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0);
+
+    long result = -1;
+    for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) {
+      // Adjust the join selectivity based on the NDV ratio to avoid underestimating
+      // the cardinality if the PK side has a higher NDV than the FK side.
+      double ndvRatio = 1.0;
+      if (slots.lhsNdv() > 0) ndvRatio = slots.rhsNdv() / slots.lhsNdv();
+      double rhsSelectivity = Double.MIN_VALUE;
+      if (slots.rhsNumRows() > 0) rhsSelectivity = rhsCard / slots.rhsNumRows();
+      long joinCard = (long) Math.ceil(lhsCard * rhsSelectivity * ndvRatio);
+      if (result == -1) {
+        result = joinCard;
       } else {
-        // Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs
-        // should only decrease, so we bail if the adjustment would lead to an increase.
-        // TODO: Adjust the NDVs more systematically throughout the plan tree to
-        // get a more accurate NDV at this plan node.
-        if (lhsCard > lhsStats.numRows || rhsCard > rhsStats.numRows) continue;
-        double lhsAdjNdv = lhsStats.ndv * ((double)lhsCard / lhsStats.numRows);
-        double rhsAdjNdv = rhsStats.ndv * ((double)rhsCard / rhsStats.numRows);
-        // Generic join cardinality estimation.
-        long joinCard = (long) Math.ceil(
-            (lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard);
-        if (genericJoinCard == -1) {
-          genericJoinCard = joinCard;
-        } else {
-          genericJoinCard = Math.min(genericJoinCard, joinCard);
-        }
+        result = Math.min(result, joinCard);
       }
     }
+    // FK/PK join cardinality must be <= the lhs cardinality.
+    result = Math.min(result, lhsCard);
+    Preconditions.checkState(result >= 0);
+    return result;
+  }
 
-    if (fkPkJoinCard != -1) {
-      return fkPkJoinCard;
-    } else if (genericJoinCard != -1) {
-      return genericJoinCard;
-    } else {
-      // Optimistic FK/PK assumption with join selectivity of 1.
-      return lhsCard;
+  /**
+   * Returns the estimated join cardinality of a generic N:M inner or outer join based
+   * on the given list of equi-join conjunct slots and the join input cardinalities.
+   * The returned result is >= 0.
+   * The list of join conjuncts must be non-empty and the cardinalities must be >= 0.
+   */
+  private long getGenericJoinCardinality(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots,
+      long lhsCard, long rhsCard) {
+    Preconditions.checkState(joinOp_.isInnerJoin() || joinOp_.isOuterJoin());
+    Preconditions.checkState(!eqJoinConjunctSlots.isEmpty());
+    Preconditions.checkState(lhsCard >= 0 && rhsCard >= 0);
+
+    long result = -1;
+    for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) {
+      // Adjust the NDVs on both sides to account for predicates. Intuitively, the NDVs
+      // should only decrease. We ignore adjustments that would lead to an increase.
+      double lhsAdjNdv = slots.lhsNdv();
+      if (slots.lhsNumRows() > lhsCard) lhsAdjNdv *= lhsCard / slots.lhsNumRows();
+      double rhsAdjNdv = slots.rhsNdv();
+      if (slots.rhsNumRows() > rhsCard) rhsAdjNdv *= rhsCard / slots.rhsNumRows();
+      long joinCard = Math.round((lhsCard / Math.max(lhsAdjNdv, rhsAdjNdv)) * rhsCard);
+      if (result == -1) {
+        result = joinCard;
+      } else {
+        result = Math.min(result, joinCard);
+      }
     }
+    Preconditions.checkState(result >= 0);
+    return result;
   }
 
   /**
-   * Class combining column and table stats for a particular slot. Contains the NDV
-   * for the slot and the number of rows in the originating table.
+   * Holds the source scan slots of a <SlotRef> = <SlotRef> join predicate.
+   * The underlying table and column on both sides have stats.
    */
-  private static class SlotStats {
-    // Number of distinct values of the slot.
-    public final long ndv;
-    // Number of rows in the originating table.
-    public final long numRows;
-
-    public SlotStats(long ndv, long numRows) {
-      // Cap NDV at num rows of the table.
-      this.ndv = Math.min(ndv, numRows);
-      this.numRows = numRows;
+  public static final class EqJoinConjunctScanSlots {
+    private final Expr eqJoinConjunct_;
+    private final SlotDescriptor lhs_;
+    private final SlotDescriptor rhs_;
+
+    private EqJoinConjunctScanSlots(Expr eqJoinConjunct, SlotDescriptor lhs,
+        SlotDescriptor rhs) {
+      eqJoinConjunct_ = eqJoinConjunct;
+      lhs_ = lhs;
+      rhs_ = rhs;
+    }
+
+    // Convenience functions. They return double to avoid excessive casts in callers.
+    public double lhsNdv() {
+      return Math.min(lhs_.getStats().getNumDistinctValues(), lhsNumRows());
+    }
+    public double rhsNdv() {
+      return Math.min(rhs_.getStats().getNumDistinctValues(), rhsNumRows());
+    }
+    public double lhsNumRows() { return lhs_.getParent().getTable().getNumRows(); }
+    public double rhsNumRows() { return rhs_.getParent().getTable().getNumRows(); }
+
+    public TupleId lhsTid() { return lhs_.getParent().getId(); }
+    public TupleId rhsTid() { return rhs_.getParent().getId(); }
+
+    /**
+     * Returns a new EqJoinConjunctScanSlots for the given equi-join conjunct or null if
+     * the given conjunct is not of the form <SlotRef> = <SlotRef> or if the underlying
+     * table/column of at least one side is missing stats.
+     */
+    public static EqJoinConjunctScanSlots create(Expr eqJoinConjunct) {
+      if (!Expr.IS_EQ_BINARY_PREDICATE.apply(eqJoinConjunct)) return null;
+      SlotDescriptor lhsScanSlot = eqJoinConjunct.getChild(0).findSrcScanSlot();
+      if (lhsScanSlot == null || !hasNumRowsAndNdvStats(lhsScanSlot)) return null;
+      SlotDescriptor rhsScanSlot = eqJoinConjunct.getChild(1).findSrcScanSlot();
+      if (rhsScanSlot == null || !hasNumRowsAndNdvStats(rhsScanSlot)) return null;
+      return new EqJoinConjunctScanSlots(eqJoinConjunct, lhsScanSlot, rhsScanSlot);
+    }
+
+    private static boolean hasNumRowsAndNdvStats(SlotDescriptor slotDesc) {
+      if (slotDesc.getColumn() == null) return false;
+      if (!slotDesc.getStats().hasNumDistinctValues()) return false;
+      Table tbl = slotDesc.getParent().getTable();
+      if (tbl == null || tbl.getNumRows() == -1) return false;
+      return true;
     }
 
     /**
-     * Returns a new SlotStats object from the given expr that is guaranteed
-     * to have valid stats.
-     * Returns null if 'e' is not a SlotRef or a cast SlotRef, or if there are no
-     * valid table/column stats for 'e'.
+     * Groups the given EqJoinConjunctScanSlots by the lhs/rhs tuple combination
+     * and returns the result as a map.
      */
-    public static SlotStats create(Expr e) {
-      // We need both the table and column stats, but 'e' might not directly reference
-      // a scan slot, e.g., if 'e' references a grouping slot of an agg. So we look for
-      // that source scan slot, traversing through materialization points if necessary.
-      SlotDescriptor slotDesc = e.findSrcScanSlot();
-      if (slotDesc == null) return null;
-      Table table = slotDesc.getParent().getTable();
-      if (table == null || table.getNumRows() == -1) return null;
-      if (!slotDesc.getStats().hasNumDistinctValues()) return null;
-      return new SlotStats(
-          slotDesc.getStats().getNumDistinctValues(), table.getNumRows());
+    public static Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>>
+        groupByJoinedTupleIds(List<EqJoinConjunctScanSlots> eqJoinConjunctSlots) {
+      Map<Pair<TupleId, TupleId>, List<EqJoinConjunctScanSlots>> scanSlotsByJoinedTids =
+          Maps.newLinkedHashMap();
+      for (EqJoinConjunctScanSlots slots: eqJoinConjunctSlots) {
+        Pair<TupleId, TupleId> tids = Pair.create(slots.lhsTid(), slots.rhsTid());
+        List<EqJoinConjunctScanSlots> scanSlots = scanSlotsByJoinedTids.get(tids);
+        if (scanSlots == null) {
+          scanSlots = Lists.newArrayList();
+          scanSlotsByJoinedTids.put(tids, scanSlots);
+        }
+        scanSlots.add(slots);
+      }
+      return scanSlotsByJoinedTids;
     }
+
+    @Override
+    public String toString() { return eqJoinConjunct_.toSql(); }
   }
 
   /**

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index b920555..d33e678 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -143,6 +143,14 @@ public class PlannerTest extends PlannerTestBase {
   }
 
   @Test
+  public void testFkPkJoinDetection() {
+    TQueryOptions options = defaultQueryOptions();
+    // The FK/PK detection result is included in EXTENDED or higher.
+    options.setExplain_level(TExplainLevel.EXTENDED);
+    runPlannerTestFile("fk-pk-join-detection", options);
+  }
+
+  @Test
   public void testOrder() {
     runPlannerTestFile("order");
   }

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
index 3600aef..82f5c3e 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/constant-folding.test
@@ -130,6 +130,7 @@ PLAN-ROOT SINK
 |
 02:HASH JOIN [LEFT OUTER JOIN]
 |  hash predicates: 2 + a.id = b.id - 2
+|  fk/pk conjuncts: assumed fk/pk
 |  other join predicates: a.int_col <= b.bigint_col + 97, a.int_col >= 0 + b.bigint_col
 |  other predicates: CAST(b.double_col AS DECIMAL(3,2)) > 11.1
 |  mem-estimate=15.68KB mem-reservation=136.00MB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
new file mode 100644
index 0000000..c2065ed
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/fk-pk-join-detection.test
@@ -0,0 +1,440 @@
+# Single-column FK/PK join detection.
+select * from
+tpcds.store_sales inner join tpcds.customer
+on ss_customer_sk = c_customer_sk
+where c_salutation = 'Mrs.'
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_customer_sk = c_customer_sk
+|  fk/pk conjuncts: ss_customer_sk = c_customer_sk
+|  runtime filters: RF000 <- c_customer_sk
+|  mem-estimate=4.46MB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=355B cardinality=529700
+|
+|--01:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     predicates: c_salutation = 'Mrs.'
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     parquet dictionary predicates: c_salutation = 'Mrs.'
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=255B cardinality=16667
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_customer_sk
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# Single-column FK/PK join detection on left outer join. The join cardinality
+# is not reduced based on the selectivity of the rhs.
+select * from
+tpcds.store_sales left outer join tpcds.customer
+on ss_customer_sk = c_customer_sk
+where c_salutation = 'Mrs.'
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: ss_customer_sk = c_customer_sk
+|  fk/pk conjuncts: ss_customer_sk = c_customer_sk
+|  other predicates: c_salutation = 'Mrs.'
+|  mem-estimate=4.46MB mem-reservation=136.00MB
+|  tuple-ids=0,1N row-size=355B cardinality=2880404
+|
+|--01:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     predicates: c_salutation = 'Mrs.'
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     parquet dictionary predicates: c_salutation = 'Mrs.'
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=255B cardinality=16667
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# Single-column FK/PK join detection on right outer join. The join cardinality
+# is reduced based on the selectivity of the rhs.
+select * from
+tpcds.store_sales right outer join tpcds.customer
+on ss_customer_sk = c_customer_sk
+where c_salutation = 'Mrs.'
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: ss_customer_sk = c_customer_sk
+|  fk/pk conjuncts: ss_customer_sk = c_customer_sk
+|  runtime filters: RF000 <- c_customer_sk
+|  mem-estimate=4.46MB mem-reservation=136.00MB
+|  tuple-ids=0N,1 row-size=355B cardinality=529700
+|
+|--01:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     predicates: c_salutation = 'Mrs.'
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     parquet dictionary predicates: c_salutation = 'Mrs.'
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=255B cardinality=16667
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_customer_sk
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# Multi-column FK/PK join detection
+select * from
+tpcds.store_sales inner join tpcds.store_returns
+on ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number
+where sr_return_quantity < 10
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number
+|  fk/pk conjuncts: ss_item_sk = sr_item_sk, ss_ticket_number = sr_ticket_number
+|  runtime filters: RF000 <- sr_item_sk, RF001 <- sr_ticket_number
+|  mem-estimate=2.65MB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=188B cardinality=211838
+|
+|--01:SCAN HDFS [tpcds.store_returns]
+|     partitions=1/1 files=1 size=31.19MB
+|     predicates: sr_return_quantity < 10
+|     stats-rows=287514 extrapolated-rows=disabled
+|     table stats: rows=287514 size=31.19MB
+|     column stats: all
+|     parquet dictionary predicates: sr_return_quantity < 10
+|     mem-estimate=80.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=88B cardinality=28751
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_item_sk, RF001 -> ss_ticket_number
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# Many-to-many join detection.
+select * from
+tpcds.store_sales inner join tpcds.web_sales
+on ss_sold_time_sk = ws_sold_time_sk
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_sold_time_sk = ws_sold_time_sk
+|  fk/pk conjuncts: none
+|  runtime filters: RF000 <- ws_sold_time_sk
+|  mem-estimate=108.67MB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=244B cardinality=44136418
+|
+|--01:SCAN HDFS [tpcds.web_sales]
+|     partitions=1/1 files=1 size=140.07MB
+|     stats-rows=719384 extrapolated-rows=disabled
+|     table stats: rows=719384 size=140.07MB
+|     column stats: all
+|     mem-estimate=160.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=144B cardinality=719384
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_sold_time_sk
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# PK/PK join is detected as FK/PK.
+select * from
+tpcds.date_dim a inner join tpcds.date_dim b
+on a.d_date_sk = b.d_date_sk
+where a.d_holiday = "Y"
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: b.d_date_sk = a.d_date_sk
+|  fk/pk conjuncts: b.d_date_sk = a.d_date_sk
+|  runtime filters: RF000 <- a.d_date_sk
+|  mem-estimate=11.62MB mem-reservation=136.00MB
+|  tuple-ids=1,0 row-size=606B cardinality=36525
+|
+|--00:SCAN HDFS [tpcds.date_dim a]
+|     partitions=1/1 files=1 size=9.84MB
+|     predicates: a.d_holiday = 'Y'
+|     stats-rows=73049 extrapolated-rows=disabled
+|     table stats: rows=73049 size=9.84MB
+|     column stats: all
+|     parquet dictionary predicates: a.d_holiday = 'Y'
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=0 row-size=303B cardinality=36525
+|
+01:SCAN HDFS [tpcds.date_dim b]
+   partitions=1/1 files=1 size=9.84MB
+   runtime filters: RF000 -> b.d_date_sk
+   stats-rows=73049 extrapolated-rows=disabled
+   table stats: rows=73049 size=9.84MB
+   column stats: all
+   mem-estimate=48.00MB mem-reservation=0B
+   tuple-ids=1 row-size=303B cardinality=73049
+====
+# Single query with various join types combined.
+select 1 from
+  tpcds.store_sales, tpcds.store_returns, tpcds.customer,
+  tpcds.date_dim d1, tpcds.date_dim d2
+where ss_item_sk = sr_item_sk and ss_ticket_number = sr_ticket_number
+  and ss_sold_date_sk = d1.d_date_sk
+  and sr_returned_date_sk = d2.d_date_sk
+  and ss_addr_sk = c_current_addr_sk
+  and d1.d_fy_week_seq = 1000
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+08:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_addr_sk = c_current_addr_sk
+|  fk/pk conjuncts: none
+|  runtime filters: RF000 <- c_current_addr_sk
+|  mem-estimate=429.69KB mem-reservation=136.00MB
+|  tuple-ids=1,0,3,4,2 row-size=60B cardinality=19358
+|
+|--02:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=2 row-size=4B cardinality=100000
+|
+07:HASH JOIN [INNER JOIN]
+|  hash predicates: sr_returned_date_sk = d2.d_date_sk
+|  fk/pk conjuncts: sr_returned_date_sk = d2.d_date_sk
+|  runtime filters: RF001 <- d2.d_date_sk
+|  mem-estimate=313.88KB mem-reservation=136.00MB
+|  tuple-ids=1,0,3,4 row-size=56B cardinality=8131
+|
+|--04:SCAN HDFS [tpcds.date_dim d2]
+|     partitions=1/1 files=1 size=9.84MB
+|     stats-rows=73049 extrapolated-rows=disabled
+|     table stats: rows=73049 size=9.84MB
+|     column stats: all
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=4 row-size=4B cardinality=73049
+|
+06:HASH JOIN [INNER JOIN]
+|  hash predicates: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number
+|  fk/pk conjuncts: sr_item_sk = ss_item_sk, sr_ticket_number = ss_ticket_number
+|  runtime filters: RF002 <- ss_item_sk, RF003 <- ss_ticket_number
+|  mem-estimate=380.02KB mem-reservation=136.00MB
+|  tuple-ids=1,0,3 row-size=52B cardinality=8131
+|
+|--05:HASH JOIN [INNER JOIN]
+|  |  hash predicates: ss_sold_date_sk = d1.d_date_sk
+|  |  fk/pk conjuncts: ss_sold_date_sk = d1.d_date_sk
+|  |  runtime filters: RF004 <- d1.d_date_sk
+|  |  mem-estimate=62B mem-reservation=136.00MB
+|  |  tuple-ids=0,3 row-size=32B cardinality=11055
+|  |
+|  |--03:SCAN HDFS [tpcds.date_dim d1]
+|  |     partitions=1/1 files=1 size=9.84MB
+|  |     predicates: d1.d_fy_week_seq = 1000
+|  |     stats-rows=73049 extrapolated-rows=disabled
+|  |     table stats: rows=73049 size=9.84MB
+|  |     column stats: all
+|  |     parquet dictionary predicates: d1.d_fy_week_seq = 1000
+|  |     mem-estimate=48.00MB mem-reservation=0B
+|  |     tuple-ids=3 row-size=8B cardinality=7
+|  |
+|  00:SCAN HDFS [tpcds.store_sales]
+|     partitions=1824/1824 files=1824 size=326.32MB
+|     runtime filters: RF000 -> ss_addr_sk, RF004 -> ss_sold_date_sk
+|     stats-rows=2880404 extrapolated-rows=disabled
+|     table stats: rows=2880404 size=326.32MB
+|     column stats: all
+|     mem-estimate=128.00MB mem-reservation=0B
+|     tuple-ids=0 row-size=24B cardinality=2880404
+|
+01:SCAN HDFS [tpcds.store_returns]
+   partitions=1/1 files=1 size=31.19MB
+   runtime filters: RF001 -> sr_returned_date_sk, RF002 -> sr_item_sk, RF003 -> sr_ticket_number
+   stats-rows=287514 extrapolated-rows=disabled
+   table stats: rows=287514 size=31.19MB
+   column stats: all
+   mem-estimate=80.00MB mem-reservation=0B
+   tuple-ids=1 row-size=20B cardinality=287514
+====
+# Assumed FK/PK join becasue of non-trivial equi-join exprs.
+select * from
+tpcds.store_sales inner join tpcds.customer
+on ss_customer_sk % 10 = c_customer_sk / 100
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_customer_sk % 10 = c_customer_sk / 100
+|  fk/pk conjuncts: assumed fk/pk
+|  runtime filters: RF000 <- c_customer_sk / 100
+|  mem-estimate=26.79MB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=355B cardinality=2880404
+|
+|--01:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=255B cardinality=100000
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_customer_sk % 10
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====
+# Assumed FK/PK join due to missing stats on the rhs. Join cardinality is equal to
+# the lhs cardinality.
+select 1 from
+tpcds.store_sales inner join tpcds_seq_snap.customer
+on ss_customer_sk = c_customer_sk
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_customer_sk = c_customer_sk
+|  fk/pk conjuncts: assumed fk/pk
+|  runtime filters: RF000 <- c_customer_sk
+|  mem-estimate=2.00GB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=8B cardinality=2880404
+|
+|--01:SCAN HDFS [tpcds_seq_snap.customer]
+|     partitions=1/1 files=1 size=8.59MB
+|     stats-rows=unavailable extrapolated-rows=disabled
+|     table stats: rows=unavailable size=unavailable
+|     column stats: unavailable
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=4B cardinality=unavailable
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_customer_sk
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=4B cardinality=2880404
+====
+# Assumed FK/PK join due to missing stats on the lhs. Join cardinality is unknown.
+select /* +straight_join */ 1 from
+tpcds_seq_snap.store_sales inner join tpcds.customer
+on ss_customer_sk = c_customer_sk
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_customer_sk = c_customer_sk
+|  fk/pk conjuncts: assumed fk/pk
+|  runtime filters: RF000 <- c_customer_sk
+|  mem-estimate=429.69KB mem-reservation=136.00MB
+|  tuple-ids=0,1 row-size=8B cardinality=unavailable
+|
+|--01:SCAN HDFS [tpcds.customer]
+|     partitions=1/1 files=1 size=12.60MB
+|     stats-rows=100000 extrapolated-rows=disabled
+|     table stats: rows=100000 size=12.60MB
+|     column stats: all
+|     mem-estimate=48.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=4B cardinality=100000
+|
+00:SCAN HDFS [tpcds_seq_snap.store_sales]
+   partitions=1824/1824 files=1824 size=207.90MB
+   runtime filters: RF000 -> ss_customer_sk
+   stats-rows=unavailable extrapolated-rows=disabled
+   table stats: rows=unavailable size=unavailable
+   column stats: unavailable
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=4B cardinality=unavailable
+====
+# Join is detected as many-to-many even though the rhs join columns
+# are in a group by on the rhs input.
+select * from
+tpcds.store_sales inner join
+(select distinct ws_sold_time_sk from tpcds.web_sales) v
+on ss_sold_time_sk = ws_sold_time_sk
+---- PLAN
+F00:PLAN FRAGMENT [UNPARTITIONED] hosts=1 instances=1
+PLAN-ROOT SINK
+|  mem-estimate=0B mem-reservation=0B
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: ss_sold_time_sk = ws_sold_time_sk
+|  fk/pk conjuncts: none
+|  runtime filters: RF000 <- ws_sold_time_sk
+|  mem-estimate=170.89KB mem-reservation=136.00MB
+|  tuple-ids=0,2 row-size=104B cardinality=2440073
+|
+|--02:AGGREGATE [FINALIZE]
+|  |  group by: ws_sold_time_sk
+|  |  mem-estimate=10.00MB mem-reservation=264.00MB
+|  |  tuple-ids=2 row-size=4B cardinality=39771
+|  |
+|  01:SCAN HDFS [tpcds.web_sales]
+|     partitions=1/1 files=1 size=140.07MB
+|     stats-rows=719384 extrapolated-rows=disabled
+|     table stats: rows=719384 size=140.07MB
+|     column stats: all
+|     mem-estimate=160.00MB mem-reservation=0B
+|     tuple-ids=1 row-size=4B cardinality=719384
+|
+00:SCAN HDFS [tpcds.store_sales]
+   partitions=1824/1824 files=1824 size=326.32MB
+   runtime filters: RF000 -> ss_sold_time_sk
+   stats-rows=2880404 extrapolated-rows=disabled
+   table stats: rows=2880404 size=326.32MB
+   column stats: all
+   mem-estimate=128.00MB mem-reservation=0B
+   tuple-ids=0 row-size=100B cardinality=2880404
+====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
index 91aff74..e0728be 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/hbase.test
@@ -611,16 +611,16 @@ where
 PLAN-ROOT SINK
 |
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: b.int_col = a.int_col
-|
-|--03:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.int_col = c.int_col
-|  |
-|  |--02:SCAN HBASE [functional_hbase.alltypessmall c]
-|  |     predicates: c.month = 4
-|  |
-|  01:SCAN HBASE [functional_hbase.alltypessmall a]
-|
-00:SCAN HBASE [functional_hbase.alltypessmall b]
-   predicates: b.bool_col = FALSE
+|  hash predicates: a.int_col = b.int_col
+|
+|--00:SCAN HBASE [functional_hbase.alltypessmall b]
+|     predicates: b.bool_col = FALSE
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: a.int_col = c.int_col
+|
+|--02:SCAN HBASE [functional_hbase.alltypessmall c]
+|     predicates: c.month = 4
+|
+01:SCAN HBASE [functional_hbase.alltypessmall a]
 ====

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
index 0fdb19d..42493ff 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test
@@ -830,64 +830,64 @@ on (b.int_col = c.int_col and c.bool_col = b.bool_col)
 PLAN-ROOT SINK
 |
 05:HASH JOIN [INNER JOIN]
-|  hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col
-|  runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col
+|  hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col
+|  runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col
 |
-|--04:HASH JOIN [INNER JOIN]
-|  |  hash predicates: a.bool_col = bool_col, a.int_col = int_col
-|  |  runtime filters: RF002 <- bool_col, RF003 <- int_col
-|  |
-|  |--03:AGGREGATE [FINALIZE]
-|  |  |  output: count(*)
-|  |  |  group by: int_col, bool_col
-|  |  |
-|  |  02:SCAN HDFS [functional.alltypes]
-|  |     partitions=24/24 files=24 size=478.45KB
+|--01:SCAN HDFS [functional.alltypes b]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  runtime filters: RF002 <- bool_col, RF003 <- int_col
+|
+|--03:AGGREGATE [FINALIZE]
+|  |  output: count(*)
+|  |  group by: int_col, bool_col
 |  |
-|  00:SCAN HDFS [functional.alltypes a]
+|  02:SCAN HDFS [functional.alltypes]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col
+|     runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col
 |
-01:SCAN HDFS [functional.alltypes b]
+00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col
+   runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF002 -> a.bool_col, RF003 -> a.int_col
 ---- DISTRIBUTEDPLAN
 PLAN-ROOT SINK
 |
 10:EXCHANGE [UNPARTITIONED]
 |
 05:HASH JOIN [INNER JOIN, PARTITIONED]
-|  hash predicates: b.bool_col = a.bool_col, b.int_col = a.int_col
-|  runtime filters: RF000 <- a.bool_col, RF001 <- a.int_col
+|  hash predicates: a.bool_col = b.bool_col, a.int_col = b.int_col
+|  runtime filters: RF000 <- b.bool_col, RF001 <- b.int_col
 |
-|--04:HASH JOIN [INNER JOIN, PARTITIONED]
-|  |  hash predicates: a.bool_col = bool_col, a.int_col = int_col
-|  |  runtime filters: RF002 <- bool_col, RF003 <- int_col
+|--09:EXCHANGE [HASH(b.int_col,b.bool_col)]
 |  |
-|  |--07:AGGREGATE [FINALIZE]
-|  |  |  output: count:merge(*)
-|  |  |  group by: int_col, bool_col
-|  |  |
-|  |  06:EXCHANGE [HASH(int_col,bool_col)]
-|  |  |
-|  |  03:AGGREGATE [STREAMING]
-|  |  |  output: count(*)
-|  |  |  group by: int_col, bool_col
-|  |  |
-|  |  02:SCAN HDFS [functional.alltypes]
-|  |     partitions=24/24 files=24 size=478.45KB
+|  01:SCAN HDFS [functional.alltypes b]
+|     partitions=24/24 files=24 size=478.45KB
+|
+04:HASH JOIN [INNER JOIN, PARTITIONED]
+|  hash predicates: a.bool_col = bool_col, a.int_col = int_col
+|  runtime filters: RF002 <- bool_col, RF003 <- int_col
+|
+|--07:AGGREGATE [FINALIZE]
+|  |  output: count:merge(*)
+|  |  group by: int_col, bool_col
 |  |
-|  08:EXCHANGE [HASH(a.int_col,a.bool_col)]
+|  06:EXCHANGE [HASH(int_col,bool_col)]
 |  |
-|  00:SCAN HDFS [functional.alltypes a]
+|  03:AGGREGATE [STREAMING]
+|  |  output: count(*)
+|  |  group by: int_col, bool_col
+|  |
+|  02:SCAN HDFS [functional.alltypes]
 |     partitions=24/24 files=24 size=478.45KB
-|     runtime filters: RF002 -> a.bool_col, RF003 -> a.int_col
+|     runtime filters: RF000 -> functional.alltypes.bool_col, RF001 -> functional.alltypes.int_col
 |
-09:EXCHANGE [HASH(b.int_col,b.bool_col)]
+08:EXCHANGE [HASH(a.int_col,a.bool_col)]
 |
-01:SCAN HDFS [functional.alltypes b]
+00:SCAN HDFS [functional.alltypes a]
    partitions=24/24 files=24 size=478.45KB
-   runtime filters: RF000 -> b.bool_col, RF001 -> b.int_col
+   runtime filters: RF000 -> a.bool_col, RF001 -> a.int_col, RF002 -> a.bool_col, RF003 -> a.int_col
 ====
 # Tests that all predicates from the On-clause are applied (IMPALA-805)
 # and that slot equivalences are enforced at lowest possible plan node.
@@ -987,20 +987,20 @@ where a.id = c.id and b.int_col = c.int_col and b.int_col = c.id
 PLAN-ROOT SINK
 |
 04:HASH JOIN [INNER JOIN]
-|  hash predicates: c.id = a.id
-|  runtime filters: RF000 <- a.id
-|
-|--00:SCAN HDFS [functional.alltypestiny a]
-|     partitions=4/4 files=4 size=460B
-|
-03:HASH JOIN [INNER JOIN]
-|  hash predicates: b.int_col = c.int_col
-|  runtime filters: RF001 <- c.int_col
+|  hash predicates: a.id = c.id
+|  runtime filters: RF000 <- c.id
 |
 |--02:SCAN HDFS [functional.alltypessmall c]
 |     partitions=4/4 files=4 size=6.32KB
 |     predicates: c.id = c.int_col
-|     runtime filters: RF000 -> c.id
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: b.int_col = a.id
+|  runtime filters: RF001 <- a.id
+|
+|--00:SCAN HDFS [functional.alltypestiny a]
+|     partitions=4/4 files=4 size=460B
+|     runtime filters: RF000 -> a.id
 |
 01:SCAN HDFS [functional.alltypes b]
    partitions=24/24 files=24 size=478.45KB

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
index 8147d0c..41b76bc 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/mt-dop-validation.test
@@ -311,6 +311,7 @@ PLAN-ROOT SINK
 |
 |--06:HASH JOIN [INNER JOIN]
 |  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  fk/pk conjuncts: assumed fk/pk
 |  |  mem-estimate=0B mem-reservation=136.00MB
 |  |  tuple-ids=1,0,2 row-size=286B cardinality=10
 |  |
@@ -358,6 +359,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=9
 |
 |--06:HASH JOIN [INNER JOIN]
 |  |  hash predicates: o1.o_orderkey = o2.o_orderkey + 2
+|  |  fk/pk conjuncts: assumed fk/pk
 |  |  mem-estimate=0B mem-reservation=136.00MB
 |  |  tuple-ids=1,0,2 row-size=286B cardinality=10
 |  |

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
index e49938c..0d527de 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/resource-requirements.test
@@ -14,9 +14,9 @@ PLAN-ROOT SINK
 |
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -34,9 +34,9 @@ PLAN-ROOT SINK
 |
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -272,9 +272,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 |  tuple-ids=1 row-size=16B cardinality=1563438
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=8B cardinality=6001215
@@ -309,9 +309,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 |  tuple-ids=1 row-size=16B cardinality=1563438
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=8B cardinality=6001215
@@ -342,9 +342,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 |  tuple-ids=1 row-size=8B cardinality=1
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=0B mem-reservation=0B
    tuple-ids=0 row-size=0B cardinality=6001215
@@ -372,9 +372,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 |  tuple-ids=1 row-size=8B cardinality=1
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=0B cardinality=6001215
@@ -403,9 +403,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 |  tuple-ids=1 row-size=263B cardinality=6001215
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -429,9 +429,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 |  tuple-ids=1 row-size=263B cardinality=6001215
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -462,9 +462,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 |  tuple-ids=1 row-size=263B cardinality=100
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -489,9 +489,9 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 |  tuple-ids=1 row-size=263B cardinality=100
 |
 00:SCAN HDFS [tpch_parquet.lineitem, RANDOM]
-   partitions=1/1 files=3 size=193.61MB
+   partitions=1/1 files=3 size=193.92MB
    stats-rows=6001215 extrapolated-rows=disabled
-   table stats: rows=6001215 size=193.61MB
+   table stats: rows=6001215 size=193.92MB
    column stats: all
    mem-estimate=80.00MB mem-reservation=0B
    tuple-ids=0 row-size=263B cardinality=6001215
@@ -514,6 +514,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF000 <- o_orderkey
 |  mem-estimate=300.41MB mem-reservation=136.00MB
 |  tuple-ids=0,1 row-size=454B cardinality=5757710
@@ -555,6 +556,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash-table-id=00
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF000 <- o_orderkey
 |  mem-estimate=300.41MB mem-reservation=136.00MB
 |  tuple-ids=0,1 row-size=454B cardinality=5757710
@@ -882,6 +884,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3
 |
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF001 <- c_custkey
 |  mem-estimate=6.61MB mem-reservation=136.00MB
 |  tuple-ids=2,1,0 row-size=108B cardinality=5757710
@@ -901,6 +904,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3
 |
 05:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF002 <- o_orderkey
 |  mem-estimate=26.23MB mem-reservation=136.00MB
 |  tuple-ids=2,1 row-size=66B cardinality=5757710
@@ -1011,6 +1015,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6
 06:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash-table-id=01
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF001 <- c_custkey
 |  mem-estimate=6.61MB mem-reservation=136.00MB
 |  tuple-ids=2,1,0 row-size=108B cardinality=5757710
@@ -1037,6 +1042,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6
 05:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash-table-id=02
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF002 <- o_orderkey
 |  mem-estimate=13.11MB mem-reservation=136.00MB
 |  tuple-ids=2,1 row-size=66B cardinality=5757710

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test
index 3aa2cba..fb30e09 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/spillable-buffer-sizing.test
@@ -17,6 +17,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=1
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: c_nationkey = n_nationkey
+|  fk/pk conjuncts: c_nationkey = n_nationkey
 |  runtime filters: RF000 <- n_nationkey
 |  mem-estimate=3.15KB mem-reservation=1.06MB
 |  tuple-ids=0,1 row-size=355B cardinality=150000
@@ -58,6 +59,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=1 instances=2
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash-table-id=00
 |  hash predicates: c_nationkey = n_nationkey
+|  fk/pk conjuncts: c_nationkey = n_nationkey
 |  runtime filters: RF000 <- n_nationkey
 |  mem-estimate=3.15KB mem-reservation=1.06MB
 |  tuple-ids=0,1 row-size=355B cardinality=150000
@@ -109,6 +111,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  mem-estimate=300.41MB mem-reservation=136.00MB
 |  tuple-ids=0,1N row-size=454B cardinality=6001215
 |
@@ -148,6 +151,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
 |  hash-table-id=00
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  mem-estimate=300.41MB mem-reservation=136.00MB
 |  tuple-ids=0,1N row-size=454B cardinality=6001215
 |
@@ -197,6 +201,7 @@ PLAN-ROOT SINK
 F02:PLAN FRAGMENT [HASH(o_custkey)] hosts=2 instances=2
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  mem-estimate=18.69MB mem-reservation=34.00MB
 |  tuple-ids=0,1 row-size=428B cardinality=1500000
@@ -243,6 +248,7 @@ F02:PLAN FRAGMENT [HASH(o_custkey)] hosts=2 instances=4
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash-table-id=00
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  mem-estimate=9.35MB mem-reservation=17.00MB
 |  tuple-ids=0,1 row-size=428B cardinality=1500000
@@ -299,6 +305,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=2
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  mem-estimate=37.38MB mem-reservation=68.00MB
 |  tuple-ids=0,1 row-size=428B cardinality=1500000
@@ -340,6 +347,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=2 instances=4
 02:HASH JOIN [INNER JOIN, BROADCAST]
 |  hash-table-id=00
 |  hash predicates: o_custkey = c_custkey
+|  fk/pk conjuncts: o_custkey = c_custkey
 |  runtime filters: RF000 <- c_custkey
 |  mem-estimate=37.38MB mem-reservation=68.00MB
 |  tuple-ids=0,1 row-size=428B cardinality=1500000
@@ -393,6 +401,7 @@ PLAN-ROOT SINK
 F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=3
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
 |  hash predicates: alltypes.id = alltypestiny.id
+|  fk/pk conjuncts: assumed fk/pk
 |  mem-estimate=2.00GB mem-reservation=136.00MB
 |  tuple-ids=0,1N row-size=176B cardinality=unavailable
 |
@@ -434,6 +443,7 @@ F00:PLAN FRAGMENT [RANDOM] hosts=3 instances=6
 02:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
 |  hash-table-id=00
 |  hash predicates: alltypes.id = alltypestiny.id
+|  fk/pk conjuncts: assumed fk/pk
 |  mem-estimate=2.00GB mem-reservation=136.00MB
 |  tuple-ids=0,1N row-size=176B cardinality=unavailable
 |
@@ -582,6 +592,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=3
 |
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF000 <- o_orderkey
 |  mem-estimate=13.11MB mem-reservation=17.00MB
 |  tuple-ids=0,1 row-size=33B cardinality=5757710
@@ -646,6 +657,7 @@ F02:PLAN FRAGMENT [HASH(l_orderkey)] hosts=3 instances=6
 02:HASH JOIN [INNER JOIN, PARTITIONED]
 |  hash-table-id=00
 |  hash predicates: l_orderkey = o_orderkey
+|  fk/pk conjuncts: l_orderkey = o_orderkey
 |  runtime filters: RF000 <- o_orderkey
 |  mem-estimate=6.56MB mem-reservation=8.50MB
 |  tuple-ids=0,1 row-size=33B cardinality=5757710

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/9f678a74/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
----------------------------------------------------------------------
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
index 6712200..fc8778e 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/tablesample.test
@@ -6,7 +6,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional.alltypes]
-   partitions=3/24 files=3 size=60.02KB
+   partitions=3/24 files=3 size=60.68KB
    stats-rows=7300 extrapolated-rows=disabled
    table stats: rows=7300 size=478.45KB
    column stats: all
@@ -21,7 +21,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional.alltypes]
-   partitions=12/24 files=12 size=239.44KB
+   partitions=12/24 files=12 size=239.26KB
    stats-rows=7300 extrapolated-rows=disabled
    table stats: rows=7300 size=478.45KB
    column stats: all
@@ -38,7 +38,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional.alltypes]
-   partitions=12/24 files=12 size=239.44KB
+   partitions=12/24 files=12 size=239.26KB
    predicates: id < 10
    stats-rows=7300 extrapolated-rows=disabled
    table stats: rows=7300 size=478.45KB
@@ -56,7 +56,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional.alltypes]
-   partitions=6/24 files=6 size=120.46KB
+   partitions=6/24 files=6 size=119.70KB
    stats-rows=3650 extrapolated-rows=disabled
    table stats: rows=7300 size=478.45KB
    column stats: all
@@ -132,7 +132,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional_parquet.alltypes]
-   partitions=3/24 files=3 size=22.05KB
+   partitions=3/24 files=3 size=22.53KB
    stats-rows=unavailable extrapolated-rows=disabled
    table stats: rows=unavailable size=unavailable
    column stats: unavailable
@@ -155,7 +155,7 @@ PLAN-ROOT SINK
 |  tuple-ids=0 row-size=4B cardinality=10
 |
 |--01:SCAN HDFS [functional.alltypessmall t2]
-|     partitions=1/4 files=1 size=1.58KB
+|     partitions=1/4 files=1 size=1.57KB
 |     stats-rows=100 extrapolated-rows=disabled
 |     table stats: rows=100 size=6.32KB
 |     column stats: all
@@ -180,7 +180,7 @@ PLAN-ROOT SINK
 |  mem-estimate=0B mem-reservation=0B
 |
 00:SCAN HDFS [functional.alltypes]
-   partitions=3/24 files=3 size=60.02KB
+   partitions=3/24 files=3 size=60.68KB
    stats-rows=7300 extrapolated-rows=disabled
    table stats: rows=7300 size=478.45KB
    column stats: all