You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/05 14:52:20 UTC
[impala] 03/06: IMPALA-8034: Improve planner tests

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit b08c8e3db2e769609f47d5c0ed87c547d41d1c8b
Author: paul-rogers <pr...@cloudera.com>
AuthorDate: Mon Dec 31 17:47:13 2018 -0800

    IMPALA-8034: Improve planner tests
    
    The FE PlannerTest cases are good, but often unrealistic and overly
    complicated, especially when trying to verify selectivity and
    cardinality. This commit adds new tests that isolate each bit of the
    work for detailed inspection.
    
    The current version of the tests highlighlights a number of bugs to be
    fixed by ongoing work. This commit establishes a clear baseline of
    current behavior, even if that behavior is not quite right. A "Bug:"
    comment explains the expected result.
    
    Tests: These are tests, no production code was changed.
    
    Change-Id: I40e59e08d7ddf2b0391d42e50511aaf95d7275f4
    Reviewed-on: http://gerrit.cloudera.org:8080/12145
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../org/apache/impala/planner/PlannerTest.java     |  32 +
 .../queries/PlannerTest/card-inner-join.test       | 985 +++++++++++++++++++++
 .../queries/PlannerTest/card-multi-join.test       | 275 ++++++
 .../queries/PlannerTest/card-outer-join.test       | 812 +++++++++++++++++
 .../queries/PlannerTest/card-scan.test             | 442 +++++++++
 5 files changed, 2546 insertions(+)

diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
index 4345bfb..cf1b0d8 100644
--- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
+++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java
@@ -49,6 +49,38 @@ import com.google.common.collect.Lists;
 // All planner tests, except for S3 specific tests should go here.
 public class PlannerTest extends PlannerTestBase {
 
+  /**
+   * Scan node cardinality test
+   */
+  @Test
+  public void testScanCardinality() {
+    runPlannerTestFile("card-scan");
+  }
+
+  /**
+   * Inner join cardinality test
+   */
+  @Test
+  public void testInnerJoinCardinality() {
+    runPlannerTestFile("card-inner-join");
+  }
+
+  /**
+   * Outer join cardinality test
+   */
+  @Test
+  public void testOuterJoinCardinality() {
+    runPlannerTestFile("card-outer-join");
+  }
+
+  /**
+   * 3+ table join cardinality test
+   */
+  @Test
+  public void testMultiJoinCardinality() {
+    runPlannerTestFile("card-multi-join");
+  }
+
   @Test
   public void testPredicatePropagation() {
     runPlannerTestFile("predicate-propagation");
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-inner-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-inner-join.test
new file mode 100644
index 0000000..2da3d1a
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-inner-join.test
@@ -0,0 +1,985 @@
+# Join cardinality tests
+# Each has a qualitative description of the quantitative math
+#
+# Simplest join: Cartesian product
+# card = |T1| * |T2|
+select m.id, d.id
+from functional.alltypestiny m,
+     functional.alltypessmall d
+---- PLAN
+PLAN-ROOT SINK
+|
+02:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=8B cardinality=800
+|
+|--00:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+01:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   row-size=4B cardinality=100
+====
+# Cartesian product between table and a no-stats, 0-row table
+select a.id, b.id
+from functional.alltypestiny a,
+     functional.alltypesnopart b
+---- PLAN
+PLAN-ROOT SINK
+|
+02:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=8B cardinality=0
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny a]
+   partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Cartesian product between two empty tables
+select a.id, b.id
+from functional.alltypesnopart a,
+     functional.alltypesnopart b
+---- PLAN
+PLAN-ROOT SINK
+|
+02:NESTED LOOP JOIN [CROSS JOIN]
+|  row-size=8B cardinality=0
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypesnopart a]
+   partitions=1/1 files=0 size=0B
+   row-size=4B cardinality=0
+====
+# Simplest M:1 join, join between a master table (M) and a detail table (D)
+# (small):(tiny)
+# |join| = |D|
+select m.id, d.id
+from functional.alltypestiny m,
+     functional.alltypessmall d
+where m.id = d.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.id = m.id
+|  runtime filters: RF000 <- m.id
+|  row-size=8B cardinality=9
+|
+|--00:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+01:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> d.id
+   row-size=4B cardinality=100
+====
+# Same, but reverse WHERE order
+select m.id, d.id
+from functional.alltypestiny m,
+     functional.alltypessmall d
+where d.id = m.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.id = m.id
+|  runtime filters: RF000 <- m.id
+|  row-size=8B cardinality=9
+|
+|--00:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+01:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> d.id
+   row-size=4B cardinality=100
+====
+# Same, but use JOIN ... ON
+select m.id, d.id
+from functional.alltypestiny m
+join functional.alltypessmall d on m.id = d.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.id = m.id
+|  runtime filters: RF000 <- m.id
+|  row-size=8B cardinality=9
+|
+|--00:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+01:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> d.id
+   row-size=4B cardinality=100
+====
+# Same, reverse ON order
+select m.id, d.id
+from functional.alltypestiny m
+join functional.alltypessmall d on d.id = m.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.id = m.id
+|  runtime filters: RF000 <- m.id
+|  row-size=8B cardinality=9
+|
+|--00:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+01:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> d.id
+   row-size=4B cardinality=100
+====
+# Same, but reverse FROM order
+select m.id, d.id
+from functional.alltypessmall d,
+     functional.alltypestiny m
+where m.id = d.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.id = m.id
+|  runtime filters: RF000 <- m.id
+|  row-size=8B cardinality=9
+|
+|--01:SCAN HDFS [functional.alltypestiny m]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+00:SCAN HDFS [functional.alltypessmall d]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> d.id
+   row-size=4B cardinality=100
+====
+# Same, but use JOIN ... ON
+select m.id, d.id
+from functional.alltypessmall m
+join functional.alltypestiny d on m.id = d.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: m.id = d.id
+|  runtime filters: RF000 <- d.id
+|  row-size=8B cardinality=9
+|
+|--01:SCAN HDFS [functional.alltypestiny d]
+|     partitions=4/4 files=4 size=460B
+|     row-size=4B cardinality=8
+|
+00:SCAN HDFS [functional.alltypessmall m]
+   partitions=4/4 files=4 size=6.32KB
+   runtime filters: RF000 -> m.id
+   row-size=4B cardinality=100
+====
+# Similar, but with big tables
+# |M| = |customer| = 150K
+# |M.pk| = |customer.c_custkey| = |customer| = 150K
+# |D| = |orders| = 1.5M
+# |D.fk| = |orders.o_custkey| = 100K
+# |join| = |D| = |orders|
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Simple join between table and a no-stats, 0-row table
+# Bug: expected cardinality = 0
+select a.id, b.id
+from functional.alltypestiny a,
+     functional.alltypesnopart b
+WHERE a.id = b.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=8B cardinality=8
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny a]
+   partitions=4/4 files=4 size=460B
+   runtime filters: RF000 -> a.id
+   row-size=4B cardinality=8
+====
+# Cartesian product between two empty tables
+select a.id, b.id
+from functional.alltypesnopart a,
+     functional.alltypesnopart b
+WHERE a.id = b.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=8B cardinality=0
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypesnopart a]
+   partitions=1/1 files=0 size=0B
+   runtime filters: RF000 -> a.id
+   row-size=4B cardinality=0
+====
+# Selective master filtering
+# |join| = |D|/|D.fk| = 15
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name = 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=54B cardinality=16
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_name = 'foo'
+|     row-size=38B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Selective master filtering, column common with detail
+# |join| = |D|/|D.fk| = 15
+# Bug: IMPALA-8014, expected cardinality ~15
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=1
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_custkey = 10
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_custkey = 10
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=15
+====
+# Selective detail filtering
+# Card = 1: one detail record finds its master record
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and d.o_orderkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: m.c_custkey = d.o_custkey
+|  runtime filters: RF000 <- d.o_custkey
+|  row-size=24B cardinality=1
+|
+|--01:SCAN HDFS [tpch.orders d]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: d.o_orderkey = 10
+|     row-size=16B cardinality=1
+|
+00:SCAN HDFS [tpch.customer m]
+   partitions=1/1 files=1 size=23.08MB
+   runtime filters: RF000 -> m.c_custkey
+   row-size=8B cardinality=150.00K
+====
+# Broad master filtering
+# |M'| = |M| * 0.33 = 50K
+# Each master finds all its details
+# |join|= |M'| * |D|/|D.fk| = 50K * 15 = 750K
+# Though |M.pk| > |D.fk|, we assume that filtering eliminated the unmatched keys
+# Bug: Several, expected cardinality ~750K
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=54B cardinality=228.68K
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_name < 'foo'
+|     row-size=38B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Broad master filtering, column common with detail
+# |D'| = |D| * 0.33 = 500K
+# Each detail finds its master
+# |join| = |D'| = 500K
+# Bug: Expected cardinality ~500K
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_custkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=22.87K
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_custkey < 1234
+|     row-size=8B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_custkey < 1234
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=150.00K
+====
+# Broad detail filtering
+# |D'| = |D| * 0.33 = 500K
+# Every detail finds its master
+# Card = 500K
+# Bug: Expected cardinlity ~500K
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and d.o_orderkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=150.00K
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_orderkey < 1234
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=150.00K
+====
+# Filtering on join between table and a no-stats, 0-row table
+# Inner join with a zero-sized table produces 0 rows.
+select a.id, b.id
+from functional.alltypestiny a,
+     functional.alltypesnopart b
+WHERE a.id = b.id
+  AND a.id = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=8B cardinality=1
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     predicates: b.id = 10
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny a]
+   partitions=4/4 files=4 size=460B
+   predicates: a.id = 10
+   runtime filters: RF000 -> a.id
+   row-size=4B cardinality=1
+====
+# Filtering on join between table and a no-stats, 0-row table
+# Bug: Expected cardinality ~1
+select a.id, b.id
+from functional.alltypestiny a,
+     functional.alltypesnopart b
+WHERE a.id = b.id
+  AND b.id = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=8B cardinality=1
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     predicates: b.id = 10
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny a]
+   partitions=4/4 files=4 size=460B
+   predicates: a.id = 10
+   runtime filters: RF000 -> a.id
+   row-size=4B cardinality=1
+====
+# Cartesian product between two empty tables
+select a.id, b.id
+from functional.alltypesnopart a,
+     functional.alltypesnopart b
+WHERE a.id = b.id
+  AND a.id = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=8B cardinality=0
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     predicates: b.id = 10
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypesnopart a]
+   partitions=1/1 files=0 size=0B
+   predicates: a.id = 10
+   runtime filters: RF000 -> a.id
+   row-size=4B cardinality=0
+====
+# Selective filtering on both sides
+# |D'| = 1
+# |C'| = 1
+# |D' >< C'| = 0 or 1, depending whether we're lucky
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name = 'foo'
+  and d.o_orderkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: m.c_custkey = d.o_custkey
+|  runtime filters: RF000 <- d.o_custkey
+|  row-size=54B cardinality=1
+|
+|--01:SCAN HDFS [tpch.orders d]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: d.o_orderkey = 10
+|     row-size=16B cardinality=1
+|
+00:SCAN HDFS [tpch.customer m]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: m.c_name = 'foo'
+   runtime filters: RF000 -> m.c_custkey
+   row-size=38B cardinality=1
+====
+# Correlated filtering on master
+# |join| = |D| / |D.fk|
+# Bug: Expected cardinality 15
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=1
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_custkey = 10
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_custkey = 10
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=15
+====
+# Correlated filtering on detail
+# |join| = |D| / |D.fk|
+# Bug: Expected cardinality 15
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and d.o_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=24B cardinality=1
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_custkey = 10
+|     row-size=8B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_custkey = 10
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=15
+====
+# Redundant correlated filtering on both sides
+# Same as above case internally
+# Bug: Expected cardinality ~1
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name = 'foo'
+  and m.c_custkey = 10
+  and d.o_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=54B cardinality=1
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_custkey = 10, m.c_name = 'foo'
+|     row-size=38B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_custkey = 10
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=15
+====
+# Selective filtering on master, broad on detail
+# |M'| = 1
+# |D'| = |D| * 0.33, uncorrelated with above
+# |D.fk'| = |D.fk| * 0.33
+# Since we assume containment: that the smaller key set
+# is a subset of the larger one.
+# Card = |D'| / max(1, |D.fk'|) = 5
+# Bug: Expected cardinality ~5
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name = 'foo'
+  and d.o_orderkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: d.o_custkey = m.c_custkey
+|  runtime filters: RF000 <- m.c_custkey
+|  row-size=54B cardinality=2
+|
+|--00:SCAN HDFS [tpch.customer m]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: m.c_name = 'foo'
+|     row-size=38B cardinality=1
+|
+01:SCAN HDFS [tpch.orders d]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: d.o_orderkey < 1234
+   runtime filters: RF000 -> d.o_custkey
+   row-size=16B cardinality=150.00K
+====
+# Selective filtering on detail, broad on master
+# |M'| = |M| * 0.33
+# |D'| = 1
+# 1/3 chance that the detail finds its master, so card = 0 or 1
+select m.c_custkey, d.o_orderkey
+from tpch.customer m,
+     tpch.orders d
+where m.c_custkey = d.o_custkey
+  and m.c_name < 'foo'
+  and d.o_orderkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: m.c_custkey = d.o_custkey
+|  runtime filters: RF000 <- d.o_custkey
+|  row-size=54B cardinality=1
+|
+|--01:SCAN HDFS [tpch.orders d]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: d.o_orderkey = 10
+|     row-size=16B cardinality=1
+|
+00:SCAN HDFS [tpch.customer m]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: m.c_name < 'foo'
+   runtime filters: RF000 -> m.c_custkey
+   row-size=38B cardinality=15.00K
+====
+# M:N join
+# |partsupp| = 800K
+# |lineitem| = 6M
+# NDV(ps_suppkey) = NDV(l_suppkey) = |suppkey| = 9.7K
+# |lineitem >< partsupp| = (|lineitem| * |partsupp|) / |suppkey|
+#                        = 800K * 6M / 10K = 80 * 600 = 480M
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=16B cardinality=494.33M
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     row-size=8B cardinality=800.00K
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=8B cardinality=6.00M
+====
+# M:N join with filtering on smaller table
+# NDV(availqty) = = |availqty| = 10K
+# |partsupp'| = |partsupp| / |availqty|
+#             = 800K / 10K = 80
+# |lineitem >< partsupp'| = |lineitem| * |partsupp'| / |suppkey|
+#                         = 6M * 80 / 10K = 48K
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t1.ps_availqty = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=20B cardinality=48.20K
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     predicates: t1.ps_availqty = 10
+|     row-size=12B cardinality=78
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=8B cardinality=6.00M
+====
+# M:N join with filtering on larger table
+# NDV(l_partkey) = |l_partkey| = 200K
+# |lineitem'| = |lineitem| / |l_partkey| = 6M / 200K = 30
+# |lineitem' >< partsupp| = |lineitem'| * |partsupp| / |suppkey|
+#                         = 30 * 800K / 10K = 2400
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t2.l_partkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t1.ps_suppkey = t2.l_suppkey
+|  runtime filters: RF000 <- t2.l_suppkey
+|  row-size=24B cardinality=2.47K
+|
+|--01:SCAN HDFS [tpch.lineitem t2]
+|     partitions=1/1 files=1 size=718.94MB
+|     predicates: t2.l_partkey = 10
+|     row-size=16B cardinality=30
+|
+00:SCAN HDFS [tpch.partsupp t1]
+   partitions=1/1 files=1 size=112.71MB
+   runtime filters: RF000 -> t1.ps_suppkey
+   row-size=8B cardinality=800.00K
+====
+# M:N join with selective filtering on shared column
+# |partsupp'| = |partsupp| / |suppkey| = 800K / 10K = 80
+# But, filtering is done on the join column, so we want:
+# |lineitem >< partsupp'| = |lineitem| * |partsupp'|
+#                         = (80 * 6M) / 10K = 80 * 600 = 48K
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t1.ps_suppkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=16B cardinality=50.67K
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     predicates: t1.ps_suppkey = 10
+|     row-size=8B cardinality=82
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   predicates: t2.l_suppkey = 10
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=8B cardinality=618
+====
+# M:N join with broad filtering on smaller table
+# Most general join case
+# |partsupp'| = |partsupp| * 0.33
+#             = 800K / 3 = 267K
+# |lineietem >< partsupp'| = |lineitem| * |partsupp'| / |suppkey|
+#             = 267K * 6M / 10K = 160M
+# Bug: Expected cardinality ~160M
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t1.ps_availqty < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=20B cardinality=49.43M
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     predicates: t1.ps_availqty < 10
+|     row-size=12B cardinality=80.00K
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=8B cardinality=6.00M
+====
+# M:N join with broad filtering on larger table
+# |lineitem'| = |lineitem| * 0.33 = 6M / 3 = 2M
+# |lineitem' >< partsupp| = |lineitem'| * |partsupp| / |suppkey|
+#             = 2M * 800K / 10K = 160M
+# Bug: Expected cardinality ~161M
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t2.l_partkey < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=24B cardinality=49.43M
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     row-size=8B cardinality=800.00K
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   predicates: t2.l_partkey < 10
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=16B cardinality=600.12K
+====
+# M:N join with filtering on both sides
+# Most general join case
+# |lineitem'| = |lineitem| * 0.33
+# |partsup'| = |partsupp| * 0.33
+# |suppkey'| = |suppkey| * 0.33
+# |lineitem' >< partsupp'| = |lineitem'| * |partsupp'| / |suppkey'|
+#              = 800K/3 * 6M/3 / 3.3K = 161M
+# (Recall the containment assumption above.)
+# Bug: Expected cardinality ~161M
+select t1.ps_suppkey, t2.l_suppkey
+from tpch.partsupp t1,
+     tpch.lineitem t2
+where t1.ps_suppkey = t2.l_suppkey
+  and t1.ps_availqty < 10
+  and t2.l_partkey < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: t2.l_suppkey = t1.ps_suppkey
+|  runtime filters: RF000 <- t1.ps_suppkey
+|  row-size=28B cardinality=49.43M
+|
+|--00:SCAN HDFS [tpch.partsupp t1]
+|     partitions=1/1 files=1 size=112.71MB
+|     predicates: t1.ps_availqty < 10
+|     row-size=12B cardinality=80.00K
+|
+01:SCAN HDFS [tpch.lineitem t2]
+   partitions=1/1 files=1 size=718.94MB
+   predicates: t2.l_partkey < 10
+   runtime filters: RF000 -> t2.l_suppkey
+   row-size=16B cardinality=600.12K
+====
+# Join with join-level filter predicates
+# |join| = |orders| * sel(c.nationkey + o.o_shippriority = 10)
+# |orders| = 1.5M
+# sel(c.nationkey + o.o_shippriority = 10) = 0.1 (can't use NDV)
+# |join| = 1.5M * .1 = 150K
+# Bug: Several, expected cardinality ~1.5M
+select c.c_custkey, o.o_orderkey
+from tpch.customer c,
+     tpch.orders o
+where c.c_custkey = o.o_custkey
+  and c.c_nationkey + o.o_shippriority = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey
+   row-size=20B cardinality=1.50M
+====
+# Join with a table that has no stats and zero rows.
+# The zero rows trumps the stats.
+# Zero-row table is on the build side, |join| = 0
+# Bug: Expected cardinality = 0
+select a.int_col, b.int_col
+from functional.alltypesagg a, functional.alltypesnopart b
+where a.id = b.id
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  runtime filters: RF000 <- b.id
+|  row-size=16B cardinality=11.00K
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     row-size=8B cardinality=0
+|
+00:SCAN HDFS [functional.alltypesagg a]
+   partitions=11/11 files=11 size=814.73KB
+   runtime filters: RF000 -> a.id
+   row-size=8B cardinality=11.00K
+====
+# Adding table and join filters should not affect the result.
+# Bug: Expected cardinality = 0
+select a.int_col, b.int_col
+from functional.alltypesagg a, functional.alltypesnopart b
+where a.id = b.id
+  and a.smallint_col = 10
+  and b.smallint_col = 20
+  and a.int_col + b.int_col > 30
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: a.id = b.id
+|  other predicates: a.int_col + b.int_col > 30
+|  runtime filters: RF000 <- b.id
+|  row-size=20B cardinality=113
+|
+|--01:SCAN HDFS [functional.alltypesnopart b]
+|     partitions=1/1 files=0 size=0B
+|     predicates: b.smallint_col = 20
+|     row-size=10B cardinality=0
+|
+00:SCAN HDFS [functional.alltypesagg a]
+   partitions=11/11 files=11 size=814.73KB
+   predicates: a.smallint_col = 10
+   runtime filters: RF000 -> a.id
+   row-size=10B cardinality=113
+====
+# Join on a computed column
+# Assumes Cartesian product * 0.1
+# |join| = 11K * 7K * 0.1 = 7M
+# Bug: Expected cardinality ~7M
+select a.id, b.id
+from functional.alltypes a, functional.alltypesagg b
+where a.id = b.id + b.int_col
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [INNER JOIN]
+|  hash predicates: b.id + b.int_col = a.id
+|  runtime filters: RF000 <- a.id
+|  row-size=12B cardinality=11.00K
+|
+|--00:SCAN HDFS [functional.alltypes a]
+|     partitions=24/24 files=24 size=478.45KB
+|     row-size=4B cardinality=7.30K
+|
+01:SCAN HDFS [functional.alltypesagg b]
+   partitions=11/11 files=11 size=814.73KB
+   runtime filters: RF000 -> b.id + b.int_col
+   row-size=8B cardinality=11.00K
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-multi-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-multi-join.test
new file mode 100644
index 0000000..02cf737
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-multi-join.test
@@ -0,0 +1,275 @@
+# Three table 1:M join
+# |customer| = 150K
+# |customer.c_custkey| = |customer| = 150K
+# |orders| = 1.5M
+# |orders.o_orderKey| = 100K
+# |lineitem| = 6M
+# |lineitem.l_orderkey| = 1.5M
+# |join| = |lineitem| = 6M
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=66B cardinality=5.76M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=38B cardinality=150.00K
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: i.l_orderkey = o.o_orderkey
+|  runtime filters: RF002 <- o.o_orderkey
+|  row-size=28B cardinality=5.76M
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     runtime filters: RF000 -> o.o_custkey
+|     row-size=16B cardinality=1.50M
+|
+02:SCAN HDFS [tpch.lineitem i]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF002 -> i.l_orderkey
+   row-size=12B cardinality=6.00M
+====
+# Filter on customer table
+# |join| = 1/3 of above = 2M
+# Bug: Expected cardinality ~2M
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=66B cardinality=877.79K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=38B cardinality=15.00K
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: i.l_orderkey = o.o_orderkey
+|  runtime filters: RF002 <- o.o_orderkey
+|  row-size=28B cardinality=5.76M
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     runtime filters: RF000 -> o.o_custkey
+|     row-size=16B cardinality=1.50M
+|
+02:SCAN HDFS [tpch.lineitem i]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF002 -> i.l_orderkey
+   row-size=12B cardinality=6.00M
+====
+# Filter on orders table
+# |join| = 1/3 of full join = 2M
+# Bug: Expected cardinality ~2M
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and o.o_clerk < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=93B cardinality=575.77K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=38B cardinality=150.00K
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: i.l_orderkey = o.o_orderkey
+|  runtime filters: RF002 <- o.o_orderkey
+|  row-size=55B cardinality=575.77K
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: o.o_clerk < 'foo'
+|     runtime filters: RF000 -> o.o_custkey
+|     row-size=43B cardinality=150.00K
+|
+02:SCAN HDFS [tpch.lineitem i]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF002 -> i.l_orderkey
+   row-size=12B cardinality=6.00M
+====
+# Filter on items table
+# |join| = 1/3 of full join = 2M
+# Bug: Expected cardinality ~2M
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and i.l_suppkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=74B cardinality=600.12K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=38B cardinality=150.00K
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_orderkey = i.l_orderkey
+|  runtime filters: RF002 <- i.l_orderkey
+|  row-size=36B cardinality=600.12K
+|
+|--02:SCAN HDFS [tpch.lineitem i]
+|     partitions=1/1 files=1 size=718.94MB
+|     predicates: i.l_suppkey < 1234
+|     row-size=20B cardinality=600.12K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey, RF002 -> o.o_orderkey
+   row-size=16B cardinality=1.50M
+====
+# Correlated filter on two columns
+# |join| = Ave. line items per customer
+# |lineitem| / |o.custkey| = 6M / 100K = 60
+# Bug: Expected cardinality ~60
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and c.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=66B cardinality=1
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_custkey = 10
+|     row-size=38B cardinality=1
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: i.l_orderkey = o.o_orderkey
+|  runtime filters: RF002 <- o.o_orderkey
+|  row-size=28B cardinality=58
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: o.o_custkey = 10
+|     runtime filters: RF000 -> o.o_custkey
+|     row-size=16B cardinality=15
+|
+02:SCAN HDFS [tpch.lineitem i]
+   partitions=1/1 files=1 size=718.94MB
+   runtime filters: RF002 -> i.l_orderkey
+   row-size=12B cardinality=6.00M
+====
+# Correlated filter on two columns
+# |join| = Ave. line items per customer
+# |lineitem| / |o.orderkey| = 6M / 1.5M = 4
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and o.o_orderkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: o.o_orderkey = i.l_orderkey
+|  runtime filters: RF000 <- i.l_orderkey
+|  row-size=66B cardinality=4
+|
+|--02:SCAN HDFS [tpch.lineitem i]
+|     partitions=1/1 files=1 size=718.94MB
+|     predicates: i.l_orderkey = 10
+|     row-size=12B cardinality=4
+|
+03:HASH JOIN [INNER JOIN]
+|  hash predicates: c.c_custkey = o.o_custkey
+|  runtime filters: RF002 <- o.o_custkey
+|  row-size=54B cardinality=1
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: o.o_orderkey = 10
+|     runtime filters: RF000 -> o.o_orderkey
+|     row-size=16B cardinality=1
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   runtime filters: RF002 -> c.c_custkey
+   row-size=38B cardinality=150.00K
+====
+# Correlated filter on two pairs of columns
+# |join| = Ave. line items per customer
+# |lineitem| / |o.orderkey| = 6M / 1.5M = 4
+# Bug: wrong join plan, expected (lineitem >< order) >< customer
+select c.c_name, o.o_orderkey, i.l_linenumber
+from tpch.customer c,
+     tpch.orders o,
+     tpch.lineitem i
+where c.c_custkey = o.o_custkey
+  and o.o_orderkey = i.l_orderkey
+  and o.o_orderkey = 10
+  and c.c_custkey = 20
+---- PLAN
+PLAN-ROOT SINK
+|
+04:HASH JOIN [INNER JOIN]
+|  hash predicates: c.c_custkey = o.o_custkey
+|  runtime filters: RF000 <- o.o_custkey
+|  row-size=66B cardinality=1
+|
+|--03:HASH JOIN [INNER JOIN]
+|  |  hash predicates: i.l_orderkey = o.o_orderkey
+|  |  runtime filters: RF002 <- o.o_orderkey
+|  |  row-size=28B cardinality=1
+|  |
+|  |--01:SCAN HDFS [tpch.orders o]
+|  |     partitions=1/1 files=1 size=162.56MB
+|  |     predicates: o.o_orderkey = 10, o.o_custkey = 20
+|  |     row-size=16B cardinality=1
+|  |
+|  02:SCAN HDFS [tpch.lineitem i]
+|     partitions=1/1 files=1 size=718.94MB
+|     predicates: i.l_orderkey = 10
+|     runtime filters: RF002 -> i.l_orderkey
+|     row-size=12B cardinality=4
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey = 20
+   runtime filters: RF000 -> c.c_custkey
+   row-size=38B cardinality=1
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-outer-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-outer-join.test
new file mode 100644
index 0000000..d54e307
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-outer-join.test
@@ -0,0 +1,812 @@
+# Right outer tests
+#
+# Right outer
+# |join| = |orders| = 1.5M
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  row-size=24B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Right outer that is the inverse of a left-outer test
+# 1/3 of customers have no orders, or 50K
+# |join| = |orders|  + 50K = 1.55M
+select c.c_custkey, o.o_orderkey
+from tpch.orders o
+right outer join tpch.customer c on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=24B cardinality=1.50M
+|
+|--01:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+00:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Right outer join with zero-sized right table
+# Bug: Expected cardinality = 0
+select o.id, i.id
+from functional.alltypestiny i
+right outer join functional.alltypesnopart o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: i.id = o.id
+|  runtime filters: RF000 <- o.id
+|  row-size=8B cardinality=8
+|
+|--01:SCAN HDFS [functional.alltypesnopart o]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny i]
+   partitions=4/4 files=4 size=460B
+   runtime filters: RF000 -> i.id
+   row-size=4B cardinality=8
+====
+# Right outer join with zero-sized left table
+select o.id, i.id
+from functional.alltypesnopart i
+right outer join functional.alltypestiny o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.id = i.id
+|  row-size=8B cardinality=8
+|
+|--00:SCAN HDFS [functional.alltypesnopart i]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+01:SCAN HDFS [functional.alltypestiny o]
+   partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Right outer with left predicate
+# |join| = |customers'| * avg. orders per customer = 15
+# predicate removes null rows, so the right outer is a no-op
+# Using wrong NDV so answer is 25. See IMPALA-8045, Outer Join section
+# Bug: Expected cardinality ~25
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_name = 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_name = 'foo'
+|  row-size=54B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name = 'foo'
+|     row-size=38B cardinality=1
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Right outer with broad left predicate
+# |join| = |customers'| * avg. orders per customer = 150K/3 * 15 = 750K
+# predicate removes null rows, so the right outer is a no-op
+# Bug: Expected cardinality ~750
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_name < 'foo'
+|  row-size=54B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=38B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Right outer with right predicate
+# |clerk| = 1K
+# sel(clerk = 'foo') = 1/1000
+# |orders'| = |orders| * sel(clerk = 'foo') = 1.5K
+# |join| = |orders'| = 1.5K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk = 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: c.c_custkey = o.o_custkey
+|  runtime filters: RF000 <- o.o_custkey
+|  row-size=51B cardinality=1.49K
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: o.o_clerk = 'foo'
+|     row-size=43B cardinality=1.49K
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   runtime filters: RF000 -> c.c_custkey
+   row-size=8B cardinality=150.00K
+====
+# Right outer with broad right predicate
+# |join| = |orders| * sel(left pred) = 500K
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  row-size=51B cardinality=150.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   row-size=43B cardinality=150.00K
+====
+# Right outer with join predicate
+# |join| = |orders| * sel(pred) = 150K
+# Bug: Expected cardinality ~150K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Right outer with broad join predicate
+# sel(c.c_nationkey + o.o_shippriority < 10) = 0.33
+# |join| = |orders| * sel(pred) = 500K
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority < 10
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Right outer with left, right and join predicate
+# |customers'| = |customers| * sel(left pred) = 50K
+# |orders'| = |orders| * sel(right pred) = 500K
+# max(|key|) = 150K / 3 = 50K
+# sel(join pred) = 0.1
+# |join| = |orders'| * |customers'| / max(|key|) * sel(join pred)
+# = 500K * 50K / 50K * .1 = 50K
+# Since inner has more keys than outer, outer join adds no rows
+# The c.c_name < 'foo' is reapplied after the join
+# Bug: Expected cardinality ~50K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+  and o.o_clerk < 'foo'
+  and c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10, c.c_name < 'foo'
+|  row-size=87B cardinality=150.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=40B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   row-size=47B cardinality=150.00K
+====
+# Left outer tests
+#
+# Left outer
+# |join| = |orders| + unmatched customers = 1.55M
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+left outer join tpch.orders o on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=24B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Left outer join which is the inverse of the right outer test
+# |join| = |orders| = 1.5M
+select c.c_custkey, o.o_orderkey
+from tpch.orders o
+left outer join tpch.customer c on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  row-size=24B cardinality=1.50M
+|
+|--01:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+00:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Left outer join with zero-sized right table
+select o.id, i.id
+from functional.alltypestiny i
+left outer join functional.alltypesnopart o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: i.id = o.id
+|  row-size=8B cardinality=8
+|
+|--01:SCAN HDFS [functional.alltypesnopart o]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny i]
+   partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Left outer join with zero-sized left table
+select o.id, i.id
+from functional.alltypesnopart i
+left outer join functional.alltypestiny o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.id = i.id
+|  runtime filters: RF000 <- i.id
+|  row-size=8B cardinality=0
+|
+|--00:SCAN HDFS [functional.alltypesnopart i]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+01:SCAN HDFS [functional.alltypestiny o]
+   partitions=4/4 files=4 size=460B
+   runtime filters: RF000 -> o.id
+   row-size=4B cardinality=8
+====
+# Left outer with left predicate
+# |customers'| = |customers| * sel(pred) = 50K
+# |join| = |customers'| * avg. orders per customer = 50 * 15 = 750K
+# (Skipping some steps in the math.)
+# Bug: Expected cardinality ~750K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+left outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=54B cardinality=228.68K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=38B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey
+   row-size=16B cardinality=1.50M
+====
+# Left outer with right predicate
+# The filter on the right table works with non-null values.
+# It is applied again after the (outer) join, and will remove any
+# null rows created via the outer join (undoing the join.)
+# While this may be a meaninless query, it is an interesting odd test case.
+# The meaning is thus to match customers who have orders.
+# |join| = |orders'| = |orders| * sel(pred)
+#        = 1.5M * 0.33 = 500K
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+left outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: o.o_clerk < 'foo'
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=51B cardinality=150.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   runtime filters: RF000 -> o.o_custkey
+   row-size=43B cardinality=150.00K
+====
+# Left outer with right predicate
+# Predicate matches both right columns and the generated, null
+# outer join rows.
+# Bug: Invalid IS NULL logic uses orders null count to estimate output of outer
+# join. Since orders has no nulls, we incorrectly assume the outer join does not either.
+# See IMPALA-8050
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+left outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk is null
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: o.o_clerk IS NULL
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=51B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   runtime filters: RF000 -> o.o_custkey
+   row-size=43B cardinality=1.50M
+====
+# Left outer with join predicate
+# sel(c.c_nationkey + o.o_shippriority = 10) = 0.1 (guess)
+# |join| = |orders| * sel(pred) = 150K
+# Bug: Expected cardinality ~150K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# As above, but with two predicates since the estimate above happens
+# to also be the customer table cardinality.
+# Remember exponential back-off
+# |join| = 0.31 above, 0.031 net, or ~45K
+# Bug: Expected cardinality ~50K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+  and c.c_nationkey * o.o_shippriority = 100
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey * o.o_shippriority = 100, c.c_nationkey + o.o_shippriority = 10
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Left outer with broad join predicate
+# |join| = |orders| * sel(pred) = 500K
+# (Skipping some math steps.)
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+right outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [LEFT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority < 10
+|  row-size=30B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Left outer with broad join predicate and right table predicate
+# sel(c.c_nationkey + o.o_shippriority < 10) = 0.33
+# |customers'| = |customers| * 0.33 = 50K
+# |orders'| = |orders| * 0.33 = 500K
+# |join| = same as inner join = 50K
+# Order key < customer key, so no extra rows from outer
+# Bug: Expected cardinality ~50K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+left outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+  and o.o_clerk < 'foo'
+  and c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [RIGHT OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10, o.o_clerk < 'foo'
+|  runtime filters: RF000 <- c.c_custkey
+|  row-size=87B cardinality=150.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=40B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   runtime filters: RF000 -> o.o_custkey
+   row-size=47B cardinality=150.00K
+====
+# Full outer join tests
+#
+# |join| = |left-only| + |inner-join| + |right-only|
+# |customers.c_custKey| = 150K
+# |orders.o_custkey| = 100K
+# |custkey| = max(above) = 150K
+# |left-only| = |left| * (1 - |o_custKey| / |custkey|)
+#             = 150K * (1 - 100K/150K) = 150K * 1/3 = 50K
+# |right-only| = |right| * (1 - |c_custKey| / |custkey|) = 0
+# |inner-join| = |left| * |right| / |custKey| = 1.5M
+# |join| = |left-only| + |inner-join| + |right-only|
+#        = 50K + 1.5M + 0 = 1.55M
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  row-size=24B cardinality=1.65M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Invers of the above
+select c.c_custkey, o.o_orderkey
+from tpch.orders o
+full outer join tpch.customer c on c.c_custkey = o.o_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  row-size=24B cardinality=1.65M
+|
+|--01:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+00:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Full outer join with zero-sized right table
+select o.id, i.id
+from functional.alltypestiny i
+full outer join functional.alltypesnopart o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: i.id = o.id
+|  row-size=8B cardinality=8
+|
+|--01:SCAN HDFS [functional.alltypesnopart o]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+00:SCAN HDFS [functional.alltypestiny i]
+   partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Full outer join with zero-sized left table
+select o.id, i.id
+from functional.alltypesnopart i
+full outer join functional.alltypestiny o using (id)
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.id = i.id
+|  row-size=8B cardinality=8
+|
+|--00:SCAN HDFS [functional.alltypesnopart i]
+|     partitions=1/1 files=0 size=0B
+|     row-size=4B cardinality=0
+|
+01:SCAN HDFS [functional.alltypestiny o]
+   partitions=4/4 files=4 size=460B
+   row-size=4B cardinality=8
+====
+# Full outer with left predicate
+# predicate removes null rows, so the right outer is a no-op
+# Using wrong NDV so answer is 25. See IMPALA-8045, Outer Join section
+# Bug: Expected cardinality ~25
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_name = 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_name = 'foo'
+|  row-size=54B cardinality=1.50M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name = 'foo'
+|     row-size=38B cardinality=1
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Full outer with broad left predicate
+# |join| = |customers'| * avg. orders per customer = 150K/3 * 15 = 750K
+# predicate removes null rows, so the right outer is a no-op
+# Bug: Using wrong NDV, so get answer of 1M, see IMPALA-8045
+# Bug: Expected cardinality ~1M
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_name < 'foo'
+|  row-size=54B cardinality=1.51M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=38B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=16B cardinality=1.50M
+====
+# Full outer with right predicate
+# |clerk| = 1K
+# sel(clerk = 'foo') = 1/1000
+# |orders'| = |orders| * sel(clerk = 'foo') = 1.5K
+# |join| = |orders'| = 1.5K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk = 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: c.c_custkey = o.o_custkey
+|  other predicates: o.o_clerk = 'foo'
+|  row-size=51B cardinality=151.49K
+|
+|--01:SCAN HDFS [tpch.orders o]
+|     partitions=1/1 files=1 size=162.56MB
+|     predicates: o.o_clerk = 'foo'
+|     row-size=43B cardinality=1.49K
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   row-size=8B cardinality=150.00K
+====
+# Full outer with broad right predicate
+# |join| = |orders| * sel(left pred) = 500K
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where o.o_clerk < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: o.o_clerk < 'foo'
+|  row-size=51B cardinality=300.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=8B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   row-size=43B cardinality=150.00K
+====
+# Full outer with join predicate
+# |join| = |orders| * sel(pred) = 150K
+# Bug: Expected cardinality ~150K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10
+|  row-size=30B cardinality=1.65M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Full outer with broad join predicate
+# sel(c.c_nationkey + o.o_shippriority < 10) = 0.33
+# |join| = |orders| * sel(pred) = 500K
+# Bug: Expected cardinality ~500K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority < 10
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority < 10
+|  row-size=30B cardinality=1.65M
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     row-size=10B cardinality=150.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   row-size=20B cardinality=1.50M
+====
+# Full outer with left, right and join predicate
+# |customers'| = |customers| * sel(left pred) = 50K
+# |orders'| = |orders| * sel(right pred) = 500K
+# |join| = |orders'| * sel(join pred) * sel(children) = 16K
+# sel(children) = 0.33
+# The c.c_name < 'foo' is reapplied after the join
+# Bug: Expected cardinality ~50K
+select c.c_custkey, o.o_orderkey
+from tpch.customer c
+full outer join tpch.orders o on c.c_custkey = o.o_custkey
+where c.c_nationkey + o.o_shippriority = 10
+  and o.o_clerk < 'foo'
+  and c.c_name < 'foo'
+---- PLAN
+PLAN-ROOT SINK
+|
+02:HASH JOIN [FULL OUTER JOIN]
+|  hash predicates: o.o_custkey = c.c_custkey
+|  other predicates: c.c_nationkey + o.o_shippriority = 10, o.o_clerk < 'foo', c.c_name < 'foo'
+|  row-size=87B cardinality=165.00K
+|
+|--00:SCAN HDFS [tpch.customer c]
+|     partitions=1/1 files=1 size=23.08MB
+|     predicates: c.c_name < 'foo'
+|     row-size=40B cardinality=15.00K
+|
+01:SCAN HDFS [tpch.orders o]
+   partitions=1/1 files=1 size=162.56MB
+   predicates: o.o_clerk < 'foo'
+   row-size=47B cardinality=150.00K
+====
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
new file mode 100644
index 0000000..fb21e84
--- /dev/null
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/card-scan.test
@@ -0,0 +1,442 @@
+# Predicate tests
+# In this file, "inequality" means not equals. It is a general
+# term for <, <=, >, >=. The term "not equals" is used for the
+# != (AKA <>) operator.
+# No predicate at all. Establishes cardinality baseline.
+select * from tpch.customer
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer]
+   partitions=1/1 files=1 size=23.08MB
+   row-size=218B cardinality=150.00K
+====
+# Predicate on a single value: card = |T|/ndv
+# Unique key, NDV=|T|
+select *
+from tpch.customer c
+where c.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey = 10
+   row-size=218B cardinality=1
+====
+# Predicate on a single value: card = |T|/ndv
+# Non-unique key, NDV=25
+select *
+from tpch.customer c
+where c.c_nationkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_nationkey = 10
+   row-size=218B cardinality=6.00K
+====
+# OR'ed predicate, card = 2/ndv
+select *
+from tpch.customer c
+where c.c_custkey = 10 OR c.c_custkey = 20
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey IN (10, 20)
+   row-size=218B cardinality=2
+====
+# OR'ed predicate, distinct columns
+# card = max(card of each OR term)
+select *
+from tpch.customer c
+where c.c_custkey = 10
+   or c.c_nationkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey = 10 OR c.c_nationkey = 10
+   row-size=218B cardinality=6.00K
+====
+# As above, but with smaller table to see effect
+# NDV(id) = |T| = 8
+# NDV(bool_val) = 2
+# card = max(|T|/NDV(id),|T|/NDV(bool_val)) = max(1, 4)
+# But done by adding cardinalities, which seems right, but is wrong
+# Bug: IMPALA-8038
+select *
+from functional.alltypestiny t
+where t.id = 10
+   or t.bool_col = true
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypestiny t]
+   partitions=4/4 files=4 size=460B
+   predicates: t.id = 10 OR t.bool_col = TRUE
+   row-size=89B cardinality=5
+====
+# IN, card = x/ndv
+select *
+from tpch.customer c
+where c.c_custkey in (10, 20, 30)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey IN (10, 20, 30)
+   row-size=218B cardinality=3
+====
+# IN with duplicate values. Remove dups.
+# Bug: IMPALA-8030
+select *
+from tpch.customer c
+where c.c_custkey in (10, 20, 30, 30, 10, 20)
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey IN (10, 20, 30, 30, 10, 20)
+   row-size=218B cardinality=6
+====
+# OR on same value: card = 1/ndv
+select *
+from tpch.customer c
+where c.c_custkey = 10 OR c.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey = 10
+   row-size=218B cardinality=1
+====
+# OR on same value: card = 1/ndv
+# Different expression order
+# Bug: IMPALA-8030
+select *
+from tpch.customer c
+where c.c_custkey = 10 OR 10 = c.c_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey IN (10, 10)
+   row-size=218B cardinality=2
+====
+# AND'ed predicate, card = 0
+select *
+from tpch.customer c
+where c.c_custkey = 10 AND c.c_custkey = 20
+---- PLAN
+PLAN-ROOT SINK
+|
+00:EMPTYSET
+====
+# AND on same value: card = 1/ndv
+select *
+from tpch.customer c
+where c.c_custkey = 10 AND c.c_custkey = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey = 10
+   row-size=218B cardinality=1
+====
+# Not-equal, card = 1 - 1/ndv
+# Use smaller table so effect is clear
+# |T|=8, NDV=8
+# Bug: IMPALA-8039
+# Bug, expected cardinality ~7
+select *
+from functional.alltypestiny
+where id != 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypestiny]
+   partitions=4/4 files=4 size=460B
+   predicates: id != 10
+   row-size=89B cardinality=1
+====
+# Inequality. No useful stats.
+# Bug: IMPALA-8037, Assumes sel = 0.1
+# Bug: Expected cardinality ~49.5K
+select *
+from tpch.customer c
+where c.c_custkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey < 1234
+   row-size=218B cardinality=15.00K
+====
+# Inequality twice on same value. Remove duplicate.
+# Bug: IMPALA-8037
+# Bug: Expected cardinality ~49.5K
+select *
+from tpch.customer c
+where c.c_custkey < 1234
+  and c.c_custkey < 1234
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey < 1234
+   row-size=218B cardinality=15.00K
+====
+# Inequality twice on same value, but reversed "direction". Remove duplicate.
+# Bug: IMPALA-8037
+# Bug: Expected cardinality ~49.5K
+select *
+from tpch.customer c
+where c.c_custkey < 1234
+  and 1234 > c.c_custkey
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey < 1234
+   row-size=218B cardinality=15.00K
+====
+# Two inequalities of the same "direction" Assume only one applies.
+# Bugs: IMPALA-8031, IMPALA-8037
+# Bug: Expected cardinality ~28.5K
+select *
+from tpch.customer c
+where c.c_custkey < 1234
+  and c.c_custkey < 2345
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey < 1234, c.c_custkey < 2345
+   row-size=218B cardinality=15.00K
+====
+# Two inequalities. No useful stats.
+# But with effect of exponential-backoff
+# Bug: IMPALA-8037
+# Bug: Expected cardinality ~28.5K
+select *
+from tpch.customer c
+where c.c_custkey < 1234
+  and c.c_nationkey < 100
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey < 1234, c.c_nationkey < 100
+   row-size=218B cardinality=15.00K
+====
+# Between. No useful stats. Should assume, say 0.16
+# But, gets rewritten before computing selectivity,
+# So same as AND'ed inequalities
+# sel = 0.1, but Ramakrishnan and Gherke suggest 1/4
+# Bug: IMPALA-8037
+# Bug: Expected cardinality ~28.5K
+select *
+from tpch.customer c
+where c.c_custkey between 1234 and 2345
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
+   row-size=218B cardinality=15.00K
+====
+# Compound inequality (< and >) that is the same as BETWEEN.
+# Gets compouted as AND of inequalities.
+# sel = 0.1 (single sel for all)
+# But, these conditions are not independent, so a better estimate
+# would be, say 1/6
+# Bugs: TBD
+# Bug: Expected cardinality ~28.5K
+# |<<< <<< ===| c > x
+# |=== >>> >>>| c < y
+# |<<< === >>>| c > x AND c < y
+select *
+from tpch.customer c
+where c.c_custkey >= 1234 and c.c_custkey <= 2345
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
+   row-size=218B cardinality=15.00K
+====
+# Between and redundant inequality
+# Bug: expected cardinality ~15K
+select *
+from tpch.customer c
+where c.c_custkey between 1234 and 2345
+  and c.c_custkey <= 2345
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_custkey <= 2345, c.c_custkey >= 1234
+   row-size=218B cardinality=15.00K
+====
+# Partitioned scan.
+# |table| = 11K
+# |partition| = 1000
+# Five patitions match
+# |table'| = 5000
+select *
+from functional.alltypesagg a
+where a.day >= 6
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesagg a]
+   partition predicates: a.`day` >= 6
+   partitions=5/11 files=5 size=372.38KB
+   row-size=95B cardinality=5.00K
+====
+# Partitioned table, one partition matches
+select *
+from functional.alltypesagg a
+where a.day = 6
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesagg a]
+   partition predicates: a.`day` = 6
+   partitions=1/11 files=1 size=74.48KB
+   row-size=95B cardinality=1.00K
+====
+# Partitioned table, no partitions match
+select *
+from functional.alltypesagg a
+where a.day = 23
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesagg a]
+   partition predicates: a.`day` = 23
+   partitions=0/11 files=0 size=0B
+   row-size=95B cardinality=0
+====
+# IS NULL
+# Estimated from null count, which is 0 for TPCH tables.
+# TODO: Add tests for ndv stats but no null counts
+# Impala does not suport NOT NULL fields, so we have to assume the
+# field can be null. That means null is one of the distinct values.
+# sel(mktsegment is null) = 1/NDV = 0.2
+# Bug: Many, expected cardinality ~15K
+select *
+from tpch.customer c
+where c.c_mktsegment is null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_mktsegment IS NULL
+   row-size=218B cardinality=15.00K
+====
+# IS NOT NULL
+# Similar to above.
+# Bug: Expected cardinality ~15K
+select *
+from tpch.customer c
+where c.c_mktsegment is not null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: c.c_mktsegment IS NOT NULL
+   row-size=218B cardinality=15.00K
+====
+# |alltypesagg| = 11K. Null count of tinyint_col = 2000.
+select *
+from functional.alltypesagg
+where tinyint_col is null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesagg]
+   partitions=11/11 files=11 size=814.73KB
+   predicates: tinyint_col IS NULL
+   row-size=95B cardinality=2.00K
+====
+# As above.
+select *
+from functional.alltypesagg
+where tinyint_col is not null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesagg]
+   partitions=11/11 files=11 size=814.73KB
+   predicates: tinyint_col IS NOT NULL
+   row-size=95B cardinality=9.00K
+====
+# IS NULL on an expression. Guess 0.1 selectivity
+select *
+from tpch.customer c
+where concat(c.c_mktsegment, c_comment) is null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: concat(c.c_mktsegment, c_comment) IS NULL
+   row-size=218B cardinality=15.00K
+====
+# IS NOT NULL.
+# Bug: Should guess 0.9 selectivity, actually guesses 0.1
+# Expected cardinality ~15K
+select *
+from tpch.customer c
+where concat(c.c_mktsegment, c_comment) is not null
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [tpch.customer c]
+   partitions=1/1 files=1 size=23.08MB
+   predicates: concat(c.c_mktsegment, c_comment) IS NOT NULL
+   row-size=218B cardinality=15.00K
+====
+# Scan of a table with no stats and zero rows
+select *
+from functional.alltypesnopart
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesnopart]
+   partitions=1/1 files=0 size=0B
+   row-size=72B cardinality=0
+====
+# Filter on the no-stats table
+select *
+from functional.alltypesnopart
+where int_col = 10
+---- PLAN
+PLAN-ROOT SINK
+|
+00:SCAN HDFS [functional.alltypesnopart]
+   partitions=1/1 files=0 size=0B
+   predicates: int_col = 10
+   row-size=72B cardinality=0
+====