You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/11/29 06:13:10 UTC

[1/4] calcite git commit: Add ImmutableBitSet.set(int, boolean)

Repository: calcite
Updated Branches:
  refs/heads/master cd493efd3 -> 9bb54006a


Add ImmutableBitSet.set(int, boolean)


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/01eb6b5e
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/01eb6b5e
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/01eb6b5e

Branch: refs/heads/master
Commit: 01eb6b5ec28985eb64892fec0918c4a0a7c4aad0
Parents: cd493ef
Author: Julian Hyde <jh...@apache.org>
Authored: Thu Feb 2 14:41:40 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Mon Nov 27 16:33:31 2017 -0800

----------------------------------------------------------------------
 .../main/java/org/apache/calcite/util/ImmutableBitSet.java  | 9 +++++++++
 .../java/org/apache/calcite/util/ImmutableBitSetTest.java   | 9 +++++++++
 2 files changed, 18 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/01eb6b5e/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
index 568fe3e..4f514bb 100644
--- a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
+++ b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
@@ -843,6 +843,15 @@ public class ImmutableBitSet
     return union(ImmutableBitSet.of(i));
   }
 
+  /** Returns a bit set the same as this but with a given bit set (if b is
+   * true) or unset (if b is false). */
+  public ImmutableBitSet set(int i, boolean b) {
+    if (get(i) == b) {
+      return this;
+    }
+    return b ? set(i) : clear(i);
+  }
+
   /** Returns a bit set the same as this but with a given bit set if condition
    * is true. */
   public ImmutableBitSet setIf(int bit, boolean condition) {

http://git-wip-us.apache.org/repos/asf/calcite/blob/01eb6b5e/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
index 8894012..9ae81d0 100644
--- a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
@@ -33,6 +33,7 @@ import java.util.SortedMap;
 
 import static org.hamcrest.CoreMatchers.equalTo;
 import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.sameInstance;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
@@ -490,6 +491,14 @@ public class ImmutableBitSetTest {
     assertThat(bitSet.clearIf(29, true), equalTo(bitSet2));
   }
 
+  @Test public void testSet2() {
+    final ImmutableBitSet bitSet = ImmutableBitSet.of(29, 4, 1969);
+    final ImmutableBitSet bitSet2 = ImmutableBitSet.of(29, 4, 1969, 30);
+    assertThat(bitSet.set(30, false), sameInstance(bitSet));
+    assertThat(bitSet.set(30, true), equalTo(bitSet2));
+    assertThat(bitSet.set(29, true), sameInstance(bitSet));
+  }
+
   @Test public void testShift() {
     final ImmutableBitSet bitSet = ImmutableBitSet.of(29, 4, 1969);
     assertThat(bitSet.shift(0), is(bitSet));


[4/4] calcite git commit: [CALCITE-2005] Test failures on Windows

Posted by jh...@apache.org.
[CALCITE-2005] Test failures on Windows


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/9bb54006
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/9bb54006
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/9bb54006

Branch: refs/heads/master
Commit: 9bb54006aa5cfd7765f791ccaef5e25afacaed86
Parents: dad5818
Author: Julian Hyde <jh...@apache.org>
Authored: Tue Oct 10 10:14:37 2017 -0700
Committer: Julian Hyde <jh...@apache.org>
Committed: Tue Nov 28 10:23:37 2017 -0800

----------------------------------------------------------------------
 .../org/apache/calcite/interpreter/Interpreter.java     |  3 ++-
 .../java/org/apache/calcite/test/CalciteAssert.java     |  1 +
 .../java/org/apache/calcite/test/RelMetadataTest.java   | 12 ++++++++++++
 .../src/test/java/org/apache/calcite/util/UtilTest.java |  2 +-
 file/pom.xml                                            |  6 ++++++
 .../org/apache/calcite/adapter/file/FileReaderTest.java |  4 +++-
 6 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java b/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
index 2d754fa..9360294 100644
--- a/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
+++ b/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
@@ -326,7 +326,8 @@ public class Interpreter extends AbstractEnumerable<Object[]>
       this.rel = rel;
       this.sink = sink;
       this.rowEnumerable = rowEnumerable;
-      assert (sink != null) != (rowEnumerable != null) : "one or the other";
+      Preconditions.checkArgument((sink == null) != (rowEnumerable == null),
+          "one or the other");
     }
   }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
index 06e351a..70a415b 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
@@ -1343,6 +1343,7 @@ public class CalciteAssert {
             hooks, checker, null, null);
         return this;
       } catch (Exception e) {
+        e.printStackTrace();
         throw new RuntimeException(
             "exception while executing [" + sql + "]", e);
       }

http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index a09de16..09c8a50 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -100,9 +100,12 @@ import com.google.common.collect.Sets;
 import org.hamcrest.CoreMatchers;
 import org.hamcrest.CustomTypeSafeMatcher;
 import org.hamcrest.Matcher;
+import org.hamcrest.core.Is;
+import org.junit.Assume;
 import org.junit.Ignore;
 import org.junit.Test;
 
+import java.io.File;
 import java.lang.reflect.Method;
 import java.math.BigDecimal;
 import java.util.ArrayList;
@@ -1444,6 +1447,15 @@ public class RelMetadataTest extends SqlToRelTestBase {
    * columns</a>. Since this is a performance problem, the test result does not
    * change, but takes over 15 minutes before the fix and 6 seconds after. */
   @Test(timeout = 20_000) public void testPullUpPredicatesForExprsItr() {
+    // If we're running Windows, we are probably in a VM and the test may
+    // exceed timeout by a small margin.
+    Assume.assumeThat("Too slow on Windows", File.separatorChar, Is.is('/'));
+    testPullUpPredicatesForExprsItrNoTimeout();
+  }
+
+  /** As {@link #testPullUpPredicatesForExprsItr} but no timeout; can run on
+   * all platforms, even slow VMs. */
+  @Test public void testPullUpPredicatesForExprsItrNoTimeout() {
     final String sql = "select a.EMPNO, a.ENAME\n"
         + "from (select * from sales.emp ) a\n"
         + "join (select * from sales.emp  ) b\n"

http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/util/UtilTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/UtilTest.java b/core/src/test/java/org/apache/calcite/util/UtilTest.java
index d7053c9..1fe2d22 100644
--- a/core/src/test/java/org/apache/calcite/util/UtilTest.java
+++ b/core/src/test/java/org/apache/calcite/util/UtilTest.java
@@ -2042,7 +2042,7 @@ public class UtilTest {
         + "\t\t\tline 4 with no ending\n"
         + "\t</someText>\n"
         + "</root>\n";
-    assertThat(s, is(expected));
+    assertThat(Util.toLinux(s), is(expected));
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/file/pom.xml
----------------------------------------------------------------------
diff --git a/file/pom.xml b/file/pom.xml
index 6ea7f08..17a6a1e 100644
--- a/file/pom.xml
+++ b/file/pom.xml
@@ -41,6 +41,12 @@ limitations under the License.
     </dependency>
     <dependency>
       <groupId>org.apache.calcite</groupId>
+      <artifactId>calcite-core</artifactId>
+      <type>test-jar</type>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.calcite</groupId>
       <artifactId>calcite-linq4j</artifactId>
     </dependency>
     <dependency>

http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
----------------------------------------------------------------------
diff --git a/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java b/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
index ec1034f..a9118f1 100644
--- a/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
+++ b/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
@@ -18,6 +18,7 @@ package org.apache.calcite.adapter.file;
 
 import org.apache.calcite.util.Source;
 import org.apache.calcite.util.Sources;
+import org.apache.calcite.util.TestUtil;
 
 import org.jsoup.select.Elements;
 
@@ -200,6 +201,7 @@ public class FileReaderTest {
    * NPE in planner</a>. */
   @Test public void testCsvFile() throws Exception {
     Properties info = new Properties();
+    final String path = resourcePath("sales-csv");
     final String model = "inline:"
         + "{\n"
         + "  \"version\": \"1.0\",\n"
@@ -210,7 +212,7 @@ public class FileReaderTest {
         + "      \"type\": \"custom\",\n"
         + "      \"factory\": \"org.apache.calcite.adapter.file.FileSchemaFactory\",\n"
         + "      \"operand\": {\n"
-        + "        \"directory\": \"" + resourcePath("sales-csv") + "\"\n"
+        + "        \"directory\": " + TestUtil.escapeString(path) + "\n"
         + "      }\n"
         + "    }\n"
         + "  ]\n"


[3/4] calcite git commit: [CALCITE-1616] Data profiler

Posted by jh...@apache.org.
[CALCITE-1616] Data profiler

Profiler does not work on JDK 7, because it relies on
com.yahoo.datasketches:sketches-core:0.9.0, which needs JDK 8 or
higher. Therefore some tests are disabled on JDK 7.

Change interface LatticeStatisticProvider to estimate joint rather
than singleton cardinality.

Bind LatticeStatisticProvider to a Lattice, and create
LatticeStatisticProvider.Factory to do the binding.

Keep results only if they are sufficiently "surprising" (informative).
The "surprise queue" is FIFO and holds the top N accepted values,
after a warm-up period

PartiallyOrderedSet: find "hypothetical" parents and children of
elements not in the set.


Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/dad58186
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/dad58186
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/dad58186

Branch: refs/heads/master
Commit: dad581868b815510389f61936b0c583be93d5dc5
Parents: 01eb6b5
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Feb 1 19:43:46 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Tue Nov 28 10:20:25 2017 -0800

----------------------------------------------------------------------
 core/pom.xml                                    |   4 +
 .../CachingLatticeStatisticProvider.java        |  40 +-
 .../DelegatingLatticeStatisticProvider.java     |   6 +-
 .../org/apache/calcite/materialize/Lattice.java | 196 +++--
 .../materialize/LatticeStatisticProvider.java   |  14 +-
 .../apache/calcite/materialize/Lattices.java    |  16 +-
 .../ProfilerLatticeStatisticProvider.java       | 105 +++
 .../SqlLatticeStatisticProvider.java            |  40 +-
 .../org/apache/calcite/profile/Profiler.java    | 294 +++++++
 .../apache/calcite/profile/ProfilerImpl.java    | 809 +++++++++++++++++++
 .../apache/calcite/profile/SimpleProfiler.java  | 341 ++++++++
 .../apache/calcite/profile/package-info.java    |  26 +
 .../calcite/util/PartiallyOrderedSet.java       | 215 +++--
 .../apache/calcite/profile/ProfilerTest.java    | 682 ++++++++++++++++
 .../org/apache/calcite/test/CalciteSuite.java   |   2 +
 .../test/FoodMartLatticeStatisticProvider.java  |  30 +-
 .../org/apache/calcite/test/LatticeTest.java    | 117 ++-
 .../java/org/apache/calcite/test/Matchers.java  |  81 ++
 .../calcite/util/PartiallyOrderedSetTest.java   |  78 +-
 .../java/org/apache/calcite/util/TestUtil.java  |  17 +
 .../apache/calcite/adapter/tpch/TpchTest.java   |   6 +-
 pom.xml                                         |  12 +-
 22 files changed, 2928 insertions(+), 203 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 73c1866..5526122 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -89,6 +89,10 @@ limitations under the License.
       <scope>test</scope>
     </dependency>
     <dependency>
+      <groupId>com.yahoo.datasketches</groupId>
+      <artifactId>sketches-core</artifactId>
+    </dependency>
+    <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>
       <scope>test</scope>

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
index e408335..21c6fed 100644
--- a/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
@@ -16,43 +16,51 @@
  */
 package org.apache.calcite.materialize;
 
-import org.apache.calcite.util.Pair;
 import org.apache.calcite.util.Util;
 
 import com.google.common.cache.CacheBuilder;
 import com.google.common.cache.CacheLoader;
 import com.google.common.cache.LoadingCache;
+import com.google.common.collect.ImmutableList;
 import com.google.common.util.concurrent.UncheckedExecutionException;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.concurrent.ExecutionException;
+import javax.annotation.Nonnull;
 
 /**
- * Implementation of {@link LatticeStatisticProvider} that gets statistics by
- * executing "SELECT COUNT(DISTINCT ...) ..." SQL queries.
+ * Implementation of {@link LatticeStatisticProvider} that caches single-column
+ * statistics and computes multi-column statistics from these.
  */
 class CachingLatticeStatisticProvider implements LatticeStatisticProvider {
-  private final LoadingCache<Pair<Lattice, Lattice.Column>, Integer> cache;
+  private final Lattice lattice;
+  private final LoadingCache<Lattice.Column, Double> cache;
 
   /** Creates a CachingStatisticProvider. */
-  CachingLatticeStatisticProvider(
+  CachingLatticeStatisticProvider(final Lattice lattice,
       final LatticeStatisticProvider provider) {
-    cache = CacheBuilder.<Pair<Lattice, Lattice.Column>>newBuilder()
+    this.lattice = lattice;
+    cache = CacheBuilder.<Lattice.Column>newBuilder()
         .build(
-            new CacheLoader<Pair<Lattice, Lattice.Column>, Integer>() {
-              public Integer load(Pair<Lattice, Lattice.Column> key)
-                  throws Exception {
-                return provider.cardinality(key.left, key.right);
+            new CacheLoader<Lattice.Column, Double>() {
+              public Double load(@Nonnull Lattice.Column key) throws Exception {
+                return provider.cardinality(ImmutableList.of(key));
               }
             });
   }
 
-  public int cardinality(Lattice lattice, Lattice.Column column) {
-    try {
-      return cache.get(Pair.of(lattice, column));
-    } catch (UncheckedExecutionException | ExecutionException e) {
-      Util.throwIfUnchecked(e.getCause());
-      throw new RuntimeException(e.getCause());
+  public double cardinality(List<Lattice.Column> columns) {
+    final List<Double> counts = new ArrayList<>();
+    for (Lattice.Column column : columns) {
+      try {
+        counts.add(cache.get(column));
+      } catch (UncheckedExecutionException | ExecutionException e) {
+        Util.throwIfUnchecked(e.getCause());
+        throw new RuntimeException(e.getCause());
+      }
     }
+    return (int) Lattice.getRowCount(lattice.getFactRowCount(), counts);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
index e1ec6c5..f889dae 100644
--- a/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
@@ -16,6 +16,8 @@
  */
 package org.apache.calcite.materialize;
 
+import java.util.List;
+
 /**
  * Implementation of {@link LatticeStatisticProvider} that delegates
  * to an underlying provider.
@@ -33,8 +35,8 @@ public class DelegatingLatticeStatisticProvider
     this.provider = provider;
   }
 
-  public int cardinality(Lattice lattice, Lattice.Column column) {
-    return provider.cardinality(lattice, column);
+  public double cardinality(List<Lattice.Column> columns) {
+    return provider.cardinality(columns);
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/Lattice.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/Lattice.java b/core/src/main/java/org/apache/calcite/materialize/Lattice.java
index 20a28a7..3828416 100644
--- a/core/src/main/java/org/apache/calcite/materialize/Lattice.java
+++ b/core/src/main/java/org/apache/calcite/materialize/Lattice.java
@@ -19,6 +19,7 @@ package org.apache.calcite.materialize;
 import org.apache.calcite.avatica.AvaticaUtils;
 import org.apache.calcite.jdbc.CalcitePrepare;
 import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.linq4j.tree.Primitive;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.prepare.CalcitePrepareImpl;
 import org.apache.calcite.rel.RelNode;
@@ -60,7 +61,7 @@ import com.google.common.collect.Maps;
 import com.google.common.collect.Ordering;
 import com.google.common.collect.Sets;
 
-import java.math.BigInteger;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -113,16 +114,15 @@ public class Lattice {
 
   private Lattice(CalciteSchema rootSchema, ImmutableList<Node> nodes,
       boolean auto, boolean algorithm, long algorithmMaxMillis,
-      LatticeStatisticProvider statisticProvider, Double rowCountEstimate,
-      ImmutableList<Column> columns, ImmutableList<Measure> defaultMeasures,
-      ImmutableList<Tile> tiles) {
+      LatticeStatisticProvider.Factory statisticProviderFactory,
+      Double rowCountEstimate, ImmutableList<Column> columns,
+      ImmutableList<Measure> defaultMeasures, ImmutableList<Tile> tiles) {
     this.rootSchema = rootSchema;
     this.nodes = Preconditions.checkNotNull(nodes);
     this.columns = Preconditions.checkNotNull(columns);
     this.auto = auto;
     this.algorithm = algorithm;
     this.algorithmMaxMillis = algorithmMaxMillis;
-    this.statisticProvider = Preconditions.checkNotNull(statisticProvider);
     this.defaultMeasures = Preconditions.checkNotNull(defaultMeasures);
     this.tiles = Preconditions.checkNotNull(tiles);
 
@@ -151,6 +151,8 @@ public class Lattice {
     }
     Preconditions.checkArgument(rowCountEstimate > 0d);
     this.rowCountEstimate = rowCountEstimate;
+    this.statisticProvider =
+        Preconditions.checkNotNull(statisticProviderFactory.apply(this));
   }
 
   /** Creates a Lattice. */
@@ -233,74 +235,92 @@ public class Lattice {
     throw new AssertionError("input not found");
   }
 
+  /** Generates a SQL query to populate a tile of the lattice specified by a
+   * given set of columns and measures. */
   public String sql(ImmutableBitSet groupSet, List<Measure> aggCallList) {
-    final ImmutableBitSet.Builder columnSetBuilder = groupSet.rebuild();
-    for (Measure call : aggCallList) {
-      for (Column arg : call.args) {
-        columnSetBuilder.set(arg.ordinal);
+    return sql(groupSet, true, aggCallList);
+  }
+
+  /** Generates a SQL query to populate a tile of the lattice specified by a
+   * given set of columns and measures, optionally grouping. */
+  public String sql(ImmutableBitSet groupSet, boolean group,
+      List<Measure> aggCallList) {
+    final List<Node> usedNodes = new ArrayList<>();
+    if (group) {
+      final ImmutableBitSet.Builder columnSetBuilder = groupSet.rebuild();
+      for (Measure call : aggCallList) {
+        for (Column arg : call.args) {
+          columnSetBuilder.set(arg.ordinal);
+        }
       }
-    }
-    final ImmutableBitSet columnSet = columnSetBuilder.build();
+      final ImmutableBitSet columnSet = columnSetBuilder.build();
 
-    // Figure out which nodes are needed. Use a node if its columns are used
-    // or if has a child whose columns are used.
-    List<Node> usedNodes = Lists.newArrayList();
-    for (Node node : nodes) {
-      if (ImmutableBitSet.range(node.startCol, node.endCol)
-          .intersects(columnSet)) {
-        use(usedNodes, node);
+      // Figure out which nodes are needed. Use a node if its columns are used
+      // or if has a child whose columns are used.
+      for (Node node : nodes) {
+        if (ImmutableBitSet.range(node.startCol, node.endCol)
+            .intersects(columnSet)) {
+          use(usedNodes, node);
+        }
       }
+      if (usedNodes.isEmpty()) {
+        usedNodes.add(nodes.get(0));
+      }
+    } else {
+      usedNodes.addAll(nodes);
     }
-    if (usedNodes.isEmpty()) {
-      usedNodes.add(nodes.get(0));
-    }
+
     final SqlDialect dialect = SqlDialect.DatabaseProduct.CALCITE.getDialect();
     final StringBuilder buf = new StringBuilder("SELECT ");
     final StringBuilder groupBuf = new StringBuilder("\nGROUP BY ");
     int k = 0;
     final Set<String> columnNames = Sets.newHashSet();
-    for (int i : groupSet) {
-      if (k++ > 0) {
-        buf.append(", ");
-        groupBuf.append(", ");
+    if (groupSet != null) {
+      for (int i : groupSet) {
+        if (k++ > 0) {
+          buf.append(", ");
+          groupBuf.append(", ");
+        }
+        final Column column = columns.get(i);
+        dialect.quoteIdentifier(buf, column.identifiers());
+        dialect.quoteIdentifier(groupBuf, column.identifiers());
+        final String fieldName = uniqueColumnNames.get(i);
+        columnNames.add(fieldName);
+        if (!column.alias.equals(fieldName)) {
+          buf.append(" AS ");
+          dialect.quoteIdentifier(buf, fieldName);
+        }
       }
-      final Column column = columns.get(i);
-      dialect.quoteIdentifier(buf, column.identifiers());
-      dialect.quoteIdentifier(groupBuf, column.identifiers());
-      final String fieldName = uniqueColumnNames.get(i);
-      columnNames.add(fieldName);
-      if (!column.alias.equals(fieldName)) {
-        buf.append(" AS ");
-        dialect.quoteIdentifier(buf, fieldName);
+      if (groupSet.isEmpty()) {
+        groupBuf.append("()");
       }
-    }
-    if (groupSet.isEmpty()) {
-      groupBuf.append("()");
-    }
-    int m = 0;
-    for (Measure measure : aggCallList) {
-      if (k++ > 0) {
-        buf.append(", ");
-      }
-      buf.append(measure.agg.getName())
-          .append("(");
-      if (measure.args.isEmpty()) {
-        buf.append("*");
-      } else {
-        int z = 0;
-        for (Column arg : measure.args) {
-          if (z++ > 0) {
-            buf.append(", ");
+      int m = 0;
+      for (Measure measure : aggCallList) {
+        if (k++ > 0) {
+          buf.append(", ");
+        }
+        buf.append(measure.agg.getName())
+            .append("(");
+        if (measure.args.isEmpty()) {
+          buf.append("*");
+        } else {
+          int z = 0;
+          for (Column arg : measure.args) {
+            if (z++ > 0) {
+              buf.append(", ");
+            }
+            dialect.quoteIdentifier(buf, arg.identifiers());
           }
-          dialect.quoteIdentifier(buf, arg.identifiers());
         }
+        buf.append(") AS ");
+        String measureName;
+        while (!columnNames.add(measureName = "m" + m)) {
+          ++m;
+        }
+        dialect.quoteIdentifier(buf, measureName);
       }
-      buf.append(") AS ");
-      String measureName;
-      while (!columnNames.add(measureName = "m" + m)) {
-        ++m;
-      }
-      dialect.quoteIdentifier(buf, measureName);
+    } else {
+      buf.append("*");
     }
     buf.append("\nFROM ");
     for (Node node : usedNodes) {
@@ -329,7 +349,9 @@ public class Lattice {
       System.out.println("Lattice SQL:\n"
           + buf);
     }
-    buf.append(groupBuf);
+    if (group) {
+      buf.append(groupBuf);
+    }
     return buf.toString();
   }
 
@@ -381,30 +403,40 @@ public class Lattice {
   /** Returns an estimate of the number of rows in the tile with the given
    * dimensions. */
   public double getRowCount(List<Column> columns) {
+    return statisticProvider.cardinality(columns);
+  }
+
+  /** Returns an estimate of the number of rows in the tile with the given
+   * dimensions. */
+  public static double getRowCount(double factCount, double... columnCounts) {
+    return getRowCount(factCount, Primitive.asList(columnCounts));
+  }
+
+  /** Returns an estimate of the number of rows in the tile with the given
+   * dimensions. */
+  public static double getRowCount(double factCount,
+      List<Double> columnCounts) {
     // The expected number of distinct values when choosing p values
     // with replacement from n integers is n . (1 - ((n - 1) / n) ^ p).
     //
     // If we have several uniformly distributed attributes A1 ... Am
     // with N1 ... Nm distinct values, they behave as one uniformly
     // distributed attribute with N1 * ... * Nm distinct values.
-    BigInteger n = BigInteger.ONE;
-    for (Column column : columns) {
-      final int cardinality = statisticProvider.cardinality(this, column);
-      if (cardinality > 1) {
-        n = n.multiply(BigInteger.valueOf(cardinality));
+    double n = 1d;
+    for (Double columnCount : columnCounts) {
+      if (columnCount > 1d) {
+        n *= columnCount;
       }
     }
-    final double nn = n.doubleValue();
-    final double f = getFactRowCount();
-    final double a = (nn - 1d) / nn;
+    final double a = (n - 1d) / n;
     if (a == 1d) {
       // A under-flows if nn is large.
-      return f;
+      return factCount;
     }
-    final double v = nn * (1d - Math.pow(a, f));
+    final double v = n * (1d - Math.pow(a, factCount));
     // Cap at fact-row-count, because numerical artifacts can cause it
     // to go a few % over.
-    return Math.min(v, f);
+    return Math.min(v, factCount);
   }
 
   /** Source relation of a lattice.
@@ -535,6 +567,15 @@ public class Lattice {
       this.alias = Preconditions.checkNotNull(alias);
     }
 
+    /** Converts a list of columns to a bit set of their ordinals. */
+    static ImmutableBitSet toBitSet(List<Column> columns) {
+      final ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+      for (Column column : columns) {
+        builder.set(column.ordinal);
+      }
+      return builder.build();
+    }
+
     public int compareTo(Column column) {
       return Utilities.compare(ordinal, column.ordinal);
     }
@@ -690,9 +731,10 @@ public class Lattice {
 
     /** Builds a lattice. */
     public Lattice build() {
-      LatticeStatisticProvider statisticProvider =
+      LatticeStatisticProvider.Factory statisticProvider =
           this.statisticProvider != null
-              ? AvaticaUtils.instantiatePlugin(LatticeStatisticProvider.class,
+              ? AvaticaUtils.instantiatePlugin(
+                  LatticeStatisticProvider.Factory.class,
                   this.statisticProvider)
               : Lattices.CACHED_SQL;
       Preconditions.checkArgument(rootSchema.isRoot(), "must be root schema");
@@ -815,15 +857,11 @@ public class Lattice {
 
     public Tile(ImmutableList<Measure> measures,
         ImmutableList<Column> dimensions) {
-      this.measures = measures;
-      this.dimensions = dimensions;
+      this.measures = Preconditions.checkNotNull(measures);
+      this.dimensions = Preconditions.checkNotNull(dimensions);
       assert Ordering.natural().isStrictlyOrdered(dimensions);
       assert Ordering.natural().isStrictlyOrdered(measures);
-      final ImmutableBitSet.Builder bitSetBuilder = ImmutableBitSet.builder();
-      for (Column dimension : dimensions) {
-        bitSetBuilder.set(dimension.ordinal);
-      }
-      bitSet = bitSetBuilder.build();
+      bitSet = Column.toBitSet(dimensions);
     }
 
     public static TileBuilder builder() {

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
index 46668cc..d16e903 100644
--- a/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
@@ -16,12 +16,22 @@
  */
 package org.apache.calcite.materialize;
 
+import com.google.common.base.Function;
+
+import java.util.List;
+
 /**
  * Estimates row counts for a lattice and its attributes.
  */
 public interface LatticeStatisticProvider {
-  /** Returns an estimate of the number of distinct values in a column. */
-  int cardinality(Lattice lattice, Lattice.Column column);
+  /** Returns an estimate of the number of distinct values in a column
+   * or list of columns. */
+  double cardinality(List<Lattice.Column> columns);
+
+  /** Creates a {@link LatticeStatisticProvider} for a given
+   * {@link org.apache.calcite.materialize.Lattice}. */
+  interface Factory extends Function<Lattice, LatticeStatisticProvider> {
+  }
 }
 
 // End LatticeStatisticProvider.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/Lattices.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/Lattices.java b/core/src/main/java/org/apache/calcite/materialize/Lattices.java
index cf70719..2a0a7ea 100644
--- a/core/src/main/java/org/apache/calcite/materialize/Lattices.java
+++ b/core/src/main/java/org/apache/calcite/materialize/Lattices.java
@@ -23,18 +23,16 @@ public class Lattices {
   private Lattices() {}
 
   /** Statistics provider that uses SQL. */
-  public static final LatticeStatisticProvider SQL =
-      SqlLatticeStatisticProvider.INSTANCE;
+  public static final LatticeStatisticProvider.Factory SQL =
+      SqlLatticeStatisticProvider.FACTORY;
 
   /** Statistics provider that uses SQL then stores the results in a cache. */
-  public static final LatticeStatisticProvider CACHED_SQL =
-      cache(SqlLatticeStatisticProvider.INSTANCE);
+  public static final LatticeStatisticProvider.Factory CACHED_SQL =
+      SqlLatticeStatisticProvider.CACHED_FACTORY;
 
-  /** Wraps a statistic provider in a cache. */
-  public static LatticeStatisticProvider cache(
-      LatticeStatisticProvider provider) {
-    return new CachingLatticeStatisticProvider(provider);
-  }
+  /** Statistics provider that uses a profiler. */
+  public static final LatticeStatisticProvider.Factory PROFILER =
+      ProfilerLatticeStatisticProvider.FACTORY;
 }
 
 // End Lattices.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
new file mode 100644
index 0000000..39e0b29
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.materialize;
+
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.function.Function1;
+import org.apache.calcite.profile.Profiler;
+import org.apache.calcite.profile.ProfilerImpl;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.schema.ScannableTable;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Implementation of {@link LatticeStatisticProvider} that uses a
+ * {@link org.apache.calcite.profile.Profiler}.
+ */
+class ProfilerLatticeStatisticProvider implements LatticeStatisticProvider {
+  static final Factory FACTORY =
+      new Factory() {
+        public LatticeStatisticProvider apply(Lattice lattice) {
+          return new ProfilerLatticeStatisticProvider(lattice);
+        }
+      };
+
+  /** Converts an array of values to a list of {@link Comparable} values,
+   * converting null values to sentinels. */
+  private static final Function1<Object[], List<Comparable>> TO_LIST =
+      new Function1<Object[], List<Comparable>>() {
+        public List<Comparable> apply(Object[] values) {
+          for (int i = 0; i < values.length; i++) {
+            if (values[i] == null) {
+              values[i] = NullSentinel.INSTANCE;
+            }
+          }
+          //noinspection unchecked
+          return (List) Arrays.asList(values);
+        }
+      };
+
+  private final Lattice lattice;
+  private final Supplier<Profiler.Profile> profile =
+      Suppliers.memoize(new Supplier<Profiler.Profile>() {
+        public Profiler.Profile get() {
+          final ProfilerImpl profiler =
+              ProfilerImpl.builder()
+                  .withPassSize(200)
+                  .withMinimumSurprise(0.3D)
+                  .build();
+          final List<Profiler.Column> columns = new ArrayList<>();
+          for (Lattice.Column column : lattice.columns) {
+            columns.add(new Profiler.Column(column.ordinal, column.alias));
+          }
+          final String sql =
+              lattice.sql(ImmutableBitSet.range(lattice.columns.size()),
+                  false, ImmutableList.<Lattice.Measure>of());
+          final Table table =
+              new MaterializationService.DefaultTableFactory()
+                  .createTable(lattice.rootSchema, sql,
+                      ImmutableList.<String>of());
+          final ImmutableList<ImmutableBitSet> initialGroups =
+              ImmutableList.of();
+          final Enumerable<List<Comparable>> rows =
+              ((ScannableTable) table).scan(null).select(TO_LIST);
+          return profiler.profile(rows, columns, initialGroups);
+        }
+      });
+
+  /** Creates a ProfilerLatticeStatisticProvider. */
+  private ProfilerLatticeStatisticProvider(Lattice lattice) {
+    this.lattice = Preconditions.checkNotNull(lattice);
+  }
+
+  public double cardinality(List<Lattice.Column> columns) {
+    final ImmutableBitSet build = Lattice.Column.toBitSet(columns);
+    final double cardinality = profile.get().cardinality(build);
+//    System.out.println(columns + ": " + cardinality);
+    return cardinality;
+  }
+}
+
+// End ProfilerLatticeStatisticProvider.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
index 4a94847..8e5b19d 100644
--- a/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
@@ -20,28 +20,56 @@ import org.apache.calcite.schema.ScannableTable;
 import org.apache.calcite.schema.Table;
 import org.apache.calcite.util.ImmutableBitSet;
 
+import com.google.common.base.Preconditions;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterables;
 
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * Implementation of {@link LatticeStatisticProvider} that gets statistics by
  * executing "SELECT COUNT(DISTINCT ...) ..." SQL queries.
  */
 class SqlLatticeStatisticProvider implements LatticeStatisticProvider {
-  static final SqlLatticeStatisticProvider INSTANCE =
-      new SqlLatticeStatisticProvider();
+  static final Factory FACTORY =
+      new LatticeStatisticProvider.Factory() {
+        public LatticeStatisticProvider apply(Lattice lattice) {
+          return new SqlLatticeStatisticProvider(lattice);
+        }
+      };
+
+  static final Factory CACHED_FACTORY =
+      new LatticeStatisticProvider.Factory() {
+        public LatticeStatisticProvider apply(Lattice lattice) {
+          LatticeStatisticProvider provider = FACTORY.apply(lattice);
+          return new CachingLatticeStatisticProvider(lattice, provider);
+        }
+      };
+
+  private final Lattice lattice;
 
-  /** Creates an SqlLatticeStatisticProvider. */
-  private SqlLatticeStatisticProvider() {}
+  /** Creates a SqlLatticeStatisticProvider. */
+  private SqlLatticeStatisticProvider(Lattice lattice) {
+    this.lattice = Preconditions.checkNotNull(lattice);
+  }
+
+  public double cardinality(List<Lattice.Column> columns) {
+    final List<Double> counts = new ArrayList<>();
+    for (Lattice.Column column : columns) {
+      counts.add(cardinality(lattice, column));
+    }
+    return (int) Lattice.getRowCount(lattice.getFactRowCount(), counts);
+  }
 
-  @Override public int cardinality(Lattice lattice, Lattice.Column column) {
+  private double cardinality(Lattice lattice, Lattice.Column column) {
     final String sql = lattice.countSql(ImmutableBitSet.of(column.ordinal));
     final Table table =
         new MaterializationService.DefaultTableFactory()
             .createTable(lattice.rootSchema, sql, ImmutableList.<String>of());
     final Object[] values =
         Iterables.getOnlyElement(((ScannableTable) table).scan(null));
-    return ((Number) values[0]).intValue();
+    return ((Number) values[0]).doubleValue();
   }
 }
 

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/Profiler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/Profiler.java b/core/src/main/java/org/apache/calcite/profile/Profiler.java
new file mode 100644
index 0000000..851f40b
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/Profiler.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Util;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSortedSet;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableSet;
+import java.util.SortedSet;
+import javax.annotation.Nonnull;
+
+/**
+ * Analyzes data sets.
+ */
+public interface Profiler {
+  /** Creates a profile of a data set.
+   *
+   * @param rows List of rows. Can be iterated over more than once (maybe not
+   *             cheaply)
+   * @param columns Column definitions
+   *
+   * @param initialGroups List of combinations of columns that should be
+   *                     profiled early, because they may be interesting
+   *
+   * @return A profile describing relationships within the data set
+   */
+  Profile profile(Iterable<List<Comparable>> rows, List<Column> columns,
+      Collection<ImmutableBitSet> initialGroups);
+
+  /** Column. */
+  class Column implements Comparable<Column> {
+    public final int ordinal;
+    public final String name;
+
+    /** Creates a Column.
+     *
+     * @param ordinal Unique and contiguous within a particular data set
+     * @param name Name of the column
+     */
+    public Column(int ordinal, String name) {
+      this.ordinal = ordinal;
+      this.name = name;
+    }
+
+    static ImmutableBitSet toOrdinals(Iterable<Column> columns) {
+      final ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+      for (Column column : columns) {
+        builder.set(column.ordinal);
+      }
+      return builder.build();
+    }
+
+    @Override public int hashCode() {
+      return ordinal;
+    }
+
+    @Override public boolean equals(Object o) {
+      return this == o
+          || o instanceof Column
+          && ordinal == ((Column) o).ordinal;
+    }
+
+    @Override public int compareTo(@Nonnull Column column) {
+      return Integer.compare(ordinal, column.ordinal);
+    }
+
+    @Override public String toString() {
+      return name;
+    }
+  }
+
+  /** Statistic produced by the profiler. */
+  interface Statistic {
+    Object toMap(JsonBuilder jsonBuilder);
+  }
+
+  /** Whole data set. */
+  class RowCount implements Statistic {
+    final int rowCount;
+
+    public RowCount(int rowCount) {
+      this.rowCount = rowCount;
+    }
+
+    public Object toMap(JsonBuilder jsonBuilder) {
+      final Map<String, Object> map = jsonBuilder.map();
+      map.put("type", "rowCount");
+      map.put("rowCount", rowCount);
+      return map;
+    }
+  }
+
+  /** Unique key. */
+  class Unique implements Statistic {
+    final NavigableSet<Column> columns;
+
+    public Unique(SortedSet<Column> columns) {
+      this.columns = ImmutableSortedSet.copyOf(columns);
+    }
+
+    public Object toMap(JsonBuilder jsonBuilder) {
+      final Map<String, Object> map = jsonBuilder.map();
+      map.put("type", "unique");
+      map.put("columns", FunctionalDependency.getObjects(jsonBuilder, columns));
+      return map;
+    }
+  }
+
+  /** Functional dependency. */
+  class FunctionalDependency implements Statistic {
+    final NavigableSet<Column> columns;
+    final Column dependentColumn;
+
+    FunctionalDependency(SortedSet<Column> columns, Column dependentColumn) {
+      this.columns = ImmutableSortedSet.copyOf(columns);
+      this.dependentColumn = dependentColumn;
+    }
+
+    public Object toMap(JsonBuilder jsonBuilder) {
+      final Map<String, Object> map = jsonBuilder.map();
+      map.put("type", "fd");
+      map.put("columns", getObjects(jsonBuilder, columns));
+      map.put("dependentColumn", dependentColumn.name);
+      return map;
+    }
+
+    private static List<Object> getObjects(JsonBuilder jsonBuilder,
+        NavigableSet<Column> columns) {
+      final List<Object> list = jsonBuilder.list();
+      for (Column column : columns) {
+        list.add(column.name);
+      }
+      return list;
+    }
+  }
+
+  /** Value distribution, including cardinality and optionally values, of a
+   * column or set of columns. If the set of columns is empty, it describes
+   * the number of rows in the entire data set. */
+  class Distribution implements Statistic {
+    final NavigableSet<Column> columns;
+    final NavigableSet<Comparable> values;
+    final double cardinality;
+    final int nullCount;
+    final double expectedCardinality;
+    final boolean minimal;
+
+    /** Creates a Distribution.
+     *
+     * @param columns Column or columns being described
+     * @param values Values of columns, or null if there are too many
+     * @param cardinality Number of distinct values
+     * @param nullCount Number of rows where this column had a null value;
+     * @param expectedCardinality Expected cardinality
+     * @param minimal Whether the distribution is not implied by a unique
+     *   or functional dependency
+     */
+    public Distribution(SortedSet<Column> columns, SortedSet<Comparable> values,
+        double cardinality, int nullCount, double expectedCardinality,
+        boolean minimal) {
+      this.columns = ImmutableSortedSet.copyOf(columns);
+      this.values = values == null ? null : ImmutableSortedSet.copyOf(values);
+      this.cardinality = cardinality;
+      this.nullCount = nullCount;
+      this.expectedCardinality = expectedCardinality;
+      this.minimal = minimal;
+    }
+
+    public Object toMap(JsonBuilder jsonBuilder) {
+      final Map<String, Object> map = jsonBuilder.map();
+      map.put("type", "distribution");
+      map.put("columns", FunctionalDependency.getObjects(jsonBuilder, columns));
+      if (values != null) {
+        List<Object> list = jsonBuilder.list();
+        for (Comparable value : values) {
+          if (value instanceof java.sql.Date) {
+            value = value.toString();
+          }
+          list.add(value);
+        }
+        map.put("values", list);
+      }
+      map.put("cardinality", cardinality);
+      if (nullCount > 0) {
+        map.put("nullCount", nullCount);
+      }
+      map.put("expectedCardinality", expectedCardinality);
+      map.put("surprise", surprise());
+      return map;
+    }
+
+    ImmutableBitSet columnOrdinals() {
+      return Column.toOrdinals(columns);
+    }
+
+    double surprise() {
+      return SimpleProfiler.surprise(expectedCardinality, cardinality);
+    }
+  }
+
+  /** The result of profiling, contains various statistics about the
+   * data in a table. */
+  class Profile {
+    public final RowCount rowCount;
+    public final List<FunctionalDependency> functionalDependencyList;
+    public final List<Distribution> distributionList;
+    public final List<Unique> uniqueList;
+
+    private final Map<ImmutableBitSet, Distribution> distributionMap;
+    private final List<Distribution> singletonDistributionList;
+
+    Profile(List<Column> columns, RowCount rowCount,
+        Iterable<FunctionalDependency> functionalDependencyList,
+        Iterable<Distribution> distributionList, Iterable<Unique> uniqueList) {
+      this.rowCount = rowCount;
+      this.functionalDependencyList =
+          ImmutableList.copyOf(functionalDependencyList);
+      this.distributionList = ImmutableList.copyOf(distributionList);
+      this.uniqueList = ImmutableList.copyOf(uniqueList);
+
+      final ImmutableMap.Builder<ImmutableBitSet, Distribution> m =
+          ImmutableMap.builder();
+      for (Distribution distribution : distributionList) {
+        m.put(distribution.columnOrdinals(), distribution);
+      }
+      distributionMap = m.build();
+
+      final ImmutableList.Builder<Distribution> b = ImmutableList.builder();
+      for (int i = 0; i < columns.size(); i++) {
+        b.add(distributionMap.get(ImmutableBitSet.of(i)));
+      }
+      singletonDistributionList = b.build();
+    }
+
+    public List<Statistic> statistics() {
+      return ImmutableList.<Statistic>builder()
+          .add(rowCount)
+          .addAll(functionalDependencyList)
+          .addAll(distributionList)
+          .addAll(uniqueList)
+          .build();
+    }
+
+    public double cardinality(ImmutableBitSet columnOrdinals) {
+      final ImmutableBitSet originalOrdinals = columnOrdinals;
+      for (;;) {
+        final Distribution distribution = distributionMap.get(columnOrdinals);
+        if (distribution != null) {
+          if (columnOrdinals == originalOrdinals) {
+            return distribution.cardinality;
+          } else {
+            final List<Double> cardinalityList = new ArrayList<>();
+            cardinalityList.add(distribution.cardinality);
+            for (int ordinal : originalOrdinals.except(columnOrdinals)) {
+              final Distribution d = singletonDistributionList.get(ordinal);
+              cardinalityList.add(d.cardinality);
+            }
+            return Lattice.getRowCount(rowCount.rowCount, cardinalityList);
+          }
+        }
+        // Clear the last bit and iterate.
+        // Better would be to combine all of our nearest ancestors.
+        final List<Integer> list = columnOrdinals.asList();
+        columnOrdinals = columnOrdinals.clear(Util.last(list));
+      }
+    }
+  }
+}
+
+// End Profiler.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java b/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
new file mode 100644
index 0000000..139f13d
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
@@ -0,0 +1,809 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.linq4j.tree.Primitive;
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.prepare.CalcitePrepareImpl;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.FlatLists;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.calcite.util.PartiallyOrderedSet;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Ordering;
+import com.yahoo.sketches.hll.HllSketch;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.apache.calcite.profile.ProfilerImpl.CompositeCollector.OF;
+
+/**
+ * Implementation of {@link Profiler} that only investigates "interesting"
+ * combinations of columns.
+ */
+public class ProfilerImpl implements Profiler {
+  /** The number of combinations to consider per pass.
+   * The number is determined by memory, but a value of 1,000 is typical.
+   * You need 2KB memory per sketch, and one sketch for each combination. */
+  private final int combinationsPerPass;
+
+  /** The minimum number of combinations considered "interesting". After that,
+   * a combination is only considered "interesting" if its surprise is greater
+   * than the median surprise. */
+  private final int interestingCount;
+
+  /** Whether a successor is considered interesting enough to analyze. */
+  private final Predicate<Pair<Space, Column>> predicate;
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /**
+   * Creates a {@code ProfilerImpl}.
+   *
+   * @param combinationsPerPass Maximum number of columns (or combinations of
+   *   columns) to compute each pass
+   * @param interestingCount Minimum number of combinations considered
+   *   interesting
+   * @param predicate Whether a successor is considered interesting enough to
+   *   analyze
+   */
+  ProfilerImpl(int combinationsPerPass,
+      int interestingCount, Predicate<Pair<Space, Column>> predicate) {
+    Preconditions.checkArgument(combinationsPerPass > 2);
+    Preconditions.checkArgument(interestingCount > 2);
+    this.combinationsPerPass = combinationsPerPass;
+    this.interestingCount = interestingCount;
+    this.predicate = predicate;
+  }
+
+  public Profile profile(Iterable<List<Comparable>> rows,
+      final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+    return new Run(columns, initialGroups).profile(rows);
+  }
+
+  /** A run of the profiler. */
+  class Run {
+    private final List<Column> columns;
+    final PartiallyOrderedSet<ImmutableBitSet> keyPoset =
+        new PartiallyOrderedSet<>(
+            PartiallyOrderedSet.BIT_SET_INCLUSION_ORDERING);
+    final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
+    /** List of spaces that have one column. */
+    final List<Space> singletonSpaces;
+    /** Combinations of columns that we have computed but whose successors have
+     * not yet been computed. We may add some of those successors to
+     * {@link #spaceQueue}. */
+    final Queue<Space> doneQueue =
+        new PriorityQueue<>(100,
+          new Comparator<Space>() {
+            public int compare(Space s0, Space s1) {
+              // The space with 0 columns is more interesting than
+              // any space with 1 column, and so forth.
+              // For spaces with 2 or more columns we compare "surprise":
+              // how many fewer values did it have than expected?
+              int c = Integer.compare(s0.columns.size(), s1.columns.size());
+              if (c == 0) {
+                c = Double.compare(s0.surprise(), s1.surprise());
+              }
+              return c;
+            }
+          });
+    final SurpriseQueue surprises;
+
+    /** Combinations of columns that we will compute next pass. */
+    final Deque<ImmutableBitSet> spaceQueue = new ArrayDeque<>();
+    final List<Unique> uniques = new ArrayList<>();
+    final List<FunctionalDependency> functionalDependencies = new ArrayList<>();
+    /** Column ordinals that have ever been placed on {@link #spaceQueue}.
+     * Ensures that we do not calculate the same combination more than once,
+     * even though we generate a column set from multiple parents. */
+    final Set<ImmutableBitSet> resultSet = new HashSet<>();
+    final PartiallyOrderedSet<Space> results = new PartiallyOrderedSet<>(
+        new PartiallyOrderedSet.Ordering<Space>() {
+          public boolean lessThan(Space e1, Space e2) {
+            return e2.columnOrdinals.contains(e1.columnOrdinals);
+          }
+        });
+    private final List<ImmutableBitSet> keyOrdinalLists =
+        new ArrayList<>();
+    final Function<Integer, Column> get =
+        new Function<Integer, Column>() {
+          public Column apply(Integer input) {
+            return columns.get(input);
+          }
+        };
+    private int rowCount;
+
+    /**
+     * Creates a Run.
+     *
+     * @param columns List of columns
+     *
+     * @param initialGroups List of combinations of columns that should be
+     *                     profiled early, because they may be interesting
+     */
+    Run(final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+      this.columns = ImmutableList.copyOf(columns);
+      for (Ord<Column> column : Ord.zip(columns)) {
+        if (column.e.ordinal != column.i) {
+          throw new IllegalArgumentException();
+        }
+      }
+      this.singletonSpaces =
+          new ArrayList<>(Collections.nCopies(columns.size(), (Space) null));
+      if (combinationsPerPass > Math.pow(2D, columns.size())) {
+        // There are not many columns. We can compute all combinations in the
+        // first pass.
+        for (ImmutableBitSet ordinals
+            : ImmutableBitSet.range(columns.size()).powerSet()) {
+          spaceQueue.add(ordinals);
+        }
+      } else {
+        // We will need to take multiple passes.
+        // Pass 0, just put the empty combination on the queue.
+        // Next pass, we will do its successors, the singleton combinations.
+        spaceQueue.add(ImmutableBitSet.of());
+        spaceQueue.addAll(initialGroups);
+        if (columns.size() < combinationsPerPass) {
+          // There are not very many columns. Compute the singleton
+          // groups in pass 0.
+          for (Column column : columns) {
+            spaceQueue.add(ImmutableBitSet.of(column.ordinal));
+          }
+        }
+      }
+      // The surprise queue must have enough room for all singleton groups
+      // plus all initial groups.
+      surprises = new SurpriseQueue(1 + columns.size() + initialGroups.size(),
+          interestingCount);
+    }
+
+    Profile profile(Iterable<List<Comparable>> rows) {
+      int pass = 0;
+      for (;;) {
+        final List<Space> spaces = nextBatch(pass);
+        if (spaces.isEmpty()) {
+          break;
+        }
+        pass(pass++, spaces, rows);
+      }
+
+      for (Space s : singletonSpaces) {
+        for (ImmutableBitSet dependent : s.dependents) {
+          functionalDependencies.add(
+              new FunctionalDependency(toColumns(dependent),
+                  Iterables.getOnlyElement(s.columns)));
+        }
+      }
+      return new Profile(columns, new RowCount(rowCount),
+          functionalDependencies, distributions.values(), uniques);
+    }
+
+    /** Populates {@code spaces} with the next batch.
+     * Returns an empty list if done. */
+    List<Space> nextBatch(int pass) {
+      final List<Space> spaces = new ArrayList<>();
+    loop:
+      for (;;) {
+        if (spaces.size() >= combinationsPerPass) {
+          // We have enough for the next pass.
+          return spaces;
+        }
+        // First, see if there is a space we did have room for last pass.
+        final ImmutableBitSet ordinals = spaceQueue.poll();
+        if (ordinals != null) {
+          final Space space = new Space(this, ordinals, toColumns(ordinals));
+          spaces.add(space);
+          if (ordinals.cardinality() == 1) {
+            singletonSpaces.set(ordinals.nth(0), space);
+          }
+        } else {
+          // Next, take a space that was done last time, generate its
+          // successors, and add the interesting ones to the space queue.
+          for (;;) {
+            final Space doneSpace = doneQueue.poll();
+            if (doneSpace == null) {
+              // There are no more done spaces. We're done.
+              return spaces;
+            }
+            if (doneSpace.columnOrdinals.cardinality() > 4) {
+              // Do not generate successors for groups with lots of columns,
+              // probably initial groups
+              continue;
+            }
+            for (Column column : columns) {
+              if (!doneSpace.columnOrdinals.get(column.ordinal)) {
+                if (pass == 0
+                    || doneSpace.columnOrdinals.cardinality() == 0
+                    || !containsKey(
+                        doneSpace.columnOrdinals.set(column.ordinal))
+                    && predicate.apply(Pair.of(doneSpace, column))) {
+                  final ImmutableBitSet nextOrdinals =
+                      doneSpace.columnOrdinals.set(column.ordinal);
+                  if (resultSet.add(nextOrdinals)) {
+                    spaceQueue.add(nextOrdinals);
+                  }
+                }
+              }
+            }
+            // We've converted at a space into at least one interesting
+            // successor.
+            if (!spaceQueue.isEmpty()) {
+              continue loop;
+            }
+          }
+        }
+      }
+    }
+
+    private boolean containsKey(ImmutableBitSet ordinals) {
+      for (ImmutableBitSet keyOrdinals : keyOrdinalLists) {
+        if (ordinals.contains(keyOrdinals)) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    void pass(int pass, List<Space> spaces, Iterable<List<Comparable>> rows) {
+      if (CalcitePrepareImpl.DEBUG) {
+        System.out.println("pass: " + pass
+            + ", spaces.size: " + spaces.size()
+            + ", distributions.size: " + distributions.size());
+      }
+
+      for (Space space : spaces) {
+        space.collector = Collector.create(space, 1000);
+      }
+
+      int rowCount = 0;
+      for (final List<Comparable> row : rows) {
+        ++rowCount;
+        for (Space space : spaces) {
+          space.collector.add(row);
+        }
+      }
+
+      // Populate unique keys.
+      // If [x, y] is a key,
+      // then [x, y, z] is a non-minimal key (therefore not interesting),
+      // and [x, y] => [a] is a functional dependency but not interesting,
+      // and [x, y, z] is not an interesting distribution.
+      for (Space space : spaces) {
+        space.collector.finish();
+        space.collector = null;
+//        results.add(space);
+
+        int nonMinimal = 0;
+      dependents:
+        for (Space s : results.getDescendants(space)) {
+          if (s.cardinality == space.cardinality) {
+            // We have discovered a sub-set that has the same cardinality.
+            // The column(s) that are not in common are functionally
+            // dependent.
+            final ImmutableBitSet dependents =
+                space.columnOrdinals.except(s.columnOrdinals);
+            for (int i : s.columnOrdinals) {
+              final Space s1 = singletonSpaces.get(i);
+              final ImmutableBitSet rest = s.columnOrdinals.clear(i);
+              for (ImmutableBitSet dependent : s1.dependents) {
+                if (rest.contains(dependent)) {
+                  // The "key" of this functional dependency is not minimal.
+                  // For instance, if we know that
+                  //   (a) -> x
+                  // then
+                  //   (a, b, x) -> y
+                  // is not minimal; we could say the same with a smaller key:
+                  //   (a, b) -> y
+                  ++nonMinimal;
+                  continue dependents;
+                }
+              }
+            }
+            for (int dependent : dependents) {
+              final Space s1 = singletonSpaces.get(dependent);
+              for (ImmutableBitSet d : s1.dependents) {
+                if (s.columnOrdinals.contains(d)) {
+                  ++nonMinimal;
+                  continue dependents;
+                }
+              }
+            }
+            space.dependencies.or(dependents.toBitSet());
+            for (int d : dependents) {
+              singletonSpaces.get(d).dependents.add(s.columnOrdinals);
+            }
+          }
+        }
+        if (nonMinimal > 0) {
+          continue;
+        }
+        final String s = space.columns.toString(); // for debug
+        Util.discard(s);
+        double expectedCardinality =
+            expectedCardinality(rowCount, space.columnOrdinals);
+
+        final boolean minimal = nonMinimal == 0
+            && !space.unique
+            && !containsKey(space.columnOrdinals);
+        space.expectedCardinality = expectedCardinality;
+        if (minimal) {
+          final Distribution distribution =
+              new Distribution(space.columns, space.valueSet, space.cardinality,
+                  space.nullCount, expectedCardinality, minimal);
+          final double surprise = distribution.surprise();
+          if (CalcitePrepareImpl.DEBUG && surprise > 0.1d) {
+            System.out.println(distribution.columnOrdinals()
+                + " " + distribution.columns
+                + ", cardinality: " + distribution.cardinality
+                + ", expected: " + distribution.expectedCardinality
+                + ", surprise: " + distribution.surprise());
+          }
+          if (surprises.offer(surprise)) {
+            distributions.put(space.columnOrdinals, distribution);
+            keyPoset.add(space.columnOrdinals);
+            doneQueue.add(space);
+          }
+        }
+
+        if (space.cardinality == rowCount) {
+          // We have discovered a new key. It is not a super-set of a key.
+          uniques.add(new Unique(space.columns));
+          keyOrdinalLists.add(space.columnOrdinals);
+          space.unique = true;
+        }
+      }
+
+      if (pass == 0) {
+        this.rowCount = rowCount;
+      }
+    }
+
+    /** Estimates the cardinality of a collection of columns represented by
+     * {@code columnOrdinals}, drawing on existing distributions. */
+    private double cardinality(double rowCount, ImmutableBitSet columns) {
+      final Distribution distribution = distributions.get(columns);
+      if (distribution != null) {
+        return distribution.cardinality;
+      } else {
+        return expectedCardinality(rowCount, columns);
+      }
+    }
+
+    /** Estimates the cardinality of a collection of columns represented by
+     * {@code columnOrdinals}, drawing on existing distributions. Does not
+     * look in the distribution map for this column set. */
+    private double expectedCardinality(double rowCount,
+        ImmutableBitSet columns) {
+      switch (columns.cardinality()) {
+      case 0:
+        return 1d;
+      case 1:
+        return rowCount;
+      default:
+        double c = rowCount;
+        for (ImmutableBitSet bitSet : keyPoset.getParents(columns, true)) {
+          if (bitSet.isEmpty()) {
+            // If the parent is the empty group (i.e. "GROUP BY ()", the grand
+            // total) we cannot improve on the estimate.
+            continue;
+          }
+          final Distribution d1 = distributions.get(bitSet);
+          final double c2 = cardinality(rowCount, columns.except(bitSet));
+          final double d = Lattice.getRowCount(rowCount, d1.cardinality, c2);
+          c = Math.min(c, d);
+        }
+        for (ImmutableBitSet bitSet : keyPoset.getChildren(columns, true)) {
+          final Distribution d1 = distributions.get(bitSet);
+          c = Math.min(c, d1.cardinality);
+        }
+        return c;
+      }
+    }
+
+
+    private ImmutableSortedSet<Column> toColumns(Iterable<Integer> ordinals) {
+      return ImmutableSortedSet.copyOf(Iterables.transform(ordinals, get));
+    }
+  }
+
+  /** Work space for a particular combination of columns. */
+  static class Space {
+    private final Run run;
+    final ImmutableBitSet columnOrdinals;
+    final ImmutableSortedSet<Column> columns;
+    boolean unique;
+    final BitSet dependencies = new BitSet();
+    final Set<ImmutableBitSet> dependents = new HashSet<>();
+    double expectedCardinality;
+    Collector collector;
+    /** Assigned by {@link Collector#finish()}. */
+    int nullCount;
+    /** Number of distinct values. Null is counted as a value, if present.
+     * Assigned by {@link Collector#finish()}. */
+    int cardinality;
+    /** Assigned by {@link Collector#finish()}. */
+    SortedSet<Comparable> valueSet;
+
+    Space(Run run, ImmutableBitSet columnOrdinals, Iterable<Column> columns) {
+      this.run = run;
+      this.columnOrdinals = columnOrdinals;
+      this.columns = ImmutableSortedSet.copyOf(columns);
+    }
+
+    @Override public int hashCode() {
+      return columnOrdinals.hashCode();
+    }
+
+    @Override public boolean equals(Object o) {
+      return o == this
+          || o instanceof Space
+          && columnOrdinals.equals(((Space) o).columnOrdinals);
+    }
+
+    /** Returns the distribution created from this space, or null if no
+     * distribution has been registered yet. */
+    public Distribution distribution() {
+      return run.distributions.get(columnOrdinals);
+    }
+
+    double surprise() {
+      return SimpleProfiler.surprise(expectedCardinality, cardinality);
+    }
+  }
+
+  /** Builds a {@link org.apache.calcite.profile.ProfilerImpl}. */
+  public static class Builder {
+    int combinationsPerPass = 100;
+    Predicate<Pair<Space, Column>> predicate = Predicates.alwaysTrue();
+
+    public ProfilerImpl build() {
+      return new ProfilerImpl(combinationsPerPass, 200, predicate);
+    }
+
+    public Builder withPassSize(int passSize) {
+      this.combinationsPerPass = passSize;
+      return this;
+    }
+
+    public Builder withMinimumSurprise(double v) {
+      predicate =
+          new PredicateImpl<Pair<Space, Column>>() {
+            public boolean test(Pair<Space, Column> spaceColumnPair) {
+              final Space space = spaceColumnPair.left;
+              return false;
+            }
+          };
+      return this;
+    }
+  }
+
+  /** Collects values of a column or columns. */
+  abstract static class Collector {
+    protected final Space space;
+
+    Collector(Space space) {
+      this.space = space;
+    }
+
+    abstract void add(List<Comparable> row);
+    abstract void finish();
+
+    /** Creates an initial collector of the appropriate kind. */
+    public static Collector create(Space space, int sketchThreshold) {
+      final List<Integer> columnOrdinalList = space.columnOrdinals.asList();
+      if (columnOrdinalList.size() == 1) {
+        return new SingletonCollector(space, columnOrdinalList.get(0),
+            sketchThreshold);
+      } else {
+        return new CompositeCollector(space,
+            (int[]) Primitive.INT.toArray(columnOrdinalList), sketchThreshold);
+      }
+    }
+  }
+
+  /** Collector that collects values of a single column. */
+  static class SingletonCollector extends Collector {
+    final SortedSet<Comparable> values = new TreeSet<>();
+    final int columnOrdinal;
+    final int sketchThreshold;
+    int nullCount = 0;
+
+    SingletonCollector(Space space, int columnOrdinal, int sketchThreshold) {
+      super(space);
+      this.columnOrdinal = columnOrdinal;
+      this.sketchThreshold = sketchThreshold;
+    }
+
+    public void add(List<Comparable> row) {
+      final Comparable v = row.get(columnOrdinal);
+      if (v == NullSentinel.INSTANCE) {
+        nullCount++;
+      } else {
+        if (values.add(v) && values.size() == sketchThreshold) {
+          // Too many values. Switch to a sketch collector.
+          final HllSingletonCollector collector =
+              new HllSingletonCollector(space, columnOrdinal);
+          for (Comparable value : values) {
+            collector.add(value);
+          }
+          space.collector = collector;
+        }
+      }
+    }
+
+    public void finish() {
+      space.nullCount = nullCount;
+      space.cardinality = values.size() + (nullCount > 0 ? 1 : 0);
+      space.valueSet = values.size() < 20 ? values : null;
+    }
+  }
+
+  /** Collector that collects two or more column values in a tree set. */
+  static class CompositeCollector extends Collector {
+    protected static final ImmutableBitSet OF = ImmutableBitSet.of(2, 13);
+    final Set<FlatLists.ComparableList> values = new HashSet<>();
+    final int[] columnOrdinals;
+    final Comparable[] columnValues;
+    int nullCount = 0;
+    private final int sketchThreshold;
+
+    CompositeCollector(Space space, int[] columnOrdinals, int sketchThreshold) {
+      super(space);
+      this.columnOrdinals = columnOrdinals;
+      this.columnValues = new Comparable[columnOrdinals.length];
+      this.sketchThreshold = sketchThreshold;
+    }
+
+    public void add(List<Comparable> row) {
+      if (space.columnOrdinals.equals(OF)) {
+        Util.discard(0);
+      }
+      int nullCountThisRow = 0;
+      for (int i = 0, length = columnOrdinals.length; i < length; i++) {
+        final Comparable value = row.get(columnOrdinals[i]);
+        if (value == NullSentinel.INSTANCE) {
+          if (nullCountThisRow++ == 0) {
+            nullCount++;
+          }
+        }
+        columnValues[i] = value;
+      }
+      //noinspection unchecked
+      if (((Set) values).add(FlatLists.copyOf(columnValues))
+          && values.size() == sketchThreshold) {
+        // Too many values. Switch to a sketch collector.
+        final HllCompositeCollector collector =
+            new HllCompositeCollector(space, columnOrdinals);
+        final List<Comparable> list =
+            new ArrayList<>(
+                Collections.nCopies(columnOrdinals[columnOrdinals.length - 1]
+                        + 1,
+                    (Comparable) null));
+        for (FlatLists.ComparableList value : this.values) {
+          for (int i = 0; i < value.size(); i++) {
+            Comparable c = (Comparable) value.get(i);
+            list.set(columnOrdinals[i], c);
+          }
+          collector.add(list);
+        }
+        space.collector = collector;
+      }
+    }
+
+    public void finish() {
+      // number of input rows (not distinct values)
+      // that were null or partially null
+      space.nullCount = nullCount;
+      space.cardinality = values.size() + (nullCount > 0 ? 1 : 0);
+      space.valueSet = null;
+    }
+
+  }
+
+  /** Collector that collects two or more column values into a HyperLogLog
+   * sketch. */
+  abstract static class HllCollector extends Collector {
+    final HllSketch sketch;
+    int nullCount = 0;
+
+    static final long[] NULL_BITS = {0x9f77d57e93167a16L};
+
+    HllCollector(Space space) {
+      super(space);
+      this.sketch = HllSketch.builder().build();
+    }
+
+    protected void add(Comparable value) {
+      if (value == NullSentinel.INSTANCE) {
+        sketch.update(NULL_BITS);
+      } else if (value instanceof String) {
+        sketch.update((String) value);
+      } else if (value instanceof Double) {
+        sketch.update((Double) value);
+      } else if (value instanceof Float) {
+        sketch.update((Float) value);
+      } else if (value instanceof Long) {
+        sketch.update((Long) value);
+      } else if (value instanceof Number) {
+        sketch.update(((Number) value).longValue());
+      } else {
+        sketch.update(value.toString());
+      }
+    }
+
+    public void finish() {
+      space.nullCount = nullCount;
+      space.cardinality = (int) sketch.getEstimate();
+      space.valueSet = null;
+    }
+  }
+
+  /** Collector that collects one column value into a HyperLogLog sketch. */
+  static class HllSingletonCollector extends HllCollector {
+    final int columnOrdinal;
+
+    HllSingletonCollector(Space space, int columnOrdinal) {
+      super(space);
+      this.columnOrdinal = columnOrdinal;
+    }
+
+    public void add(List<Comparable> row) {
+      final Comparable value = row.get(columnOrdinal);
+      if (value == NullSentinel.INSTANCE) {
+        nullCount++;
+        sketch.update(NULL_BITS);
+      } else {
+        add(value);
+      }
+    }
+  }
+
+  /** Collector that collects two or more column values into a HyperLogLog
+   * sketch. */
+  static class HllCompositeCollector extends HllCollector {
+    private final int[] columnOrdinals;
+    private final ByteBuffer buf = ByteBuffer.allocate(1024);
+
+    HllCompositeCollector(Space space, int[] columnOrdinals) {
+      super(space);
+      this.columnOrdinals = columnOrdinals;
+    }
+
+    public void add(List<Comparable> row) {
+      if (space.columnOrdinals.equals(OF)) {
+        Util.discard(0);
+      }
+      int nullCountThisRow = 0;
+      buf.clear();
+      for (int columnOrdinal : columnOrdinals) {
+        final Comparable value = row.get(columnOrdinal);
+        if (value == NullSentinel.INSTANCE) {
+          if (nullCountThisRow++ == 0) {
+            nullCount++;
+          }
+          buf.put((byte) 0);
+        } else if (value instanceof String) {
+          buf.put((byte) 1)
+              .put(((String) value).getBytes(StandardCharsets.UTF_8));
+        } else if (value instanceof Double) {
+          buf.put((byte) 2).putDouble((Double) value);
+        } else if (value instanceof Float) {
+          buf.put((byte) 3).putFloat((Float) value);
+        } else if (value instanceof Long) {
+          buf.put((byte) 4).putLong((Long) value);
+        } else if (value instanceof Integer) {
+          buf.put((byte) 5).putInt((Integer) value);
+        } else if (value instanceof Boolean) {
+          buf.put((Boolean) value ? (byte) 6 : (byte) 7);
+        } else {
+          buf.put((byte) 8)
+              .put(value.toString().getBytes(StandardCharsets.UTF_8));
+        }
+      }
+      sketch.update(Arrays.copyOf(buf.array(), buf.position()));
+    }
+  }
+
+  /** A priority queue of the last N surprise values. Accepts a new value if
+   * the queue is not yet full, or if its value is greater than the median value
+   * over the last N. */
+  static class SurpriseQueue {
+    private final int warmUpCount;
+    private final int size;
+    int count = 0;
+    final Deque<Double> deque = new ArrayDeque<>();
+    final PriorityQueue<Double> priorityQueue =
+        new PriorityQueue<>(11, Ordering.<Double>natural());
+
+    SurpriseQueue(int warmUpCount, int size) {
+      this.warmUpCount = warmUpCount;
+      this.size = size;
+      Preconditions.checkArgument(warmUpCount > 3);
+      Preconditions.checkArgument(size > 0);
+    }
+
+    @Override public String toString() {
+      return "min: " + priorityQueue.peek()
+          + ", contents: " + deque.toString();
+    }
+
+    boolean isValid() {
+      if (CalcitePrepareImpl.DEBUG) {
+        System.out.println(toString());
+      }
+      assert deque.size() == priorityQueue.size();
+      if (count > size) {
+        assert deque.size() == size;
+      }
+      return true;
+    }
+
+    boolean offer(double d) {
+      boolean b;
+      if (count++ < warmUpCount || d > priorityQueue.peek()) {
+        if (priorityQueue.size() >= size) {
+          priorityQueue.remove(deque.pop());
+        }
+        priorityQueue.add(d);
+        deque.add(d);
+        b = true;
+      } else {
+        b = false;
+      }
+      if (CalcitePrepareImpl.DEBUG) {
+        System.out.println("offer " + d
+            + " min " + priorityQueue.peek()
+            + " accepted " + b);
+      }
+      return b;
+    }
+  }
+}
+
+// End ProfilerImpl.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java b/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
new file mode 100644
index 0000000..c9b00d0
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.FlatLists;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.PartiallyOrderedSet;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.Iterables;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import javax.annotation.Nonnull;
+
+/**
+ * Basic implementation of {@link Profiler}.
+ */
+public class SimpleProfiler implements Profiler {
+  private static final Function<List<Comparable>, Comparable> ONLY =
+      new Function<List<Comparable>, Comparable>() {
+        public Comparable apply(List<Comparable> input) {
+          return Iterables.getOnlyElement(input);
+        }
+      };
+
+  public Profile profile(Iterable<List<Comparable>> rows,
+      final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+    Util.discard(initialGroups); // this profiler ignores initial groups
+    return new Run(columns).profile(rows);
+  }
+
+  /** Returns a measure of how much an actual value differs from expected.
+   * The formula is {@code abs(expected - actual) / (expected + actual)}.
+   *
+   * <p>Examples:<ul>
+   *   <li>surprise(e, a) is always between 0 and 1;
+   *   <li>surprise(e, a) is 0 if e = a;
+   *   <li>surprise(e, 0) is 1 if e &gt; 0;
+   *   <li>surprise(0, a) is 1 if a &gt; 0;
+   *   <li>surprise(5, 0) is 100%;
+   *   <li>surprise(5, 3) is 25%;
+   *   <li>surprise(5, 4) is 11%;
+   *   <li>surprise(5, 5) is 0%;
+   *   <li>surprise(5, 6) is 9%;
+   *   <li>surprise(5, 16) is 52%;
+   *   <li>surprise(5, 100) is 90%;
+   * </ul>
+   *
+   * @param expected Expected value
+   * @param actual Actual value
+   * @return Measure of how much expected deviates from actual
+   */
+  public static double surprise(double expected, double actual) {
+    if (expected == actual) {
+      return 0d;
+    }
+    final double sum = expected + actual;
+    if (sum <= 0d) {
+      return 1d;
+    }
+    return Math.abs(expected - actual) / sum;
+  }
+
+  /** A run of the profiler. */
+  static class Run {
+    private final List<Column> columns;
+    final List<Space> spaces = new ArrayList<>();
+    final List<Space> singletonSpaces;
+    final List<Statistic> statistics = new ArrayList<>();
+    final PartiallyOrderedSet.Ordering<Space> ordering =
+        new PartiallyOrderedSet.Ordering<Space>() {
+          public boolean lessThan(Space e1, Space e2) {
+            return e2.columnOrdinals.contains(e1.columnOrdinals);
+          }
+        };
+    final PartiallyOrderedSet<Space> results =
+        new PartiallyOrderedSet<>(ordering);
+    final PartiallyOrderedSet<Space> keyResults =
+        new PartiallyOrderedSet<>(ordering);
+    private final List<ImmutableBitSet> keyOrdinalLists =
+        new ArrayList<>();
+    final Function<Integer, Column> get =
+        new Function<Integer, Column>() {
+          public Column apply(Integer input) {
+            return columns.get(input);
+          }
+        };
+
+    Run(final List<Column> columns) {
+      for (Ord<Column> column : Ord.zip(columns)) {
+        if (column.e.ordinal != column.i) {
+          throw new IllegalArgumentException();
+        }
+      }
+      this.columns = columns;
+      this.singletonSpaces =
+          new ArrayList<>(Collections.nCopies(columns.size(), (Space) null));
+      for (ImmutableBitSet ordinals
+          : ImmutableBitSet.range(columns.size()).powerSet()) {
+        final Space space = new Space(ordinals, toColumns(ordinals));
+        spaces.add(space);
+        if (ordinals.cardinality() == 1) {
+          singletonSpaces.set(ordinals.nth(0), space);
+        }
+      }
+    }
+
+    Profile profile(Iterable<List<Comparable>> rows) {
+      final List<Comparable> values = new ArrayList<>();
+      int rowCount = 0;
+      for (final List<Comparable> row : rows) {
+        ++rowCount;
+      joint:
+        for (Space space : spaces) {
+          values.clear();
+          for (Column column : space.columns) {
+            final Comparable value = row.get(column.ordinal);
+            values.add(value);
+            if (value == NullSentinel.INSTANCE) {
+              space.nullCount++;
+              continue joint;
+            }
+          }
+          space.values.add(FlatLists.ofComparable(values));
+        }
+      }
+
+      // Populate unique keys
+      // If [x, y] is a key,
+      // then [x, y, z] is a key but not intersecting,
+      // and [x, y] => [a] is a functional dependency but not interesting,
+      // and [x, y, z] is not an interesting distribution.
+      final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
+      for (Space space : spaces) {
+        if (space.values.size() == rowCount
+            && !containsKey(space.columnOrdinals, false)) {
+          // We have discovered a new key.
+          // It is not an existing key or a super-set of a key.
+          statistics.add(new Unique(space.columns));
+          space.unique = true;
+          keyOrdinalLists.add(space.columnOrdinals);
+        }
+
+        int nonMinimal = 0;
+      dependents:
+        for (Space s : results.getDescendants(space)) {
+          if (s.cardinality() == space.cardinality()) {
+            // We have discovered a sub-set that has the same cardinality.
+            // The column(s) that are not in common are functionally
+            // dependent.
+            final ImmutableBitSet dependents =
+                space.columnOrdinals.except(s.columnOrdinals);
+            for (int i : s.columnOrdinals) {
+              final Space s1 = singletonSpaces.get(i);
+              final ImmutableBitSet rest = s.columnOrdinals.clear(i);
+              for (ImmutableBitSet dependent : s1.dependents) {
+                if (rest.contains(dependent)) {
+                  // The "key" of this functional dependency is not minimal.
+                  // For instance, if we know that
+                  //   (a) -> x
+                  // then
+                  //   (a, b, x) -> y
+                  // is not minimal; we could say the same with a smaller key:
+                  //   (a, b) -> y
+                  ++nonMinimal;
+                  continue dependents;
+                }
+              }
+            }
+            for (int dependent : dependents) {
+              final Space s1 = singletonSpaces.get(dependent);
+              for (ImmutableBitSet d : s1.dependents) {
+                if (s.columnOrdinals.contains(d)) {
+                  ++nonMinimal;
+                  continue dependents;
+                }
+              }
+            }
+            space.dependencies.or(dependents.toBitSet());
+            for (int d : dependents) {
+              singletonSpaces.get(d).dependents.add(s.columnOrdinals);
+            }
+          }
+        }
+
+        int nullCount;
+        final SortedSet<Comparable> valueSet;
+        if (space.columns.size() == 1) {
+          nullCount = space.nullCount;
+          valueSet = ImmutableSortedSet.copyOf(
+              Iterables.transform(space.values, ONLY));
+        } else {
+          nullCount = -1;
+          valueSet = null;
+        }
+        double expectedCardinality;
+        final double cardinality = space.cardinality();
+        switch (space.columns.size()) {
+        case 0:
+          expectedCardinality = 1d;
+          break;
+        case 1:
+          expectedCardinality = rowCount;
+          break;
+        default:
+          expectedCardinality = rowCount;
+          for (Column column : space.columns) {
+            final Distribution d1 =
+                distributions.get(ImmutableBitSet.of(column.ordinal));
+            final Distribution d2 =
+                distributions.get(space.columnOrdinals.clear(column.ordinal));
+            final double d =
+                Lattice.getRowCount(rowCount, d1.cardinality, d2.cardinality);
+            expectedCardinality = Math.min(expectedCardinality, d);
+          }
+        }
+        final boolean minimal = nonMinimal == 0
+            && !space.unique
+            && !containsKey(space.columnOrdinals, true);
+        final Distribution distribution =
+            new Distribution(space.columns, valueSet, cardinality, nullCount,
+                expectedCardinality, minimal);
+        statistics.add(distribution);
+        distributions.put(space.columnOrdinals, distribution);
+
+        if (distribution.minimal) {
+          results.add(space);
+        }
+      }
+
+      for (Space s : singletonSpaces) {
+        for (ImmutableBitSet dependent : s.dependents) {
+          if (!containsKey(dependent, false)
+              && !hasNull(dependent)) {
+            statistics.add(
+                new FunctionalDependency(toColumns(dependent),
+                    Iterables.getOnlyElement(s.columns)));
+          }
+        }
+      }
+      return new Profile(columns, new RowCount(rowCount),
+          Iterables.filter(statistics, FunctionalDependency.class),
+          Iterables.filter(statistics, Distribution.class),
+          Iterables.filter(statistics, Unique.class));
+    }
+
+    /** Returns whether a set of column ordinals
+     * matches or contains a unique key.
+     * If {@code strict}, it must contain a unique key. */
+    private boolean containsKey(ImmutableBitSet ordinals, boolean strict) {
+      for (ImmutableBitSet keyOrdinals : keyOrdinalLists) {
+        if (ordinals.contains(keyOrdinals)) {
+          return !(strict && keyOrdinals.equals(ordinals));
+        }
+      }
+      return false;
+    }
+
+    private boolean hasNull(ImmutableBitSet columnOrdinals) {
+      for (Integer columnOrdinal : columnOrdinals) {
+        if (singletonSpaces.get(columnOrdinal).nullCount > 0) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    private ImmutableSortedSet<Column> toColumns(Iterable<Integer> ordinals) {
+      return ImmutableSortedSet.copyOf(Iterables.transform(ordinals, get));
+    }
+  }
+
+  /** Work space for a particular combination of columns. */
+  static class Space implements Comparable<Space> {
+    final ImmutableBitSet columnOrdinals;
+    final ImmutableSortedSet<Column> columns;
+    int nullCount;
+    final SortedSet<FlatLists.ComparableList<Comparable>> values =
+        new TreeSet<>();
+    boolean unique;
+    final BitSet dependencies = new BitSet();
+    final Set<ImmutableBitSet> dependents = new HashSet<>();
+
+    Space(ImmutableBitSet columnOrdinals, Iterable<Column> columns) {
+      this.columnOrdinals = columnOrdinals;
+      this.columns = ImmutableSortedSet.copyOf(columns);
+    }
+
+    @Override public int hashCode() {
+      return columnOrdinals.hashCode();
+    }
+
+    @Override public boolean equals(Object o) {
+      return o == this
+          || o instanceof Space
+          && columnOrdinals.equals(((Space) o).columnOrdinals);
+    }
+
+    public int compareTo(@Nonnull Space o) {
+      return columnOrdinals.equals(o.columnOrdinals) ? 0
+          : columnOrdinals.contains(o.columnOrdinals) ? 1
+              : -1;
+    }
+
+    /** Number of distinct values. Null is counted as a value, if present. */
+    public double cardinality() {
+      return values.size() + (nullCount > 0 ? 1 : 0);
+    }
+  }
+}
+
+// End SimpleProfiler.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/package-info.java b/core/src/main/java/org/apache/calcite/profile/package-info.java
new file mode 100644
index 0000000..c30e9e0
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities to analyze data sets.
+ */
+@PackageMarker
+package org.apache.calcite.profile;
+
+import org.apache.calcite.avatica.util.PackageMarker;
+
+// End package-info.java


[2/4] calcite git commit: [CALCITE-1616] Data profiler

Posted by jh...@apache.org.
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
index ad808a9..a3db6db 100644
--- a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
+++ b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
@@ -16,7 +16,11 @@
  */
 package org.apache.calcite.util;
 
-import java.util.AbstractList;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
 import java.util.AbstractSet;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
@@ -61,7 +65,21 @@ import java.util.Set;
  * @param <E> Element type
  */
 public class PartiallyOrderedSet<E> extends AbstractSet<E> {
+  /** Ordering that orders bit sets by inclusion.
+   *
+   * <p>For example, the children of 14 (1110) are 12 (1100), 10 (1010) and
+   * 6 (0110).
+   */
+  public static final Ordering<ImmutableBitSet> BIT_SET_INCLUSION_ORDERING =
+      new Ordering<ImmutableBitSet>() {
+        public boolean lessThan(ImmutableBitSet e1, ImmutableBitSet e2) {
+          return e1.contains(e2);
+        }
+      };
+
   private final Map<E, Node<E>> map;
+  private final Function<E, Iterable<E>> parentFunction;
+  private final Function<E, Iterable<E>> childFunction;
   private final Ordering<E> ordering;
 
   /**
@@ -71,7 +89,9 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
   private final Node<E> topNode;
   private final Node<E> bottomNode;
 
-  private static final boolean DEBUG = Math.random() >= 0;
+  /** Whether to check internal consistency all the time.
+   * False unless you specify "-Dcalcite.debug" on the command line. */
+  private static final boolean DEBUG = Util.getBooleanProperty("calcite.debug");
 
   /**
    * Creates a partially-ordered set.
@@ -79,7 +99,19 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * @param ordering Ordering relation
    */
   public PartiallyOrderedSet(Ordering<E> ordering) {
-    this(ordering, new HashMap<E, Node<E>>());
+    this(ordering, new HashMap<E, Node<E>>(), null, null);
+  }
+
+  /**
+   * Creates a partially-ordered set with a parent-generating function.
+   *
+   * @param ordering Ordering relation
+   * @param parentFunction Function to compute parents of a node; may be null
+   */
+  public PartiallyOrderedSet(Ordering<E> ordering,
+      Function<E, Iterable<E>> childFunction,
+      Function<E, Iterable<E>> parentFunction) {
+    this(ordering, new HashMap<E, Node<E>>(), childFunction, parentFunction);
   }
 
   /**
@@ -90,7 +122,8 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * @param collection Initial contents of partially-ordered set
    */
   public PartiallyOrderedSet(Ordering<E> ordering, Collection<E> collection) {
-    this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2));
+    this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2), null,
+        null);
     addAll(collection);
   }
 
@@ -99,10 +132,15 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *
    * @param ordering Ordering relation
    * @param map Map from values to nodes
+   * @param parentFunction Function to compute parents of a node; may be null
    */
-  private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map) {
+  private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map,
+      Function<E, Iterable<E>> childFunction,
+      Function<E, Iterable<E>> parentFunction) {
     this.ordering = ordering;
     this.map = map;
+    this.childFunction = childFunction;
+    this.parentFunction = parentFunction;
     this.topNode = new TopBottomNode<>(true);
     this.bottomNode = new TopBottomNode<>(false);
     this.topNode.childList.add(bottomNode);
@@ -528,7 +566,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * Returns the values in this partially-ordered set that are less-than
    * a given value and there are no intervening values.
    *
-   * <p>If the value is not in this set, returns the empty list.</p>
+   * <p>If the value is not in this set, returns null.
    *
    * @see #getDescendants
    *
@@ -537,15 +575,33 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *   value
    */
   public List<E> getChildren(E e) {
+    return getChildren(e, false);
+  }
+
+  /**
+   * Returns the values in this partially-ordered set that are less-than
+   * a given value and there are no intervening values.
+   *
+   * <p>If the value is not in this set, returns null if {@code hypothetical}
+   * is false.
+   *
+   * @see #getDescendants
+   *
+   * @param e Value
+   * @param hypothetical Whether to generate a list if value is not in the set
+   * @return List of values in this set that are directly less than the given
+   *   value
+   */
+  public List<E> getChildren(E e, boolean hypothetical) {
     final Node<E> node = map.get(e);
     if (node == null) {
-      return null;
-    } else if (node.childList.get(0).e == null) {
-      // child list contains bottom element, so officially there are no
-      // children
-      return Collections.emptyList();
+      if (hypothetical) {
+        return strip(findChildren(e));
+      } else {
+        return null;
+      }
     } else {
-      return new StripList<>(node.childList);
+      return strip(node.childList);
     }
   }
 
@@ -553,7 +609,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    * Returns the values in this partially-ordered set that are greater-than
    * a given value and there are no intervening values.
    *
-   * <p>If the value is not in this set, returns the empty list.</p>
+   * <p>If the value is not in this set, returns null.
    *
    * @see #getAncestors
    *
@@ -562,32 +618,61 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
    *   given value
    */
   public List<E> getParents(E e) {
+    return getParents(e, false);
+  }
+
+  /**
+   * Returns the values in this partially-ordered set that are greater-than
+   * a given value and there are no intervening values.
+   *
+   * <p>If the value is not in this set, returns {@code null} if
+   * {@code hypothetical} is false.
+   *
+   * @see #getAncestors
+   *
+   * @param e Value
+   * @param hypothetical Whether to generate a list if value is not in the set
+   * @return List of values in this set that are directly greater than the
+   *   given value
+   */
+  public List<E> getParents(E e, boolean hypothetical) {
     final Node<E> node = map.get(e);
     if (node == null) {
-      return null;
-    } else if (node.parentList.get(0).e == null) {
-      // parent list contains top element, so officially there are no
-      // parents
-      return Collections.emptyList();
+      if (hypothetical) {
+        if (parentFunction != null) {
+          final List<E> list = new ArrayList<>();
+          closure(parentFunction, e, list, new HashSet<E>());
+          return list;
+        } else {
+          return ImmutableList.copyOf(strip(findParents(e)));
+        }
+      } else {
+        return null;
+      }
     } else {
-      return new StripList<>(node.parentList);
+      return strip(node.parentList);
     }
   }
 
-  public List<E> getNonChildren() {
-    if (topNode.childList.size() == 1
-        && topNode.childList.get(0).e == null) {
-      return Collections.emptyList();
+  private void closure(Function<E, Iterable<E>> generator, E e, List<E> list,
+      Set<E> set) {
+    for (E p : Preconditions.checkNotNull(generator.apply(e))) {
+      if (set.add(e)) {
+        if (map.containsKey(p)) {
+          list.add(p);
+        } else {
+          closure(generator, p, list, set);
+        }
+      }
     }
-    return new StripList<>(topNode.childList);
+  }
+
+  public List<E> getNonChildren() {
+    return strip(topNode.childList);
   }
 
   public List<E> getNonParents() {
-    if (bottomNode.parentList.size() == 1
-        && bottomNode.parentList.get(0).e == null) {
-      return Collections.emptyList();
-    }
-    return new StripList<>(bottomNode.parentList);
+    return strip(bottomNode.parentList);
   }
 
   @Override public void clear() {
@@ -612,6 +697,57 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
     return descendants(e, true);
   }
 
+  /** Returns a list, backed by a list of
+   * {@link org.apache.calcite.util.PartiallyOrderedSet.Node}s, that strips
+   * away the node and returns the element inside.
+   *
+   * @param <E> Element type
+   */
+  public static <E> List<E> strip(List<Node<E>> list) {
+    if (list.size() == 1
+        && list.get(0).e == null) {
+      // If parent list contains top element, a node whose element is null,
+      // officially there are no parents.
+      // Similarly child list and bottom element.
+      return ImmutableList.of();
+    }
+    return Lists.transform(list,
+      new Function<Node<E>, E>() {
+        public E apply(Node<E> node) {
+          return node.e;
+        }
+      });
+  }
+
+  /** Converts an iterable of nodes into the list of the elements inside.
+   * If there is one node whose element is null, it represents a list
+   * containing either the top or bottom element, so we return the empty list.
+   *
+   * @param <E> Element type
+   */
+  private static <E> ImmutableList<E> strip(Iterable<Node<E>> iterable) {
+    final Iterator<Node<E>> iterator = iterable.iterator();
+    if (!iterator.hasNext()) {
+      return ImmutableList.of();
+    }
+    Node<E> node = iterator.next();
+    if (!iterator.hasNext()) {
+      if (node.e == null) {
+        return ImmutableList.of();
+      } else {
+        return ImmutableList.of(node.e);
+      }
+    }
+    final ImmutableList.Builder<E> builder = ImmutableList.builder();
+    for (;;) {
+      builder.add(node.e);
+      if (!iterator.hasNext()) {
+        return builder.build();
+      }
+      node = iterator.next();
+    }
+  }
+
   /**
    * Returns a list of values in the set that are less-than a given value.
    * The list is in topological order but order is otherwise
@@ -725,27 +861,6 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
      */
     boolean lessThan(E e1, E e2);
   }
-
-  /** List, backed by a list of {@link Node}s, that strips away the
-   * node and returns the element inside.
-   *
-   * @param <E> Element type
-   */
-  private static class StripList<E> extends AbstractList<E> {
-    private final List<Node<E>> list;
-
-    StripList(List<Node<E>> list) {
-      this.list = list;
-    }
-
-    @Override public E get(int index) {
-      return list.get(index).e;
-    }
-
-    @Override public int size() {
-      return list.size();
-    }
-  }
 }
 
 // End PartiallyOrderedSet.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
new file mode 100644
index 0000000..e45db8b
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.linq4j.AbstractEnumerable;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.Matchers;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Supplier;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Ordering;
+
+import org.hamcrest.Matcher;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+/**
+ * Unit tests for {@link Profiler}.
+ */
+public class ProfilerTest {
+  @Test public void testProfileZeroRows() throws Exception {
+    final String sql = "select * from \"scott\".dept where false";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DEPTNO],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:0.0}",
+        "{type:distribution,columns:[DNAME],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[LOC],values:[],cardinality:0.0}",
+        "{type:distribution,columns:[],cardinality:0.0}",
+        "{type:rowCount,rowCount:0}",
+        "{type:unique,columns:[]}");
+  }
+
+  @Test public void testProfileOneRow() throws Exception {
+    final String sql = "select * from \"scott\".dept where deptno = 10";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10],cardinality:1.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:1.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING],cardinality:1.0}",
+        "{type:distribution,columns:[LOC],values:[NEWYORK],cardinality:1.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:rowCount,rowCount:1}",
+        "{type:unique,columns:[]}");
+  }
+
+  @Test public void testProfileTwoRows() throws Exception {
+    final String sql = "select * from \"scott\".dept where deptno in (10, 20)";
+    sql(sql).unordered(
+        "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20],cardinality:2.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:2.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH],cardinality:2.0}",
+        "{type:distribution,columns:[LOC],values:[DALLAS,NEWYORK],cardinality:2.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:rowCount,rowCount:2}",
+        "{type:unique,columns:[DEPTNO]}",
+        "{type:unique,columns:[DNAME]}",
+        "{type:unique,columns:[LOC]}");
+  }
+
+  @Test public void testProfileScott() throws Exception {
+    final String sql = "select * from \"scott\".emp\n"
+        + "join \"scott\".dept using (deptno)";
+    sql(sql)
+        .where(new PredicateImpl<Profiler.Statistic>() {
+          public boolean test(Profiler.Statistic statistic) {
+            return !(statistic instanceof Profiler.Distribution)
+                || ((Profiler.Distribution) statistic).cardinality < 14
+                && ((Profiler.Distribution) statistic).minimal;
+          }
+        }).unordered(
+        "{type:distribution,columns:[COMM,DEPTNO0],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,DEPTNO],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,DNAME],cardinality:5.0}",
+        "{type:distribution,columns:[COMM,LOC],cardinality:5.0}",
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:3.0}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0}",
+        "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0}",
+        "{type:distribution,columns:[JOB,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[JOB,DEPTNO0],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,DEPTNO],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,DNAME],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,LOC],cardinality:9.0}",
+        "{type:distribution,columns:[JOB,MGR,DEPTNO0],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,DEPTNO],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,DNAME],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR,LOC],cardinality:10.0}",
+        "{type:distribution,columns:[JOB,MGR],cardinality:8.0}",
+        "{type:distribution,columns:[JOB,SAL],cardinality:12.0}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0}",
+        "{type:distribution,columns:[MGR,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[MGR,DEPTNO0],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,DEPTNO],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,DNAME],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,LOC],cardinality:9.0}",
+        "{type:distribution,columns:[MGR,SAL],cardinality:12.0}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1}",
+        "{type:distribution,columns:[SAL,COMM],cardinality:5.0}",
+        "{type:distribution,columns:[SAL,DEPTNO0],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,DEPTNO],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,DNAME],cardinality:12.0}",
+        "{type:distribution,columns:[SAL,LOC],cardinality:12.0}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0}",
+        "{type:distribution,columns:[],cardinality:1.0}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:DNAME}",
+        "{type:fd,columns:[DEPTNO0],dependentColumn:LOC}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:DNAME}",
+        "{type:fd,columns:[DEPTNO],dependentColumn:LOC}",
+        "{type:fd,columns:[DNAME],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[DNAME],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[DNAME],dependentColumn:LOC}",
+        "{type:fd,columns:[JOB],dependentColumn:COMM}",
+        "{type:fd,columns:[LOC],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[LOC],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[LOC],dependentColumn:DNAME}",
+        "{type:fd,columns:[SAL],dependentColumn:DEPTNO0}",
+        "{type:fd,columns:[SAL],dependentColumn:DEPTNO}",
+        "{type:fd,columns:[SAL],dependentColumn:DNAME}",
+        "{type:fd,columns:[SAL],dependentColumn:JOB}",
+        "{type:fd,columns:[SAL],dependentColumn:LOC}",
+        "{type:fd,columns:[SAL],dependentColumn:MGR}",
+        "{type:rowCount,rowCount:14}",
+        "{type:unique,columns:[EMPNO]}",
+        "{type:unique,columns:[ENAME]}",
+        "{type:unique,columns:[HIREDATE,DEPTNO0]}",
+        "{type:unique,columns:[HIREDATE,DEPTNO]}",
+        "{type:unique,columns:[HIREDATE,DNAME]}",
+        "{type:unique,columns:[HIREDATE,LOC]}",
+        "{type:unique,columns:[HIREDATE,SAL]}",
+        "{type:unique,columns:[JOB,HIREDATE]}");
+  }
+
+  /** As {@link #testProfileScott()}, but prints only the most surprising
+   * distributions. */
+  @Test public void testProfileScott2() throws Exception {
+    scott().factory(Fluid.SIMPLE_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0,expectedCardinality:12.682618485430247,surprise:0.4344728973121492}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR,COMM],cardinality:5.0,expectedCardinality:11.675074674157162,surprise:0.400302535646339}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL,COMM],cardinality:5.0,expectedCardinality:12.579960871109892,surprise:0.43117052004174}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott2()}, but uses the breadth-first profiler.
+   * Results should be the same, but are slightly different (extra EMPNO
+   * and ENAME distributions). */
+  @Test public void testProfileScott3() throws Exception {
+    scott().factory(Fluid.BETTER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott3()}, but uses the breadth-first profiler
+   * and deems everything uninteresting. Only first-level combinations (those
+   * consisting of a single column) are computed. */
+  @Test public void testProfileScott4() throws Exception {
+    scott().factory(Fluid.INCURIOUS_PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** As {@link #testProfileScott3()}, but uses the breadth-first profiler. */
+  @Ignore
+  @Test public void testProfileScott5() throws Exception {
+    scott().factory(Fluid.PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+        "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+        "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+        "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+        "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+        "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+        "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** Profiles a star-join query on the Foodmart schema using the breadth-first
+   * profiler. */
+  @Ignore
+  @Test public void testProfileFoodmart() throws Exception {
+    foodmart().factory(Fluid.PROFILER_FACTORY).unordered(
+        "{type:distribution,columns:[brand_name],cardinality:111.0,expectedCardinality:86837.0,surprise:0.9974467497814786}",
+        "{type:distribution,columns:[cases_per_pallet],values:[5,6,7,8,9,10,11,12,13,14],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+        "{type:distribution,columns:[day_of_month],cardinality:30.0,expectedCardinality:86837.0,surprise:0.9993092889129359}",
+        "{type:distribution,columns:[fiscal_period],values:[],cardinality:1.0,nullCount:86837,expectedCardinality:86837.0,surprise:0.999976968608213}",
+        "{type:distribution,columns:[low_fat],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+        "{type:distribution,columns:[month_of_year],values:[1,2,3,4,5,6,7,8,9,10,11,12],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+        "{type:distribution,columns:[product_category],cardinality:45.0,expectedCardinality:86837.0,surprise:0.9989641122441932}",
+        "{type:distribution,columns:[product_class_id0,product_subcategory,product_category,product_department,product_family],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_class_id0],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_class_id],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[product_department],cardinality:22.0,expectedCardinality:86837.0,surprise:0.9994934318838578}",
+        "{type:distribution,columns:[product_family],values:[Drink,Food,Non-Consumable],cardinality:3.0,expectedCardinality:86837.0,surprise:0.9999309074159374}",
+        "{type:distribution,columns:[product_subcategory],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+        "{type:distribution,columns:[quarter],values:[Q1,Q2,Q3,Q4],cardinality:4.0,expectedCardinality:86837.0,surprise:0.9999078776154121}",
+        "{type:distribution,columns:[recyclable_package],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+        "{type:distribution,columns:[store_cost,fiscal_period],cardinality:10601.0,nullCount:86724,expectedCardinality:10.0,surprise:0.9981151635095655}",
+        "{type:distribution,columns:[store_cost,low_fat],cardinality:17673.0,expectedCardinality:20.0,surprise:0.99773921890013}",
+        "{type:distribution,columns:[store_cost,product_family],cardinality:19453.0,expectedCardinality:30.0,surprise:0.9969203921367346}",
+        "{type:distribution,columns:[store_cost,quarter],cardinality:29590.0,expectedCardinality:40.0,surprise:0.9973000337495781}",
+        "{type:distribution,columns:[store_cost,recyclable_package],cardinality:17847.0,expectedCardinality:20.0,surprise:0.9977612357978396}",
+        "{type:distribution,columns:[store_cost,the_year],cardinality:10944.0,expectedCardinality:10.0,surprise:0.9981741829468688}",
+        "{type:distribution,columns:[store_cost],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+        "{type:distribution,columns:[store_id],values:[2,3,6,7,11,13,14,15,16,17,22,23,24],cardinality:13.0,expectedCardinality:86837.0,surprise:0.9997006332757629}",
+        "{type:distribution,columns:[store_sales],cardinality:21.0,expectedCardinality:86837.0,surprise:0.999516452140275}",
+        "{type:distribution,columns:[the_day],values:[Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday],cardinality:7.0,expectedCardinality:86837.0,surprise:0.9998387913960665}",
+        "{type:distribution,columns:[the_month],values:[April,August,December,February,January,July,June,March,May,November,October,September],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+        "{type:distribution,columns:[the_year],values:[1997],cardinality:1.0,expectedCardinality:86837.0,surprise:0.999976968608213}",
+        "{type:distribution,columns:[unit_sales],values:[1.0000,2.0000,3.0000,4.0000,5.0000,6.0000],cardinality:6.0,expectedCardinality:86837.0,surprise:0.999861819605495}",
+        "{type:distribution,columns:[units_per_case],cardinality:36.0,expectedCardinality:86837.0,surprise:0.9991712039413857}",
+        "{type:distribution,columns:[week_of_year],cardinality:52.0,expectedCardinality:86837.0,surprise:0.9988030705843087}",
+        "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+  }
+
+  /** Tests
+   * {@link org.apache.calcite.profile.ProfilerImpl.SurpriseQueue}. */
+  @Test public void testSurpriseQueue() {
+    ProfilerImpl.SurpriseQueue q = new ProfilerImpl.SurpriseQueue(4, 3);
+    assertThat(q.offer(2), is(true));
+    assertThat(q.toString(), is("min: 2.0, contents: [2.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(4), is(true));
+    assertThat(q.toString(), is("min: 2.0, contents: [2.0, 4.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Since we're in the warm-up period, a value lower than the minimum is
+    // accepted.
+    assertThat(q.offer(1), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [2.0, 4.0, 1.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(5), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [4.0, 1.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    assertThat(q.offer(3), is(true));
+    assertThat(q.toString(), is("min: 1.0, contents: [1.0, 5.0, 3.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Duplicate entry
+    assertThat(q.offer(5), is(true));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Now that the list is full, a value below the minimum is refused.
+    // "offer" returns false, and the value is not added to the queue.
+    // Thus the median never decreases.
+    assertThat(q.offer(2), is(false));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Same applies for a value equal to the minimum.
+    assertThat(q.offer(3), is(false));
+    assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+    assertThat(q.isValid(), is(true));
+
+    // Add a value that is above the minimum.
+    assertThat(q.offer(4.5), is(true));
+    assertThat(q.toString(), is("min: 3.0, contents: [3.0, 5.0, 4.5]"));
+    assertThat(q.isValid(), is(true));
+  }
+
+  private Fluid scott() throws Exception {
+    final String sql = "select * from \"scott\".emp\n"
+        + "join \"scott\".dept using (deptno)";
+    return sql(sql)
+        .where(Fluid.STATISTIC_PREDICATE)
+        .sort(Fluid.ORDERING.reverse())
+        .limit(30)
+        .project(Fluid.EXTENDED_COLUMNS);
+  }
+
+  private Fluid foodmart() throws Exception {
+    final String sql = "select \"s\".*, \"p\".*, \"t\".*, \"pc\".*\n"
+        + "from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+        + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
+        + "join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")\n"
+        + "join \"foodmart\".\"product_class\" as \"pc\"\n"
+        + "  on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"\n";
+    return sql(sql)
+        .config(CalciteAssert.Config.JDBC_FOODMART)
+        .where(Fluid.STATISTIC_PREDICATE)
+        .sort(Fluid.ORDERING.reverse())
+        .limit(30)
+        .project(Fluid.EXTENDED_COLUMNS);
+  }
+
+  private static Fluid sql(String sql) {
+    return new Fluid(CalciteAssert.Config.SCOTT, sql, Fluid.SIMPLE_FACTORY,
+        Predicates.<Profiler.Statistic>alwaysTrue(), null, -1,
+        Fluid.DEFAULT_COLUMNS);
+  }
+
+  /** Fluid interface for writing profiler test cases. */
+  private static class Fluid {
+    static final Supplier<Profiler> SIMPLE_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            return new SimpleProfiler();
+          }
+        };
+
+    static final Supplier<Profiler> BETTER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>>
+                predicate = Predicates.alwaysTrue();
+            return new ProfilerImpl(600, 200, predicate);
+          }
+        };
+
+    static final Ordering<Profiler.Statistic> ORDERING =
+        new Ordering<Profiler.Statistic>() {
+          public int compare(Profiler.Statistic left,
+              Profiler.Statistic right) {
+            int c = left.getClass().getSimpleName()
+                .compareTo(right.getClass().getSimpleName());
+            if (c == 0
+                && left instanceof Profiler.Distribution
+                && right instanceof Profiler.Distribution) {
+              final Profiler.Distribution d0 = (Profiler.Distribution) left;
+              final Profiler.Distribution d1 = (Profiler.Distribution) right;
+              c = Double.compare(d0.surprise(), d1.surprise());
+              if (c == 0) {
+                c = d0.columns.toString().compareTo(d1.columns.toString());
+              }
+            }
+            return c;
+          }
+        };
+
+    static final Predicate<Profiler.Statistic> STATISTIC_PREDICATE =
+        new PredicateImpl<Profiler.Statistic>() {
+          public boolean test(Profiler.Statistic statistic) {
+            // Include distributions of zero columns (the grand total)
+            // and singleton columns, plus "surprising" distributions
+            // (with significantly higher NDVs than predicted from their
+            // constituent columns).
+            return statistic instanceof Profiler.Distribution
+                && (((Profiler.Distribution) statistic).columns.size() < 2
+                || ((Profiler.Distribution) statistic).surprise() > 0.4D)
+                && ((Profiler.Distribution) statistic).minimal;
+          }
+        };
+
+    static final List<String> DEFAULT_COLUMNS =
+        ImmutableList.of("type", "distribution", "columns", "cardinality",
+            "values", "nullCount", "dependentColumn", "rowCount");
+
+    static final List<String> EXTENDED_COLUMNS =
+        ImmutableList.<String>builder().addAll(DEFAULT_COLUMNS)
+            .add("expectedCardinality", "surprise")
+            .build();
+
+    private static final Supplier<Profiler> PROFILER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            return new ProfilerImpl(7500, 100,
+                new PredicateImpl<Pair<ProfilerImpl.Space, Profiler.Column>>() {
+                  public boolean test(
+                      Pair<ProfilerImpl.Space, Profiler.Column> p) {
+                    final Profiler.Distribution distribution =
+                        p.left.distribution();
+                    if (distribution == null) {
+                      // We don't have a distribution yet, because this space
+                      // has not yet been evaluated. Let's do it anyway.
+                      return true;
+                    }
+                    return distribution.surprise() >= 0.3D;
+                  }
+                });
+          }
+        };
+
+    private static final Supplier<Profiler> INCURIOUS_PROFILER_FACTORY =
+        new Supplier<Profiler>() {
+          public Profiler get() {
+            final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>> p =
+                Predicates.alwaysFalse();
+            return new ProfilerImpl(10, 200, p);
+          }
+        };
+
+    private final String sql;
+    private final List<String> columns;
+    private final Comparator<Profiler.Statistic> comparator;
+    private final int limit;
+    private final Predicate<Profiler.Statistic> predicate;
+    private final Supplier<Profiler> factory;
+    private final CalciteAssert.Config config;
+
+    Fluid(CalciteAssert.Config config, String sql, Supplier<Profiler> factory,
+        Predicate<Profiler.Statistic> predicate,
+        Comparator<Profiler.Statistic> comparator, int limit,
+        List<String> columns) {
+      this.sql = Preconditions.checkNotNull(sql);
+      this.factory = Preconditions.checkNotNull(factory);
+      this.columns = ImmutableList.copyOf(columns);
+      this.predicate = Preconditions.checkNotNull(predicate);
+      this.comparator = comparator; // null means sort on JSON representation
+      this.limit = limit;
+      this.config = config;
+    }
+
+    Fluid config(CalciteAssert.Config config) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid factory(Supplier<Profiler> factory) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid project(List<String> columns) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid sort(Ordering<Profiler.Statistic> comparator) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid limit(int limit) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid where(Predicate<Profiler.Statistic> predicate) {
+      return new Fluid(config, sql, factory, predicate, comparator, limit,
+          columns);
+    }
+
+    Fluid unordered(String... lines) throws Exception {
+      return check(Matchers.equalsUnordered(lines));
+    }
+
+    public Fluid check(final Matcher<Iterable<String>> matcher)
+        throws Exception {
+      CalciteAssert.that(config)
+          .doWithConnection(new Function<CalciteConnection, Void>() {
+            public Void apply(CalciteConnection c) {
+              try (PreparedStatement s = c.prepareStatement(sql)) {
+                final ResultSetMetaData m = s.getMetaData();
+                final List<Profiler.Column> columns = new ArrayList<>();
+                final int columnCount = m.getColumnCount();
+                for (int i = 0; i < columnCount; i++) {
+                  columns.add(new Profiler.Column(i, m.getColumnLabel(i + 1)));
+                }
+
+                // Create an initial group for each table in the query.
+                // Columns in the same table will tend to have the same
+                // cardinality as the table, and as the table's primary key.
+                final Multimap<String, Integer> groups = HashMultimap.create();
+                for (int i = 0; i < m.getColumnCount(); i++) {
+                  groups.put(m.getTableName(i + 1), i);
+                }
+                final SortedSet<ImmutableBitSet> initialGroups =
+                    new TreeSet<>();
+                for (Collection<Integer> integers : groups.asMap().values()) {
+                  initialGroups.add(ImmutableBitSet.of(integers));
+                }
+                final Profiler p = factory.get();
+                final Enumerable<List<Comparable>> rows = getRows(s);
+                final Profiler.Profile profile =
+                    p.profile(rows, columns, initialGroups);
+                final List<Profiler.Statistic> statistics =
+                    ImmutableList.copyOf(
+                        Iterables.filter(profile.statistics(), predicate));
+
+                // If no comparator specified, use the function that converts to
+                // JSON strings
+                final Function<Profiler.Statistic, String> toJson =
+                    toJsonFunction();
+                Ordering<Profiler.Statistic> comp = comparator != null
+                    ? Ordering.from(comparator)
+                    : Ordering.natural().onResultOf(toJson);
+                ImmutableList<Profiler.Statistic> statistics2 =
+                    comp.immutableSortedCopy(statistics);
+                if (limit >= 0 && limit < statistics2.size()) {
+                  statistics2 = statistics2.subList(0, limit);
+                }
+
+                final List<String> strings =
+                    Lists.transform(statistics2, toJson);
+                assertThat(strings, matcher);
+              } catch (SQLException e) {
+                throw new RuntimeException(e);
+              }
+              return null;
+            }
+          });
+      return this;
+    }
+
+    /** Returns a function that converts a statistic to a JSON string. */
+    Function<Profiler.Statistic, String> toJsonFunction() {
+      return new Function<Profiler.Statistic, String>() {
+        final JsonBuilder jb = new JsonBuilder();
+
+        public String apply(Profiler.Statistic statistic) {
+          Object map = statistic.toMap(jb);
+          if (map instanceof Map) {
+            @SuppressWarnings("unchecked")
+            final Map<String, Object> map1 = (Map) map;
+            map1.keySet().retainAll(Fluid.this.columns);
+          }
+          final String json = jb.toJsonString(map);
+          return json.replaceAll("\n", "").replaceAll(" ", "")
+              .replaceAll("\"", "");
+        }
+      };
+    }
+
+    private Enumerable<List<Comparable>> getRows(final PreparedStatement s) {
+      return new AbstractEnumerable<List<Comparable>>() {
+        public Enumerator<List<Comparable>> enumerator() {
+          try {
+            final ResultSet r = s.executeQuery();
+            return getListEnumerator(r, r.getMetaData().getColumnCount());
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+
+    private Enumerator<List<Comparable>> getListEnumerator(
+        final ResultSet r, final int columnCount) {
+      return new Enumerator<List<Comparable>>() {
+        final Comparable[] values = new Comparable[columnCount];
+
+        public List<Comparable> current() {
+          for (int i = 0; i < columnCount; i++) {
+            try {
+              final Comparable value = (Comparable) r.getObject(i + 1);
+              values[i] = NullSentinel.mask(value);
+            } catch (SQLException e) {
+              throw new RuntimeException(e);
+            }
+          }
+          return ImmutableList.copyOf(values);
+        }
+
+        public boolean moveNext() {
+          try {
+            return r.next();
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+
+        public void reset() {
+        }
+
+        public void close() {
+          try {
+            r.close();
+          } catch (SQLException e) {
+            throw new RuntimeException(e);
+          }
+        }
+      };
+    }
+  }
+}
+
+// End ProfilerTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
index d577057..585e8ed 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
@@ -28,6 +28,7 @@ import org.apache.calcite.plan.volcano.TraitPropagationTest;
 import org.apache.calcite.plan.volcano.VolcanoPlannerTest;
 import org.apache.calcite.plan.volcano.VolcanoPlannerTraitTest;
 import org.apache.calcite.prepare.LookupOperatorOverloadsTest;
+import org.apache.calcite.profile.ProfilerTest;
 import org.apache.calcite.rel.RelCollationTest;
 import org.apache.calcite.rel.RelDistributionTest;
 import org.apache.calcite.rel.rel2sql.RelToSqlConverterTest;
@@ -156,6 +157,7 @@ import org.junit.runners.Suite;
     LinqFrontJdbcBackTest.class,
     JdbcFrontJdbcBackLinqMiddleTest.class,
     CalciteSqlOperatorTest.class,
+    ProfilerTest.class,
     LatticeTest.class,
     ReflectiveSchemaTest.class,
     JdbcTest.class,

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
index 3f36bf9..4eae872 100644
--- a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
+++ b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
@@ -23,6 +23,8 @@ import org.apache.calcite.materialize.Lattices;
 
 import com.google.common.collect.ImmutableMap;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 /**
@@ -33,10 +35,15 @@ import java.util.Map;
  */
 public class FoodMartLatticeStatisticProvider
     extends DelegatingLatticeStatisticProvider {
-  public static final FoodMartLatticeStatisticProvider INSTANCE =
-      new FoodMartLatticeStatisticProvider(Lattices.CACHED_SQL);
+  public static final FoodMartLatticeStatisticProvider.Factory FACTORY =
+      new Factory() {
+        public LatticeStatisticProvider apply(Lattice lattice) {
+          return new FoodMartLatticeStatisticProvider(lattice,
+              Lattices.CACHED_SQL.apply(lattice));
+        }
+      };
 
-  public static final Map<String, Integer> CARDINALITY_MAP =
+  private static final Map<String, Integer> CARDINALITY_MAP =
       ImmutableMap.<String, Integer>builder()
           .put("brand_name", 111)
           .put("cases_per_pallet", 10)
@@ -75,18 +82,29 @@ public class FoodMartLatticeStatisticProvider
           .put("week_of_year", 52)
           .build();
 
-  private FoodMartLatticeStatisticProvider(LatticeStatisticProvider provider) {
+  private final Lattice lattice;
+
+  private FoodMartLatticeStatisticProvider(Lattice lattice,
+      LatticeStatisticProvider provider) {
     super(provider);
+    this.lattice = lattice;
   }
 
-  /** Returns an estimate of the number of distinct values in a column. */
-  public int cardinality(Lattice lattice, Lattice.Column column) {
+  private int cardinality(Lattice.Column column) {
     final Integer integer = CARDINALITY_MAP.get(column.alias);
     if (integer != null && integer > 0) {
       return integer;
     }
     return column.alias.length();
   }
+
+  @Override public double cardinality(List<Lattice.Column> columns) {
+    final List<Double> cardinalityList = new ArrayList<>();
+    for (Lattice.Column column : columns) {
+      cardinalityList.add((double) cardinality(column));
+    }
+    return Lattice.getRowCount(lattice.getFactRowCount(), cardinalityList);
+  }
 }
 
 // End FoodMartLatticeStatisticProvider.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/LatticeTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index 54f4ea7..473e5f5 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -16,11 +16,16 @@
  */
 package org.apache.calcite.test;
 
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.materialize.Lattice;
 import org.apache.calcite.materialize.Lattices;
 import org.apache.calcite.materialize.MaterializationService;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.TestUtil;
 import org.apache.calcite.util.Util;
 
@@ -29,6 +34,7 @@ import com.google.common.base.Throwables;
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Lists;
 
+import org.junit.Assume;
 import org.junit.Ignore;
 import org.junit.Test;
 
@@ -39,11 +45,15 @@ import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Map;
 import java.util.concurrent.atomic.AtomicInteger;
 
+import static org.apache.calcite.test.Matchers.within;
+
 import static org.hamcrest.CoreMatchers.anyOf;
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertThat;
 
@@ -110,8 +120,8 @@ public class LatticeTest {
       + "  } ]\n"
       + "}\n";
 
-  private CalciteAssert.AssertThat modelWithLattice(String name, String sql,
-      String... extras) {
+  private static CalciteAssert.AssertThat modelWithLattice(String name,
+      String sql, String... extras) {
     final StringBuilder buf = new StringBuilder("{ name: '")
         .append(name)
         .append("', sql: ")
@@ -123,7 +133,8 @@ public class LatticeTest {
     return modelWithLattices(buf.toString());
   }
 
-  private CalciteAssert.AssertThat modelWithLattices(String... lattices) {
+  private static CalciteAssert.AssertThat modelWithLattices(
+      String... lattices) {
     final Class<JdbcTest.EmpDeptTableFactory> clazz =
         JdbcTest.EmpDeptTableFactory.class;
     return CalciteAssert.model(""
@@ -153,6 +164,36 @@ public class LatticeTest {
 
   /** Tests that it's OK for a lattice to have the same name as a table in the
    * schema. */
+  @Test public void testLatticeSql() throws Exception {
+    modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
+        .doWithConnection(new Function<CalciteConnection, Void>() {
+          public Void apply(CalciteConnection c) {
+            final SchemaPlus schema = c.getRootSchema();
+            final SchemaPlus adhoc = schema.getSubSchema("adhoc");
+            assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
+            final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
+                adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
+            final Lattice lattice = entry.getValue().getLattice();
+            final String sql = "SELECT \"days\".\"day\"\n"
+                + "FROM \"foodmart\".\"days\" AS \"days\"\n"
+                + "GROUP BY \"days\".\"day\"";
+            assertThat(
+                lattice.sql(ImmutableBitSet.of(0),
+                    ImmutableList.<Lattice.Measure>of()), is(sql));
+            final String sql2 = "SELECT"
+                + " \"days\".\"day\", \"days\".\"week_day\"\n"
+                + "FROM \"foodmart\".\"days\" AS \"days\"";
+            assertThat(
+                lattice.sql(ImmutableBitSet.of(0, 1), false,
+                    ImmutableList.<Lattice.Measure>of()),
+                is(sql2));
+            return null;
+          }
+        });
+  }
+
+  /** Tests that it's OK for a lattice to have the same name as a table in the
+   * schema. */
   @Test public void testLatticeWithSameNameAsTable() {
     modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
         .query("select count(*) from EMPLOYEES")
@@ -377,28 +418,57 @@ public class LatticeTest {
    * Use optimization algorithm to suggest which tiles of a lattice to
    * materialize</a>. */
   @Test public void testTileAlgorithm() {
-    checkTileAlgorithm(FoodMartLatticeStatisticProvider.class.getCanonicalName(),
-        "EnumerableAggregate(group=[{2, 3}])\n"
-            + "  EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31}]])");
+    final String explain = "EnumerableAggregate(group=[{2, 3}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 37}]])";
+    checkTileAlgorithm(
+        FoodMartLatticeStatisticProvider.class.getCanonicalName() + "#FACTORY",
+        explain);
   }
 
+  /** As {@link #testTileAlgorithm()}, but uses the
+   * {@link Lattices#CACHED_SQL} statistics provider. */
   @Test public void testTileAlgorithm2() {
     // Different explain than above, but note that it still selects columns
     // (27, 31).
+    final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
     checkTileAlgorithm(Lattices.class.getCanonicalName() + "#CACHED_SQL",
-        "EnumerableAggregate(group=[{0, 1}])\n"
-            + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]");
+        explain);
+  }
+
+  /** As {@link #testTileAlgorithm()}, but uses the
+   * {@link Lattices#PROFILER} statistics provider. */
+  @Test public void testTileAlgorithm3() {
+    Assume.assumeTrue("Yahoo sketches requires JDK 8 or higher",
+        TestUtil.getJavaMajorVersion() >= 8);
+    final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+        + "  EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
+    checkTileAlgorithm(Lattices.class.getCanonicalName() + "#PROFILER",
+        explain);
   }
 
   private void checkTileAlgorithm(String statisticProvider,
       String expectedExplain) {
     MaterializationService.setThreadLocal();
     MaterializationService.instance().clear();
-    foodmartModel(
-        " auto: false,\n"
+    foodmartLatticeModel(statisticProvider)
+        .query("select distinct t.\"the_year\", t.\"quarter\"\n"
+            + "from \"foodmart\".\"sales_fact_1997\" as s\n"
+            + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
+        .enableMaterializations(true)
+        .explainContains(expectedExplain)
+        .returnsUnordered("the_year=1997; quarter=Q1",
+            "the_year=1997; quarter=Q2",
+            "the_year=1997; quarter=Q3",
+            "the_year=1997; quarter=Q4");
+  }
+
+  private static CalciteAssert.AssertThat foodmartLatticeModel(
+      String statisticProvider) {
+    return foodmartModel(" auto: false,\n"
         + "  algorithm: true,\n"
         + "  algorithmMaxMillis: -1,\n"
-        + "  rowCountEstimate: 86000,\n"
+        + "  rowCountEstimate: 87000,\n"
         + "  defaultMeasures: [ {\n"
         + "      agg: 'sum',\n"
         + "      args: 'unit_sales'\n"
@@ -414,17 +484,7 @@ public class LatticeTest {
         + "  tiles: [ {\n"
         + "    dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
         + "    measures: [ ]\n"
-        + "  } ]\n")
-        .query("select distinct t.\"the_year\", t.\"quarter\"\n"
-            + "from \"foodmart\".\"sales_fact_1997\" as s\n"
-            + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
-        .enableMaterializations(true)
-        .explainContains(expectedExplain)
-        .returnsUnordered("the_year=1997; quarter=Q1",
-            "the_year=1997; quarter=Q2",
-            "the_year=1997; quarter=Q3",
-            "the_year=1997; quarter=Q4")
-        .returnsCount(4);
+        + "  } ]\n");
   }
 
   /** Tests a query that is created within {@link #testTileAlgorithm()}. */
@@ -704,7 +764,7 @@ public class LatticeTest {
         .returns("EXPR$0=1\n");
   }
 
-  private CalciteAssert.AssertThat foodmartModel(String... extras) {
+  private static CalciteAssert.AssertThat foodmartModel(String... extras) {
     return modelWithLattice("star",
         "select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
             + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
@@ -741,6 +801,17 @@ public class LatticeTest {
     System.out.println(CalciteAssert.toString(resultSet));
     connection.close();
   }
+
+  /** Unit test for {@link Lattice#getRowCount(double, List)}. */
+  @Test public void testColumnCount() {
+    assertThat(Lattice.getRowCount(10, 2, 3), within(5.03D, 0.01D));
+    assertThat(Lattice.getRowCount(10, 9, 8), within(9.4D, 0.01D));
+    assertThat(Lattice.getRowCount(100, 9, 8), within(54.2D, 0.1D));
+    assertThat(Lattice.getRowCount(1000, 9, 8), within(72D, 0.01D));
+    assertThat(Lattice.getRowCount(1000, 1, 1), is(1D));
+    assertThat(Lattice.getRowCount(1, 3, 5), within(1D, 0.01D));
+    assertThat(Lattice.getRowCount(1, 3, 5, 13, 4831), within(1D, 0.01D));
+  }
 }
 
 // End LatticeTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/Matchers.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/Matchers.java b/core/src/test/java/org/apache/calcite/test/Matchers.java
index fc9e0fd..08733a2 100644
--- a/core/src/test/java/org/apache/calcite/test/Matchers.java
+++ b/core/src/test/java/org/apache/calcite/test/Matchers.java
@@ -16,10 +16,16 @@
  */
 package org.apache.calcite.test;
 
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 
+import org.hamcrest.BaseMatcher;
 import org.hamcrest.CustomTypeSafeMatcher;
 import org.hamcrest.Description;
+import org.hamcrest.Factory;
 import org.hamcrest.Matcher;
 
 import java.sql.ResultSet;
@@ -79,6 +85,81 @@ public class Matchers {
       }
     };
   }
+
+  public static <E extends Comparable> Matcher<Iterable<E>> equalsUnordered(
+      E... lines) {
+    final List<String> expectedList =
+        Lists.newArrayList(toStringList(Arrays.asList(lines)));
+    Collections.sort(expectedList);
+    final String description = Util.lines(expectedList);
+    return new CustomTypeSafeMatcher<Iterable<E>>(description) {
+      @Override protected void describeMismatchSafely(Iterable<E> actuals,
+          Description description) {
+        final List<String> actualList =
+            Lists.newArrayList(toStringList(actuals));
+        Collections.sort(actualList);
+        description.appendText("was ")
+            .appendValue(Util.lines(actualList));
+      }
+
+      protected boolean matchesSafely(Iterable<E> actuals) {
+        final List<String> actualList =
+            Lists.newArrayList(toStringList(actuals));
+        Collections.sort(actualList);
+        return actualList.equals(expectedList);
+      }
+    };
+  }
+
+  private static <E> Iterable<String> toStringList(Iterable<E> items) {
+    return Iterables.transform(items, Functions.toStringFunction());
+  }
+
+  /**
+   * Creates a matcher that matches when the examined object is within
+   * {@code epsilon} of the specified <code>operand</code>.
+   */
+  @Factory
+  public static <T extends Number> Matcher<T> within(T value, double epsilon) {
+    return new IsWithin<T>(value, epsilon);
+  }
+
+  /**
+   * Is the numeric value within a given difference another value?
+   *
+   * @param <T> Value type
+   */
+  public static class IsWithin<T extends Number> extends BaseMatcher<T> {
+    private final T expectedValue;
+    private final double epsilon;
+
+    public IsWithin(T expectedValue, double epsilon) {
+      this.expectedValue = expectedValue;
+      this.epsilon = epsilon;
+    }
+
+    public boolean matches(Object actualValue) {
+      return areEqual(actualValue, expectedValue, epsilon);
+    }
+
+    public void describeTo(Description description) {
+      description.appendValue(expectedValue + " +/-" + epsilon);
+    }
+
+    private static boolean areEqual(Object actual, Number expected,
+        double epsilon) {
+      if (actual == null) {
+        return expected == null;
+      }
+      if (actual.equals(expected)) {
+        return true;
+      }
+      final double a = ((Number) actual).doubleValue();
+      final double min = expected.doubleValue() - epsilon;
+      final double max = expected.doubleValue() + epsilon;
+      return min <= a && a <= max;
+    }
+  }
 }
 
 // End Matchers.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
index d37c223..8a6a9da 100644
--- a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
@@ -18,9 +18,13 @@ package org.apache.calcite.util;
 
 import org.apache.calcite.test.CalciteAssert;
 
+import com.google.common.base.Function;
+
+import org.junit.Assume;
 import org.junit.Test;
 
 import java.util.AbstractList;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.LinkedHashSet;
 import java.util.List;
@@ -28,7 +32,10 @@ import java.util.Random;
 import java.util.Set;
 import java.util.TreeSet;
 
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
 import static org.junit.Assert.assertTrue;
 
 /**
@@ -133,6 +140,13 @@ public class PartiallyOrderedSetTest {
 
     // "bcd" is child of "abcd" and parent of ""
     final String bcd = "'bcd'";
+    assertEquals("['abcd']", poset.getParents(bcd, true).toString());
+    assertThat(poset.getParents(bcd, false), nullValue());
+    assertThat(poset.getParents(bcd), nullValue());
+    assertEquals("['']", poset.getChildren(bcd, true).toString());
+    assertThat(poset.getChildren(bcd, false), nullValue());
+    assertThat(poset.getChildren(bcd), nullValue());
+
     poset.add(bcd);
     printValidate(poset);
     assertTrue(poset.isValid(false));
@@ -210,6 +224,66 @@ public class PartiallyOrderedSetTest {
     printValidate(poset);
   }
 
+  @Test public void testPosetBitsLarge() {
+    final PartiallyOrderedSet<Integer> poset =
+        new PartiallyOrderedSet<>(IS_BIT_SUPERSET);
+    checkPosetBitsLarge(poset, 30000, 2921, 164782);
+  }
+
+  @Test public void testPosetBitsLarge2() {
+    Assume.assumeTrue("too slow to run every day", CalciteAssert.ENABLE_SLOW);
+    final int n = 30000;
+    final PartiallyOrderedSet<Integer> poset =
+        new PartiallyOrderedSet<>(IS_BIT_SUPERSET,
+          new Function<Integer, Iterable<Integer>>() {
+            public Iterable<Integer> apply(Integer input) {
+              final int i = input;
+              int r = i; // bits not yet cleared
+              final List<Integer> list = new ArrayList<>();
+              for (int z = 1; r != 0; z <<= 1) {
+                if ((i & z) != 0) {
+                  list.add(i ^ z);
+                  r ^= z;
+                }
+              }
+              return list;
+            }
+          },
+          new Function<Integer, Iterable<Integer>>() {
+            public Iterable<Integer> apply(Integer input) {
+              final int i = input;
+              final List<Integer> list = new ArrayList<>();
+              for (int z = 1; z <= n; z <<= 1) {
+                if ((i & z) == 0) {
+                  list.add(i | z);
+                }
+              }
+              return list;
+            }
+          });
+    checkPosetBitsLarge(poset, n, 2921, 11961);
+  }
+
+  void checkPosetBitsLarge(PartiallyOrderedSet<Integer> poset, int n,
+      int expectedSize, int expectedParentCount) {
+    final Random random = new Random(1);
+    int count = 0;
+    int parentCount = 0;
+    for (int i = 0; i < n; i++) {
+      if (random.nextInt(10) == 0) {
+        if (poset.add(random.nextInt(n * 2))) {
+          ++count;
+        }
+      }
+      final List<Integer> parents =
+          poset.getParents(random.nextInt(n * 2), true);
+      parentCount += parents.size();
+    }
+    assertThat(poset.size(), is(count));
+    assertThat(poset.size(), is(expectedSize));
+    assertThat(parentCount, is(expectedParentCount));
+  }
+
   @Test public void testPosetBitsRemoveParent() {
     final PartiallyOrderedSet<Integer> poset =
         new PartiallyOrderedSet<Integer>(IS_BIT_SUPERSET);
@@ -223,9 +297,6 @@ public class PartiallyOrderedSetTest {
   }
 
   @Test public void testDivisorPoset() {
-    if (!CalciteAssert.ENABLE_SLOW) {
-      return;
-    }
     PartiallyOrderedSet<Integer> integers =
         new PartiallyOrderedSet<Integer>(IS_DIVISOR, range(1, 1000));
     assertEquals(
@@ -405,6 +476,7 @@ public class PartiallyOrderedSetTest {
         expected,
         new TreeSet<String>(ss).toString());
   }
+
 }
 
 // End PartiallyOrderedSetTest.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/TestUtil.java b/core/src/test/java/org/apache/calcite/util/TestUtil.java
index 5a283d2..b530d3b 100644
--- a/core/src/test/java/org/apache/calcite/util/TestUtil.java
+++ b/core/src/test/java/org/apache/calcite/util/TestUtil.java
@@ -34,6 +34,9 @@ public abstract class TestUtil {
   private static final String LINE_BREAK =
       "\\\\n\"" + Util.LINE_SEPARATOR + " + \"";
 
+  private static final String JAVA_VERSION =
+      System.getProperties().getProperty("java.version");
+
   //~ Methods ----------------------------------------------------------------
 
   public static void assertEqualsVerbose(
@@ -190,6 +193,20 @@ public abstract class TestUtil {
         .replaceAll("\\[", "\\\\[")
         .replaceAll("\\]", "\\\\]");
   }
+
+  /** Returns the Java major version: 7 for JDK 1.7, 8 for JDK 8, 10 for
+   * JDK 10, etc. */
+  public static int getJavaMajorVersion() {
+    if (JAVA_VERSION.startsWith("1.7")) {
+      return 7;
+    } else if (JAVA_VERSION.startsWith("1.8")) {
+      return 8;
+    } else if (JAVA_VERSION.startsWith("1.9")) {
+      return 9;
+    } else {
+      return 10;
+    }
+  }
 }
 
 // End TestUtil.java

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
----------------------------------------------------------------------
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
index 7dee05d..eaf9c91 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
@@ -19,6 +19,7 @@ package org.apache.calcite.adapter.tpch;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.util.TestUtil;
 import org.apache.calcite.util.Util;
 
 import com.google.common.base.Function;
@@ -40,11 +41,8 @@ import static org.junit.Assert.assertThat;
  * if {@code -Dcalcite.test.slow} is specified on the command-line.
  * (See {@link org.apache.calcite.test.CalciteAssert#ENABLE_SLOW}.)</p> */
 public class TpchTest {
-  public static final String JAVA_VERSION =
-      System.getProperties().getProperty("java.version");
-
   public static final boolean ENABLE =
-      CalciteAssert.ENABLE_SLOW && JAVA_VERSION.compareTo("1.7") >= 0;
+      CalciteAssert.ENABLE_SLOW && TestUtil.getJavaMajorVersion() >= 7;
 
   private static String schema(String name, String scaleFactor) {
     return "     {\n"

http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1feec71..9ea9ecc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,7 @@ limitations under the License.
     <sqlline.version>1.3.0</sqlline.version>
     <xalan.version>2.7.1</xalan.version>
     <xerces.version>2.9.1</xerces.version>
+    <sketches.version>0.9.0</sketches.version>
   </properties>
 
   <issueManagement>
@@ -259,6 +260,11 @@ limitations under the License.
         <version>${guava.version}</version>
       </dependency>
       <dependency>
+        <groupId>com.h2database</groupId>
+        <artifactId>h2</artifactId>
+        <version>${h2.version}</version>
+      </dependency>
+      <dependency>
         <groupId>com.joestelmach</groupId>
         <artifactId>natty</artifactId>
         <version>${natty.version}</version>
@@ -269,9 +275,9 @@ limitations under the License.
         <version>${oracle-jdbc6-driver.version}</version>
       </dependency>
       <dependency>
-        <groupId>com.h2database</groupId>
-        <artifactId>h2</artifactId>
-        <version>${h2.version}</version>
+        <groupId>com.yahoo.datasketches</groupId>
+        <artifactId>sketches-core</artifactId>
+        <version>${sketches.version}</version>
       </dependency>
       <dependency>
         <groupId>javax.servlet</groupId>