You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/11/29 06:13:10 UTC
[1/4] calcite git commit: Add ImmutableBitSet.set(int, boolean)
Repository: calcite
Updated Branches:
refs/heads/master cd493efd3 -> 9bb54006a
Add ImmutableBitSet.set(int, boolean)
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/01eb6b5e
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/01eb6b5e
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/01eb6b5e
Branch: refs/heads/master
Commit: 01eb6b5ec28985eb64892fec0918c4a0a7c4aad0
Parents: cd493ef
Author: Julian Hyde <jh...@apache.org>
Authored: Thu Feb 2 14:41:40 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Mon Nov 27 16:33:31 2017 -0800
----------------------------------------------------------------------
.../main/java/org/apache/calcite/util/ImmutableBitSet.java | 9 +++++++++
.../java/org/apache/calcite/util/ImmutableBitSetTest.java | 9 +++++++++
2 files changed, 18 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/01eb6b5e/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
index 568fe3e..4f514bb 100644
--- a/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
+++ b/core/src/main/java/org/apache/calcite/util/ImmutableBitSet.java
@@ -843,6 +843,15 @@ public class ImmutableBitSet
return union(ImmutableBitSet.of(i));
}
+ /** Returns a bit set the same as this but with a given bit set (if b is
+ * true) or unset (if b is false). */
+ public ImmutableBitSet set(int i, boolean b) {
+ if (get(i) == b) {
+ return this;
+ }
+ return b ? set(i) : clear(i);
+ }
+
/** Returns a bit set the same as this but with a given bit set if condition
* is true. */
public ImmutableBitSet setIf(int bit, boolean condition) {
http://git-wip-us.apache.org/repos/asf/calcite/blob/01eb6b5e/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
index 8894012..9ae81d0 100644
--- a/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/ImmutableBitSetTest.java
@@ -33,6 +33,7 @@ import java.util.SortedMap;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.sameInstance;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
@@ -490,6 +491,14 @@ public class ImmutableBitSetTest {
assertThat(bitSet.clearIf(29, true), equalTo(bitSet2));
}
+ @Test public void testSet2() {
+ final ImmutableBitSet bitSet = ImmutableBitSet.of(29, 4, 1969);
+ final ImmutableBitSet bitSet2 = ImmutableBitSet.of(29, 4, 1969, 30);
+ assertThat(bitSet.set(30, false), sameInstance(bitSet));
+ assertThat(bitSet.set(30, true), equalTo(bitSet2));
+ assertThat(bitSet.set(29, true), sameInstance(bitSet));
+ }
+
@Test public void testShift() {
final ImmutableBitSet bitSet = ImmutableBitSet.of(29, 4, 1969);
assertThat(bitSet.shift(0), is(bitSet));
[4/4] calcite git commit: [CALCITE-2005] Test failures on Windows
Posted by jh...@apache.org.
[CALCITE-2005] Test failures on Windows
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/9bb54006
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/9bb54006
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/9bb54006
Branch: refs/heads/master
Commit: 9bb54006aa5cfd7765f791ccaef5e25afacaed86
Parents: dad5818
Author: Julian Hyde <jh...@apache.org>
Authored: Tue Oct 10 10:14:37 2017 -0700
Committer: Julian Hyde <jh...@apache.org>
Committed: Tue Nov 28 10:23:37 2017 -0800
----------------------------------------------------------------------
.../org/apache/calcite/interpreter/Interpreter.java | 3 ++-
.../java/org/apache/calcite/test/CalciteAssert.java | 1 +
.../java/org/apache/calcite/test/RelMetadataTest.java | 12 ++++++++++++
.../src/test/java/org/apache/calcite/util/UtilTest.java | 2 +-
file/pom.xml | 6 ++++++
.../org/apache/calcite/adapter/file/FileReaderTest.java | 4 +++-
6 files changed, 25 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java b/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
index 2d754fa..9360294 100644
--- a/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
+++ b/core/src/main/java/org/apache/calcite/interpreter/Interpreter.java
@@ -326,7 +326,8 @@ public class Interpreter extends AbstractEnumerable<Object[]>
this.rel = rel;
this.sink = sink;
this.rowEnumerable = rowEnumerable;
- assert (sink != null) != (rowEnumerable != null) : "one or the other";
+ Preconditions.checkArgument((sink == null) != (rowEnumerable == null),
+ "one or the other");
}
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
index 06e351a..70a415b 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteAssert.java
@@ -1343,6 +1343,7 @@ public class CalciteAssert {
hooks, checker, null, null);
return this;
} catch (Exception e) {
+ e.printStackTrace();
throw new RuntimeException(
"exception while executing [" + sql + "]", e);
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
index a09de16..09c8a50 100644
--- a/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
+++ b/core/src/test/java/org/apache/calcite/test/RelMetadataTest.java
@@ -100,9 +100,12 @@ import com.google.common.collect.Sets;
import org.hamcrest.CoreMatchers;
import org.hamcrest.CustomTypeSafeMatcher;
import org.hamcrest.Matcher;
+import org.hamcrest.core.Is;
+import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;
+import java.io.File;
import java.lang.reflect.Method;
import java.math.BigDecimal;
import java.util.ArrayList;
@@ -1444,6 +1447,15 @@ public class RelMetadataTest extends SqlToRelTestBase {
* columns</a>. Since this is a performance problem, the test result does not
* change, but takes over 15 minutes before the fix and 6 seconds after. */
@Test(timeout = 20_000) public void testPullUpPredicatesForExprsItr() {
+ // If we're running Windows, we are probably in a VM and the test may
+ // exceed timeout by a small margin.
+ Assume.assumeThat("Too slow on Windows", File.separatorChar, Is.is('/'));
+ testPullUpPredicatesForExprsItrNoTimeout();
+ }
+
+ /** As {@link #testPullUpPredicatesForExprsItr} but no timeout; can run on
+ * all platforms, even slow VMs. */
+ @Test public void testPullUpPredicatesForExprsItrNoTimeout() {
final String sql = "select a.EMPNO, a.ENAME\n"
+ "from (select * from sales.emp ) a\n"
+ "join (select * from sales.emp ) b\n"
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/core/src/test/java/org/apache/calcite/util/UtilTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/UtilTest.java b/core/src/test/java/org/apache/calcite/util/UtilTest.java
index d7053c9..1fe2d22 100644
--- a/core/src/test/java/org/apache/calcite/util/UtilTest.java
+++ b/core/src/test/java/org/apache/calcite/util/UtilTest.java
@@ -2042,7 +2042,7 @@ public class UtilTest {
+ "\t\t\tline 4 with no ending\n"
+ "\t</someText>\n"
+ "</root>\n";
- assertThat(s, is(expected));
+ assertThat(Util.toLinux(s), is(expected));
}
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/file/pom.xml
----------------------------------------------------------------------
diff --git a/file/pom.xml b/file/pom.xml
index 6ea7f08..17a6a1e 100644
--- a/file/pom.xml
+++ b/file/pom.xml
@@ -41,6 +41,12 @@ limitations under the License.
</dependency>
<dependency>
<groupId>org.apache.calcite</groupId>
+ <artifactId>calcite-core</artifactId>
+ <type>test-jar</type>
+ <scope>test</scope>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.calcite</groupId>
<artifactId>calcite-linq4j</artifactId>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/calcite/blob/9bb54006/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
----------------------------------------------------------------------
diff --git a/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java b/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
index ec1034f..a9118f1 100644
--- a/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
+++ b/file/src/test/java/org/apache/calcite/adapter/file/FileReaderTest.java
@@ -18,6 +18,7 @@ package org.apache.calcite.adapter.file;
import org.apache.calcite.util.Source;
import org.apache.calcite.util.Sources;
+import org.apache.calcite.util.TestUtil;
import org.jsoup.select.Elements;
@@ -200,6 +201,7 @@ public class FileReaderTest {
* NPE in planner</a>. */
@Test public void testCsvFile() throws Exception {
Properties info = new Properties();
+ final String path = resourcePath("sales-csv");
final String model = "inline:"
+ "{\n"
+ " \"version\": \"1.0\",\n"
@@ -210,7 +212,7 @@ public class FileReaderTest {
+ " \"type\": \"custom\",\n"
+ " \"factory\": \"org.apache.calcite.adapter.file.FileSchemaFactory\",\n"
+ " \"operand\": {\n"
- + " \"directory\": \"" + resourcePath("sales-csv") + "\"\n"
+ + " \"directory\": " + TestUtil.escapeString(path) + "\n"
+ " }\n"
+ " }\n"
+ " ]\n"
[3/4] calcite git commit: [CALCITE-1616] Data profiler
Posted by jh...@apache.org.
[CALCITE-1616] Data profiler
Profiler does not work on JDK 7, because it relies on
com.yahoo.datasketches:sketches-core:0.9.0, which needs JDK 8 or
higher. Therefore some tests are disabled on JDK 7.
Change interface LatticeStatisticProvider to estimate joint rather
than singleton cardinality.
Bind LatticeStatisticProvider to a Lattice, and create
LatticeStatisticProvider.Factory to do the binding.
Keep results only if they are sufficiently "surprising" (informative).
The "surprise queue" is FIFO and holds the top N accepted values,
after a warm-up period
PartiallyOrderedSet: find "hypothetical" parents and children of
elements not in the set.
Project: http://git-wip-us.apache.org/repos/asf/calcite/repo
Commit: http://git-wip-us.apache.org/repos/asf/calcite/commit/dad58186
Tree: http://git-wip-us.apache.org/repos/asf/calcite/tree/dad58186
Diff: http://git-wip-us.apache.org/repos/asf/calcite/diff/dad58186
Branch: refs/heads/master
Commit: dad581868b815510389f61936b0c583be93d5dc5
Parents: 01eb6b5
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Feb 1 19:43:46 2017 -0800
Committer: Julian Hyde <jh...@apache.org>
Committed: Tue Nov 28 10:20:25 2017 -0800
----------------------------------------------------------------------
core/pom.xml | 4 +
.../CachingLatticeStatisticProvider.java | 40 +-
.../DelegatingLatticeStatisticProvider.java | 6 +-
.../org/apache/calcite/materialize/Lattice.java | 196 +++--
.../materialize/LatticeStatisticProvider.java | 14 +-
.../apache/calcite/materialize/Lattices.java | 16 +-
.../ProfilerLatticeStatisticProvider.java | 105 +++
.../SqlLatticeStatisticProvider.java | 40 +-
.../org/apache/calcite/profile/Profiler.java | 294 +++++++
.../apache/calcite/profile/ProfilerImpl.java | 809 +++++++++++++++++++
.../apache/calcite/profile/SimpleProfiler.java | 341 ++++++++
.../apache/calcite/profile/package-info.java | 26 +
.../calcite/util/PartiallyOrderedSet.java | 215 +++--
.../apache/calcite/profile/ProfilerTest.java | 682 ++++++++++++++++
.../org/apache/calcite/test/CalciteSuite.java | 2 +
.../test/FoodMartLatticeStatisticProvider.java | 30 +-
.../org/apache/calcite/test/LatticeTest.java | 117 ++-
.../java/org/apache/calcite/test/Matchers.java | 81 ++
.../calcite/util/PartiallyOrderedSetTest.java | 78 +-
.../java/org/apache/calcite/util/TestUtil.java | 17 +
.../apache/calcite/adapter/tpch/TpchTest.java | 6 +-
pom.xml | 12 +-
22 files changed, 2928 insertions(+), 203 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 73c1866..5526122 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -89,6 +89,10 @@ limitations under the License.
<scope>test</scope>
</dependency>
<dependency>
+ <groupId>com.yahoo.datasketches</groupId>
+ <artifactId>sketches-core</artifactId>
+ </dependency>
+ <dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
index e408335..21c6fed 100644
--- a/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/CachingLatticeStatisticProvider.java
@@ -16,43 +16,51 @@
*/
package org.apache.calcite.materialize;
-import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
+import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.UncheckedExecutionException;
+import java.util.ArrayList;
+import java.util.List;
import java.util.concurrent.ExecutionException;
+import javax.annotation.Nonnull;
/**
- * Implementation of {@link LatticeStatisticProvider} that gets statistics by
- * executing "SELECT COUNT(DISTINCT ...) ..." SQL queries.
+ * Implementation of {@link LatticeStatisticProvider} that caches single-column
+ * statistics and computes multi-column statistics from these.
*/
class CachingLatticeStatisticProvider implements LatticeStatisticProvider {
- private final LoadingCache<Pair<Lattice, Lattice.Column>, Integer> cache;
+ private final Lattice lattice;
+ private final LoadingCache<Lattice.Column, Double> cache;
/** Creates a CachingStatisticProvider. */
- CachingLatticeStatisticProvider(
+ CachingLatticeStatisticProvider(final Lattice lattice,
final LatticeStatisticProvider provider) {
- cache = CacheBuilder.<Pair<Lattice, Lattice.Column>>newBuilder()
+ this.lattice = lattice;
+ cache = CacheBuilder.<Lattice.Column>newBuilder()
.build(
- new CacheLoader<Pair<Lattice, Lattice.Column>, Integer>() {
- public Integer load(Pair<Lattice, Lattice.Column> key)
- throws Exception {
- return provider.cardinality(key.left, key.right);
+ new CacheLoader<Lattice.Column, Double>() {
+ public Double load(@Nonnull Lattice.Column key) throws Exception {
+ return provider.cardinality(ImmutableList.of(key));
}
});
}
- public int cardinality(Lattice lattice, Lattice.Column column) {
- try {
- return cache.get(Pair.of(lattice, column));
- } catch (UncheckedExecutionException | ExecutionException e) {
- Util.throwIfUnchecked(e.getCause());
- throw new RuntimeException(e.getCause());
+ public double cardinality(List<Lattice.Column> columns) {
+ final List<Double> counts = new ArrayList<>();
+ for (Lattice.Column column : columns) {
+ try {
+ counts.add(cache.get(column));
+ } catch (UncheckedExecutionException | ExecutionException e) {
+ Util.throwIfUnchecked(e.getCause());
+ throw new RuntimeException(e.getCause());
+ }
}
+ return (int) Lattice.getRowCount(lattice.getFactRowCount(), counts);
}
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
index e1ec6c5..f889dae 100644
--- a/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/DelegatingLatticeStatisticProvider.java
@@ -16,6 +16,8 @@
*/
package org.apache.calcite.materialize;
+import java.util.List;
+
/**
* Implementation of {@link LatticeStatisticProvider} that delegates
* to an underlying provider.
@@ -33,8 +35,8 @@ public class DelegatingLatticeStatisticProvider
this.provider = provider;
}
- public int cardinality(Lattice lattice, Lattice.Column column) {
- return provider.cardinality(lattice, column);
+ public double cardinality(List<Lattice.Column> columns) {
+ return provider.cardinality(columns);
}
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/Lattice.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/Lattice.java b/core/src/main/java/org/apache/calcite/materialize/Lattice.java
index 20a28a7..3828416 100644
--- a/core/src/main/java/org/apache/calcite/materialize/Lattice.java
+++ b/core/src/main/java/org/apache/calcite/materialize/Lattice.java
@@ -19,6 +19,7 @@ package org.apache.calcite.materialize;
import org.apache.calcite.avatica.AvaticaUtils;
import org.apache.calcite.jdbc.CalcitePrepare;
import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.linq4j.tree.Primitive;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.prepare.CalcitePrepareImpl;
import org.apache.calcite.rel.RelNode;
@@ -60,7 +61,7 @@ import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import com.google.common.collect.Sets;
-import java.math.BigInteger;
+import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
@@ -113,16 +114,15 @@ public class Lattice {
private Lattice(CalciteSchema rootSchema, ImmutableList<Node> nodes,
boolean auto, boolean algorithm, long algorithmMaxMillis,
- LatticeStatisticProvider statisticProvider, Double rowCountEstimate,
- ImmutableList<Column> columns, ImmutableList<Measure> defaultMeasures,
- ImmutableList<Tile> tiles) {
+ LatticeStatisticProvider.Factory statisticProviderFactory,
+ Double rowCountEstimate, ImmutableList<Column> columns,
+ ImmutableList<Measure> defaultMeasures, ImmutableList<Tile> tiles) {
this.rootSchema = rootSchema;
this.nodes = Preconditions.checkNotNull(nodes);
this.columns = Preconditions.checkNotNull(columns);
this.auto = auto;
this.algorithm = algorithm;
this.algorithmMaxMillis = algorithmMaxMillis;
- this.statisticProvider = Preconditions.checkNotNull(statisticProvider);
this.defaultMeasures = Preconditions.checkNotNull(defaultMeasures);
this.tiles = Preconditions.checkNotNull(tiles);
@@ -151,6 +151,8 @@ public class Lattice {
}
Preconditions.checkArgument(rowCountEstimate > 0d);
this.rowCountEstimate = rowCountEstimate;
+ this.statisticProvider =
+ Preconditions.checkNotNull(statisticProviderFactory.apply(this));
}
/** Creates a Lattice. */
@@ -233,74 +235,92 @@ public class Lattice {
throw new AssertionError("input not found");
}
+ /** Generates a SQL query to populate a tile of the lattice specified by a
+ * given set of columns and measures. */
public String sql(ImmutableBitSet groupSet, List<Measure> aggCallList) {
- final ImmutableBitSet.Builder columnSetBuilder = groupSet.rebuild();
- for (Measure call : aggCallList) {
- for (Column arg : call.args) {
- columnSetBuilder.set(arg.ordinal);
+ return sql(groupSet, true, aggCallList);
+ }
+
+ /** Generates a SQL query to populate a tile of the lattice specified by a
+ * given set of columns and measures, optionally grouping. */
+ public String sql(ImmutableBitSet groupSet, boolean group,
+ List<Measure> aggCallList) {
+ final List<Node> usedNodes = new ArrayList<>();
+ if (group) {
+ final ImmutableBitSet.Builder columnSetBuilder = groupSet.rebuild();
+ for (Measure call : aggCallList) {
+ for (Column arg : call.args) {
+ columnSetBuilder.set(arg.ordinal);
+ }
}
- }
- final ImmutableBitSet columnSet = columnSetBuilder.build();
+ final ImmutableBitSet columnSet = columnSetBuilder.build();
- // Figure out which nodes are needed. Use a node if its columns are used
- // or if has a child whose columns are used.
- List<Node> usedNodes = Lists.newArrayList();
- for (Node node : nodes) {
- if (ImmutableBitSet.range(node.startCol, node.endCol)
- .intersects(columnSet)) {
- use(usedNodes, node);
+ // Figure out which nodes are needed. Use a node if its columns are used
+ // or if has a child whose columns are used.
+ for (Node node : nodes) {
+ if (ImmutableBitSet.range(node.startCol, node.endCol)
+ .intersects(columnSet)) {
+ use(usedNodes, node);
+ }
}
+ if (usedNodes.isEmpty()) {
+ usedNodes.add(nodes.get(0));
+ }
+ } else {
+ usedNodes.addAll(nodes);
}
- if (usedNodes.isEmpty()) {
- usedNodes.add(nodes.get(0));
- }
+
final SqlDialect dialect = SqlDialect.DatabaseProduct.CALCITE.getDialect();
final StringBuilder buf = new StringBuilder("SELECT ");
final StringBuilder groupBuf = new StringBuilder("\nGROUP BY ");
int k = 0;
final Set<String> columnNames = Sets.newHashSet();
- for (int i : groupSet) {
- if (k++ > 0) {
- buf.append(", ");
- groupBuf.append(", ");
+ if (groupSet != null) {
+ for (int i : groupSet) {
+ if (k++ > 0) {
+ buf.append(", ");
+ groupBuf.append(", ");
+ }
+ final Column column = columns.get(i);
+ dialect.quoteIdentifier(buf, column.identifiers());
+ dialect.quoteIdentifier(groupBuf, column.identifiers());
+ final String fieldName = uniqueColumnNames.get(i);
+ columnNames.add(fieldName);
+ if (!column.alias.equals(fieldName)) {
+ buf.append(" AS ");
+ dialect.quoteIdentifier(buf, fieldName);
+ }
}
- final Column column = columns.get(i);
- dialect.quoteIdentifier(buf, column.identifiers());
- dialect.quoteIdentifier(groupBuf, column.identifiers());
- final String fieldName = uniqueColumnNames.get(i);
- columnNames.add(fieldName);
- if (!column.alias.equals(fieldName)) {
- buf.append(" AS ");
- dialect.quoteIdentifier(buf, fieldName);
+ if (groupSet.isEmpty()) {
+ groupBuf.append("()");
}
- }
- if (groupSet.isEmpty()) {
- groupBuf.append("()");
- }
- int m = 0;
- for (Measure measure : aggCallList) {
- if (k++ > 0) {
- buf.append(", ");
- }
- buf.append(measure.agg.getName())
- .append("(");
- if (measure.args.isEmpty()) {
- buf.append("*");
- } else {
- int z = 0;
- for (Column arg : measure.args) {
- if (z++ > 0) {
- buf.append(", ");
+ int m = 0;
+ for (Measure measure : aggCallList) {
+ if (k++ > 0) {
+ buf.append(", ");
+ }
+ buf.append(measure.agg.getName())
+ .append("(");
+ if (measure.args.isEmpty()) {
+ buf.append("*");
+ } else {
+ int z = 0;
+ for (Column arg : measure.args) {
+ if (z++ > 0) {
+ buf.append(", ");
+ }
+ dialect.quoteIdentifier(buf, arg.identifiers());
}
- dialect.quoteIdentifier(buf, arg.identifiers());
}
+ buf.append(") AS ");
+ String measureName;
+ while (!columnNames.add(measureName = "m" + m)) {
+ ++m;
+ }
+ dialect.quoteIdentifier(buf, measureName);
}
- buf.append(") AS ");
- String measureName;
- while (!columnNames.add(measureName = "m" + m)) {
- ++m;
- }
- dialect.quoteIdentifier(buf, measureName);
+ } else {
+ buf.append("*");
}
buf.append("\nFROM ");
for (Node node : usedNodes) {
@@ -329,7 +349,9 @@ public class Lattice {
System.out.println("Lattice SQL:\n"
+ buf);
}
- buf.append(groupBuf);
+ if (group) {
+ buf.append(groupBuf);
+ }
return buf.toString();
}
@@ -381,30 +403,40 @@ public class Lattice {
/** Returns an estimate of the number of rows in the tile with the given
* dimensions. */
public double getRowCount(List<Column> columns) {
+ return statisticProvider.cardinality(columns);
+ }
+
+ /** Returns an estimate of the number of rows in the tile with the given
+ * dimensions. */
+ public static double getRowCount(double factCount, double... columnCounts) {
+ return getRowCount(factCount, Primitive.asList(columnCounts));
+ }
+
+ /** Returns an estimate of the number of rows in the tile with the given
+ * dimensions. */
+ public static double getRowCount(double factCount,
+ List<Double> columnCounts) {
// The expected number of distinct values when choosing p values
// with replacement from n integers is n . (1 - ((n - 1) / n) ^ p).
//
// If we have several uniformly distributed attributes A1 ... Am
// with N1 ... Nm distinct values, they behave as one uniformly
// distributed attribute with N1 * ... * Nm distinct values.
- BigInteger n = BigInteger.ONE;
- for (Column column : columns) {
- final int cardinality = statisticProvider.cardinality(this, column);
- if (cardinality > 1) {
- n = n.multiply(BigInteger.valueOf(cardinality));
+ double n = 1d;
+ for (Double columnCount : columnCounts) {
+ if (columnCount > 1d) {
+ n *= columnCount;
}
}
- final double nn = n.doubleValue();
- final double f = getFactRowCount();
- final double a = (nn - 1d) / nn;
+ final double a = (n - 1d) / n;
if (a == 1d) {
// A under-flows if nn is large.
- return f;
+ return factCount;
}
- final double v = nn * (1d - Math.pow(a, f));
+ final double v = n * (1d - Math.pow(a, factCount));
// Cap at fact-row-count, because numerical artifacts can cause it
// to go a few % over.
- return Math.min(v, f);
+ return Math.min(v, factCount);
}
/** Source relation of a lattice.
@@ -535,6 +567,15 @@ public class Lattice {
this.alias = Preconditions.checkNotNull(alias);
}
+ /** Converts a list of columns to a bit set of their ordinals. */
+ static ImmutableBitSet toBitSet(List<Column> columns) {
+ final ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (Column column : columns) {
+ builder.set(column.ordinal);
+ }
+ return builder.build();
+ }
+
public int compareTo(Column column) {
return Utilities.compare(ordinal, column.ordinal);
}
@@ -690,9 +731,10 @@ public class Lattice {
/** Builds a lattice. */
public Lattice build() {
- LatticeStatisticProvider statisticProvider =
+ LatticeStatisticProvider.Factory statisticProvider =
this.statisticProvider != null
- ? AvaticaUtils.instantiatePlugin(LatticeStatisticProvider.class,
+ ? AvaticaUtils.instantiatePlugin(
+ LatticeStatisticProvider.Factory.class,
this.statisticProvider)
: Lattices.CACHED_SQL;
Preconditions.checkArgument(rootSchema.isRoot(), "must be root schema");
@@ -815,15 +857,11 @@ public class Lattice {
public Tile(ImmutableList<Measure> measures,
ImmutableList<Column> dimensions) {
- this.measures = measures;
- this.dimensions = dimensions;
+ this.measures = Preconditions.checkNotNull(measures);
+ this.dimensions = Preconditions.checkNotNull(dimensions);
assert Ordering.natural().isStrictlyOrdered(dimensions);
assert Ordering.natural().isStrictlyOrdered(measures);
- final ImmutableBitSet.Builder bitSetBuilder = ImmutableBitSet.builder();
- for (Column dimension : dimensions) {
- bitSetBuilder.set(dimension.ordinal);
- }
- bitSet = bitSetBuilder.build();
+ bitSet = Column.toBitSet(dimensions);
}
public static TileBuilder builder() {
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
index 46668cc..d16e903 100644
--- a/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/LatticeStatisticProvider.java
@@ -16,12 +16,22 @@
*/
package org.apache.calcite.materialize;
+import com.google.common.base.Function;
+
+import java.util.List;
+
/**
* Estimates row counts for a lattice and its attributes.
*/
public interface LatticeStatisticProvider {
- /** Returns an estimate of the number of distinct values in a column. */
- int cardinality(Lattice lattice, Lattice.Column column);
+ /** Returns an estimate of the number of distinct values in a column
+ * or list of columns. */
+ double cardinality(List<Lattice.Column> columns);
+
+ /** Creates a {@link LatticeStatisticProvider} for a given
+ * {@link org.apache.calcite.materialize.Lattice}. */
+ interface Factory extends Function<Lattice, LatticeStatisticProvider> {
+ }
}
// End LatticeStatisticProvider.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/Lattices.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/Lattices.java b/core/src/main/java/org/apache/calcite/materialize/Lattices.java
index cf70719..2a0a7ea 100644
--- a/core/src/main/java/org/apache/calcite/materialize/Lattices.java
+++ b/core/src/main/java/org/apache/calcite/materialize/Lattices.java
@@ -23,18 +23,16 @@ public class Lattices {
private Lattices() {}
/** Statistics provider that uses SQL. */
- public static final LatticeStatisticProvider SQL =
- SqlLatticeStatisticProvider.INSTANCE;
+ public static final LatticeStatisticProvider.Factory SQL =
+ SqlLatticeStatisticProvider.FACTORY;
/** Statistics provider that uses SQL then stores the results in a cache. */
- public static final LatticeStatisticProvider CACHED_SQL =
- cache(SqlLatticeStatisticProvider.INSTANCE);
+ public static final LatticeStatisticProvider.Factory CACHED_SQL =
+ SqlLatticeStatisticProvider.CACHED_FACTORY;
- /** Wraps a statistic provider in a cache. */
- public static LatticeStatisticProvider cache(
- LatticeStatisticProvider provider) {
- return new CachingLatticeStatisticProvider(provider);
- }
+ /** Statistics provider that uses a profiler. */
+ public static final LatticeStatisticProvider.Factory PROFILER =
+ ProfilerLatticeStatisticProvider.FACTORY;
}
// End Lattices.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
new file mode 100644
index 0000000..39e0b29
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/materialize/ProfilerLatticeStatisticProvider.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.materialize;
+
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.function.Function1;
+import org.apache.calcite.profile.Profiler;
+import org.apache.calcite.profile.ProfilerImpl;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.schema.ScannableTable;
+import org.apache.calcite.schema.Table;
+import org.apache.calcite.util.ImmutableBitSet;
+
+import com.google.common.base.Preconditions;
+import com.google.common.base.Supplier;
+import com.google.common.base.Suppliers;
+import com.google.common.collect.ImmutableList;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Implementation of {@link LatticeStatisticProvider} that uses a
+ * {@link org.apache.calcite.profile.Profiler}.
+ */
+class ProfilerLatticeStatisticProvider implements LatticeStatisticProvider {
+ static final Factory FACTORY =
+ new Factory() {
+ public LatticeStatisticProvider apply(Lattice lattice) {
+ return new ProfilerLatticeStatisticProvider(lattice);
+ }
+ };
+
+ /** Converts an array of values to a list of {@link Comparable} values,
+ * converting null values to sentinels. */
+ private static final Function1<Object[], List<Comparable>> TO_LIST =
+ new Function1<Object[], List<Comparable>>() {
+ public List<Comparable> apply(Object[] values) {
+ for (int i = 0; i < values.length; i++) {
+ if (values[i] == null) {
+ values[i] = NullSentinel.INSTANCE;
+ }
+ }
+ //noinspection unchecked
+ return (List) Arrays.asList(values);
+ }
+ };
+
+ private final Lattice lattice;
+ private final Supplier<Profiler.Profile> profile =
+ Suppliers.memoize(new Supplier<Profiler.Profile>() {
+ public Profiler.Profile get() {
+ final ProfilerImpl profiler =
+ ProfilerImpl.builder()
+ .withPassSize(200)
+ .withMinimumSurprise(0.3D)
+ .build();
+ final List<Profiler.Column> columns = new ArrayList<>();
+ for (Lattice.Column column : lattice.columns) {
+ columns.add(new Profiler.Column(column.ordinal, column.alias));
+ }
+ final String sql =
+ lattice.sql(ImmutableBitSet.range(lattice.columns.size()),
+ false, ImmutableList.<Lattice.Measure>of());
+ final Table table =
+ new MaterializationService.DefaultTableFactory()
+ .createTable(lattice.rootSchema, sql,
+ ImmutableList.<String>of());
+ final ImmutableList<ImmutableBitSet> initialGroups =
+ ImmutableList.of();
+ final Enumerable<List<Comparable>> rows =
+ ((ScannableTable) table).scan(null).select(TO_LIST);
+ return profiler.profile(rows, columns, initialGroups);
+ }
+ });
+
+ /** Creates a ProfilerLatticeStatisticProvider. */
+ private ProfilerLatticeStatisticProvider(Lattice lattice) {
+ this.lattice = Preconditions.checkNotNull(lattice);
+ }
+
+ public double cardinality(List<Lattice.Column> columns) {
+ final ImmutableBitSet build = Lattice.Column.toBitSet(columns);
+ final double cardinality = profile.get().cardinality(build);
+// System.out.println(columns + ": " + cardinality);
+ return cardinality;
+ }
+}
+
+// End ProfilerLatticeStatisticProvider.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java b/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
index 4a94847..8e5b19d 100644
--- a/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
+++ b/core/src/main/java/org/apache/calcite/materialize/SqlLatticeStatisticProvider.java
@@ -20,28 +20,56 @@ import org.apache.calcite.schema.ScannableTable;
import org.apache.calcite.schema.Table;
import org.apache.calcite.util.ImmutableBitSet;
+import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* Implementation of {@link LatticeStatisticProvider} that gets statistics by
* executing "SELECT COUNT(DISTINCT ...) ..." SQL queries.
*/
class SqlLatticeStatisticProvider implements LatticeStatisticProvider {
- static final SqlLatticeStatisticProvider INSTANCE =
- new SqlLatticeStatisticProvider();
+ static final Factory FACTORY =
+ new LatticeStatisticProvider.Factory() {
+ public LatticeStatisticProvider apply(Lattice lattice) {
+ return new SqlLatticeStatisticProvider(lattice);
+ }
+ };
+
+ static final Factory CACHED_FACTORY =
+ new LatticeStatisticProvider.Factory() {
+ public LatticeStatisticProvider apply(Lattice lattice) {
+ LatticeStatisticProvider provider = FACTORY.apply(lattice);
+ return new CachingLatticeStatisticProvider(lattice, provider);
+ }
+ };
+
+ private final Lattice lattice;
- /** Creates an SqlLatticeStatisticProvider. */
- private SqlLatticeStatisticProvider() {}
+ /** Creates a SqlLatticeStatisticProvider. */
+ private SqlLatticeStatisticProvider(Lattice lattice) {
+ this.lattice = Preconditions.checkNotNull(lattice);
+ }
+
+ public double cardinality(List<Lattice.Column> columns) {
+ final List<Double> counts = new ArrayList<>();
+ for (Lattice.Column column : columns) {
+ counts.add(cardinality(lattice, column));
+ }
+ return (int) Lattice.getRowCount(lattice.getFactRowCount(), counts);
+ }
- @Override public int cardinality(Lattice lattice, Lattice.Column column) {
+ private double cardinality(Lattice lattice, Lattice.Column column) {
final String sql = lattice.countSql(ImmutableBitSet.of(column.ordinal));
final Table table =
new MaterializationService.DefaultTableFactory()
.createTable(lattice.rootSchema, sql, ImmutableList.<String>of());
final Object[] values =
Iterables.getOnlyElement(((ScannableTable) table).scan(null));
- return ((Number) values[0]).intValue();
+ return ((Number) values[0]).doubleValue();
}
}
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/Profiler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/Profiler.java b/core/src/main/java/org/apache/calcite/profile/Profiler.java
new file mode 100644
index 0000000..851f40b
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/Profiler.java
@@ -0,0 +1,294 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Util;
+
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.ImmutableSortedSet;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+import java.util.NavigableSet;
+import java.util.SortedSet;
+import javax.annotation.Nonnull;
+
+/**
+ * Analyzes data sets.
+ */
+public interface Profiler {
+ /** Creates a profile of a data set.
+ *
+ * @param rows List of rows. Can be iterated over more than once (maybe not
+ * cheaply)
+ * @param columns Column definitions
+ *
+ * @param initialGroups List of combinations of columns that should be
+ * profiled early, because they may be interesting
+ *
+ * @return A profile describing relationships within the data set
+ */
+ Profile profile(Iterable<List<Comparable>> rows, List<Column> columns,
+ Collection<ImmutableBitSet> initialGroups);
+
+ /** Column. */
+ class Column implements Comparable<Column> {
+ public final int ordinal;
+ public final String name;
+
+ /** Creates a Column.
+ *
+ * @param ordinal Unique and contiguous within a particular data set
+ * @param name Name of the column
+ */
+ public Column(int ordinal, String name) {
+ this.ordinal = ordinal;
+ this.name = name;
+ }
+
+ static ImmutableBitSet toOrdinals(Iterable<Column> columns) {
+ final ImmutableBitSet.Builder builder = ImmutableBitSet.builder();
+ for (Column column : columns) {
+ builder.set(column.ordinal);
+ }
+ return builder.build();
+ }
+
+ @Override public int hashCode() {
+ return ordinal;
+ }
+
+ @Override public boolean equals(Object o) {
+ return this == o
+ || o instanceof Column
+ && ordinal == ((Column) o).ordinal;
+ }
+
+ @Override public int compareTo(@Nonnull Column column) {
+ return Integer.compare(ordinal, column.ordinal);
+ }
+
+ @Override public String toString() {
+ return name;
+ }
+ }
+
+ /** Statistic produced by the profiler. */
+ interface Statistic {
+ Object toMap(JsonBuilder jsonBuilder);
+ }
+
+ /** Whole data set. */
+ class RowCount implements Statistic {
+ final int rowCount;
+
+ public RowCount(int rowCount) {
+ this.rowCount = rowCount;
+ }
+
+ public Object toMap(JsonBuilder jsonBuilder) {
+ final Map<String, Object> map = jsonBuilder.map();
+ map.put("type", "rowCount");
+ map.put("rowCount", rowCount);
+ return map;
+ }
+ }
+
+ /** Unique key. */
+ class Unique implements Statistic {
+ final NavigableSet<Column> columns;
+
+ public Unique(SortedSet<Column> columns) {
+ this.columns = ImmutableSortedSet.copyOf(columns);
+ }
+
+ public Object toMap(JsonBuilder jsonBuilder) {
+ final Map<String, Object> map = jsonBuilder.map();
+ map.put("type", "unique");
+ map.put("columns", FunctionalDependency.getObjects(jsonBuilder, columns));
+ return map;
+ }
+ }
+
+ /** Functional dependency. */
+ class FunctionalDependency implements Statistic {
+ final NavigableSet<Column> columns;
+ final Column dependentColumn;
+
+ FunctionalDependency(SortedSet<Column> columns, Column dependentColumn) {
+ this.columns = ImmutableSortedSet.copyOf(columns);
+ this.dependentColumn = dependentColumn;
+ }
+
+ public Object toMap(JsonBuilder jsonBuilder) {
+ final Map<String, Object> map = jsonBuilder.map();
+ map.put("type", "fd");
+ map.put("columns", getObjects(jsonBuilder, columns));
+ map.put("dependentColumn", dependentColumn.name);
+ return map;
+ }
+
+ private static List<Object> getObjects(JsonBuilder jsonBuilder,
+ NavigableSet<Column> columns) {
+ final List<Object> list = jsonBuilder.list();
+ for (Column column : columns) {
+ list.add(column.name);
+ }
+ return list;
+ }
+ }
+
+ /** Value distribution, including cardinality and optionally values, of a
+ * column or set of columns. If the set of columns is empty, it describes
+ * the number of rows in the entire data set. */
+ class Distribution implements Statistic {
+ final NavigableSet<Column> columns;
+ final NavigableSet<Comparable> values;
+ final double cardinality;
+ final int nullCount;
+ final double expectedCardinality;
+ final boolean minimal;
+
+ /** Creates a Distribution.
+ *
+ * @param columns Column or columns being described
+ * @param values Values of columns, or null if there are too many
+ * @param cardinality Number of distinct values
+ * @param nullCount Number of rows where this column had a null value;
+ * @param expectedCardinality Expected cardinality
+ * @param minimal Whether the distribution is not implied by a unique
+ * or functional dependency
+ */
+ public Distribution(SortedSet<Column> columns, SortedSet<Comparable> values,
+ double cardinality, int nullCount, double expectedCardinality,
+ boolean minimal) {
+ this.columns = ImmutableSortedSet.copyOf(columns);
+ this.values = values == null ? null : ImmutableSortedSet.copyOf(values);
+ this.cardinality = cardinality;
+ this.nullCount = nullCount;
+ this.expectedCardinality = expectedCardinality;
+ this.minimal = minimal;
+ }
+
+ public Object toMap(JsonBuilder jsonBuilder) {
+ final Map<String, Object> map = jsonBuilder.map();
+ map.put("type", "distribution");
+ map.put("columns", FunctionalDependency.getObjects(jsonBuilder, columns));
+ if (values != null) {
+ List<Object> list = jsonBuilder.list();
+ for (Comparable value : values) {
+ if (value instanceof java.sql.Date) {
+ value = value.toString();
+ }
+ list.add(value);
+ }
+ map.put("values", list);
+ }
+ map.put("cardinality", cardinality);
+ if (nullCount > 0) {
+ map.put("nullCount", nullCount);
+ }
+ map.put("expectedCardinality", expectedCardinality);
+ map.put("surprise", surprise());
+ return map;
+ }
+
+ ImmutableBitSet columnOrdinals() {
+ return Column.toOrdinals(columns);
+ }
+
+ double surprise() {
+ return SimpleProfiler.surprise(expectedCardinality, cardinality);
+ }
+ }
+
+ /** The result of profiling, contains various statistics about the
+ * data in a table. */
+ class Profile {
+ public final RowCount rowCount;
+ public final List<FunctionalDependency> functionalDependencyList;
+ public final List<Distribution> distributionList;
+ public final List<Unique> uniqueList;
+
+ private final Map<ImmutableBitSet, Distribution> distributionMap;
+ private final List<Distribution> singletonDistributionList;
+
+ Profile(List<Column> columns, RowCount rowCount,
+ Iterable<FunctionalDependency> functionalDependencyList,
+ Iterable<Distribution> distributionList, Iterable<Unique> uniqueList) {
+ this.rowCount = rowCount;
+ this.functionalDependencyList =
+ ImmutableList.copyOf(functionalDependencyList);
+ this.distributionList = ImmutableList.copyOf(distributionList);
+ this.uniqueList = ImmutableList.copyOf(uniqueList);
+
+ final ImmutableMap.Builder<ImmutableBitSet, Distribution> m =
+ ImmutableMap.builder();
+ for (Distribution distribution : distributionList) {
+ m.put(distribution.columnOrdinals(), distribution);
+ }
+ distributionMap = m.build();
+
+ final ImmutableList.Builder<Distribution> b = ImmutableList.builder();
+ for (int i = 0; i < columns.size(); i++) {
+ b.add(distributionMap.get(ImmutableBitSet.of(i)));
+ }
+ singletonDistributionList = b.build();
+ }
+
+ public List<Statistic> statistics() {
+ return ImmutableList.<Statistic>builder()
+ .add(rowCount)
+ .addAll(functionalDependencyList)
+ .addAll(distributionList)
+ .addAll(uniqueList)
+ .build();
+ }
+
+ public double cardinality(ImmutableBitSet columnOrdinals) {
+ final ImmutableBitSet originalOrdinals = columnOrdinals;
+ for (;;) {
+ final Distribution distribution = distributionMap.get(columnOrdinals);
+ if (distribution != null) {
+ if (columnOrdinals == originalOrdinals) {
+ return distribution.cardinality;
+ } else {
+ final List<Double> cardinalityList = new ArrayList<>();
+ cardinalityList.add(distribution.cardinality);
+ for (int ordinal : originalOrdinals.except(columnOrdinals)) {
+ final Distribution d = singletonDistributionList.get(ordinal);
+ cardinalityList.add(d.cardinality);
+ }
+ return Lattice.getRowCount(rowCount.rowCount, cardinalityList);
+ }
+ }
+ // Clear the last bit and iterate.
+ // Better would be to combine all of our nearest ancestors.
+ final List<Integer> list = columnOrdinals.asList();
+ columnOrdinals = columnOrdinals.clear(Util.last(list));
+ }
+ }
+ }
+}
+
+// End Profiler.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java b/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
new file mode 100644
index 0000000..139f13d
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/ProfilerImpl.java
@@ -0,0 +1,809 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.linq4j.tree.Primitive;
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.prepare.CalcitePrepareImpl;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.FlatLists;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.Pair;
+import org.apache.calcite.util.PartiallyOrderedSet;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Ordering;
+import com.yahoo.sketches.hll.HllSketch;
+
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayDeque;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.Deque;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.Queue;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.apache.calcite.profile.ProfilerImpl.CompositeCollector.OF;
+
+/**
+ * Implementation of {@link Profiler} that only investigates "interesting"
+ * combinations of columns.
+ */
+public class ProfilerImpl implements Profiler {
+ /** The number of combinations to consider per pass.
+ * The number is determined by memory, but a value of 1,000 is typical.
+ * You need 2KB memory per sketch, and one sketch for each combination. */
+ private final int combinationsPerPass;
+
+ /** The minimum number of combinations considered "interesting". After that,
+ * a combination is only considered "interesting" if its surprise is greater
+ * than the median surprise. */
+ private final int interestingCount;
+
+ /** Whether a successor is considered interesting enough to analyze. */
+ private final Predicate<Pair<Space, Column>> predicate;
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Creates a {@code ProfilerImpl}.
+ *
+ * @param combinationsPerPass Maximum number of columns (or combinations of
+ * columns) to compute each pass
+ * @param interestingCount Minimum number of combinations considered
+ * interesting
+ * @param predicate Whether a successor is considered interesting enough to
+ * analyze
+ */
+ ProfilerImpl(int combinationsPerPass,
+ int interestingCount, Predicate<Pair<Space, Column>> predicate) {
+ Preconditions.checkArgument(combinationsPerPass > 2);
+ Preconditions.checkArgument(interestingCount > 2);
+ this.combinationsPerPass = combinationsPerPass;
+ this.interestingCount = interestingCount;
+ this.predicate = predicate;
+ }
+
+ public Profile profile(Iterable<List<Comparable>> rows,
+ final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+ return new Run(columns, initialGroups).profile(rows);
+ }
+
+ /** A run of the profiler. */
+ class Run {
+ private final List<Column> columns;
+ final PartiallyOrderedSet<ImmutableBitSet> keyPoset =
+ new PartiallyOrderedSet<>(
+ PartiallyOrderedSet.BIT_SET_INCLUSION_ORDERING);
+ final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
+ /** List of spaces that have one column. */
+ final List<Space> singletonSpaces;
+ /** Combinations of columns that we have computed but whose successors have
+ * not yet been computed. We may add some of those successors to
+ * {@link #spaceQueue}. */
+ final Queue<Space> doneQueue =
+ new PriorityQueue<>(100,
+ new Comparator<Space>() {
+ public int compare(Space s0, Space s1) {
+ // The space with 0 columns is more interesting than
+ // any space with 1 column, and so forth.
+ // For spaces with 2 or more columns we compare "surprise":
+ // how many fewer values did it have than expected?
+ int c = Integer.compare(s0.columns.size(), s1.columns.size());
+ if (c == 0) {
+ c = Double.compare(s0.surprise(), s1.surprise());
+ }
+ return c;
+ }
+ });
+ final SurpriseQueue surprises;
+
+ /** Combinations of columns that we will compute next pass. */
+ final Deque<ImmutableBitSet> spaceQueue = new ArrayDeque<>();
+ final List<Unique> uniques = new ArrayList<>();
+ final List<FunctionalDependency> functionalDependencies = new ArrayList<>();
+ /** Column ordinals that have ever been placed on {@link #spaceQueue}.
+ * Ensures that we do not calculate the same combination more than once,
+ * even though we generate a column set from multiple parents. */
+ final Set<ImmutableBitSet> resultSet = new HashSet<>();
+ final PartiallyOrderedSet<Space> results = new PartiallyOrderedSet<>(
+ new PartiallyOrderedSet.Ordering<Space>() {
+ public boolean lessThan(Space e1, Space e2) {
+ return e2.columnOrdinals.contains(e1.columnOrdinals);
+ }
+ });
+ private final List<ImmutableBitSet> keyOrdinalLists =
+ new ArrayList<>();
+ final Function<Integer, Column> get =
+ new Function<Integer, Column>() {
+ public Column apply(Integer input) {
+ return columns.get(input);
+ }
+ };
+ private int rowCount;
+
+ /**
+ * Creates a Run.
+ *
+ * @param columns List of columns
+ *
+ * @param initialGroups List of combinations of columns that should be
+ * profiled early, because they may be interesting
+ */
+ Run(final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+ this.columns = ImmutableList.copyOf(columns);
+ for (Ord<Column> column : Ord.zip(columns)) {
+ if (column.e.ordinal != column.i) {
+ throw new IllegalArgumentException();
+ }
+ }
+ this.singletonSpaces =
+ new ArrayList<>(Collections.nCopies(columns.size(), (Space) null));
+ if (combinationsPerPass > Math.pow(2D, columns.size())) {
+ // There are not many columns. We can compute all combinations in the
+ // first pass.
+ for (ImmutableBitSet ordinals
+ : ImmutableBitSet.range(columns.size()).powerSet()) {
+ spaceQueue.add(ordinals);
+ }
+ } else {
+ // We will need to take multiple passes.
+ // Pass 0, just put the empty combination on the queue.
+ // Next pass, we will do its successors, the singleton combinations.
+ spaceQueue.add(ImmutableBitSet.of());
+ spaceQueue.addAll(initialGroups);
+ if (columns.size() < combinationsPerPass) {
+ // There are not very many columns. Compute the singleton
+ // groups in pass 0.
+ for (Column column : columns) {
+ spaceQueue.add(ImmutableBitSet.of(column.ordinal));
+ }
+ }
+ }
+ // The surprise queue must have enough room for all singleton groups
+ // plus all initial groups.
+ surprises = new SurpriseQueue(1 + columns.size() + initialGroups.size(),
+ interestingCount);
+ }
+
+ Profile profile(Iterable<List<Comparable>> rows) {
+ int pass = 0;
+ for (;;) {
+ final List<Space> spaces = nextBatch(pass);
+ if (spaces.isEmpty()) {
+ break;
+ }
+ pass(pass++, spaces, rows);
+ }
+
+ for (Space s : singletonSpaces) {
+ for (ImmutableBitSet dependent : s.dependents) {
+ functionalDependencies.add(
+ new FunctionalDependency(toColumns(dependent),
+ Iterables.getOnlyElement(s.columns)));
+ }
+ }
+ return new Profile(columns, new RowCount(rowCount),
+ functionalDependencies, distributions.values(), uniques);
+ }
+
+ /** Populates {@code spaces} with the next batch.
+ * Returns an empty list if done. */
+ List<Space> nextBatch(int pass) {
+ final List<Space> spaces = new ArrayList<>();
+ loop:
+ for (;;) {
+ if (spaces.size() >= combinationsPerPass) {
+ // We have enough for the next pass.
+ return spaces;
+ }
+ // First, see if there is a space we did have room for last pass.
+ final ImmutableBitSet ordinals = spaceQueue.poll();
+ if (ordinals != null) {
+ final Space space = new Space(this, ordinals, toColumns(ordinals));
+ spaces.add(space);
+ if (ordinals.cardinality() == 1) {
+ singletonSpaces.set(ordinals.nth(0), space);
+ }
+ } else {
+ // Next, take a space that was done last time, generate its
+ // successors, and add the interesting ones to the space queue.
+ for (;;) {
+ final Space doneSpace = doneQueue.poll();
+ if (doneSpace == null) {
+ // There are no more done spaces. We're done.
+ return spaces;
+ }
+ if (doneSpace.columnOrdinals.cardinality() > 4) {
+ // Do not generate successors for groups with lots of columns,
+ // probably initial groups
+ continue;
+ }
+ for (Column column : columns) {
+ if (!doneSpace.columnOrdinals.get(column.ordinal)) {
+ if (pass == 0
+ || doneSpace.columnOrdinals.cardinality() == 0
+ || !containsKey(
+ doneSpace.columnOrdinals.set(column.ordinal))
+ && predicate.apply(Pair.of(doneSpace, column))) {
+ final ImmutableBitSet nextOrdinals =
+ doneSpace.columnOrdinals.set(column.ordinal);
+ if (resultSet.add(nextOrdinals)) {
+ spaceQueue.add(nextOrdinals);
+ }
+ }
+ }
+ }
+ // We've converted at a space into at least one interesting
+ // successor.
+ if (!spaceQueue.isEmpty()) {
+ continue loop;
+ }
+ }
+ }
+ }
+ }
+
+ private boolean containsKey(ImmutableBitSet ordinals) {
+ for (ImmutableBitSet keyOrdinals : keyOrdinalLists) {
+ if (ordinals.contains(keyOrdinals)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ void pass(int pass, List<Space> spaces, Iterable<List<Comparable>> rows) {
+ if (CalcitePrepareImpl.DEBUG) {
+ System.out.println("pass: " + pass
+ + ", spaces.size: " + spaces.size()
+ + ", distributions.size: " + distributions.size());
+ }
+
+ for (Space space : spaces) {
+ space.collector = Collector.create(space, 1000);
+ }
+
+ int rowCount = 0;
+ for (final List<Comparable> row : rows) {
+ ++rowCount;
+ for (Space space : spaces) {
+ space.collector.add(row);
+ }
+ }
+
+ // Populate unique keys.
+ // If [x, y] is a key,
+ // then [x, y, z] is a non-minimal key (therefore not interesting),
+ // and [x, y] => [a] is a functional dependency but not interesting,
+ // and [x, y, z] is not an interesting distribution.
+ for (Space space : spaces) {
+ space.collector.finish();
+ space.collector = null;
+// results.add(space);
+
+ int nonMinimal = 0;
+ dependents:
+ for (Space s : results.getDescendants(space)) {
+ if (s.cardinality == space.cardinality) {
+ // We have discovered a sub-set that has the same cardinality.
+ // The column(s) that are not in common are functionally
+ // dependent.
+ final ImmutableBitSet dependents =
+ space.columnOrdinals.except(s.columnOrdinals);
+ for (int i : s.columnOrdinals) {
+ final Space s1 = singletonSpaces.get(i);
+ final ImmutableBitSet rest = s.columnOrdinals.clear(i);
+ for (ImmutableBitSet dependent : s1.dependents) {
+ if (rest.contains(dependent)) {
+ // The "key" of this functional dependency is not minimal.
+ // For instance, if we know that
+ // (a) -> x
+ // then
+ // (a, b, x) -> y
+ // is not minimal; we could say the same with a smaller key:
+ // (a, b) -> y
+ ++nonMinimal;
+ continue dependents;
+ }
+ }
+ }
+ for (int dependent : dependents) {
+ final Space s1 = singletonSpaces.get(dependent);
+ for (ImmutableBitSet d : s1.dependents) {
+ if (s.columnOrdinals.contains(d)) {
+ ++nonMinimal;
+ continue dependents;
+ }
+ }
+ }
+ space.dependencies.or(dependents.toBitSet());
+ for (int d : dependents) {
+ singletonSpaces.get(d).dependents.add(s.columnOrdinals);
+ }
+ }
+ }
+ if (nonMinimal > 0) {
+ continue;
+ }
+ final String s = space.columns.toString(); // for debug
+ Util.discard(s);
+ double expectedCardinality =
+ expectedCardinality(rowCount, space.columnOrdinals);
+
+ final boolean minimal = nonMinimal == 0
+ && !space.unique
+ && !containsKey(space.columnOrdinals);
+ space.expectedCardinality = expectedCardinality;
+ if (minimal) {
+ final Distribution distribution =
+ new Distribution(space.columns, space.valueSet, space.cardinality,
+ space.nullCount, expectedCardinality, minimal);
+ final double surprise = distribution.surprise();
+ if (CalcitePrepareImpl.DEBUG && surprise > 0.1d) {
+ System.out.println(distribution.columnOrdinals()
+ + " " + distribution.columns
+ + ", cardinality: " + distribution.cardinality
+ + ", expected: " + distribution.expectedCardinality
+ + ", surprise: " + distribution.surprise());
+ }
+ if (surprises.offer(surprise)) {
+ distributions.put(space.columnOrdinals, distribution);
+ keyPoset.add(space.columnOrdinals);
+ doneQueue.add(space);
+ }
+ }
+
+ if (space.cardinality == rowCount) {
+ // We have discovered a new key. It is not a super-set of a key.
+ uniques.add(new Unique(space.columns));
+ keyOrdinalLists.add(space.columnOrdinals);
+ space.unique = true;
+ }
+ }
+
+ if (pass == 0) {
+ this.rowCount = rowCount;
+ }
+ }
+
+ /** Estimates the cardinality of a collection of columns represented by
+ * {@code columnOrdinals}, drawing on existing distributions. */
+ private double cardinality(double rowCount, ImmutableBitSet columns) {
+ final Distribution distribution = distributions.get(columns);
+ if (distribution != null) {
+ return distribution.cardinality;
+ } else {
+ return expectedCardinality(rowCount, columns);
+ }
+ }
+
+ /** Estimates the cardinality of a collection of columns represented by
+ * {@code columnOrdinals}, drawing on existing distributions. Does not
+ * look in the distribution map for this column set. */
+ private double expectedCardinality(double rowCount,
+ ImmutableBitSet columns) {
+ switch (columns.cardinality()) {
+ case 0:
+ return 1d;
+ case 1:
+ return rowCount;
+ default:
+ double c = rowCount;
+ for (ImmutableBitSet bitSet : keyPoset.getParents(columns, true)) {
+ if (bitSet.isEmpty()) {
+ // If the parent is the empty group (i.e. "GROUP BY ()", the grand
+ // total) we cannot improve on the estimate.
+ continue;
+ }
+ final Distribution d1 = distributions.get(bitSet);
+ final double c2 = cardinality(rowCount, columns.except(bitSet));
+ final double d = Lattice.getRowCount(rowCount, d1.cardinality, c2);
+ c = Math.min(c, d);
+ }
+ for (ImmutableBitSet bitSet : keyPoset.getChildren(columns, true)) {
+ final Distribution d1 = distributions.get(bitSet);
+ c = Math.min(c, d1.cardinality);
+ }
+ return c;
+ }
+ }
+
+
+ private ImmutableSortedSet<Column> toColumns(Iterable<Integer> ordinals) {
+ return ImmutableSortedSet.copyOf(Iterables.transform(ordinals, get));
+ }
+ }
+
+ /** Work space for a particular combination of columns. */
+ static class Space {
+ private final Run run;
+ final ImmutableBitSet columnOrdinals;
+ final ImmutableSortedSet<Column> columns;
+ boolean unique;
+ final BitSet dependencies = new BitSet();
+ final Set<ImmutableBitSet> dependents = new HashSet<>();
+ double expectedCardinality;
+ Collector collector;
+ /** Assigned by {@link Collector#finish()}. */
+ int nullCount;
+ /** Number of distinct values. Null is counted as a value, if present.
+ * Assigned by {@link Collector#finish()}. */
+ int cardinality;
+ /** Assigned by {@link Collector#finish()}. */
+ SortedSet<Comparable> valueSet;
+
+ Space(Run run, ImmutableBitSet columnOrdinals, Iterable<Column> columns) {
+ this.run = run;
+ this.columnOrdinals = columnOrdinals;
+ this.columns = ImmutableSortedSet.copyOf(columns);
+ }
+
+ @Override public int hashCode() {
+ return columnOrdinals.hashCode();
+ }
+
+ @Override public boolean equals(Object o) {
+ return o == this
+ || o instanceof Space
+ && columnOrdinals.equals(((Space) o).columnOrdinals);
+ }
+
+ /** Returns the distribution created from this space, or null if no
+ * distribution has been registered yet. */
+ public Distribution distribution() {
+ return run.distributions.get(columnOrdinals);
+ }
+
+ double surprise() {
+ return SimpleProfiler.surprise(expectedCardinality, cardinality);
+ }
+ }
+
+ /** Builds a {@link org.apache.calcite.profile.ProfilerImpl}. */
+ public static class Builder {
+ int combinationsPerPass = 100;
+ Predicate<Pair<Space, Column>> predicate = Predicates.alwaysTrue();
+
+ public ProfilerImpl build() {
+ return new ProfilerImpl(combinationsPerPass, 200, predicate);
+ }
+
+ public Builder withPassSize(int passSize) {
+ this.combinationsPerPass = passSize;
+ return this;
+ }
+
+ public Builder withMinimumSurprise(double v) {
+ predicate =
+ new PredicateImpl<Pair<Space, Column>>() {
+ public boolean test(Pair<Space, Column> spaceColumnPair) {
+ final Space space = spaceColumnPair.left;
+ return false;
+ }
+ };
+ return this;
+ }
+ }
+
+ /** Collects values of a column or columns. */
+ abstract static class Collector {
+ protected final Space space;
+
+ Collector(Space space) {
+ this.space = space;
+ }
+
+ abstract void add(List<Comparable> row);
+ abstract void finish();
+
+ /** Creates an initial collector of the appropriate kind. */
+ public static Collector create(Space space, int sketchThreshold) {
+ final List<Integer> columnOrdinalList = space.columnOrdinals.asList();
+ if (columnOrdinalList.size() == 1) {
+ return new SingletonCollector(space, columnOrdinalList.get(0),
+ sketchThreshold);
+ } else {
+ return new CompositeCollector(space,
+ (int[]) Primitive.INT.toArray(columnOrdinalList), sketchThreshold);
+ }
+ }
+ }
+
+ /** Collector that collects values of a single column. */
+ static class SingletonCollector extends Collector {
+ final SortedSet<Comparable> values = new TreeSet<>();
+ final int columnOrdinal;
+ final int sketchThreshold;
+ int nullCount = 0;
+
+ SingletonCollector(Space space, int columnOrdinal, int sketchThreshold) {
+ super(space);
+ this.columnOrdinal = columnOrdinal;
+ this.sketchThreshold = sketchThreshold;
+ }
+
+ public void add(List<Comparable> row) {
+ final Comparable v = row.get(columnOrdinal);
+ if (v == NullSentinel.INSTANCE) {
+ nullCount++;
+ } else {
+ if (values.add(v) && values.size() == sketchThreshold) {
+ // Too many values. Switch to a sketch collector.
+ final HllSingletonCollector collector =
+ new HllSingletonCollector(space, columnOrdinal);
+ for (Comparable value : values) {
+ collector.add(value);
+ }
+ space.collector = collector;
+ }
+ }
+ }
+
+ public void finish() {
+ space.nullCount = nullCount;
+ space.cardinality = values.size() + (nullCount > 0 ? 1 : 0);
+ space.valueSet = values.size() < 20 ? values : null;
+ }
+ }
+
+ /** Collector that collects two or more column values in a tree set. */
+ static class CompositeCollector extends Collector {
+ protected static final ImmutableBitSet OF = ImmutableBitSet.of(2, 13);
+ final Set<FlatLists.ComparableList> values = new HashSet<>();
+ final int[] columnOrdinals;
+ final Comparable[] columnValues;
+ int nullCount = 0;
+ private final int sketchThreshold;
+
+ CompositeCollector(Space space, int[] columnOrdinals, int sketchThreshold) {
+ super(space);
+ this.columnOrdinals = columnOrdinals;
+ this.columnValues = new Comparable[columnOrdinals.length];
+ this.sketchThreshold = sketchThreshold;
+ }
+
+ public void add(List<Comparable> row) {
+ if (space.columnOrdinals.equals(OF)) {
+ Util.discard(0);
+ }
+ int nullCountThisRow = 0;
+ for (int i = 0, length = columnOrdinals.length; i < length; i++) {
+ final Comparable value = row.get(columnOrdinals[i]);
+ if (value == NullSentinel.INSTANCE) {
+ if (nullCountThisRow++ == 0) {
+ nullCount++;
+ }
+ }
+ columnValues[i] = value;
+ }
+ //noinspection unchecked
+ if (((Set) values).add(FlatLists.copyOf(columnValues))
+ && values.size() == sketchThreshold) {
+ // Too many values. Switch to a sketch collector.
+ final HllCompositeCollector collector =
+ new HllCompositeCollector(space, columnOrdinals);
+ final List<Comparable> list =
+ new ArrayList<>(
+ Collections.nCopies(columnOrdinals[columnOrdinals.length - 1]
+ + 1,
+ (Comparable) null));
+ for (FlatLists.ComparableList value : this.values) {
+ for (int i = 0; i < value.size(); i++) {
+ Comparable c = (Comparable) value.get(i);
+ list.set(columnOrdinals[i], c);
+ }
+ collector.add(list);
+ }
+ space.collector = collector;
+ }
+ }
+
+ public void finish() {
+ // number of input rows (not distinct values)
+ // that were null or partially null
+ space.nullCount = nullCount;
+ space.cardinality = values.size() + (nullCount > 0 ? 1 : 0);
+ space.valueSet = null;
+ }
+
+ }
+
+ /** Collector that collects two or more column values into a HyperLogLog
+ * sketch. */
+ abstract static class HllCollector extends Collector {
+ final HllSketch sketch;
+ int nullCount = 0;
+
+ static final long[] NULL_BITS = {0x9f77d57e93167a16L};
+
+ HllCollector(Space space) {
+ super(space);
+ this.sketch = HllSketch.builder().build();
+ }
+
+ protected void add(Comparable value) {
+ if (value == NullSentinel.INSTANCE) {
+ sketch.update(NULL_BITS);
+ } else if (value instanceof String) {
+ sketch.update((String) value);
+ } else if (value instanceof Double) {
+ sketch.update((Double) value);
+ } else if (value instanceof Float) {
+ sketch.update((Float) value);
+ } else if (value instanceof Long) {
+ sketch.update((Long) value);
+ } else if (value instanceof Number) {
+ sketch.update(((Number) value).longValue());
+ } else {
+ sketch.update(value.toString());
+ }
+ }
+
+ public void finish() {
+ space.nullCount = nullCount;
+ space.cardinality = (int) sketch.getEstimate();
+ space.valueSet = null;
+ }
+ }
+
+ /** Collector that collects one column value into a HyperLogLog sketch. */
+ static class HllSingletonCollector extends HllCollector {
+ final int columnOrdinal;
+
+ HllSingletonCollector(Space space, int columnOrdinal) {
+ super(space);
+ this.columnOrdinal = columnOrdinal;
+ }
+
+ public void add(List<Comparable> row) {
+ final Comparable value = row.get(columnOrdinal);
+ if (value == NullSentinel.INSTANCE) {
+ nullCount++;
+ sketch.update(NULL_BITS);
+ } else {
+ add(value);
+ }
+ }
+ }
+
+ /** Collector that collects two or more column values into a HyperLogLog
+ * sketch. */
+ static class HllCompositeCollector extends HllCollector {
+ private final int[] columnOrdinals;
+ private final ByteBuffer buf = ByteBuffer.allocate(1024);
+
+ HllCompositeCollector(Space space, int[] columnOrdinals) {
+ super(space);
+ this.columnOrdinals = columnOrdinals;
+ }
+
+ public void add(List<Comparable> row) {
+ if (space.columnOrdinals.equals(OF)) {
+ Util.discard(0);
+ }
+ int nullCountThisRow = 0;
+ buf.clear();
+ for (int columnOrdinal : columnOrdinals) {
+ final Comparable value = row.get(columnOrdinal);
+ if (value == NullSentinel.INSTANCE) {
+ if (nullCountThisRow++ == 0) {
+ nullCount++;
+ }
+ buf.put((byte) 0);
+ } else if (value instanceof String) {
+ buf.put((byte) 1)
+ .put(((String) value).getBytes(StandardCharsets.UTF_8));
+ } else if (value instanceof Double) {
+ buf.put((byte) 2).putDouble((Double) value);
+ } else if (value instanceof Float) {
+ buf.put((byte) 3).putFloat((Float) value);
+ } else if (value instanceof Long) {
+ buf.put((byte) 4).putLong((Long) value);
+ } else if (value instanceof Integer) {
+ buf.put((byte) 5).putInt((Integer) value);
+ } else if (value instanceof Boolean) {
+ buf.put((Boolean) value ? (byte) 6 : (byte) 7);
+ } else {
+ buf.put((byte) 8)
+ .put(value.toString().getBytes(StandardCharsets.UTF_8));
+ }
+ }
+ sketch.update(Arrays.copyOf(buf.array(), buf.position()));
+ }
+ }
+
+ /** A priority queue of the last N surprise values. Accepts a new value if
+ * the queue is not yet full, or if its value is greater than the median value
+ * over the last N. */
+ static class SurpriseQueue {
+ private final int warmUpCount;
+ private final int size;
+ int count = 0;
+ final Deque<Double> deque = new ArrayDeque<>();
+ final PriorityQueue<Double> priorityQueue =
+ new PriorityQueue<>(11, Ordering.<Double>natural());
+
+ SurpriseQueue(int warmUpCount, int size) {
+ this.warmUpCount = warmUpCount;
+ this.size = size;
+ Preconditions.checkArgument(warmUpCount > 3);
+ Preconditions.checkArgument(size > 0);
+ }
+
+ @Override public String toString() {
+ return "min: " + priorityQueue.peek()
+ + ", contents: " + deque.toString();
+ }
+
+ boolean isValid() {
+ if (CalcitePrepareImpl.DEBUG) {
+ System.out.println(toString());
+ }
+ assert deque.size() == priorityQueue.size();
+ if (count > size) {
+ assert deque.size() == size;
+ }
+ return true;
+ }
+
+ boolean offer(double d) {
+ boolean b;
+ if (count++ < warmUpCount || d > priorityQueue.peek()) {
+ if (priorityQueue.size() >= size) {
+ priorityQueue.remove(deque.pop());
+ }
+ priorityQueue.add(d);
+ deque.add(d);
+ b = true;
+ } else {
+ b = false;
+ }
+ if (CalcitePrepareImpl.DEBUG) {
+ System.out.println("offer " + d
+ + " min " + priorityQueue.peek()
+ + " accepted " + b);
+ }
+ return b;
+ }
+ }
+}
+
+// End ProfilerImpl.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java b/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
new file mode 100644
index 0000000..c9b00d0
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/SimpleProfiler.java
@@ -0,0 +1,341 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.linq4j.Ord;
+import org.apache.calcite.materialize.Lattice;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.FlatLists;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.PartiallyOrderedSet;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Function;
+import com.google.common.collect.ImmutableSortedSet;
+import com.google.common.collect.Iterables;
+
+import java.util.ArrayList;
+import java.util.BitSet;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.SortedSet;
+import java.util.TreeSet;
+import javax.annotation.Nonnull;
+
+/**
+ * Basic implementation of {@link Profiler}.
+ */
+public class SimpleProfiler implements Profiler {
+ private static final Function<List<Comparable>, Comparable> ONLY =
+ new Function<List<Comparable>, Comparable>() {
+ public Comparable apply(List<Comparable> input) {
+ return Iterables.getOnlyElement(input);
+ }
+ };
+
+ public Profile profile(Iterable<List<Comparable>> rows,
+ final List<Column> columns, Collection<ImmutableBitSet> initialGroups) {
+ Util.discard(initialGroups); // this profiler ignores initial groups
+ return new Run(columns).profile(rows);
+ }
+
+ /** Returns a measure of how much an actual value differs from expected.
+ * The formula is {@code abs(expected - actual) / (expected + actual)}.
+ *
+ * <p>Examples:<ul>
+ * <li>surprise(e, a) is always between 0 and 1;
+ * <li>surprise(e, a) is 0 if e = a;
+ * <li>surprise(e, 0) is 1 if e > 0;
+ * <li>surprise(0, a) is 1 if a > 0;
+ * <li>surprise(5, 0) is 100%;
+ * <li>surprise(5, 3) is 25%;
+ * <li>surprise(5, 4) is 11%;
+ * <li>surprise(5, 5) is 0%;
+ * <li>surprise(5, 6) is 9%;
+ * <li>surprise(5, 16) is 52%;
+ * <li>surprise(5, 100) is 90%;
+ * </ul>
+ *
+ * @param expected Expected value
+ * @param actual Actual value
+ * @return Measure of how much expected deviates from actual
+ */
+ public static double surprise(double expected, double actual) {
+ if (expected == actual) {
+ return 0d;
+ }
+ final double sum = expected + actual;
+ if (sum <= 0d) {
+ return 1d;
+ }
+ return Math.abs(expected - actual) / sum;
+ }
+
+ /** A run of the profiler. */
+ static class Run {
+ private final List<Column> columns;
+ final List<Space> spaces = new ArrayList<>();
+ final List<Space> singletonSpaces;
+ final List<Statistic> statistics = new ArrayList<>();
+ final PartiallyOrderedSet.Ordering<Space> ordering =
+ new PartiallyOrderedSet.Ordering<Space>() {
+ public boolean lessThan(Space e1, Space e2) {
+ return e2.columnOrdinals.contains(e1.columnOrdinals);
+ }
+ };
+ final PartiallyOrderedSet<Space> results =
+ new PartiallyOrderedSet<>(ordering);
+ final PartiallyOrderedSet<Space> keyResults =
+ new PartiallyOrderedSet<>(ordering);
+ private final List<ImmutableBitSet> keyOrdinalLists =
+ new ArrayList<>();
+ final Function<Integer, Column> get =
+ new Function<Integer, Column>() {
+ public Column apply(Integer input) {
+ return columns.get(input);
+ }
+ };
+
+ Run(final List<Column> columns) {
+ for (Ord<Column> column : Ord.zip(columns)) {
+ if (column.e.ordinal != column.i) {
+ throw new IllegalArgumentException();
+ }
+ }
+ this.columns = columns;
+ this.singletonSpaces =
+ new ArrayList<>(Collections.nCopies(columns.size(), (Space) null));
+ for (ImmutableBitSet ordinals
+ : ImmutableBitSet.range(columns.size()).powerSet()) {
+ final Space space = new Space(ordinals, toColumns(ordinals));
+ spaces.add(space);
+ if (ordinals.cardinality() == 1) {
+ singletonSpaces.set(ordinals.nth(0), space);
+ }
+ }
+ }
+
+ Profile profile(Iterable<List<Comparable>> rows) {
+ final List<Comparable> values = new ArrayList<>();
+ int rowCount = 0;
+ for (final List<Comparable> row : rows) {
+ ++rowCount;
+ joint:
+ for (Space space : spaces) {
+ values.clear();
+ for (Column column : space.columns) {
+ final Comparable value = row.get(column.ordinal);
+ values.add(value);
+ if (value == NullSentinel.INSTANCE) {
+ space.nullCount++;
+ continue joint;
+ }
+ }
+ space.values.add(FlatLists.ofComparable(values));
+ }
+ }
+
+ // Populate unique keys
+ // If [x, y] is a key,
+ // then [x, y, z] is a key but not intersecting,
+ // and [x, y] => [a] is a functional dependency but not interesting,
+ // and [x, y, z] is not an interesting distribution.
+ final Map<ImmutableBitSet, Distribution> distributions = new HashMap<>();
+ for (Space space : spaces) {
+ if (space.values.size() == rowCount
+ && !containsKey(space.columnOrdinals, false)) {
+ // We have discovered a new key.
+ // It is not an existing key or a super-set of a key.
+ statistics.add(new Unique(space.columns));
+ space.unique = true;
+ keyOrdinalLists.add(space.columnOrdinals);
+ }
+
+ int nonMinimal = 0;
+ dependents:
+ for (Space s : results.getDescendants(space)) {
+ if (s.cardinality() == space.cardinality()) {
+ // We have discovered a sub-set that has the same cardinality.
+ // The column(s) that are not in common are functionally
+ // dependent.
+ final ImmutableBitSet dependents =
+ space.columnOrdinals.except(s.columnOrdinals);
+ for (int i : s.columnOrdinals) {
+ final Space s1 = singletonSpaces.get(i);
+ final ImmutableBitSet rest = s.columnOrdinals.clear(i);
+ for (ImmutableBitSet dependent : s1.dependents) {
+ if (rest.contains(dependent)) {
+ // The "key" of this functional dependency is not minimal.
+ // For instance, if we know that
+ // (a) -> x
+ // then
+ // (a, b, x) -> y
+ // is not minimal; we could say the same with a smaller key:
+ // (a, b) -> y
+ ++nonMinimal;
+ continue dependents;
+ }
+ }
+ }
+ for (int dependent : dependents) {
+ final Space s1 = singletonSpaces.get(dependent);
+ for (ImmutableBitSet d : s1.dependents) {
+ if (s.columnOrdinals.contains(d)) {
+ ++nonMinimal;
+ continue dependents;
+ }
+ }
+ }
+ space.dependencies.or(dependents.toBitSet());
+ for (int d : dependents) {
+ singletonSpaces.get(d).dependents.add(s.columnOrdinals);
+ }
+ }
+ }
+
+ int nullCount;
+ final SortedSet<Comparable> valueSet;
+ if (space.columns.size() == 1) {
+ nullCount = space.nullCount;
+ valueSet = ImmutableSortedSet.copyOf(
+ Iterables.transform(space.values, ONLY));
+ } else {
+ nullCount = -1;
+ valueSet = null;
+ }
+ double expectedCardinality;
+ final double cardinality = space.cardinality();
+ switch (space.columns.size()) {
+ case 0:
+ expectedCardinality = 1d;
+ break;
+ case 1:
+ expectedCardinality = rowCount;
+ break;
+ default:
+ expectedCardinality = rowCount;
+ for (Column column : space.columns) {
+ final Distribution d1 =
+ distributions.get(ImmutableBitSet.of(column.ordinal));
+ final Distribution d2 =
+ distributions.get(space.columnOrdinals.clear(column.ordinal));
+ final double d =
+ Lattice.getRowCount(rowCount, d1.cardinality, d2.cardinality);
+ expectedCardinality = Math.min(expectedCardinality, d);
+ }
+ }
+ final boolean minimal = nonMinimal == 0
+ && !space.unique
+ && !containsKey(space.columnOrdinals, true);
+ final Distribution distribution =
+ new Distribution(space.columns, valueSet, cardinality, nullCount,
+ expectedCardinality, minimal);
+ statistics.add(distribution);
+ distributions.put(space.columnOrdinals, distribution);
+
+ if (distribution.minimal) {
+ results.add(space);
+ }
+ }
+
+ for (Space s : singletonSpaces) {
+ for (ImmutableBitSet dependent : s.dependents) {
+ if (!containsKey(dependent, false)
+ && !hasNull(dependent)) {
+ statistics.add(
+ new FunctionalDependency(toColumns(dependent),
+ Iterables.getOnlyElement(s.columns)));
+ }
+ }
+ }
+ return new Profile(columns, new RowCount(rowCount),
+ Iterables.filter(statistics, FunctionalDependency.class),
+ Iterables.filter(statistics, Distribution.class),
+ Iterables.filter(statistics, Unique.class));
+ }
+
+ /** Returns whether a set of column ordinals
+ * matches or contains a unique key.
+ * If {@code strict}, it must contain a unique key. */
+ private boolean containsKey(ImmutableBitSet ordinals, boolean strict) {
+ for (ImmutableBitSet keyOrdinals : keyOrdinalLists) {
+ if (ordinals.contains(keyOrdinals)) {
+ return !(strict && keyOrdinals.equals(ordinals));
+ }
+ }
+ return false;
+ }
+
+ private boolean hasNull(ImmutableBitSet columnOrdinals) {
+ for (Integer columnOrdinal : columnOrdinals) {
+ if (singletonSpaces.get(columnOrdinal).nullCount > 0) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ private ImmutableSortedSet<Column> toColumns(Iterable<Integer> ordinals) {
+ return ImmutableSortedSet.copyOf(Iterables.transform(ordinals, get));
+ }
+ }
+
+ /** Work space for a particular combination of columns. */
+ static class Space implements Comparable<Space> {
+ final ImmutableBitSet columnOrdinals;
+ final ImmutableSortedSet<Column> columns;
+ int nullCount;
+ final SortedSet<FlatLists.ComparableList<Comparable>> values =
+ new TreeSet<>();
+ boolean unique;
+ final BitSet dependencies = new BitSet();
+ final Set<ImmutableBitSet> dependents = new HashSet<>();
+
+ Space(ImmutableBitSet columnOrdinals, Iterable<Column> columns) {
+ this.columnOrdinals = columnOrdinals;
+ this.columns = ImmutableSortedSet.copyOf(columns);
+ }
+
+ @Override public int hashCode() {
+ return columnOrdinals.hashCode();
+ }
+
+ @Override public boolean equals(Object o) {
+ return o == this
+ || o instanceof Space
+ && columnOrdinals.equals(((Space) o).columnOrdinals);
+ }
+
+ public int compareTo(@Nonnull Space o) {
+ return columnOrdinals.equals(o.columnOrdinals) ? 0
+ : columnOrdinals.contains(o.columnOrdinals) ? 1
+ : -1;
+ }
+
+ /** Number of distinct values. Null is counted as a value, if present. */
+ public double cardinality() {
+ return values.size() + (nullCount > 0 ? 1 : 0);
+ }
+ }
+}
+
+// End SimpleProfiler.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/profile/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/profile/package-info.java b/core/src/main/java/org/apache/calcite/profile/package-info.java
new file mode 100644
index 0000000..c30e9e0
--- /dev/null
+++ b/core/src/main/java/org/apache/calcite/profile/package-info.java
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Utilities to analyze data sets.
+ */
+@PackageMarker
+package org.apache.calcite.profile;
+
+import org.apache.calcite.avatica.util.PackageMarker;
+
+// End package-info.java
[2/4] calcite git commit: [CALCITE-1616] Data profiler
Posted by jh...@apache.org.
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
index ad808a9..a3db6db 100644
--- a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
+++ b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
@@ -16,7 +16,11 @@
*/
package org.apache.calcite.util;
-import java.util.AbstractList;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
import java.util.AbstractSet;
import java.util.ArrayDeque;
import java.util.ArrayList;
@@ -61,7 +65,21 @@ import java.util.Set;
* @param <E> Element type
*/
public class PartiallyOrderedSet<E> extends AbstractSet<E> {
+ /** Ordering that orders bit sets by inclusion.
+ *
+ * <p>For example, the children of 14 (1110) are 12 (1100), 10 (1010) and
+ * 6 (0110).
+ */
+ public static final Ordering<ImmutableBitSet> BIT_SET_INCLUSION_ORDERING =
+ new Ordering<ImmutableBitSet>() {
+ public boolean lessThan(ImmutableBitSet e1, ImmutableBitSet e2) {
+ return e1.contains(e2);
+ }
+ };
+
private final Map<E, Node<E>> map;
+ private final Function<E, Iterable<E>> parentFunction;
+ private final Function<E, Iterable<E>> childFunction;
private final Ordering<E> ordering;
/**
@@ -71,7 +89,9 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
private final Node<E> topNode;
private final Node<E> bottomNode;
- private static final boolean DEBUG = Math.random() >= 0;
+ /** Whether to check internal consistency all the time.
+ * False unless you specify "-Dcalcite.debug" on the command line. */
+ private static final boolean DEBUG = Util.getBooleanProperty("calcite.debug");
/**
* Creates a partially-ordered set.
@@ -79,7 +99,19 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* @param ordering Ordering relation
*/
public PartiallyOrderedSet(Ordering<E> ordering) {
- this(ordering, new HashMap<E, Node<E>>());
+ this(ordering, new HashMap<E, Node<E>>(), null, null);
+ }
+
+ /**
+ * Creates a partially-ordered set with a parent-generating function.
+ *
+ * @param ordering Ordering relation
+ * @param parentFunction Function to compute parents of a node; may be null
+ */
+ public PartiallyOrderedSet(Ordering<E> ordering,
+ Function<E, Iterable<E>> childFunction,
+ Function<E, Iterable<E>> parentFunction) {
+ this(ordering, new HashMap<E, Node<E>>(), childFunction, parentFunction);
}
/**
@@ -90,7 +122,8 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* @param collection Initial contents of partially-ordered set
*/
public PartiallyOrderedSet(Ordering<E> ordering, Collection<E> collection) {
- this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2));
+ this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2), null,
+ null);
addAll(collection);
}
@@ -99,10 +132,15 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
*
* @param ordering Ordering relation
* @param map Map from values to nodes
+ * @param parentFunction Function to compute parents of a node; may be null
*/
- private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map) {
+ private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map,
+ Function<E, Iterable<E>> childFunction,
+ Function<E, Iterable<E>> parentFunction) {
this.ordering = ordering;
this.map = map;
+ this.childFunction = childFunction;
+ this.parentFunction = parentFunction;
this.topNode = new TopBottomNode<>(true);
this.bottomNode = new TopBottomNode<>(false);
this.topNode.childList.add(bottomNode);
@@ -528,7 +566,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* Returns the values in this partially-ordered set that are less-than
* a given value and there are no intervening values.
*
- * <p>If the value is not in this set, returns the empty list.</p>
+ * <p>If the value is not in this set, returns null.
*
* @see #getDescendants
*
@@ -537,15 +575,33 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* value
*/
public List<E> getChildren(E e) {
+ return getChildren(e, false);
+ }
+
+ /**
+ * Returns the values in this partially-ordered set that are less-than
+ * a given value and there are no intervening values.
+ *
+ * <p>If the value is not in this set, returns null if {@code hypothetical}
+ * is false.
+ *
+ * @see #getDescendants
+ *
+ * @param e Value
+ * @param hypothetical Whether to generate a list if value is not in the set
+ * @return List of values in this set that are directly less than the given
+ * value
+ */
+ public List<E> getChildren(E e, boolean hypothetical) {
final Node<E> node = map.get(e);
if (node == null) {
- return null;
- } else if (node.childList.get(0).e == null) {
- // child list contains bottom element, so officially there are no
- // children
- return Collections.emptyList();
+ if (hypothetical) {
+ return strip(findChildren(e));
+ } else {
+ return null;
+ }
} else {
- return new StripList<>(node.childList);
+ return strip(node.childList);
}
}
@@ -553,7 +609,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* Returns the values in this partially-ordered set that are greater-than
* a given value and there are no intervening values.
*
- * <p>If the value is not in this set, returns the empty list.</p>
+ * <p>If the value is not in this set, returns null.
*
* @see #getAncestors
*
@@ -562,32 +618,61 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* given value
*/
public List<E> getParents(E e) {
+ return getParents(e, false);
+ }
+
+ /**
+ * Returns the values in this partially-ordered set that are greater-than
+ * a given value and there are no intervening values.
+ *
+ * <p>If the value is not in this set, returns {@code null} if
+ * {@code hypothetical} is false.
+ *
+ * @see #getAncestors
+ *
+ * @param e Value
+ * @param hypothetical Whether to generate a list if value is not in the set
+ * @return List of values in this set that are directly greater than the
+ * given value
+ */
+ public List<E> getParents(E e, boolean hypothetical) {
final Node<E> node = map.get(e);
if (node == null) {
- return null;
- } else if (node.parentList.get(0).e == null) {
- // parent list contains top element, so officially there are no
- // parents
- return Collections.emptyList();
+ if (hypothetical) {
+ if (parentFunction != null) {
+ final List<E> list = new ArrayList<>();
+ closure(parentFunction, e, list, new HashSet<E>());
+ return list;
+ } else {
+ return ImmutableList.copyOf(strip(findParents(e)));
+ }
+ } else {
+ return null;
+ }
} else {
- return new StripList<>(node.parentList);
+ return strip(node.parentList);
}
}
- public List<E> getNonChildren() {
- if (topNode.childList.size() == 1
- && topNode.childList.get(0).e == null) {
- return Collections.emptyList();
+ private void closure(Function<E, Iterable<E>> generator, E e, List<E> list,
+ Set<E> set) {
+ for (E p : Preconditions.checkNotNull(generator.apply(e))) {
+ if (set.add(e)) {
+ if (map.containsKey(p)) {
+ list.add(p);
+ } else {
+ closure(generator, p, list, set);
+ }
+ }
}
- return new StripList<>(topNode.childList);
+ }
+
+ public List<E> getNonChildren() {
+ return strip(topNode.childList);
}
public List<E> getNonParents() {
- if (bottomNode.parentList.size() == 1
- && bottomNode.parentList.get(0).e == null) {
- return Collections.emptyList();
- }
- return new StripList<>(bottomNode.parentList);
+ return strip(bottomNode.parentList);
}
@Override public void clear() {
@@ -612,6 +697,57 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
return descendants(e, true);
}
+ /** Returns a list, backed by a list of
+ * {@link org.apache.calcite.util.PartiallyOrderedSet.Node}s, that strips
+ * away the node and returns the element inside.
+ *
+ * @param <E> Element type
+ */
+ public static <E> List<E> strip(List<Node<E>> list) {
+ if (list.size() == 1
+ && list.get(0).e == null) {
+ // If parent list contains top element, a node whose element is null,
+ // officially there are no parents.
+ // Similarly child list and bottom element.
+ return ImmutableList.of();
+ }
+ return Lists.transform(list,
+ new Function<Node<E>, E>() {
+ public E apply(Node<E> node) {
+ return node.e;
+ }
+ });
+ }
+
+ /** Converts an iterable of nodes into the list of the elements inside.
+ * If there is one node whose element is null, it represents a list
+ * containing either the top or bottom element, so we return the empty list.
+ *
+ * @param <E> Element type
+ */
+ private static <E> ImmutableList<E> strip(Iterable<Node<E>> iterable) {
+ final Iterator<Node<E>> iterator = iterable.iterator();
+ if (!iterator.hasNext()) {
+ return ImmutableList.of();
+ }
+ Node<E> node = iterator.next();
+ if (!iterator.hasNext()) {
+ if (node.e == null) {
+ return ImmutableList.of();
+ } else {
+ return ImmutableList.of(node.e);
+ }
+ }
+ final ImmutableList.Builder<E> builder = ImmutableList.builder();
+ for (;;) {
+ builder.add(node.e);
+ if (!iterator.hasNext()) {
+ return builder.build();
+ }
+ node = iterator.next();
+ }
+ }
+
/**
* Returns a list of values in the set that are less-than a given value.
* The list is in topological order but order is otherwise
@@ -725,27 +861,6 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
*/
boolean lessThan(E e1, E e2);
}
-
- /** List, backed by a list of {@link Node}s, that strips away the
- * node and returns the element inside.
- *
- * @param <E> Element type
- */
- private static class StripList<E> extends AbstractList<E> {
- private final List<Node<E>> list;
-
- StripList(List<Node<E>> list) {
- this.list = list;
- }
-
- @Override public E get(int index) {
- return list.get(index).e;
- }
-
- @Override public int size() {
- return list.size();
- }
- }
}
// End PartiallyOrderedSet.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
new file mode 100644
index 0000000..e45db8b
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.linq4j.AbstractEnumerable;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.Matchers;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Supplier;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Ordering;
+
+import org.hamcrest.Matcher;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+/**
+ * Unit tests for {@link Profiler}.
+ */
+public class ProfilerTest {
+ @Test public void testProfileZeroRows() throws Exception {
+ final String sql = "select * from \"scott\".dept where false";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DNAME],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[LOC],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[],cardinality:0.0}",
+ "{type:rowCount,rowCount:0}",
+ "{type:unique,columns:[]}");
+ }
+
+ @Test public void testProfileOneRow() throws Exception {
+ final String sql = "select * from \"scott\".dept where deptno = 10";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10],cardinality:1.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING],cardinality:1.0}",
+ "{type:distribution,columns:[LOC],values:[NEWYORK],cardinality:1.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:rowCount,rowCount:1}",
+ "{type:unique,columns:[]}");
+ }
+
+ @Test public void testProfileTwoRows() throws Exception {
+ final String sql = "select * from \"scott\".dept where deptno in (10, 20)";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20],cardinality:2.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH],cardinality:2.0}",
+ "{type:distribution,columns:[LOC],values:[DALLAS,NEWYORK],cardinality:2.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:rowCount,rowCount:2}",
+ "{type:unique,columns:[DEPTNO]}",
+ "{type:unique,columns:[DNAME]}",
+ "{type:unique,columns:[LOC]}");
+ }
+
+ @Test public void testProfileScott() throws Exception {
+ final String sql = "select * from \"scott\".emp\n"
+ + "join \"scott\".dept using (deptno)";
+ sql(sql)
+ .where(new PredicateImpl<Profiler.Statistic>() {
+ public boolean test(Profiler.Statistic statistic) {
+ return !(statistic instanceof Profiler.Distribution)
+ || ((Profiler.Distribution) statistic).cardinality < 14
+ && ((Profiler.Distribution) statistic).minimal;
+ }
+ }).unordered(
+ "{type:distribution,columns:[COMM,DEPTNO0],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,DEPTNO],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,DNAME],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,LOC],cardinality:5.0}",
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0}",
+ "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0}",
+ "{type:distribution,columns:[JOB,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[JOB,DEPTNO0],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,DEPTNO],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,DNAME],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,LOC],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,MGR,DEPTNO0],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,DEPTNO],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,DNAME],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,LOC],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR],cardinality:8.0}",
+ "{type:distribution,columns:[JOB,SAL],cardinality:12.0}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0}",
+ "{type:distribution,columns:[MGR,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[MGR,DEPTNO0],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,DEPTNO],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,DNAME],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,LOC],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,SAL],cardinality:12.0}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1}",
+ "{type:distribution,columns:[SAL,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[SAL,DEPTNO0],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,DEPTNO],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,DNAME],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,LOC],cardinality:12.0}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:DNAME}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:LOC}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:DNAME}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:LOC}",
+ "{type:fd,columns:[DNAME],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[DNAME],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[DNAME],dependentColumn:LOC}",
+ "{type:fd,columns:[JOB],dependentColumn:COMM}",
+ "{type:fd,columns:[LOC],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[LOC],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[LOC],dependentColumn:DNAME}",
+ "{type:fd,columns:[SAL],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[SAL],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[SAL],dependentColumn:DNAME}",
+ "{type:fd,columns:[SAL],dependentColumn:JOB}",
+ "{type:fd,columns:[SAL],dependentColumn:LOC}",
+ "{type:fd,columns:[SAL],dependentColumn:MGR}",
+ "{type:rowCount,rowCount:14}",
+ "{type:unique,columns:[EMPNO]}",
+ "{type:unique,columns:[ENAME]}",
+ "{type:unique,columns:[HIREDATE,DEPTNO0]}",
+ "{type:unique,columns:[HIREDATE,DEPTNO]}",
+ "{type:unique,columns:[HIREDATE,DNAME]}",
+ "{type:unique,columns:[HIREDATE,LOC]}",
+ "{type:unique,columns:[HIREDATE,SAL]}",
+ "{type:unique,columns:[JOB,HIREDATE]}");
+ }
+
+ /** As {@link #testProfileScott()}, but prints only the most surprising
+ * distributions. */
+ @Test public void testProfileScott2() throws Exception {
+ scott().factory(Fluid.SIMPLE_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0,expectedCardinality:12.682618485430247,surprise:0.4344728973121492}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR,COMM],cardinality:5.0,expectedCardinality:11.675074674157162,surprise:0.400302535646339}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL,COMM],cardinality:5.0,expectedCardinality:12.579960871109892,surprise:0.43117052004174}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott2()}, but uses the breadth-first profiler.
+ * Results should be the same, but are slightly different (extra EMPNO
+ * and ENAME distributions). */
+ @Test public void testProfileScott3() throws Exception {
+ scott().factory(Fluid.BETTER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott3()}, but uses the breadth-first profiler
+ * and deems everything uninteresting. Only first-level combinations (those
+ * consisting of a single column) are computed. */
+ @Test public void testProfileScott4() throws Exception {
+ scott().factory(Fluid.INCURIOUS_PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott3()}, but uses the breadth-first profiler. */
+ @Ignore
+ @Test public void testProfileScott5() throws Exception {
+ scott().factory(Fluid.PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** Profiles a star-join query on the Foodmart schema using the breadth-first
+ * profiler. */
+ @Ignore
+ @Test public void testProfileFoodmart() throws Exception {
+ foodmart().factory(Fluid.PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[brand_name],cardinality:111.0,expectedCardinality:86837.0,surprise:0.9974467497814786}",
+ "{type:distribution,columns:[cases_per_pallet],values:[5,6,7,8,9,10,11,12,13,14],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+ "{type:distribution,columns:[day_of_month],cardinality:30.0,expectedCardinality:86837.0,surprise:0.9993092889129359}",
+ "{type:distribution,columns:[fiscal_period],values:[],cardinality:1.0,nullCount:86837,expectedCardinality:86837.0,surprise:0.999976968608213}",
+ "{type:distribution,columns:[low_fat],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+ "{type:distribution,columns:[month_of_year],values:[1,2,3,4,5,6,7,8,9,10,11,12],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+ "{type:distribution,columns:[product_category],cardinality:45.0,expectedCardinality:86837.0,surprise:0.9989641122441932}",
+ "{type:distribution,columns:[product_class_id0,product_subcategory,product_category,product_department,product_family],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_class_id0],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_class_id],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_department],cardinality:22.0,expectedCardinality:86837.0,surprise:0.9994934318838578}",
+ "{type:distribution,columns:[product_family],values:[Drink,Food,Non-Consumable],cardinality:3.0,expectedCardinality:86837.0,surprise:0.9999309074159374}",
+ "{type:distribution,columns:[product_subcategory],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[quarter],values:[Q1,Q2,Q3,Q4],cardinality:4.0,expectedCardinality:86837.0,surprise:0.9999078776154121}",
+ "{type:distribution,columns:[recyclable_package],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+ "{type:distribution,columns:[store_cost,fiscal_period],cardinality:10601.0,nullCount:86724,expectedCardinality:10.0,surprise:0.9981151635095655}",
+ "{type:distribution,columns:[store_cost,low_fat],cardinality:17673.0,expectedCardinality:20.0,surprise:0.99773921890013}",
+ "{type:distribution,columns:[store_cost,product_family],cardinality:19453.0,expectedCardinality:30.0,surprise:0.9969203921367346}",
+ "{type:distribution,columns:[store_cost,quarter],cardinality:29590.0,expectedCardinality:40.0,surprise:0.9973000337495781}",
+ "{type:distribution,columns:[store_cost,recyclable_package],cardinality:17847.0,expectedCardinality:20.0,surprise:0.9977612357978396}",
+ "{type:distribution,columns:[store_cost,the_year],cardinality:10944.0,expectedCardinality:10.0,surprise:0.9981741829468688}",
+ "{type:distribution,columns:[store_cost],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+ "{type:distribution,columns:[store_id],values:[2,3,6,7,11,13,14,15,16,17,22,23,24],cardinality:13.0,expectedCardinality:86837.0,surprise:0.9997006332757629}",
+ "{type:distribution,columns:[store_sales],cardinality:21.0,expectedCardinality:86837.0,surprise:0.999516452140275}",
+ "{type:distribution,columns:[the_day],values:[Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday],cardinality:7.0,expectedCardinality:86837.0,surprise:0.9998387913960665}",
+ "{type:distribution,columns:[the_month],values:[April,August,December,February,January,July,June,March,May,November,October,September],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+ "{type:distribution,columns:[the_year],values:[1997],cardinality:1.0,expectedCardinality:86837.0,surprise:0.999976968608213}",
+ "{type:distribution,columns:[unit_sales],values:[1.0000,2.0000,3.0000,4.0000,5.0000,6.0000],cardinality:6.0,expectedCardinality:86837.0,surprise:0.999861819605495}",
+ "{type:distribution,columns:[units_per_case],cardinality:36.0,expectedCardinality:86837.0,surprise:0.9991712039413857}",
+ "{type:distribution,columns:[week_of_year],cardinality:52.0,expectedCardinality:86837.0,surprise:0.9988030705843087}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** Tests
+ * {@link org.apache.calcite.profile.ProfilerImpl.SurpriseQueue}. */
+ @Test public void testSurpriseQueue() {
+ ProfilerImpl.SurpriseQueue q = new ProfilerImpl.SurpriseQueue(4, 3);
+ assertThat(q.offer(2), is(true));
+ assertThat(q.toString(), is("min: 2.0, contents: [2.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(4), is(true));
+ assertThat(q.toString(), is("min: 2.0, contents: [2.0, 4.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Since we're in the warm-up period, a value lower than the minimum is
+ // accepted.
+ assertThat(q.offer(1), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [2.0, 4.0, 1.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(5), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [4.0, 1.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(3), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [1.0, 5.0, 3.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Duplicate entry
+ assertThat(q.offer(5), is(true));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Now that the list is full, a value below the minimum is refused.
+ // "offer" returns false, and the value is not added to the queue.
+ // Thus the median never decreases.
+ assertThat(q.offer(2), is(false));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Same applies for a value equal to the minimum.
+ assertThat(q.offer(3), is(false));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Add a value that is above the minimum.
+ assertThat(q.offer(4.5), is(true));
+ assertThat(q.toString(), is("min: 3.0, contents: [3.0, 5.0, 4.5]"));
+ assertThat(q.isValid(), is(true));
+ }
+
+ private Fluid scott() throws Exception {
+ final String sql = "select * from \"scott\".emp\n"
+ + "join \"scott\".dept using (deptno)";
+ return sql(sql)
+ .where(Fluid.STATISTIC_PREDICATE)
+ .sort(Fluid.ORDERING.reverse())
+ .limit(30)
+ .project(Fluid.EXTENDED_COLUMNS);
+ }
+
+ private Fluid foodmart() throws Exception {
+ final String sql = "select \"s\".*, \"p\".*, \"t\".*, \"pc\".*\n"
+ + "from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+ + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
+ + "join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")\n"
+ + "join \"foodmart\".\"product_class\" as \"pc\"\n"
+ + " on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"\n";
+ return sql(sql)
+ .config(CalciteAssert.Config.JDBC_FOODMART)
+ .where(Fluid.STATISTIC_PREDICATE)
+ .sort(Fluid.ORDERING.reverse())
+ .limit(30)
+ .project(Fluid.EXTENDED_COLUMNS);
+ }
+
+ private static Fluid sql(String sql) {
+ return new Fluid(CalciteAssert.Config.SCOTT, sql, Fluid.SIMPLE_FACTORY,
+ Predicates.<Profiler.Statistic>alwaysTrue(), null, -1,
+ Fluid.DEFAULT_COLUMNS);
+ }
+
+ /** Fluid interface for writing profiler test cases. */
+ private static class Fluid {
+ static final Supplier<Profiler> SIMPLE_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ return new SimpleProfiler();
+ }
+ };
+
+ static final Supplier<Profiler> BETTER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>>
+ predicate = Predicates.alwaysTrue();
+ return new ProfilerImpl(600, 200, predicate);
+ }
+ };
+
+ static final Ordering<Profiler.Statistic> ORDERING =
+ new Ordering<Profiler.Statistic>() {
+ public int compare(Profiler.Statistic left,
+ Profiler.Statistic right) {
+ int c = left.getClass().getSimpleName()
+ .compareTo(right.getClass().getSimpleName());
+ if (c == 0
+ && left instanceof Profiler.Distribution
+ && right instanceof Profiler.Distribution) {
+ final Profiler.Distribution d0 = (Profiler.Distribution) left;
+ final Profiler.Distribution d1 = (Profiler.Distribution) right;
+ c = Double.compare(d0.surprise(), d1.surprise());
+ if (c == 0) {
+ c = d0.columns.toString().compareTo(d1.columns.toString());
+ }
+ }
+ return c;
+ }
+ };
+
+ static final Predicate<Profiler.Statistic> STATISTIC_PREDICATE =
+ new PredicateImpl<Profiler.Statistic>() {
+ public boolean test(Profiler.Statistic statistic) {
+ // Include distributions of zero columns (the grand total)
+ // and singleton columns, plus "surprising" distributions
+ // (with significantly higher NDVs than predicted from their
+ // constituent columns).
+ return statistic instanceof Profiler.Distribution
+ && (((Profiler.Distribution) statistic).columns.size() < 2
+ || ((Profiler.Distribution) statistic).surprise() > 0.4D)
+ && ((Profiler.Distribution) statistic).minimal;
+ }
+ };
+
+ static final List<String> DEFAULT_COLUMNS =
+ ImmutableList.of("type", "distribution", "columns", "cardinality",
+ "values", "nullCount", "dependentColumn", "rowCount");
+
+ static final List<String> EXTENDED_COLUMNS =
+ ImmutableList.<String>builder().addAll(DEFAULT_COLUMNS)
+ .add("expectedCardinality", "surprise")
+ .build();
+
+ private static final Supplier<Profiler> PROFILER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ return new ProfilerImpl(7500, 100,
+ new PredicateImpl<Pair<ProfilerImpl.Space, Profiler.Column>>() {
+ public boolean test(
+ Pair<ProfilerImpl.Space, Profiler.Column> p) {
+ final Profiler.Distribution distribution =
+ p.left.distribution();
+ if (distribution == null) {
+ // We don't have a distribution yet, because this space
+ // has not yet been evaluated. Let's do it anyway.
+ return true;
+ }
+ return distribution.surprise() >= 0.3D;
+ }
+ });
+ }
+ };
+
+ private static final Supplier<Profiler> INCURIOUS_PROFILER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>> p =
+ Predicates.alwaysFalse();
+ return new ProfilerImpl(10, 200, p);
+ }
+ };
+
+ private final String sql;
+ private final List<String> columns;
+ private final Comparator<Profiler.Statistic> comparator;
+ private final int limit;
+ private final Predicate<Profiler.Statistic> predicate;
+ private final Supplier<Profiler> factory;
+ private final CalciteAssert.Config config;
+
+ Fluid(CalciteAssert.Config config, String sql, Supplier<Profiler> factory,
+ Predicate<Profiler.Statistic> predicate,
+ Comparator<Profiler.Statistic> comparator, int limit,
+ List<String> columns) {
+ this.sql = Preconditions.checkNotNull(sql);
+ this.factory = Preconditions.checkNotNull(factory);
+ this.columns = ImmutableList.copyOf(columns);
+ this.predicate = Preconditions.checkNotNull(predicate);
+ this.comparator = comparator; // null means sort on JSON representation
+ this.limit = limit;
+ this.config = config;
+ }
+
+ Fluid config(CalciteAssert.Config config) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid factory(Supplier<Profiler> factory) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid project(List<String> columns) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid sort(Ordering<Profiler.Statistic> comparator) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid limit(int limit) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid where(Predicate<Profiler.Statistic> predicate) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid unordered(String... lines) throws Exception {
+ return check(Matchers.equalsUnordered(lines));
+ }
+
+ public Fluid check(final Matcher<Iterable<String>> matcher)
+ throws Exception {
+ CalciteAssert.that(config)
+ .doWithConnection(new Function<CalciteConnection, Void>() {
+ public Void apply(CalciteConnection c) {
+ try (PreparedStatement s = c.prepareStatement(sql)) {
+ final ResultSetMetaData m = s.getMetaData();
+ final List<Profiler.Column> columns = new ArrayList<>();
+ final int columnCount = m.getColumnCount();
+ for (int i = 0; i < columnCount; i++) {
+ columns.add(new Profiler.Column(i, m.getColumnLabel(i + 1)));
+ }
+
+ // Create an initial group for each table in the query.
+ // Columns in the same table will tend to have the same
+ // cardinality as the table, and as the table's primary key.
+ final Multimap<String, Integer> groups = HashMultimap.create();
+ for (int i = 0; i < m.getColumnCount(); i++) {
+ groups.put(m.getTableName(i + 1), i);
+ }
+ final SortedSet<ImmutableBitSet> initialGroups =
+ new TreeSet<>();
+ for (Collection<Integer> integers : groups.asMap().values()) {
+ initialGroups.add(ImmutableBitSet.of(integers));
+ }
+ final Profiler p = factory.get();
+ final Enumerable<List<Comparable>> rows = getRows(s);
+ final Profiler.Profile profile =
+ p.profile(rows, columns, initialGroups);
+ final List<Profiler.Statistic> statistics =
+ ImmutableList.copyOf(
+ Iterables.filter(profile.statistics(), predicate));
+
+ // If no comparator specified, use the function that converts to
+ // JSON strings
+ final Function<Profiler.Statistic, String> toJson =
+ toJsonFunction();
+ Ordering<Profiler.Statistic> comp = comparator != null
+ ? Ordering.from(comparator)
+ : Ordering.natural().onResultOf(toJson);
+ ImmutableList<Profiler.Statistic> statistics2 =
+ comp.immutableSortedCopy(statistics);
+ if (limit >= 0 && limit < statistics2.size()) {
+ statistics2 = statistics2.subList(0, limit);
+ }
+
+ final List<String> strings =
+ Lists.transform(statistics2, toJson);
+ assertThat(strings, matcher);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ return null;
+ }
+ });
+ return this;
+ }
+
+ /** Returns a function that converts a statistic to a JSON string. */
+ Function<Profiler.Statistic, String> toJsonFunction() {
+ return new Function<Profiler.Statistic, String>() {
+ final JsonBuilder jb = new JsonBuilder();
+
+ public String apply(Profiler.Statistic statistic) {
+ Object map = statistic.toMap(jb);
+ if (map instanceof Map) {
+ @SuppressWarnings("unchecked")
+ final Map<String, Object> map1 = (Map) map;
+ map1.keySet().retainAll(Fluid.this.columns);
+ }
+ final String json = jb.toJsonString(map);
+ return json.replaceAll("\n", "").replaceAll(" ", "")
+ .replaceAll("\"", "");
+ }
+ };
+ }
+
+ private Enumerable<List<Comparable>> getRows(final PreparedStatement s) {
+ return new AbstractEnumerable<List<Comparable>>() {
+ public Enumerator<List<Comparable>> enumerator() {
+ try {
+ final ResultSet r = s.executeQuery();
+ return getListEnumerator(r, r.getMetaData().getColumnCount());
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ private Enumerator<List<Comparable>> getListEnumerator(
+ final ResultSet r, final int columnCount) {
+ return new Enumerator<List<Comparable>>() {
+ final Comparable[] values = new Comparable[columnCount];
+
+ public List<Comparable> current() {
+ for (int i = 0; i < columnCount; i++) {
+ try {
+ final Comparable value = (Comparable) r.getObject(i + 1);
+ values[i] = NullSentinel.mask(value);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return ImmutableList.copyOf(values);
+ }
+
+ public boolean moveNext() {
+ try {
+ return r.next();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void reset() {
+ }
+
+ public void close() {
+ try {
+ r.close();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+ }
+}
+
+// End ProfilerTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
index d577057..585e8ed 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
@@ -28,6 +28,7 @@ import org.apache.calcite.plan.volcano.TraitPropagationTest;
import org.apache.calcite.plan.volcano.VolcanoPlannerTest;
import org.apache.calcite.plan.volcano.VolcanoPlannerTraitTest;
import org.apache.calcite.prepare.LookupOperatorOverloadsTest;
+import org.apache.calcite.profile.ProfilerTest;
import org.apache.calcite.rel.RelCollationTest;
import org.apache.calcite.rel.RelDistributionTest;
import org.apache.calcite.rel.rel2sql.RelToSqlConverterTest;
@@ -156,6 +157,7 @@ import org.junit.runners.Suite;
LinqFrontJdbcBackTest.class,
JdbcFrontJdbcBackLinqMiddleTest.class,
CalciteSqlOperatorTest.class,
+ ProfilerTest.class,
LatticeTest.class,
ReflectiveSchemaTest.class,
JdbcTest.class,
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
index 3f36bf9..4eae872 100644
--- a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
+++ b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
@@ -23,6 +23,8 @@ import org.apache.calcite.materialize.Lattices;
import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
/**
@@ -33,10 +35,15 @@ import java.util.Map;
*/
public class FoodMartLatticeStatisticProvider
extends DelegatingLatticeStatisticProvider {
- public static final FoodMartLatticeStatisticProvider INSTANCE =
- new FoodMartLatticeStatisticProvider(Lattices.CACHED_SQL);
+ public static final FoodMartLatticeStatisticProvider.Factory FACTORY =
+ new Factory() {
+ public LatticeStatisticProvider apply(Lattice lattice) {
+ return new FoodMartLatticeStatisticProvider(lattice,
+ Lattices.CACHED_SQL.apply(lattice));
+ }
+ };
- public static final Map<String, Integer> CARDINALITY_MAP =
+ private static final Map<String, Integer> CARDINALITY_MAP =
ImmutableMap.<String, Integer>builder()
.put("brand_name", 111)
.put("cases_per_pallet", 10)
@@ -75,18 +82,29 @@ public class FoodMartLatticeStatisticProvider
.put("week_of_year", 52)
.build();
- private FoodMartLatticeStatisticProvider(LatticeStatisticProvider provider) {
+ private final Lattice lattice;
+
+ private FoodMartLatticeStatisticProvider(Lattice lattice,
+ LatticeStatisticProvider provider) {
super(provider);
+ this.lattice = lattice;
}
- /** Returns an estimate of the number of distinct values in a column. */
- public int cardinality(Lattice lattice, Lattice.Column column) {
+ private int cardinality(Lattice.Column column) {
final Integer integer = CARDINALITY_MAP.get(column.alias);
if (integer != null && integer > 0) {
return integer;
}
return column.alias.length();
}
+
+ @Override public double cardinality(List<Lattice.Column> columns) {
+ final List<Double> cardinalityList = new ArrayList<>();
+ for (Lattice.Column column : columns) {
+ cardinalityList.add((double) cardinality(column));
+ }
+ return Lattice.getRowCount(lattice.getFactRowCount(), cardinalityList);
+ }
}
// End FoodMartLatticeStatisticProvider.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/LatticeTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index 54f4ea7..473e5f5 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -16,11 +16,16 @@
*/
package org.apache.calcite.test;
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.materialize.Lattice;
import org.apache.calcite.materialize.Lattices;
import org.apache.calcite.materialize.MaterializationService;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.TestUtil;
import org.apache.calcite.util.Util;
@@ -29,6 +34,7 @@ import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;
@@ -39,11 +45,15 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
+import static org.apache.calcite.test.Matchers.within;
+
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
@@ -110,8 +120,8 @@ public class LatticeTest {
+ " } ]\n"
+ "}\n";
- private CalciteAssert.AssertThat modelWithLattice(String name, String sql,
- String... extras) {
+ private static CalciteAssert.AssertThat modelWithLattice(String name,
+ String sql, String... extras) {
final StringBuilder buf = new StringBuilder("{ name: '")
.append(name)
.append("', sql: ")
@@ -123,7 +133,8 @@ public class LatticeTest {
return modelWithLattices(buf.toString());
}
- private CalciteAssert.AssertThat modelWithLattices(String... lattices) {
+ private static CalciteAssert.AssertThat modelWithLattices(
+ String... lattices) {
final Class<JdbcTest.EmpDeptTableFactory> clazz =
JdbcTest.EmpDeptTableFactory.class;
return CalciteAssert.model(""
@@ -153,6 +164,36 @@ public class LatticeTest {
/** Tests that it's OK for a lattice to have the same name as a table in the
* schema. */
+ @Test public void testLatticeSql() throws Exception {
+ modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
+ .doWithConnection(new Function<CalciteConnection, Void>() {
+ public Void apply(CalciteConnection c) {
+ final SchemaPlus schema = c.getRootSchema();
+ final SchemaPlus adhoc = schema.getSubSchema("adhoc");
+ assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
+ final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
+ adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
+ final Lattice lattice = entry.getValue().getLattice();
+ final String sql = "SELECT \"days\".\"day\"\n"
+ + "FROM \"foodmart\".\"days\" AS \"days\"\n"
+ + "GROUP BY \"days\".\"day\"";
+ assertThat(
+ lattice.sql(ImmutableBitSet.of(0),
+ ImmutableList.<Lattice.Measure>of()), is(sql));
+ final String sql2 = "SELECT"
+ + " \"days\".\"day\", \"days\".\"week_day\"\n"
+ + "FROM \"foodmart\".\"days\" AS \"days\"";
+ assertThat(
+ lattice.sql(ImmutableBitSet.of(0, 1), false,
+ ImmutableList.<Lattice.Measure>of()),
+ is(sql2));
+ return null;
+ }
+ });
+ }
+
+ /** Tests that it's OK for a lattice to have the same name as a table in the
+ * schema. */
@Test public void testLatticeWithSameNameAsTable() {
modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
.query("select count(*) from EMPLOYEES")
@@ -377,28 +418,57 @@ public class LatticeTest {
* Use optimization algorithm to suggest which tiles of a lattice to
* materialize</a>. */
@Test public void testTileAlgorithm() {
- checkTileAlgorithm(FoodMartLatticeStatisticProvider.class.getCanonicalName(),
- "EnumerableAggregate(group=[{2, 3}])\n"
- + " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31}]])");
+ final String explain = "EnumerableAggregate(group=[{2, 3}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 37}]])";
+ checkTileAlgorithm(
+ FoodMartLatticeStatisticProvider.class.getCanonicalName() + "#FACTORY",
+ explain);
}
+ /** As {@link #testTileAlgorithm()}, but uses the
+ * {@link Lattices#CACHED_SQL} statistics provider. */
@Test public void testTileAlgorithm2() {
// Different explain than above, but note that it still selects columns
// (27, 31).
+ final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
checkTileAlgorithm(Lattices.class.getCanonicalName() + "#CACHED_SQL",
- "EnumerableAggregate(group=[{0, 1}])\n"
- + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]");
+ explain);
+ }
+
+ /** As {@link #testTileAlgorithm()}, but uses the
+ * {@link Lattices#PROFILER} statistics provider. */
+ @Test public void testTileAlgorithm3() {
+ Assume.assumeTrue("Yahoo sketches requires JDK 8 or higher",
+ TestUtil.getJavaMajorVersion() >= 8);
+ final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
+ checkTileAlgorithm(Lattices.class.getCanonicalName() + "#PROFILER",
+ explain);
}
private void checkTileAlgorithm(String statisticProvider,
String expectedExplain) {
MaterializationService.setThreadLocal();
MaterializationService.instance().clear();
- foodmartModel(
- " auto: false,\n"
+ foodmartLatticeModel(statisticProvider)
+ .query("select distinct t.\"the_year\", t.\"quarter\"\n"
+ + "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
+ .enableMaterializations(true)
+ .explainContains(expectedExplain)
+ .returnsUnordered("the_year=1997; quarter=Q1",
+ "the_year=1997; quarter=Q2",
+ "the_year=1997; quarter=Q3",
+ "the_year=1997; quarter=Q4");
+ }
+
+ private static CalciteAssert.AssertThat foodmartLatticeModel(
+ String statisticProvider) {
+ return foodmartModel(" auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: -1,\n"
- + " rowCountEstimate: 86000,\n"
+ + " rowCountEstimate: 87000,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
@@ -414,17 +484,7 @@ public class LatticeTest {
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
- + " } ]\n")
- .query("select distinct t.\"the_year\", t.\"quarter\"\n"
- + "from \"foodmart\".\"sales_fact_1997\" as s\n"
- + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
- .enableMaterializations(true)
- .explainContains(expectedExplain)
- .returnsUnordered("the_year=1997; quarter=Q1",
- "the_year=1997; quarter=Q2",
- "the_year=1997; quarter=Q3",
- "the_year=1997; quarter=Q4")
- .returnsCount(4);
+ + " } ]\n");
}
/** Tests a query that is created within {@link #testTileAlgorithm()}. */
@@ -704,7 +764,7 @@ public class LatticeTest {
.returns("EXPR$0=1\n");
}
- private CalciteAssert.AssertThat foodmartModel(String... extras) {
+ private static CalciteAssert.AssertThat foodmartModel(String... extras) {
return modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+ "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
@@ -741,6 +801,17 @@ public class LatticeTest {
System.out.println(CalciteAssert.toString(resultSet));
connection.close();
}
+
+ /** Unit test for {@link Lattice#getRowCount(double, List)}. */
+ @Test public void testColumnCount() {
+ assertThat(Lattice.getRowCount(10, 2, 3), within(5.03D, 0.01D));
+ assertThat(Lattice.getRowCount(10, 9, 8), within(9.4D, 0.01D));
+ assertThat(Lattice.getRowCount(100, 9, 8), within(54.2D, 0.1D));
+ assertThat(Lattice.getRowCount(1000, 9, 8), within(72D, 0.01D));
+ assertThat(Lattice.getRowCount(1000, 1, 1), is(1D));
+ assertThat(Lattice.getRowCount(1, 3, 5), within(1D, 0.01D));
+ assertThat(Lattice.getRowCount(1, 3, 5, 13, 4831), within(1D, 0.01D));
+ }
}
// End LatticeTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/Matchers.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/Matchers.java b/core/src/test/java/org/apache/calcite/test/Matchers.java
index fc9e0fd..08733a2 100644
--- a/core/src/test/java/org/apache/calcite/test/Matchers.java
+++ b/core/src/test/java/org/apache/calcite/test/Matchers.java
@@ -16,10 +16,16 @@
*/
package org.apache.calcite.test;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
+import org.hamcrest.BaseMatcher;
import org.hamcrest.CustomTypeSafeMatcher;
import org.hamcrest.Description;
+import org.hamcrest.Factory;
import org.hamcrest.Matcher;
import java.sql.ResultSet;
@@ -79,6 +85,81 @@ public class Matchers {
}
};
}
+
+ public static <E extends Comparable> Matcher<Iterable<E>> equalsUnordered(
+ E... lines) {
+ final List<String> expectedList =
+ Lists.newArrayList(toStringList(Arrays.asList(lines)));
+ Collections.sort(expectedList);
+ final String description = Util.lines(expectedList);
+ return new CustomTypeSafeMatcher<Iterable<E>>(description) {
+ @Override protected void describeMismatchSafely(Iterable<E> actuals,
+ Description description) {
+ final List<String> actualList =
+ Lists.newArrayList(toStringList(actuals));
+ Collections.sort(actualList);
+ description.appendText("was ")
+ .appendValue(Util.lines(actualList));
+ }
+
+ protected boolean matchesSafely(Iterable<E> actuals) {
+ final List<String> actualList =
+ Lists.newArrayList(toStringList(actuals));
+ Collections.sort(actualList);
+ return actualList.equals(expectedList);
+ }
+ };
+ }
+
+ private static <E> Iterable<String> toStringList(Iterable<E> items) {
+ return Iterables.transform(items, Functions.toStringFunction());
+ }
+
+ /**
+ * Creates a matcher that matches when the examined object is within
+ * {@code epsilon} of the specified <code>operand</code>.
+ */
+ @Factory
+ public static <T extends Number> Matcher<T> within(T value, double epsilon) {
+ return new IsWithin<T>(value, epsilon);
+ }
+
+ /**
+ * Is the numeric value within a given difference another value?
+ *
+ * @param <T> Value type
+ */
+ public static class IsWithin<T extends Number> extends BaseMatcher<T> {
+ private final T expectedValue;
+ private final double epsilon;
+
+ public IsWithin(T expectedValue, double epsilon) {
+ this.expectedValue = expectedValue;
+ this.epsilon = epsilon;
+ }
+
+ public boolean matches(Object actualValue) {
+ return areEqual(actualValue, expectedValue, epsilon);
+ }
+
+ public void describeTo(Description description) {
+ description.appendValue(expectedValue + " +/-" + epsilon);
+ }
+
+ private static boolean areEqual(Object actual, Number expected,
+ double epsilon) {
+ if (actual == null) {
+ return expected == null;
+ }
+ if (actual.equals(expected)) {
+ return true;
+ }
+ final double a = ((Number) actual).doubleValue();
+ final double min = expected.doubleValue() - epsilon;
+ final double max = expected.doubleValue() + epsilon;
+ return min <= a && a <= max;
+ }
+ }
}
// End Matchers.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
index d37c223..8a6a9da 100644
--- a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
@@ -18,9 +18,13 @@ package org.apache.calcite.util;
import org.apache.calcite.test.CalciteAssert;
+import com.google.common.base.Function;
+
+import org.junit.Assume;
import org.junit.Test;
import java.util.AbstractList;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
@@ -28,7 +32,10 @@ import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
/**
@@ -133,6 +140,13 @@ public class PartiallyOrderedSetTest {
// "bcd" is child of "abcd" and parent of ""
final String bcd = "'bcd'";
+ assertEquals("['abcd']", poset.getParents(bcd, true).toString());
+ assertThat(poset.getParents(bcd, false), nullValue());
+ assertThat(poset.getParents(bcd), nullValue());
+ assertEquals("['']", poset.getChildren(bcd, true).toString());
+ assertThat(poset.getChildren(bcd, false), nullValue());
+ assertThat(poset.getChildren(bcd), nullValue());
+
poset.add(bcd);
printValidate(poset);
assertTrue(poset.isValid(false));
@@ -210,6 +224,66 @@ public class PartiallyOrderedSetTest {
printValidate(poset);
}
+ @Test public void testPosetBitsLarge() {
+ final PartiallyOrderedSet<Integer> poset =
+ new PartiallyOrderedSet<>(IS_BIT_SUPERSET);
+ checkPosetBitsLarge(poset, 30000, 2921, 164782);
+ }
+
+ @Test public void testPosetBitsLarge2() {
+ Assume.assumeTrue("too slow to run every day", CalciteAssert.ENABLE_SLOW);
+ final int n = 30000;
+ final PartiallyOrderedSet<Integer> poset =
+ new PartiallyOrderedSet<>(IS_BIT_SUPERSET,
+ new Function<Integer, Iterable<Integer>>() {
+ public Iterable<Integer> apply(Integer input) {
+ final int i = input;
+ int r = i; // bits not yet cleared
+ final List<Integer> list = new ArrayList<>();
+ for (int z = 1; r != 0; z <<= 1) {
+ if ((i & z) != 0) {
+ list.add(i ^ z);
+ r ^= z;
+ }
+ }
+ return list;
+ }
+ },
+ new Function<Integer, Iterable<Integer>>() {
+ public Iterable<Integer> apply(Integer input) {
+ final int i = input;
+ final List<Integer> list = new ArrayList<>();
+ for (int z = 1; z <= n; z <<= 1) {
+ if ((i & z) == 0) {
+ list.add(i | z);
+ }
+ }
+ return list;
+ }
+ });
+ checkPosetBitsLarge(poset, n, 2921, 11961);
+ }
+
+ void checkPosetBitsLarge(PartiallyOrderedSet<Integer> poset, int n,
+ int expectedSize, int expectedParentCount) {
+ final Random random = new Random(1);
+ int count = 0;
+ int parentCount = 0;
+ for (int i = 0; i < n; i++) {
+ if (random.nextInt(10) == 0) {
+ if (poset.add(random.nextInt(n * 2))) {
+ ++count;
+ }
+ }
+ final List<Integer> parents =
+ poset.getParents(random.nextInt(n * 2), true);
+ parentCount += parents.size();
+ }
+ assertThat(poset.size(), is(count));
+ assertThat(poset.size(), is(expectedSize));
+ assertThat(parentCount, is(expectedParentCount));
+ }
+
@Test public void testPosetBitsRemoveParent() {
final PartiallyOrderedSet<Integer> poset =
new PartiallyOrderedSet<Integer>(IS_BIT_SUPERSET);
@@ -223,9 +297,6 @@ public class PartiallyOrderedSetTest {
}
@Test public void testDivisorPoset() {
- if (!CalciteAssert.ENABLE_SLOW) {
- return;
- }
PartiallyOrderedSet<Integer> integers =
new PartiallyOrderedSet<Integer>(IS_DIVISOR, range(1, 1000));
assertEquals(
@@ -405,6 +476,7 @@ public class PartiallyOrderedSetTest {
expected,
new TreeSet<String>(ss).toString());
}
+
}
// End PartiallyOrderedSetTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/TestUtil.java b/core/src/test/java/org/apache/calcite/util/TestUtil.java
index 5a283d2..b530d3b 100644
--- a/core/src/test/java/org/apache/calcite/util/TestUtil.java
+++ b/core/src/test/java/org/apache/calcite/util/TestUtil.java
@@ -34,6 +34,9 @@ public abstract class TestUtil {
private static final String LINE_BREAK =
"\\\\n\"" + Util.LINE_SEPARATOR + " + \"";
+ private static final String JAVA_VERSION =
+ System.getProperties().getProperty("java.version");
+
//~ Methods ----------------------------------------------------------------
public static void assertEqualsVerbose(
@@ -190,6 +193,20 @@ public abstract class TestUtil {
.replaceAll("\\[", "\\\\[")
.replaceAll("\\]", "\\\\]");
}
+
+ /** Returns the Java major version: 7 for JDK 1.7, 8 for JDK 8, 10 for
+ * JDK 10, etc. */
+ public static int getJavaMajorVersion() {
+ if (JAVA_VERSION.startsWith("1.7")) {
+ return 7;
+ } else if (JAVA_VERSION.startsWith("1.8")) {
+ return 8;
+ } else if (JAVA_VERSION.startsWith("1.9")) {
+ return 9;
+ } else {
+ return 10;
+ }
+ }
}
// End TestUtil.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
----------------------------------------------------------------------
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
index 7dee05d..eaf9c91 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
@@ -19,6 +19,7 @@ package org.apache.calcite.adapter.tpch;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.util.TestUtil;
import org.apache.calcite.util.Util;
import com.google.common.base.Function;
@@ -40,11 +41,8 @@ import static org.junit.Assert.assertThat;
* if {@code -Dcalcite.test.slow} is specified on the command-line.
* (See {@link org.apache.calcite.test.CalciteAssert#ENABLE_SLOW}.)</p> */
public class TpchTest {
- public static final String JAVA_VERSION =
- System.getProperties().getProperty("java.version");
-
public static final boolean ENABLE =
- CalciteAssert.ENABLE_SLOW && JAVA_VERSION.compareTo("1.7") >= 0;
+ CalciteAssert.ENABLE_SLOW && TestUtil.getJavaMajorVersion() >= 7;
private static String schema(String name, String scaleFactor) {
return " {\n"
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1feec71..9ea9ecc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,7 @@ limitations under the License.
<sqlline.version>1.3.0</sqlline.version>
<xalan.version>2.7.1</xalan.version>
<xerces.version>2.9.1</xerces.version>
+ <sketches.version>0.9.0</sketches.version>
</properties>
<issueManagement>
@@ -259,6 +260,11 @@ limitations under the License.
<version>${guava.version}</version>
</dependency>
<dependency>
+ <groupId>com.h2database</groupId>
+ <artifactId>h2</artifactId>
+ <version>${h2.version}</version>
+ </dependency>
+ <dependency>
<groupId>com.joestelmach</groupId>
<artifactId>natty</artifactId>
<version>${natty.version}</version>
@@ -269,9 +275,9 @@ limitations under the License.
<version>${oracle-jdbc6-driver.version}</version>
</dependency>
<dependency>
- <groupId>com.h2database</groupId>
- <artifactId>h2</artifactId>
- <version>${h2.version}</version>
+ <groupId>com.yahoo.datasketches</groupId>
+ <artifactId>sketches-core</artifactId>
+ <version>${sketches.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>