You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2017/11/29 06:13:11 UTC
[2/4] calcite git commit: [CALCITE-1616] Data profiler
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
index ad808a9..a3db6db 100644
--- a/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
+++ b/core/src/main/java/org/apache/calcite/util/PartiallyOrderedSet.java
@@ -16,7 +16,11 @@
*/
package org.apache.calcite.util;
-import java.util.AbstractList;
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Lists;
+
import java.util.AbstractSet;
import java.util.ArrayDeque;
import java.util.ArrayList;
@@ -61,7 +65,21 @@ import java.util.Set;
* @param <E> Element type
*/
public class PartiallyOrderedSet<E> extends AbstractSet<E> {
+ /** Ordering that orders bit sets by inclusion.
+ *
+ * <p>For example, the children of 14 (1110) are 12 (1100), 10 (1010) and
+ * 6 (0110).
+ */
+ public static final Ordering<ImmutableBitSet> BIT_SET_INCLUSION_ORDERING =
+ new Ordering<ImmutableBitSet>() {
+ public boolean lessThan(ImmutableBitSet e1, ImmutableBitSet e2) {
+ return e1.contains(e2);
+ }
+ };
+
private final Map<E, Node<E>> map;
+ private final Function<E, Iterable<E>> parentFunction;
+ private final Function<E, Iterable<E>> childFunction;
private final Ordering<E> ordering;
/**
@@ -71,7 +89,9 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
private final Node<E> topNode;
private final Node<E> bottomNode;
- private static final boolean DEBUG = Math.random() >= 0;
+ /** Whether to check internal consistency all the time.
+ * False unless you specify "-Dcalcite.debug" on the command line. */
+ private static final boolean DEBUG = Util.getBooleanProperty("calcite.debug");
/**
* Creates a partially-ordered set.
@@ -79,7 +99,19 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* @param ordering Ordering relation
*/
public PartiallyOrderedSet(Ordering<E> ordering) {
- this(ordering, new HashMap<E, Node<E>>());
+ this(ordering, new HashMap<E, Node<E>>(), null, null);
+ }
+
+ /**
+ * Creates a partially-ordered set with a parent-generating function.
+ *
+ * @param ordering Ordering relation
+ * @param parentFunction Function to compute parents of a node; may be null
+ */
+ public PartiallyOrderedSet(Ordering<E> ordering,
+ Function<E, Iterable<E>> childFunction,
+ Function<E, Iterable<E>> parentFunction) {
+ this(ordering, new HashMap<E, Node<E>>(), childFunction, parentFunction);
}
/**
@@ -90,7 +122,8 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* @param collection Initial contents of partially-ordered set
*/
public PartiallyOrderedSet(Ordering<E> ordering, Collection<E> collection) {
- this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2));
+ this(ordering, new HashMap<E, Node<E>>(collection.size() * 3 / 2), null,
+ null);
addAll(collection);
}
@@ -99,10 +132,15 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
*
* @param ordering Ordering relation
* @param map Map from values to nodes
+ * @param parentFunction Function to compute parents of a node; may be null
*/
- private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map) {
+ private PartiallyOrderedSet(Ordering<E> ordering, Map<E, Node<E>> map,
+ Function<E, Iterable<E>> childFunction,
+ Function<E, Iterable<E>> parentFunction) {
this.ordering = ordering;
this.map = map;
+ this.childFunction = childFunction;
+ this.parentFunction = parentFunction;
this.topNode = new TopBottomNode<>(true);
this.bottomNode = new TopBottomNode<>(false);
this.topNode.childList.add(bottomNode);
@@ -528,7 +566,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* Returns the values in this partially-ordered set that are less-than
* a given value and there are no intervening values.
*
- * <p>If the value is not in this set, returns the empty list.</p>
+ * <p>If the value is not in this set, returns null.
*
* @see #getDescendants
*
@@ -537,15 +575,33 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* value
*/
public List<E> getChildren(E e) {
+ return getChildren(e, false);
+ }
+
+ /**
+ * Returns the values in this partially-ordered set that are less-than
+ * a given value and there are no intervening values.
+ *
+ * <p>If the value is not in this set, returns null if {@code hypothetical}
+ * is false.
+ *
+ * @see #getDescendants
+ *
+ * @param e Value
+ * @param hypothetical Whether to generate a list if value is not in the set
+ * @return List of values in this set that are directly less than the given
+ * value
+ */
+ public List<E> getChildren(E e, boolean hypothetical) {
final Node<E> node = map.get(e);
if (node == null) {
- return null;
- } else if (node.childList.get(0).e == null) {
- // child list contains bottom element, so officially there are no
- // children
- return Collections.emptyList();
+ if (hypothetical) {
+ return strip(findChildren(e));
+ } else {
+ return null;
+ }
} else {
- return new StripList<>(node.childList);
+ return strip(node.childList);
}
}
@@ -553,7 +609,7 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* Returns the values in this partially-ordered set that are greater-than
* a given value and there are no intervening values.
*
- * <p>If the value is not in this set, returns the empty list.</p>
+ * <p>If the value is not in this set, returns null.
*
* @see #getAncestors
*
@@ -562,32 +618,61 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
* given value
*/
public List<E> getParents(E e) {
+ return getParents(e, false);
+ }
+
+ /**
+ * Returns the values in this partially-ordered set that are greater-than
+ * a given value and there are no intervening values.
+ *
+ * <p>If the value is not in this set, returns {@code null} if
+ * {@code hypothetical} is false.
+ *
+ * @see #getAncestors
+ *
+ * @param e Value
+ * @param hypothetical Whether to generate a list if value is not in the set
+ * @return List of values in this set that are directly greater than the
+ * given value
+ */
+ public List<E> getParents(E e, boolean hypothetical) {
final Node<E> node = map.get(e);
if (node == null) {
- return null;
- } else if (node.parentList.get(0).e == null) {
- // parent list contains top element, so officially there are no
- // parents
- return Collections.emptyList();
+ if (hypothetical) {
+ if (parentFunction != null) {
+ final List<E> list = new ArrayList<>();
+ closure(parentFunction, e, list, new HashSet<E>());
+ return list;
+ } else {
+ return ImmutableList.copyOf(strip(findParents(e)));
+ }
+ } else {
+ return null;
+ }
} else {
- return new StripList<>(node.parentList);
+ return strip(node.parentList);
}
}
- public List<E> getNonChildren() {
- if (topNode.childList.size() == 1
- && topNode.childList.get(0).e == null) {
- return Collections.emptyList();
+ private void closure(Function<E, Iterable<E>> generator, E e, List<E> list,
+ Set<E> set) {
+ for (E p : Preconditions.checkNotNull(generator.apply(e))) {
+ if (set.add(e)) {
+ if (map.containsKey(p)) {
+ list.add(p);
+ } else {
+ closure(generator, p, list, set);
+ }
+ }
}
- return new StripList<>(topNode.childList);
+ }
+
+ public List<E> getNonChildren() {
+ return strip(topNode.childList);
}
public List<E> getNonParents() {
- if (bottomNode.parentList.size() == 1
- && bottomNode.parentList.get(0).e == null) {
- return Collections.emptyList();
- }
- return new StripList<>(bottomNode.parentList);
+ return strip(bottomNode.parentList);
}
@Override public void clear() {
@@ -612,6 +697,57 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
return descendants(e, true);
}
+ /** Returns a list, backed by a list of
+ * {@link org.apache.calcite.util.PartiallyOrderedSet.Node}s, that strips
+ * away the node and returns the element inside.
+ *
+ * @param <E> Element type
+ */
+ public static <E> List<E> strip(List<Node<E>> list) {
+ if (list.size() == 1
+ && list.get(0).e == null) {
+ // If parent list contains top element, a node whose element is null,
+ // officially there are no parents.
+ // Similarly child list and bottom element.
+ return ImmutableList.of();
+ }
+ return Lists.transform(list,
+ new Function<Node<E>, E>() {
+ public E apply(Node<E> node) {
+ return node.e;
+ }
+ });
+ }
+
+ /** Converts an iterable of nodes into the list of the elements inside.
+ * If there is one node whose element is null, it represents a list
+ * containing either the top or bottom element, so we return the empty list.
+ *
+ * @param <E> Element type
+ */
+ private static <E> ImmutableList<E> strip(Iterable<Node<E>> iterable) {
+ final Iterator<Node<E>> iterator = iterable.iterator();
+ if (!iterator.hasNext()) {
+ return ImmutableList.of();
+ }
+ Node<E> node = iterator.next();
+ if (!iterator.hasNext()) {
+ if (node.e == null) {
+ return ImmutableList.of();
+ } else {
+ return ImmutableList.of(node.e);
+ }
+ }
+ final ImmutableList.Builder<E> builder = ImmutableList.builder();
+ for (;;) {
+ builder.add(node.e);
+ if (!iterator.hasNext()) {
+ return builder.build();
+ }
+ node = iterator.next();
+ }
+ }
+
/**
* Returns a list of values in the set that are less-than a given value.
* The list is in topological order but order is otherwise
@@ -725,27 +861,6 @@ public class PartiallyOrderedSet<E> extends AbstractSet<E> {
*/
boolean lessThan(E e1, E e2);
}
-
- /** List, backed by a list of {@link Node}s, that strips away the
- * node and returns the element inside.
- *
- * @param <E> Element type
- */
- private static class StripList<E> extends AbstractList<E> {
- private final List<Node<E>> list;
-
- StripList(List<Node<E>> list) {
- this.list = list;
- }
-
- @Override public E get(int index) {
- return list.get(index).e;
- }
-
- @Override public int size() {
- return list.size();
- }
- }
}
// End PartiallyOrderedSet.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
new file mode 100644
index 0000000..e45db8b
--- /dev/null
+++ b/core/src/test/java/org/apache/calcite/profile/ProfilerTest.java
@@ -0,0 +1,682 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.calcite.profile;
+
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.linq4j.AbstractEnumerable;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Enumerator;
+import org.apache.calcite.rel.metadata.NullSentinel;
+import org.apache.calcite.runtime.PredicateImpl;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.test.Matchers;
+import org.apache.calcite.util.ImmutableBitSet;
+import org.apache.calcite.util.JsonBuilder;
+import org.apache.calcite.util.Pair;
+
+import com.google.common.base.Function;
+import com.google.common.base.Preconditions;
+import com.google.common.base.Predicate;
+import com.google.common.base.Predicates;
+import com.google.common.base.Supplier;
+import com.google.common.collect.HashMultimap;
+import com.google.common.collect.ImmutableList;
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
+import com.google.common.collect.Ordering;
+
+import org.hamcrest.Matcher;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.sql.PreparedStatement;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+import static org.hamcrest.core.Is.is;
+import static org.junit.Assert.assertThat;
+
+/**
+ * Unit tests for {@link Profiler}.
+ */
+public class ProfilerTest {
+ @Test public void testProfileZeroRows() throws Exception {
+ final String sql = "select * from \"scott\".dept where false";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DEPTNO],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:0.0}",
+ "{type:distribution,columns:[DNAME],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[LOC],values:[],cardinality:0.0}",
+ "{type:distribution,columns:[],cardinality:0.0}",
+ "{type:rowCount,rowCount:0}",
+ "{type:unique,columns:[]}");
+ }
+
+ @Test public void testProfileOneRow() throws Exception {
+ final String sql = "select * from \"scott\".dept where deptno = 10";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10],cardinality:1.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:1.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING],cardinality:1.0}",
+ "{type:distribution,columns:[LOC],values:[NEWYORK],cardinality:1.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:rowCount,rowCount:1}",
+ "{type:unique,columns:[]}");
+ }
+
+ @Test public void testProfileTwoRows() throws Exception {
+ final String sql = "select * from \"scott\".dept where deptno in (10, 20)";
+ sql(sql).unordered(
+ "{type:distribution,columns:[DEPTNO,DNAME,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20],cardinality:2.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:2.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH],cardinality:2.0}",
+ "{type:distribution,columns:[LOC],values:[DALLAS,NEWYORK],cardinality:2.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:rowCount,rowCount:2}",
+ "{type:unique,columns:[DEPTNO]}",
+ "{type:unique,columns:[DNAME]}",
+ "{type:unique,columns:[LOC]}");
+ }
+
+ @Test public void testProfileScott() throws Exception {
+ final String sql = "select * from \"scott\".emp\n"
+ + "join \"scott\".dept using (deptno)";
+ sql(sql)
+ .where(new PredicateImpl<Profiler.Statistic>() {
+ public boolean test(Profiler.Statistic statistic) {
+ return !(statistic instanceof Profiler.Distribution)
+ || ((Profiler.Distribution) statistic).cardinality < 14
+ && ((Profiler.Distribution) statistic).minimal;
+ }
+ }).unordered(
+ "{type:distribution,columns:[COMM,DEPTNO0],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,DEPTNO],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,DNAME],cardinality:5.0}",
+ "{type:distribution,columns:[COMM,LOC],cardinality:5.0}",
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:3.0}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0}",
+ "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0}",
+ "{type:distribution,columns:[JOB,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[JOB,DEPTNO0],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,DEPTNO],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,DNAME],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,LOC],cardinality:9.0}",
+ "{type:distribution,columns:[JOB,MGR,DEPTNO0],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,DEPTNO],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,DNAME],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR,LOC],cardinality:10.0}",
+ "{type:distribution,columns:[JOB,MGR],cardinality:8.0}",
+ "{type:distribution,columns:[JOB,SAL],cardinality:12.0}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0}",
+ "{type:distribution,columns:[MGR,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[MGR,DEPTNO0],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,DEPTNO],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,DNAME],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,LOC],cardinality:9.0}",
+ "{type:distribution,columns:[MGR,SAL],cardinality:12.0}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1}",
+ "{type:distribution,columns:[SAL,COMM],cardinality:5.0}",
+ "{type:distribution,columns:[SAL,DEPTNO0],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,DEPTNO],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,DNAME],cardinality:12.0}",
+ "{type:distribution,columns:[SAL,LOC],cardinality:12.0}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0}",
+ "{type:distribution,columns:[],cardinality:1.0}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:DNAME}",
+ "{type:fd,columns:[DEPTNO0],dependentColumn:LOC}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:DNAME}",
+ "{type:fd,columns:[DEPTNO],dependentColumn:LOC}",
+ "{type:fd,columns:[DNAME],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[DNAME],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[DNAME],dependentColumn:LOC}",
+ "{type:fd,columns:[JOB],dependentColumn:COMM}",
+ "{type:fd,columns:[LOC],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[LOC],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[LOC],dependentColumn:DNAME}",
+ "{type:fd,columns:[SAL],dependentColumn:DEPTNO0}",
+ "{type:fd,columns:[SAL],dependentColumn:DEPTNO}",
+ "{type:fd,columns:[SAL],dependentColumn:DNAME}",
+ "{type:fd,columns:[SAL],dependentColumn:JOB}",
+ "{type:fd,columns:[SAL],dependentColumn:LOC}",
+ "{type:fd,columns:[SAL],dependentColumn:MGR}",
+ "{type:rowCount,rowCount:14}",
+ "{type:unique,columns:[EMPNO]}",
+ "{type:unique,columns:[ENAME]}",
+ "{type:unique,columns:[HIREDATE,DEPTNO0]}",
+ "{type:unique,columns:[HIREDATE,DEPTNO]}",
+ "{type:unique,columns:[HIREDATE,DNAME]}",
+ "{type:unique,columns:[HIREDATE,LOC]}",
+ "{type:unique,columns:[HIREDATE,SAL]}",
+ "{type:unique,columns:[JOB,HIREDATE]}");
+ }
+
+ /** As {@link #testProfileScott()}, but prints only the most surprising
+ * distributions. */
+ @Test public void testProfileScott2() throws Exception {
+ scott().factory(Fluid.SIMPLE_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[HIREDATE,COMM],cardinality:5.0,expectedCardinality:12.682618485430247,surprise:0.4344728973121492}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR,COMM],cardinality:5.0,expectedCardinality:11.675074674157162,surprise:0.400302535646339}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL,COMM],cardinality:5.0,expectedCardinality:12.579960871109892,surprise:0.43117052004174}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott2()}, but uses the breadth-first profiler.
+ * Results should be the same, but are slightly different (extra EMPNO
+ * and ENAME distributions). */
+ @Test public void testProfileScott3() throws Exception {
+ scott().factory(Fluid.BETTER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott3()}, but uses the breadth-first profiler
+ * and deems everything uninteresting. Only first-level combinations (those
+ * consisting of a single column) are computed. */
+ @Test public void testProfileScott4() throws Exception {
+ scott().factory(Fluid.INCURIOUS_PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** As {@link #testProfileScott3()}, but uses the breadth-first profiler. */
+ @Ignore
+ @Test public void testProfileScott5() throws Exception {
+ scott().factory(Fluid.PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[COMM],values:[0.00,300.00,500.00,1400.00],cardinality:5.0,nullCount:10,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DEPTNO0],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,DNAME],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO,LOC],cardinality:3.0,expectedCardinality:7.269756624410332,surprise:0.41576025416819384}",
+ "{type:distribution,columns:[DEPTNO0,DNAME,LOC],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO0],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DEPTNO],values:[10,20,30],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[DNAME],values:[ACCOUNTING,RESEARCH,SALES],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[EMPNO],values:[7369,7499,7521,7566,7654,7698,7782,7788,7839,7844,7876,7900,7902,7934],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[ENAME],values:[ADAMS,ALLEN,BLAKE,CLARK,FORD,JAMES,JONES,KING,MARTIN,MILLER,SCOTT,SMITH,TURNER,WARD],cardinality:14.0,expectedCardinality:14.0,surprise:0.0}",
+ "{type:distribution,columns:[HIREDATE],values:[1980-12-17,1981-01-05,1981-02-04,1981-02-20,1981-02-22,1981-06-09,1981-09-08,1981-09-28,1981-11-17,1981-12-03,1982-01-23,1987-04-19,1987-05-23],cardinality:13.0,expectedCardinality:14.0,surprise:0.037037037037037035}",
+ "{type:distribution,columns:[JOB],values:[ANALYST,CLERK,MANAGER,PRESIDENT,SALESMAN],cardinality:5.0,expectedCardinality:14.0,surprise:0.47368421052631576}",
+ "{type:distribution,columns:[LOC],values:[CHICAGO,DALLAS,NEWYORK],cardinality:3.0,expectedCardinality:14.0,surprise:0.6470588235294118}",
+ "{type:distribution,columns:[MGR],values:[7566,7698,7782,7788,7839,7902],cardinality:7.0,nullCount:1,expectedCardinality:14.0,surprise:0.3333333333333333}",
+ "{type:distribution,columns:[SAL],values:[800.00,950.00,1100.00,1250.00,1300.00,1500.00,1600.00,2450.00,2850.00,2975.00,3000.00,5000.00],cardinality:12.0,expectedCardinality:14.0,surprise:0.07692307692307693}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** Profiles a star-join query on the Foodmart schema using the breadth-first
+ * profiler. */
+ @Ignore
+ @Test public void testProfileFoodmart() throws Exception {
+ foodmart().factory(Fluid.PROFILER_FACTORY).unordered(
+ "{type:distribution,columns:[brand_name],cardinality:111.0,expectedCardinality:86837.0,surprise:0.9974467497814786}",
+ "{type:distribution,columns:[cases_per_pallet],values:[5,6,7,8,9,10,11,12,13,14],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+ "{type:distribution,columns:[day_of_month],cardinality:30.0,expectedCardinality:86837.0,surprise:0.9993092889129359}",
+ "{type:distribution,columns:[fiscal_period],values:[],cardinality:1.0,nullCount:86837,expectedCardinality:86837.0,surprise:0.999976968608213}",
+ "{type:distribution,columns:[low_fat],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+ "{type:distribution,columns:[month_of_year],values:[1,2,3,4,5,6,7,8,9,10,11,12],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+ "{type:distribution,columns:[product_category],cardinality:45.0,expectedCardinality:86837.0,surprise:0.9989641122441932}",
+ "{type:distribution,columns:[product_class_id0,product_subcategory,product_category,product_department,product_family],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_class_id0],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_class_id],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[product_department],cardinality:22.0,expectedCardinality:86837.0,surprise:0.9994934318838578}",
+ "{type:distribution,columns:[product_family],values:[Drink,Food,Non-Consumable],cardinality:3.0,expectedCardinality:86837.0,surprise:0.9999309074159374}",
+ "{type:distribution,columns:[product_subcategory],cardinality:102.0,expectedCardinality:86837.0,surprise:0.997653527185728}",
+ "{type:distribution,columns:[quarter],values:[Q1,Q2,Q3,Q4],cardinality:4.0,expectedCardinality:86837.0,surprise:0.9999078776154121}",
+ "{type:distribution,columns:[recyclable_package],values:[false,true],cardinality:2.0,expectedCardinality:86837.0,surprise:0.9999539377468649}",
+ "{type:distribution,columns:[store_cost,fiscal_period],cardinality:10601.0,nullCount:86724,expectedCardinality:10.0,surprise:0.9981151635095655}",
+ "{type:distribution,columns:[store_cost,low_fat],cardinality:17673.0,expectedCardinality:20.0,surprise:0.99773921890013}",
+ "{type:distribution,columns:[store_cost,product_family],cardinality:19453.0,expectedCardinality:30.0,surprise:0.9969203921367346}",
+ "{type:distribution,columns:[store_cost,quarter],cardinality:29590.0,expectedCardinality:40.0,surprise:0.9973000337495781}",
+ "{type:distribution,columns:[store_cost,recyclable_package],cardinality:17847.0,expectedCardinality:20.0,surprise:0.9977612357978396}",
+ "{type:distribution,columns:[store_cost,the_year],cardinality:10944.0,expectedCardinality:10.0,surprise:0.9981741829468688}",
+ "{type:distribution,columns:[store_cost],cardinality:10.0,expectedCardinality:86837.0,surprise:0.9997697099496816}",
+ "{type:distribution,columns:[store_id],values:[2,3,6,7,11,13,14,15,16,17,22,23,24],cardinality:13.0,expectedCardinality:86837.0,surprise:0.9997006332757629}",
+ "{type:distribution,columns:[store_sales],cardinality:21.0,expectedCardinality:86837.0,surprise:0.999516452140275}",
+ "{type:distribution,columns:[the_day],values:[Friday,Monday,Saturday,Sunday,Thursday,Tuesday,Wednesday],cardinality:7.0,expectedCardinality:86837.0,surprise:0.9998387913960665}",
+ "{type:distribution,columns:[the_month],values:[April,August,December,February,January,July,June,March,May,November,October,September],cardinality:12.0,expectedCardinality:86837.0,surprise:0.9997236583034923}",
+ "{type:distribution,columns:[the_year],values:[1997],cardinality:1.0,expectedCardinality:86837.0,surprise:0.999976968608213}",
+ "{type:distribution,columns:[unit_sales],values:[1.0000,2.0000,3.0000,4.0000,5.0000,6.0000],cardinality:6.0,expectedCardinality:86837.0,surprise:0.999861819605495}",
+ "{type:distribution,columns:[units_per_case],cardinality:36.0,expectedCardinality:86837.0,surprise:0.9991712039413857}",
+ "{type:distribution,columns:[week_of_year],cardinality:52.0,expectedCardinality:86837.0,surprise:0.9988030705843087}",
+ "{type:distribution,columns:[],cardinality:1.0,expectedCardinality:1.0,surprise:0.0}");
+ }
+
+ /** Tests
+ * {@link org.apache.calcite.profile.ProfilerImpl.SurpriseQueue}. */
+ @Test public void testSurpriseQueue() {
+ ProfilerImpl.SurpriseQueue q = new ProfilerImpl.SurpriseQueue(4, 3);
+ assertThat(q.offer(2), is(true));
+ assertThat(q.toString(), is("min: 2.0, contents: [2.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(4), is(true));
+ assertThat(q.toString(), is("min: 2.0, contents: [2.0, 4.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Since we're in the warm-up period, a value lower than the minimum is
+ // accepted.
+ assertThat(q.offer(1), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [2.0, 4.0, 1.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(5), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [4.0, 1.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ assertThat(q.offer(3), is(true));
+ assertThat(q.toString(), is("min: 1.0, contents: [1.0, 5.0, 3.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Duplicate entry
+ assertThat(q.offer(5), is(true));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Now that the list is full, a value below the minimum is refused.
+ // "offer" returns false, and the value is not added to the queue.
+ // Thus the median never decreases.
+ assertThat(q.offer(2), is(false));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Same applies for a value equal to the minimum.
+ assertThat(q.offer(3), is(false));
+ assertThat(q.toString(), is("min: 3.0, contents: [5.0, 3.0, 5.0]"));
+ assertThat(q.isValid(), is(true));
+
+ // Add a value that is above the minimum.
+ assertThat(q.offer(4.5), is(true));
+ assertThat(q.toString(), is("min: 3.0, contents: [3.0, 5.0, 4.5]"));
+ assertThat(q.isValid(), is(true));
+ }
+
+ private Fluid scott() throws Exception {
+ final String sql = "select * from \"scott\".emp\n"
+ + "join \"scott\".dept using (deptno)";
+ return sql(sql)
+ .where(Fluid.STATISTIC_PREDICATE)
+ .sort(Fluid.ORDERING.reverse())
+ .limit(30)
+ .project(Fluid.EXTENDED_COLUMNS);
+ }
+
+ private Fluid foodmart() throws Exception {
+ final String sql = "select \"s\".*, \"p\".*, \"t\".*, \"pc\".*\n"
+ + "from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+ + "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
+ + "join \"foodmart\".\"time_by_day\" as \"t\" using (\"time_id\")\n"
+ + "join \"foodmart\".\"product_class\" as \"pc\"\n"
+ + " on \"p\".\"product_class_id\" = \"pc\".\"product_class_id\"\n";
+ return sql(sql)
+ .config(CalciteAssert.Config.JDBC_FOODMART)
+ .where(Fluid.STATISTIC_PREDICATE)
+ .sort(Fluid.ORDERING.reverse())
+ .limit(30)
+ .project(Fluid.EXTENDED_COLUMNS);
+ }
+
+ private static Fluid sql(String sql) {
+ return new Fluid(CalciteAssert.Config.SCOTT, sql, Fluid.SIMPLE_FACTORY,
+ Predicates.<Profiler.Statistic>alwaysTrue(), null, -1,
+ Fluid.DEFAULT_COLUMNS);
+ }
+
+ /** Fluid interface for writing profiler test cases. */
+ private static class Fluid {
+ static final Supplier<Profiler> SIMPLE_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ return new SimpleProfiler();
+ }
+ };
+
+ static final Supplier<Profiler> BETTER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>>
+ predicate = Predicates.alwaysTrue();
+ return new ProfilerImpl(600, 200, predicate);
+ }
+ };
+
+ static final Ordering<Profiler.Statistic> ORDERING =
+ new Ordering<Profiler.Statistic>() {
+ public int compare(Profiler.Statistic left,
+ Profiler.Statistic right) {
+ int c = left.getClass().getSimpleName()
+ .compareTo(right.getClass().getSimpleName());
+ if (c == 0
+ && left instanceof Profiler.Distribution
+ && right instanceof Profiler.Distribution) {
+ final Profiler.Distribution d0 = (Profiler.Distribution) left;
+ final Profiler.Distribution d1 = (Profiler.Distribution) right;
+ c = Double.compare(d0.surprise(), d1.surprise());
+ if (c == 0) {
+ c = d0.columns.toString().compareTo(d1.columns.toString());
+ }
+ }
+ return c;
+ }
+ };
+
+ static final Predicate<Profiler.Statistic> STATISTIC_PREDICATE =
+ new PredicateImpl<Profiler.Statistic>() {
+ public boolean test(Profiler.Statistic statistic) {
+ // Include distributions of zero columns (the grand total)
+ // and singleton columns, plus "surprising" distributions
+ // (with significantly higher NDVs than predicted from their
+ // constituent columns).
+ return statistic instanceof Profiler.Distribution
+ && (((Profiler.Distribution) statistic).columns.size() < 2
+ || ((Profiler.Distribution) statistic).surprise() > 0.4D)
+ && ((Profiler.Distribution) statistic).minimal;
+ }
+ };
+
+ static final List<String> DEFAULT_COLUMNS =
+ ImmutableList.of("type", "distribution", "columns", "cardinality",
+ "values", "nullCount", "dependentColumn", "rowCount");
+
+ static final List<String> EXTENDED_COLUMNS =
+ ImmutableList.<String>builder().addAll(DEFAULT_COLUMNS)
+ .add("expectedCardinality", "surprise")
+ .build();
+
+ private static final Supplier<Profiler> PROFILER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ return new ProfilerImpl(7500, 100,
+ new PredicateImpl<Pair<ProfilerImpl.Space, Profiler.Column>>() {
+ public boolean test(
+ Pair<ProfilerImpl.Space, Profiler.Column> p) {
+ final Profiler.Distribution distribution =
+ p.left.distribution();
+ if (distribution == null) {
+ // We don't have a distribution yet, because this space
+ // has not yet been evaluated. Let's do it anyway.
+ return true;
+ }
+ return distribution.surprise() >= 0.3D;
+ }
+ });
+ }
+ };
+
+ private static final Supplier<Profiler> INCURIOUS_PROFILER_FACTORY =
+ new Supplier<Profiler>() {
+ public Profiler get() {
+ final Predicate<Pair<ProfilerImpl.Space, Profiler.Column>> p =
+ Predicates.alwaysFalse();
+ return new ProfilerImpl(10, 200, p);
+ }
+ };
+
+ private final String sql;
+ private final List<String> columns;
+ private final Comparator<Profiler.Statistic> comparator;
+ private final int limit;
+ private final Predicate<Profiler.Statistic> predicate;
+ private final Supplier<Profiler> factory;
+ private final CalciteAssert.Config config;
+
+ Fluid(CalciteAssert.Config config, String sql, Supplier<Profiler> factory,
+ Predicate<Profiler.Statistic> predicate,
+ Comparator<Profiler.Statistic> comparator, int limit,
+ List<String> columns) {
+ this.sql = Preconditions.checkNotNull(sql);
+ this.factory = Preconditions.checkNotNull(factory);
+ this.columns = ImmutableList.copyOf(columns);
+ this.predicate = Preconditions.checkNotNull(predicate);
+ this.comparator = comparator; // null means sort on JSON representation
+ this.limit = limit;
+ this.config = config;
+ }
+
+ Fluid config(CalciteAssert.Config config) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid factory(Supplier<Profiler> factory) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid project(List<String> columns) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid sort(Ordering<Profiler.Statistic> comparator) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid limit(int limit) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid where(Predicate<Profiler.Statistic> predicate) {
+ return new Fluid(config, sql, factory, predicate, comparator, limit,
+ columns);
+ }
+
+ Fluid unordered(String... lines) throws Exception {
+ return check(Matchers.equalsUnordered(lines));
+ }
+
+ public Fluid check(final Matcher<Iterable<String>> matcher)
+ throws Exception {
+ CalciteAssert.that(config)
+ .doWithConnection(new Function<CalciteConnection, Void>() {
+ public Void apply(CalciteConnection c) {
+ try (PreparedStatement s = c.prepareStatement(sql)) {
+ final ResultSetMetaData m = s.getMetaData();
+ final List<Profiler.Column> columns = new ArrayList<>();
+ final int columnCount = m.getColumnCount();
+ for (int i = 0; i < columnCount; i++) {
+ columns.add(new Profiler.Column(i, m.getColumnLabel(i + 1)));
+ }
+
+ // Create an initial group for each table in the query.
+ // Columns in the same table will tend to have the same
+ // cardinality as the table, and as the table's primary key.
+ final Multimap<String, Integer> groups = HashMultimap.create();
+ for (int i = 0; i < m.getColumnCount(); i++) {
+ groups.put(m.getTableName(i + 1), i);
+ }
+ final SortedSet<ImmutableBitSet> initialGroups =
+ new TreeSet<>();
+ for (Collection<Integer> integers : groups.asMap().values()) {
+ initialGroups.add(ImmutableBitSet.of(integers));
+ }
+ final Profiler p = factory.get();
+ final Enumerable<List<Comparable>> rows = getRows(s);
+ final Profiler.Profile profile =
+ p.profile(rows, columns, initialGroups);
+ final List<Profiler.Statistic> statistics =
+ ImmutableList.copyOf(
+ Iterables.filter(profile.statistics(), predicate));
+
+ // If no comparator specified, use the function that converts to
+ // JSON strings
+ final Function<Profiler.Statistic, String> toJson =
+ toJsonFunction();
+ Ordering<Profiler.Statistic> comp = comparator != null
+ ? Ordering.from(comparator)
+ : Ordering.natural().onResultOf(toJson);
+ ImmutableList<Profiler.Statistic> statistics2 =
+ comp.immutableSortedCopy(statistics);
+ if (limit >= 0 && limit < statistics2.size()) {
+ statistics2 = statistics2.subList(0, limit);
+ }
+
+ final List<String> strings =
+ Lists.transform(statistics2, toJson);
+ assertThat(strings, matcher);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ return null;
+ }
+ });
+ return this;
+ }
+
+ /** Returns a function that converts a statistic to a JSON string. */
+ Function<Profiler.Statistic, String> toJsonFunction() {
+ return new Function<Profiler.Statistic, String>() {
+ final JsonBuilder jb = new JsonBuilder();
+
+ public String apply(Profiler.Statistic statistic) {
+ Object map = statistic.toMap(jb);
+ if (map instanceof Map) {
+ @SuppressWarnings("unchecked")
+ final Map<String, Object> map1 = (Map) map;
+ map1.keySet().retainAll(Fluid.this.columns);
+ }
+ final String json = jb.toJsonString(map);
+ return json.replaceAll("\n", "").replaceAll(" ", "")
+ .replaceAll("\"", "");
+ }
+ };
+ }
+
+ private Enumerable<List<Comparable>> getRows(final PreparedStatement s) {
+ return new AbstractEnumerable<List<Comparable>>() {
+ public Enumerator<List<Comparable>> enumerator() {
+ try {
+ final ResultSet r = s.executeQuery();
+ return getListEnumerator(r, r.getMetaData().getColumnCount());
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+
+ private Enumerator<List<Comparable>> getListEnumerator(
+ final ResultSet r, final int columnCount) {
+ return new Enumerator<List<Comparable>>() {
+ final Comparable[] values = new Comparable[columnCount];
+
+ public List<Comparable> current() {
+ for (int i = 0; i < columnCount; i++) {
+ try {
+ final Comparable value = (Comparable) r.getObject(i + 1);
+ values[i] = NullSentinel.mask(value);
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ return ImmutableList.copyOf(values);
+ }
+
+ public boolean moveNext() {
+ try {
+ return r.next();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void reset() {
+ }
+
+ public void close() {
+ try {
+ r.close();
+ } catch (SQLException e) {
+ throw new RuntimeException(e);
+ }
+ }
+ };
+ }
+ }
+}
+
+// End ProfilerTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
index d577057..585e8ed 100644
--- a/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
+++ b/core/src/test/java/org/apache/calcite/test/CalciteSuite.java
@@ -28,6 +28,7 @@ import org.apache.calcite.plan.volcano.TraitPropagationTest;
import org.apache.calcite.plan.volcano.VolcanoPlannerTest;
import org.apache.calcite.plan.volcano.VolcanoPlannerTraitTest;
import org.apache.calcite.prepare.LookupOperatorOverloadsTest;
+import org.apache.calcite.profile.ProfilerTest;
import org.apache.calcite.rel.RelCollationTest;
import org.apache.calcite.rel.RelDistributionTest;
import org.apache.calcite.rel.rel2sql.RelToSqlConverterTest;
@@ -156,6 +157,7 @@ import org.junit.runners.Suite;
LinqFrontJdbcBackTest.class,
JdbcFrontJdbcBackLinqMiddleTest.class,
CalciteSqlOperatorTest.class,
+ ProfilerTest.class,
LatticeTest.class,
ReflectiveSchemaTest.class,
JdbcTest.class,
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
index 3f36bf9..4eae872 100644
--- a/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
+++ b/core/src/test/java/org/apache/calcite/test/FoodMartLatticeStatisticProvider.java
@@ -23,6 +23,8 @@ import org.apache.calcite.materialize.Lattices;
import com.google.common.collect.ImmutableMap;
+import java.util.ArrayList;
+import java.util.List;
import java.util.Map;
/**
@@ -33,10 +35,15 @@ import java.util.Map;
*/
public class FoodMartLatticeStatisticProvider
extends DelegatingLatticeStatisticProvider {
- public static final FoodMartLatticeStatisticProvider INSTANCE =
- new FoodMartLatticeStatisticProvider(Lattices.CACHED_SQL);
+ public static final FoodMartLatticeStatisticProvider.Factory FACTORY =
+ new Factory() {
+ public LatticeStatisticProvider apply(Lattice lattice) {
+ return new FoodMartLatticeStatisticProvider(lattice,
+ Lattices.CACHED_SQL.apply(lattice));
+ }
+ };
- public static final Map<String, Integer> CARDINALITY_MAP =
+ private static final Map<String, Integer> CARDINALITY_MAP =
ImmutableMap.<String, Integer>builder()
.put("brand_name", 111)
.put("cases_per_pallet", 10)
@@ -75,18 +82,29 @@ public class FoodMartLatticeStatisticProvider
.put("week_of_year", 52)
.build();
- private FoodMartLatticeStatisticProvider(LatticeStatisticProvider provider) {
+ private final Lattice lattice;
+
+ private FoodMartLatticeStatisticProvider(Lattice lattice,
+ LatticeStatisticProvider provider) {
super(provider);
+ this.lattice = lattice;
}
- /** Returns an estimate of the number of distinct values in a column. */
- public int cardinality(Lattice lattice, Lattice.Column column) {
+ private int cardinality(Lattice.Column column) {
final Integer integer = CARDINALITY_MAP.get(column.alias);
if (integer != null && integer > 0) {
return integer;
}
return column.alias.length();
}
+
+ @Override public double cardinality(List<Lattice.Column> columns) {
+ final List<Double> cardinalityList = new ArrayList<>();
+ for (Lattice.Column column : columns) {
+ cardinalityList.add((double) cardinality(column));
+ }
+ return Lattice.getRowCount(lattice.getFactRowCount(), cardinalityList);
+ }
}
// End FoodMartLatticeStatisticProvider.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/LatticeTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/LatticeTest.java b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
index 54f4ea7..473e5f5 100644
--- a/core/src/test/java/org/apache/calcite/test/LatticeTest.java
+++ b/core/src/test/java/org/apache/calcite/test/LatticeTest.java
@@ -16,11 +16,16 @@
*/
package org.apache.calcite.test;
+import org.apache.calcite.jdbc.CalciteConnection;
+import org.apache.calcite.jdbc.CalciteSchema;
+import org.apache.calcite.materialize.Lattice;
import org.apache.calcite.materialize.Lattices;
import org.apache.calcite.materialize.MaterializationService;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.runtime.Hook;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.TestUtil;
import org.apache.calcite.util.Util;
@@ -29,6 +34,7 @@ import com.google.common.base.Throwables;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
+import org.junit.Assume;
import org.junit.Ignore;
import org.junit.Test;
@@ -39,11 +45,15 @@ import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.Arrays;
import java.util.List;
+import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
+import static org.apache.calcite.test.Matchers.within;
+
import static org.hamcrest.CoreMatchers.anyOf;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertThat;
@@ -110,8 +120,8 @@ public class LatticeTest {
+ " } ]\n"
+ "}\n";
- private CalciteAssert.AssertThat modelWithLattice(String name, String sql,
- String... extras) {
+ private static CalciteAssert.AssertThat modelWithLattice(String name,
+ String sql, String... extras) {
final StringBuilder buf = new StringBuilder("{ name: '")
.append(name)
.append("', sql: ")
@@ -123,7 +133,8 @@ public class LatticeTest {
return modelWithLattices(buf.toString());
}
- private CalciteAssert.AssertThat modelWithLattices(String... lattices) {
+ private static CalciteAssert.AssertThat modelWithLattices(
+ String... lattices) {
final Class<JdbcTest.EmpDeptTableFactory> clazz =
JdbcTest.EmpDeptTableFactory.class;
return CalciteAssert.model(""
@@ -153,6 +164,36 @@ public class LatticeTest {
/** Tests that it's OK for a lattice to have the same name as a table in the
* schema. */
+ @Test public void testLatticeSql() throws Exception {
+ modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
+ .doWithConnection(new Function<CalciteConnection, Void>() {
+ public Void apply(CalciteConnection c) {
+ final SchemaPlus schema = c.getRootSchema();
+ final SchemaPlus adhoc = schema.getSubSchema("adhoc");
+ assertThat(adhoc.getTableNames().contains("EMPLOYEES"), is(true));
+ final Map.Entry<String, CalciteSchema.LatticeEntry> entry =
+ adhoc.unwrap(CalciteSchema.class).getLatticeMap().firstEntry();
+ final Lattice lattice = entry.getValue().getLattice();
+ final String sql = "SELECT \"days\".\"day\"\n"
+ + "FROM \"foodmart\".\"days\" AS \"days\"\n"
+ + "GROUP BY \"days\".\"day\"";
+ assertThat(
+ lattice.sql(ImmutableBitSet.of(0),
+ ImmutableList.<Lattice.Measure>of()), is(sql));
+ final String sql2 = "SELECT"
+ + " \"days\".\"day\", \"days\".\"week_day\"\n"
+ + "FROM \"foodmart\".\"days\" AS \"days\"";
+ assertThat(
+ lattice.sql(ImmutableBitSet.of(0, 1), false,
+ ImmutableList.<Lattice.Measure>of()),
+ is(sql2));
+ return null;
+ }
+ });
+ }
+
+ /** Tests that it's OK for a lattice to have the same name as a table in the
+ * schema. */
@Test public void testLatticeWithSameNameAsTable() {
modelWithLattice("EMPLOYEES", "select * from \"foodmart\".\"days\"")
.query("select count(*) from EMPLOYEES")
@@ -377,28 +418,57 @@ public class LatticeTest {
* Use optimization algorithm to suggest which tiles of a lattice to
* materialize</a>. */
@Test public void testTileAlgorithm() {
- checkTileAlgorithm(FoodMartLatticeStatisticProvider.class.getCanonicalName(),
- "EnumerableAggregate(group=[{2, 3}])\n"
- + " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31}]])");
+ final String explain = "EnumerableAggregate(group=[{2, 3}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{16, 17, 27, 31, 32, 37}]])";
+ checkTileAlgorithm(
+ FoodMartLatticeStatisticProvider.class.getCanonicalName() + "#FACTORY",
+ explain);
}
+ /** As {@link #testTileAlgorithm()}, but uses the
+ * {@link Lattices#CACHED_SQL} statistics provider. */
@Test public void testTileAlgorithm2() {
// Different explain than above, but note that it still selects columns
// (27, 31).
+ final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
checkTileAlgorithm(Lattices.class.getCanonicalName() + "#CACHED_SQL",
- "EnumerableAggregate(group=[{0, 1}])\n"
- + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]");
+ explain);
+ }
+
+ /** As {@link #testTileAlgorithm()}, but uses the
+ * {@link Lattices#PROFILER} statistics provider. */
+ @Test public void testTileAlgorithm3() {
+ Assume.assumeTrue("Yahoo sketches requires JDK 8 or higher",
+ TestUtil.getJavaMajorVersion() >= 8);
+ final String explain = "EnumerableAggregate(group=[{0, 1}])\n"
+ + " EnumerableTableScan(table=[[adhoc, m{27, 31, 32, 36, 37}]";
+ checkTileAlgorithm(Lattices.class.getCanonicalName() + "#PROFILER",
+ explain);
}
private void checkTileAlgorithm(String statisticProvider,
String expectedExplain) {
MaterializationService.setThreadLocal();
MaterializationService.instance().clear();
- foodmartModel(
- " auto: false,\n"
+ foodmartLatticeModel(statisticProvider)
+ .query("select distinct t.\"the_year\", t.\"quarter\"\n"
+ + "from \"foodmart\".\"sales_fact_1997\" as s\n"
+ + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
+ .enableMaterializations(true)
+ .explainContains(expectedExplain)
+ .returnsUnordered("the_year=1997; quarter=Q1",
+ "the_year=1997; quarter=Q2",
+ "the_year=1997; quarter=Q3",
+ "the_year=1997; quarter=Q4");
+ }
+
+ private static CalciteAssert.AssertThat foodmartLatticeModel(
+ String statisticProvider) {
+ return foodmartModel(" auto: false,\n"
+ " algorithm: true,\n"
+ " algorithmMaxMillis: -1,\n"
- + " rowCountEstimate: 86000,\n"
+ + " rowCountEstimate: 87000,\n"
+ " defaultMeasures: [ {\n"
+ " agg: 'sum',\n"
+ " args: 'unit_sales'\n"
@@ -414,17 +484,7 @@ public class LatticeTest {
+ " tiles: [ {\n"
+ " dimensions: [ 'the_year', ['t', 'quarter'] ],\n"
+ " measures: [ ]\n"
- + " } ]\n")
- .query("select distinct t.\"the_year\", t.\"quarter\"\n"
- + "from \"foodmart\".\"sales_fact_1997\" as s\n"
- + "join \"foodmart\".\"time_by_day\" as t using (\"time_id\")\n")
- .enableMaterializations(true)
- .explainContains(expectedExplain)
- .returnsUnordered("the_year=1997; quarter=Q1",
- "the_year=1997; quarter=Q2",
- "the_year=1997; quarter=Q3",
- "the_year=1997; quarter=Q4")
- .returnsCount(4);
+ + " } ]\n");
}
/** Tests a query that is created within {@link #testTileAlgorithm()}. */
@@ -704,7 +764,7 @@ public class LatticeTest {
.returns("EXPR$0=1\n");
}
- private CalciteAssert.AssertThat foodmartModel(String... extras) {
+ private static CalciteAssert.AssertThat foodmartModel(String... extras) {
return modelWithLattice("star",
"select 1 from \"foodmart\".\"sales_fact_1997\" as \"s\"\n"
+ "join \"foodmart\".\"product\" as \"p\" using (\"product_id\")\n"
@@ -741,6 +801,17 @@ public class LatticeTest {
System.out.println(CalciteAssert.toString(resultSet));
connection.close();
}
+
+ /** Unit test for {@link Lattice#getRowCount(double, List)}. */
+ @Test public void testColumnCount() {
+ assertThat(Lattice.getRowCount(10, 2, 3), within(5.03D, 0.01D));
+ assertThat(Lattice.getRowCount(10, 9, 8), within(9.4D, 0.01D));
+ assertThat(Lattice.getRowCount(100, 9, 8), within(54.2D, 0.1D));
+ assertThat(Lattice.getRowCount(1000, 9, 8), within(72D, 0.01D));
+ assertThat(Lattice.getRowCount(1000, 1, 1), is(1D));
+ assertThat(Lattice.getRowCount(1, 3, 5), within(1D, 0.01D));
+ assertThat(Lattice.getRowCount(1, 3, 5, 13, 4831), within(1D, 0.01D));
+ }
}
// End LatticeTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/test/Matchers.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/test/Matchers.java b/core/src/test/java/org/apache/calcite/test/Matchers.java
index fc9e0fd..08733a2 100644
--- a/core/src/test/java/org/apache/calcite/test/Matchers.java
+++ b/core/src/test/java/org/apache/calcite/test/Matchers.java
@@ -16,10 +16,16 @@
*/
package org.apache.calcite.test;
+import org.apache.calcite.util.Util;
+
+import com.google.common.base.Functions;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
+import org.hamcrest.BaseMatcher;
import org.hamcrest.CustomTypeSafeMatcher;
import org.hamcrest.Description;
+import org.hamcrest.Factory;
import org.hamcrest.Matcher;
import java.sql.ResultSet;
@@ -79,6 +85,81 @@ public class Matchers {
}
};
}
+
+ public static <E extends Comparable> Matcher<Iterable<E>> equalsUnordered(
+ E... lines) {
+ final List<String> expectedList =
+ Lists.newArrayList(toStringList(Arrays.asList(lines)));
+ Collections.sort(expectedList);
+ final String description = Util.lines(expectedList);
+ return new CustomTypeSafeMatcher<Iterable<E>>(description) {
+ @Override protected void describeMismatchSafely(Iterable<E> actuals,
+ Description description) {
+ final List<String> actualList =
+ Lists.newArrayList(toStringList(actuals));
+ Collections.sort(actualList);
+ description.appendText("was ")
+ .appendValue(Util.lines(actualList));
+ }
+
+ protected boolean matchesSafely(Iterable<E> actuals) {
+ final List<String> actualList =
+ Lists.newArrayList(toStringList(actuals));
+ Collections.sort(actualList);
+ return actualList.equals(expectedList);
+ }
+ };
+ }
+
+ private static <E> Iterable<String> toStringList(Iterable<E> items) {
+ return Iterables.transform(items, Functions.toStringFunction());
+ }
+
+ /**
+ * Creates a matcher that matches when the examined object is within
+ * {@code epsilon} of the specified <code>operand</code>.
+ */
+ @Factory
+ public static <T extends Number> Matcher<T> within(T value, double epsilon) {
+ return new IsWithin<T>(value, epsilon);
+ }
+
+ /**
+ * Is the numeric value within a given difference another value?
+ *
+ * @param <T> Value type
+ */
+ public static class IsWithin<T extends Number> extends BaseMatcher<T> {
+ private final T expectedValue;
+ private final double epsilon;
+
+ public IsWithin(T expectedValue, double epsilon) {
+ this.expectedValue = expectedValue;
+ this.epsilon = epsilon;
+ }
+
+ public boolean matches(Object actualValue) {
+ return areEqual(actualValue, expectedValue, epsilon);
+ }
+
+ public void describeTo(Description description) {
+ description.appendValue(expectedValue + " +/-" + epsilon);
+ }
+
+ private static boolean areEqual(Object actual, Number expected,
+ double epsilon) {
+ if (actual == null) {
+ return expected == null;
+ }
+ if (actual.equals(expected)) {
+ return true;
+ }
+ final double a = ((Number) actual).doubleValue();
+ final double min = expected.doubleValue() - epsilon;
+ final double max = expected.doubleValue() + epsilon;
+ return min <= a && a <= max;
+ }
+ }
}
// End Matchers.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
index d37c223..8a6a9da 100644
--- a/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
+++ b/core/src/test/java/org/apache/calcite/util/PartiallyOrderedSetTest.java
@@ -18,9 +18,13 @@ package org.apache.calcite.util;
import org.apache.calcite.test.CalciteAssert;
+import com.google.common.base.Function;
+
+import org.junit.Assume;
import org.junit.Test;
import java.util.AbstractList;
+import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.List;
@@ -28,7 +32,10 @@ import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
+import static org.hamcrest.CoreMatchers.nullValue;
+import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
/**
@@ -133,6 +140,13 @@ public class PartiallyOrderedSetTest {
// "bcd" is child of "abcd" and parent of ""
final String bcd = "'bcd'";
+ assertEquals("['abcd']", poset.getParents(bcd, true).toString());
+ assertThat(poset.getParents(bcd, false), nullValue());
+ assertThat(poset.getParents(bcd), nullValue());
+ assertEquals("['']", poset.getChildren(bcd, true).toString());
+ assertThat(poset.getChildren(bcd, false), nullValue());
+ assertThat(poset.getChildren(bcd), nullValue());
+
poset.add(bcd);
printValidate(poset);
assertTrue(poset.isValid(false));
@@ -210,6 +224,66 @@ public class PartiallyOrderedSetTest {
printValidate(poset);
}
+ @Test public void testPosetBitsLarge() {
+ final PartiallyOrderedSet<Integer> poset =
+ new PartiallyOrderedSet<>(IS_BIT_SUPERSET);
+ checkPosetBitsLarge(poset, 30000, 2921, 164782);
+ }
+
+ @Test public void testPosetBitsLarge2() {
+ Assume.assumeTrue("too slow to run every day", CalciteAssert.ENABLE_SLOW);
+ final int n = 30000;
+ final PartiallyOrderedSet<Integer> poset =
+ new PartiallyOrderedSet<>(IS_BIT_SUPERSET,
+ new Function<Integer, Iterable<Integer>>() {
+ public Iterable<Integer> apply(Integer input) {
+ final int i = input;
+ int r = i; // bits not yet cleared
+ final List<Integer> list = new ArrayList<>();
+ for (int z = 1; r != 0; z <<= 1) {
+ if ((i & z) != 0) {
+ list.add(i ^ z);
+ r ^= z;
+ }
+ }
+ return list;
+ }
+ },
+ new Function<Integer, Iterable<Integer>>() {
+ public Iterable<Integer> apply(Integer input) {
+ final int i = input;
+ final List<Integer> list = new ArrayList<>();
+ for (int z = 1; z <= n; z <<= 1) {
+ if ((i & z) == 0) {
+ list.add(i | z);
+ }
+ }
+ return list;
+ }
+ });
+ checkPosetBitsLarge(poset, n, 2921, 11961);
+ }
+
+ void checkPosetBitsLarge(PartiallyOrderedSet<Integer> poset, int n,
+ int expectedSize, int expectedParentCount) {
+ final Random random = new Random(1);
+ int count = 0;
+ int parentCount = 0;
+ for (int i = 0; i < n; i++) {
+ if (random.nextInt(10) == 0) {
+ if (poset.add(random.nextInt(n * 2))) {
+ ++count;
+ }
+ }
+ final List<Integer> parents =
+ poset.getParents(random.nextInt(n * 2), true);
+ parentCount += parents.size();
+ }
+ assertThat(poset.size(), is(count));
+ assertThat(poset.size(), is(expectedSize));
+ assertThat(parentCount, is(expectedParentCount));
+ }
+
@Test public void testPosetBitsRemoveParent() {
final PartiallyOrderedSet<Integer> poset =
new PartiallyOrderedSet<Integer>(IS_BIT_SUPERSET);
@@ -223,9 +297,6 @@ public class PartiallyOrderedSetTest {
}
@Test public void testDivisorPoset() {
- if (!CalciteAssert.ENABLE_SLOW) {
- return;
- }
PartiallyOrderedSet<Integer> integers =
new PartiallyOrderedSet<Integer>(IS_DIVISOR, range(1, 1000));
assertEquals(
@@ -405,6 +476,7 @@ public class PartiallyOrderedSetTest {
expected,
new TreeSet<String>(ss).toString());
}
+
}
// End PartiallyOrderedSetTest.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/core/src/test/java/org/apache/calcite/util/TestUtil.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/calcite/util/TestUtil.java b/core/src/test/java/org/apache/calcite/util/TestUtil.java
index 5a283d2..b530d3b 100644
--- a/core/src/test/java/org/apache/calcite/util/TestUtil.java
+++ b/core/src/test/java/org/apache/calcite/util/TestUtil.java
@@ -34,6 +34,9 @@ public abstract class TestUtil {
private static final String LINE_BREAK =
"\\\\n\"" + Util.LINE_SEPARATOR + " + \"";
+ private static final String JAVA_VERSION =
+ System.getProperties().getProperty("java.version");
+
//~ Methods ----------------------------------------------------------------
public static void assertEqualsVerbose(
@@ -190,6 +193,20 @@ public abstract class TestUtil {
.replaceAll("\\[", "\\\\[")
.replaceAll("\\]", "\\\\]");
}
+
+ /** Returns the Java major version: 7 for JDK 1.7, 8 for JDK 8, 10 for
+ * JDK 10, etc. */
+ public static int getJavaMajorVersion() {
+ if (JAVA_VERSION.startsWith("1.7")) {
+ return 7;
+ } else if (JAVA_VERSION.startsWith("1.8")) {
+ return 8;
+ } else if (JAVA_VERSION.startsWith("1.9")) {
+ return 9;
+ } else {
+ return 10;
+ }
+ }
}
// End TestUtil.java
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
----------------------------------------------------------------------
diff --git a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
index 7dee05d..eaf9c91 100644
--- a/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
+++ b/plus/src/test/java/org/apache/calcite/adapter/tpch/TpchTest.java
@@ -19,6 +19,7 @@ package org.apache.calcite.adapter.tpch;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.util.TestUtil;
import org.apache.calcite.util.Util;
import com.google.common.base.Function;
@@ -40,11 +41,8 @@ import static org.junit.Assert.assertThat;
* if {@code -Dcalcite.test.slow} is specified on the command-line.
* (See {@link org.apache.calcite.test.CalciteAssert#ENABLE_SLOW}.)</p> */
public class TpchTest {
- public static final String JAVA_VERSION =
- System.getProperties().getProperty("java.version");
-
public static final boolean ENABLE =
- CalciteAssert.ENABLE_SLOW && JAVA_VERSION.compareTo("1.7") >= 0;
+ CalciteAssert.ENABLE_SLOW && TestUtil.getJavaMajorVersion() >= 7;
private static String schema(String name, String scaleFactor) {
return " {\n"
http://git-wip-us.apache.org/repos/asf/calcite/blob/dad58186/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 1feec71..9ea9ecc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -129,6 +129,7 @@ limitations under the License.
<sqlline.version>1.3.0</sqlline.version>
<xalan.version>2.7.1</xalan.version>
<xerces.version>2.9.1</xerces.version>
+ <sketches.version>0.9.0</sketches.version>
</properties>
<issueManagement>
@@ -259,6 +260,11 @@ limitations under the License.
<version>${guava.version}</version>
</dependency>
<dependency>
+ <groupId>com.h2database</groupId>
+ <artifactId>h2</artifactId>
+ <version>${h2.version}</version>
+ </dependency>
+ <dependency>
<groupId>com.joestelmach</groupId>
<artifactId>natty</artifactId>
<version>${natty.version}</version>
@@ -269,9 +275,9 @@ limitations under the License.
<version>${oracle-jdbc6-driver.version}</version>
</dependency>
<dependency>
- <groupId>com.h2database</groupId>
- <artifactId>h2</artifactId>
- <version>${h2.version}</version>
+ <groupId>com.yahoo.datasketches</groupId>
+ <artifactId>sketches-core</artifactId>
+ <version>${sketches.version}</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>