You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@calcite.apache.org by jh...@apache.org on 2014/08/01 00:14:54 UTC

[2/3] git commit: Fix cartesian products in OptimizeBushyJoinRule.

Fix cartesian products in OptimizeBushyJoinRule.


Project: http://git-wip-us.apache.org/repos/asf/incubator-optiq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-optiq/commit/50914222
Tree: http://git-wip-us.apache.org/repos/asf/incubator-optiq/tree/50914222
Diff: http://git-wip-us.apache.org/repos/asf/incubator-optiq/diff/50914222

Branch: refs/heads/master
Commit: 50914222eb1c4dd210f0f96eb4876dee085bf832
Parents: 249b00c
Author: Julian Hyde <jh...@apache.org>
Authored: Wed Jul 30 17:09:21 2014 -0700
Committer: Julian Hyde <jh...@apache.org>
Committed: Wed Jul 30 17:09:21 2014 -0700

----------------------------------------------------------------------
 .../rel/rules/OptimizeBushyJoinRule.java        | 50 +++++++++-----------
 .../net/hydromatic/optiq/tools/PlannerTest.java | 33 +++++++++++++
 2 files changed, 56 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-optiq/blob/50914222/core/src/main/java/org/eigenbase/rel/rules/OptimizeBushyJoinRule.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/eigenbase/rel/rules/OptimizeBushyJoinRule.java b/core/src/main/java/org/eigenbase/rel/rules/OptimizeBushyJoinRule.java
index 1c6c4fa..135a105 100644
--- a/core/src/main/java/org/eigenbase/rel/rules/OptimizeBushyJoinRule.java
+++ b/core/src/main/java/org/eigenbase/rel/rules/OptimizeBushyJoinRule.java
@@ -115,22 +115,31 @@ public class OptimizeBushyJoinRule extends RelOptRule {
         };
 
     final List<LoptMultiJoin.Edge> usedEdges = Lists.newArrayList();
-    while (!unusedEdges.isEmpty()) {
+    for (;;) {
       final int edgeOrdinal = chooseBestEdge(unusedEdges, edgeComparator);
       if (pw != null) {
         trace(vertexes, unusedEdges, usedEdges, edgeOrdinal, pw);
       }
-      final LoptMultiJoin.Edge bestEdge = remove(unusedEdges, edgeOrdinal);
-      usedEdges.add(bestEdge);
-
-      // For now, assume that the edge is between precisely two factors.
-      // 1-factor conditions have probably been pushed down,
-      // and 3-or-more-factor conditions are advanced. (TODO:)
-      // Therefore, for now, the factors that are merged are exactly the factors
-      // on this edge.
-      BitSet merged = bestEdge.factors;
-      assert merged.cardinality() == 2;
-      final int[] factors = BitSets.toArray(merged);
+      final int[] factors;
+      if (edgeOrdinal == -1) {
+        // No more edges. Are there any un-joined vertexes?
+        final Vertex lastVertex = Util.last(vertexes);
+        final int z = lastVertex.factors.previousClearBit(lastVertex.id - 1);
+        if (z < 0) {
+          break;
+        }
+        factors = new int[] {z, lastVertex.id};
+      } else {
+        final LoptMultiJoin.Edge bestEdge = unusedEdges.get(edgeOrdinal);
+
+        // For now, assume that the edge is between precisely two factors.
+        // 1-factor conditions have probably been pushed down,
+        // and 3-or-more-factor conditions are advanced. (TODO:)
+        // Therefore, for now, the factors that are merged are exactly the
+        // factors on this edge.
+        assert bestEdge.factors.cardinality() == 2;
+        factors = BitSets.toArray(bestEdge.factors);
+      }
 
       // Determine which factor is to be on the LHS of the join.
       final int majorFactor;
@@ -149,7 +158,7 @@ public class OptimizeBushyJoinRule extends RelOptRule {
       // the join can now be used.
       final BitSet newFactors =
           BitSets.union(majorVertex.factors, minorVertex.factors);
-      final List<RexNode> conditions = Lists.newArrayList(bestEdge.condition);
+      final List<RexNode> conditions = Lists.newArrayList();
       final Iterator<LoptMultiJoin.Edge> edgeIterator = unusedEdges.iterator();
       while (edgeIterator.hasNext()) {
         LoptMultiJoin.Edge edge = edgeIterator.next();
@@ -179,6 +188,7 @@ public class OptimizeBushyJoinRule extends RelOptRule {
       // This vertex has fewer rows (1k rows) -- a fact that is critical to
       // decisions made later. (Hence "greedy" algorithm not "simple".)
       // The adjacent edges are modified.
+      final BitSet merged = BitSets.of(minorFactor, majorFactor);
       for (int i = 0; i < unusedEdges.size(); i++) {
         final LoptMultiJoin.Edge edge = unusedEdges.get(i);
         if (edge.factors.intersects(merged)) {
@@ -270,20 +280,6 @@ public class OptimizeBushyJoinRule extends RelOptRule {
     pw.flush();
   }
 
-  /** Removes the element of a list at a given ordinal, moving the last element
-   * into its place. This is an efficient means of removing an element from an
-   * array list if you do not mind the order of the list changing. */
-  private static <E> E remove(List<E> list, int ordinal) {
-    final int lastOrdinal = list.size() - 1;
-    final E last = list.remove(lastOrdinal);
-    if (ordinal == lastOrdinal) {
-      return last;
-    }
-    final E e = list.get(ordinal);
-    list.set(ordinal, last);
-    return e;
-  }
-
   int chooseBestEdge(List<LoptMultiJoin.Edge> edges,
       Comparator<LoptMultiJoin.Edge> comparator) {
     return minPos(edges, comparator);

http://git-wip-us.apache.org/repos/asf/incubator-optiq/blob/50914222/core/src/test/java/net/hydromatic/optiq/tools/PlannerTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/net/hydromatic/optiq/tools/PlannerTest.java b/core/src/test/java/net/hydromatic/optiq/tools/PlannerTest.java
index d544858..e96bf84 100644
--- a/core/src/test/java/net/hydromatic/optiq/tools/PlannerTest.java
+++ b/core/src/test/java/net/hydromatic/optiq/tools/PlannerTest.java
@@ -526,6 +526,39 @@ public class PlannerTest {
         + "          EnumerableTableAccessRel(table=[[foodmart2, sales_fact_1997]])\n");
   }
 
+  /** Tests the bushy join algorithm where one table does not join to
+   * anything. */
+  @Test public void testBushyCrossJoin() throws Exception {
+    checkBushy("select * from \"sales_fact_1997\"\n"
+        + "join \"customer\" using (\"customer_id\")\n"
+        + "cross join \"department\"",
+        "EnumerableProjectRel(product_id=[$0], time_id=[$1], customer_id=[$2], promotion_id=[$3], store_id=[$4], store_sales=[$5], store_cost=[$6], unit_sales=[$7], customer_id0=[$8], account_num=[$9], lname=[$10], fname=[$11], mi=[$12], address1=[$13], address2=[$14], address3=[$15], address4=[$16], city=[$17], state_province=[$18], postal_code=[$19], country=[$20], customer_region_id=[$21], phone1=[$22], phone2=[$23], birthdate=[$24], marital_status=[$25], yearly_income=[$26], gender=[$27], total_children=[$28], num_children_at_home=[$29], education=[$30], date_accnt_opened=[$31], member_card=[$32], occupation=[$33], houseowner=[$34], num_cars_owned=[$35], fullname=[$36], department_id=[$37], department_description=[$38])\n"
+        + "  EnumerableProjectRel($f0=[$31], $f1=[$32], $f2=[$33], $f3=[$34], $f4=[$35], $f5=[$36], $f6=[$37], $f7=[$38], $f8=[$2], $f9=[$3], $f10=[$4], $f11=[$5], $f12=[$6], $f13=[$7], $f14=[$8], $f15=[$9], $f16=[$10], $f17=[$11], $f18=[$12], $f19=[$13], $f20=[$14], $f21=[$15], $f22=[$16], $f23=[$17], $f24=[$18], $f25=[$19], $f26=[$20], $f27=[$21], $f28=[$22], $f29=[$23], $f30=[$24], $f31=[$25], $f32=[$26], $f33=[$27], $f34=[$28], $f35=[$29], $f36=[$30], $f37=[$0], $f38=[$1])\n"
+        + "    EnumerableJoinRel(condition=[true], joinType=[inner])\n"
+        + "      EnumerableTableAccessRel(table=[[foodmart2, department]])\n"
+        + "      EnumerableJoinRel(condition=[=($31, $0)], joinType=[inner])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, customer]])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, sales_fact_1997]])");
+  }
+
+  /** Tests the bushy join algorithm against a query where not all tables have a
+   * join condition to the others. */
+  @Test public void testBushyCrossJoin2() throws Exception {
+    checkBushy("select * from \"sales_fact_1997\"\n"
+        + "join \"customer\" using (\"customer_id\")\n"
+        + "cross join \"department\"\n"
+        + "join \"employee\" using (\"department_id\")",
+        "EnumerableProjectRel(product_id=[$0], time_id=[$1], customer_id=[$2], promotion_id=[$3], store_id=[$4], store_sales=[$5], store_cost=[$6], unit_sales=[$7], customer_id0=[$8], account_num=[$9], lname=[$10], fname=[$11], mi=[$12], address1=[$13], address2=[$14], address3=[$15], address4=[$16], city=[$17], state_province=[$18], postal_code=[$19], country=[$20], customer_region_id=[$21], phone1=[$22], phone2=[$23], birthdate=[$24], marital_status=[$25], yearly_income=[$26], gender=[$27], total_children=[$28], num_children_at_home=[$29], education=[$30], date_accnt_opened=[$31], member_card=[$32], occupation=[$33], houseowner=[$34], num_cars_owned=[$35], fullname=[$36], department_id=[$37], department_description=[$38], employee_id=[$39], full_name=[$40], first_name=[$41], last_name=[$42], position_id=[$43], position_title=[$44], store_id0=[$45], department_id0=[$46], birth_date=[$47], hire_date=[$48], end_date=[$49], salary=[$50], supervisor_id=[$51], education_level=[$52], mar
 ital_status0=[$53], gender0=[$54], management_role=[$55])\n"
+        + "  EnumerableProjectRel($f0=[$48], $f1=[$49], $f2=[$50], $f3=[$51], $f4=[$52], $f5=[$53], $f6=[$54], $f7=[$55], $f8=[$19], $f9=[$20], $f10=[$21], $f11=[$22], $f12=[$23], $f13=[$24], $f14=[$25], $f15=[$26], $f16=[$27], $f17=[$28], $f18=[$29], $f19=[$30], $f20=[$31], $f21=[$32], $f22=[$33], $f23=[$34], $f24=[$35], $f25=[$36], $f26=[$37], $f27=[$38], $f28=[$39], $f29=[$40], $f30=[$41], $f31=[$42], $f32=[$43], $f33=[$44], $f34=[$45], $f35=[$46], $f36=[$47], $f37=[$0], $f38=[$1], $f39=[$2], $f40=[$3], $f41=[$4], $f42=[$5], $f43=[$6], $f44=[$7], $f45=[$8], $f46=[$9], $f47=[$10], $f48=[$11], $f49=[$12], $f50=[$13], $f51=[$14], $f52=[$15], $f53=[$16], $f54=[$17], $f55=[$18])\n"
+        + "    EnumerableJoinRel(condition=[true], joinType=[inner])\n"
+        + "      EnumerableJoinRel(condition=[=($0, $9)], joinType=[inner])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, department]])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, employee]])\n"
+        + "      EnumerableJoinRel(condition=[=($31, $0)], joinType=[inner])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, customer]])\n"
+        + "        EnumerableTableAccessRel(table=[[foodmart2, sales_fact_1997]])\n");
+  }
+
   /** Checks that a query returns a particular plan, using a planner with
    * OptimizeBushyJoinRule enabled. */
   private void checkBushy(String sql, String expected) throws Exception {