You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2015/09/10 08:48:20 UTC

tajo git commit: TAJO-1600: Invalid query planning for distinct group-by.

Repository: tajo
Updated Branches:
  refs/heads/master 7f7f4d119 -> fe99e0fff


TAJO-1600: Invalid query planning for distinct group-by.

Closes #750


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fe99e0ff
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fe99e0ff
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fe99e0ff

Branch: refs/heads/master
Commit: fe99e0fff2fcdd0d0341d2f3d22b5d03fa4bea56
Parents: 7f7f4d1
Author: Hyunsik Choi <hy...@apache.org>
Authored: Thu Sep 10 15:34:48 2015 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Thu Sep 10 15:34:48 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |  2 +
 .../tajo/engine/query/TestCaseByCases.java      |  7 +++
 .../queries/TestCaseByCases/testTAJO_1600.sql   | 10 ++++
 .../results/TestCaseByCases/testTAJO_1600.plan  | 21 ++++++++
 .../TestCaseByCases/testTAJO_1600.result        |  7 +++
 .../org/apache/tajo/plan/LogicalPlanner.java    | 46 +++++++++++-----
 .../apache/tajo/plan/logical/GroupbyNode.java   | 12 +++++
 .../rewrite/rules/ProjectionPushDownRule.java   | 55 +++++++++++++-------
 8 files changed, 128 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 9dc0ca5..ccbca88 100644
--- a/CHANGES
+++ b/CHANGES
@@ -257,6 +257,8 @@ Release 0.11.0 - unreleased
 
   BUG FIXES
 
+    TAJO-1600: Invalid query planning for distinct group-by. (hyunsik)
+
     TAJO-1782: Check ON_ERROR_STOP flag in TSQL when error is occured. 
     (Contributed by Dongkyu Hwangbo, Committed by jihoon)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index bcf00f8..b32ba65 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -180,4 +180,11 @@ public class TestCaseByCases extends QueryTestCaseBase {
     assertResultSet(res);
     cleanupQuery(res);
   }
+
+  @Test
+  @Option(withExplain = true)
+  @SimpleTest
+  public final void testTAJO_1600() throws Exception {
+    runSimpleTests();
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
new file mode 100644
index 0000000..0861283
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
@@ -0,0 +1,10 @@
+SELECT DISTINCT
+  c_custkey,
+  orders.o_orderkey,
+  orders.o_orderstatus,
+  orders.o_orderdate
+from
+  customer left outer join orders on c_custkey = o_orderkey
+order by
+  c_custkey,
+  o_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
new file mode 100644
index 0000000..12921cd
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
@@ -0,0 +1,21 @@
+explain
+-------------------------------
+SORT(3)
+  => Sort Keys: default.customer.c_custkey (INT4) (asc),default.orders.o_orderkey (INT4) (asc)
+   GROUP_BY(5)(c_custkey,o_orderkey,o_orderstatus,o_orderdate)
+     => target list: default.customer.c_custkey (INT4), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT), default.orders.o_orderdate (TEXT)
+     => out schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+     => in schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+      JOIN(7)(LEFT_OUTER)
+        => Join Cond: default.customer.c_custkey (INT4) = default.orders.o_orderkey (INT4)
+        => target list: default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)
+        => out schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+        => in schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+         SCAN(1) on default.orders
+           => target list: default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)
+           => out schema: {(3) default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+           => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)}
+         SCAN(0) on default.customer
+           => target list: default.customer.c_custkey (INT4)
+           => out schema: {(1) default.customer.c_custkey (INT4)}
+           => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)}

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
new file mode 100644
index 0000000..7cb5166
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
@@ -0,0 +1,7 @@
+c_custkey,o_orderkey,o_orderstatus,o_orderdate
+-------------------------------
+1,1,O,1996-01-02
+2,2,O,1996-12-01
+3,3,F,1993-10-14
+4,null,null,null
+5,null,null,null

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
index 6d40cda..24dcfd5 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
@@ -323,18 +323,40 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex
     LogicalPlan plan = context.plan;
     QueryBlock block = context.queryBlock;
 
-    Schema outSchema = projectionNode.getOutSchema();
-    GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
-    dupRemoval.setChild(child);
-    dupRemoval.setInSchema(projectionNode.getInSchema());
-    dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema));
-    dupRemoval.setGroupingColumns(outSchema.toArray());
-
-    block.registerNode(dupRemoval);
-    postHook(context, stack, null, dupRemoval);
-
-    projectionNode.setChild(dupRemoval);
-    projectionNode.setInSchema(dupRemoval.getOutSchema());
+    if (child.getType() == NodeType.SORT) {
+      SortNode sortNode = (SortNode) child;
+
+      GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
+      dupRemoval.setForDistinctBlock();
+      dupRemoval.setChild(sortNode.getChild());
+      dupRemoval.setInSchema(sortNode.getInSchema());
+      dupRemoval.setTargets(PlannerUtil.schemaToTargets(sortNode.getInSchema()));
+      dupRemoval.setGroupingColumns(sortNode.getInSchema().toArray());
+
+      block.registerNode(dupRemoval);
+      postHook(context, stack, null, dupRemoval);
+
+      sortNode.setChild(dupRemoval);
+      sortNode.setInSchema(dupRemoval.getOutSchema());
+      sortNode.setOutSchema(dupRemoval.getOutSchema());
+      projectionNode.setInSchema(sortNode.getOutSchema());
+      projectionNode.setChild(sortNode);
+
+    } else {
+      Schema outSchema = projectionNode.getOutSchema();
+      GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
+      dupRemoval.setForDistinctBlock();
+      dupRemoval.setChild(child);
+      dupRemoval.setInSchema(projectionNode.getInSchema());
+      dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema));
+      dupRemoval.setGroupingColumns(outSchema.toArray());
+
+      block.registerNode(dupRemoval);
+      postHook(context, stack, null, dupRemoval);
+
+      projectionNode.setChild(dupRemoval);
+      projectionNode.setInSchema(dupRemoval.getOutSchema());
+    }
   }
 
   private Pair<String [], ExprNormalizer.WindowSpecReferences []> doProjectionPrephase(PlanContext context,

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
index 23a9154..3966606 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
@@ -41,6 +41,10 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable {
    * */
   @Expose private Target [] targets;
   @Expose private boolean hasDistinct = false;
+  /**
+   * A flag to indicate if this groupby is for distinct block (i.e., SELECT DISTINCT x,y,z, ...)
+   */
+  @Expose private boolean forDistinctBlock = false;
 
   public GroupbyNode(int pid) {
     super(pid, NodeType.GROUP_BY);
@@ -70,6 +74,14 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable {
     hasDistinct = distinct;
   }
 
+  public final void setForDistinctBlock() {
+    forDistinctBlock = true;
+  }
+
+  public boolean isForDistinctBlock() {
+    return forDistinctBlock;
+  }
+
   public boolean hasAggFunctions() {
     return aggrFunctions.length > 0;
   }

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
index 5322868..408f2d2 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
@@ -714,36 +714,51 @@ public class ProjectionPushDownRule extends
                              Stack<LogicalNode> stack) throws TajoException {
     Context newContext = new Context(context);
 
-    // Getting grouping key names
-    final int groupingKeyNum = node.getGroupingColumns().length;
+    int groupingKeyNum = node.getGroupingColumns().length;
     LinkedHashSet<String> groupingKeyNames = null;
-    if (groupingKeyNum > 0) {
-      groupingKeyNames = Sets.newLinkedHashSet();
-      for (int i = 0; i < groupingKeyNum; i++) {
-        FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
-        groupingKeyNames.add(newContext.addExpr(fieldEval));
+    String[] aggEvalNames = null;
+
+    // if this query block is distinct, this groupby node have the same target to that of its above operator.
+    // So, it does not need to add new expression to newContext.
+    if (!node.isForDistinctBlock()) {
+      // Getting grouping key names
+      if (groupingKeyNum > 0) {
+        groupingKeyNames = Sets.newLinkedHashSet();
+        for (int i = 0; i < groupingKeyNum; i++) {
+          FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
+          groupingKeyNames.add(newContext.addExpr(fieldEval));
+        }
       }
-    }
 
-    // Getting eval names
-
-    final String [] aggEvalNames;
-    if (node.hasAggFunctions()) {
-      final int evalNum = node.getAggFunctions().length;
-      aggEvalNames = new String[evalNum];
-      for (int evalIdx = 0, targetIdx = groupingKeyNum; targetIdx < node.getTargets().length; evalIdx++, targetIdx++) {
-        Target target = node.getTargets()[targetIdx];
-        EvalNode evalNode = node.getAggFunctions()[evalIdx];
-        aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName()));
+      // Getting eval names
+      if (node.hasAggFunctions()) {
+        final int evalNum = node.getAggFunctions().length;
+        aggEvalNames = new String[evalNum];
+        for (int evalIdx = 0, targetIdx = node.getGroupingColumns().length; targetIdx < node.getTargets().length;
+             evalIdx++, targetIdx++) {
+          Target target = node.getTargets()[targetIdx];
+          EvalNode evalNode = node.getAggFunctions()[evalIdx];
+          aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName()));
+        }
       }
-    } else {
-      aggEvalNames = null;
     }
 
     // visit a child node
     LogicalNode child = super.visitGroupBy(newContext, plan, block, node, stack);
 
     node.setInSchema(child.getOutSchema());
+    if (node.isForDistinctBlock()) { // the grouping columns should be updated according to the schema of child node.
+      node.setGroupingColumns(child.getOutSchema().toArray());
+      node.setTargets(PlannerUtil.schemaToTargets(child.getOutSchema()));
+
+      // Because it updates grouping columns and targets, it should refresh grouping key num and names.
+      groupingKeyNum = node.getGroupingColumns().length;
+      groupingKeyNames = Sets.newLinkedHashSet();
+      for (int i = 0; i < groupingKeyNum; i++) {
+        FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
+        groupingKeyNames.add(newContext.addExpr(fieldEval));
+      }
+    }
 
     List<Target> targets = Lists.newArrayList();
     if (groupingKeyNum > 0 && groupingKeyNames != null) {