You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2015/09/10 08:48:20 UTC
tajo git commit: TAJO-1600: Invalid query planning for distinct
group-by.
Repository: tajo
Updated Branches:
refs/heads/master 7f7f4d119 -> fe99e0fff
TAJO-1600: Invalid query planning for distinct group-by.
Closes #750
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fe99e0ff
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fe99e0ff
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fe99e0ff
Branch: refs/heads/master
Commit: fe99e0fff2fcdd0d0341d2f3d22b5d03fa4bea56
Parents: 7f7f4d1
Author: Hyunsik Choi <hy...@apache.org>
Authored: Thu Sep 10 15:34:48 2015 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Thu Sep 10 15:34:48 2015 +0900
----------------------------------------------------------------------
CHANGES | 2 +
.../tajo/engine/query/TestCaseByCases.java | 7 +++
.../queries/TestCaseByCases/testTAJO_1600.sql | 10 ++++
.../results/TestCaseByCases/testTAJO_1600.plan | 21 ++++++++
.../TestCaseByCases/testTAJO_1600.result | 7 +++
.../org/apache/tajo/plan/LogicalPlanner.java | 46 +++++++++++-----
.../apache/tajo/plan/logical/GroupbyNode.java | 12 +++++
.../rewrite/rules/ProjectionPushDownRule.java | 55 +++++++++++++-------
8 files changed, 128 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 9dc0ca5..ccbca88 100644
--- a/CHANGES
+++ b/CHANGES
@@ -257,6 +257,8 @@ Release 0.11.0 - unreleased
BUG FIXES
+ TAJO-1600: Invalid query planning for distinct group-by. (hyunsik)
+
TAJO-1782: Check ON_ERROR_STOP flag in TSQL when error is occured.
(Contributed by Dongkyu Hwangbo, Committed by jihoon)
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index bcf00f8..b32ba65 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -180,4 +180,11 @@ public class TestCaseByCases extends QueryTestCaseBase {
assertResultSet(res);
cleanupQuery(res);
}
+
+ @Test
+ @Option(withExplain = true)
+ @SimpleTest
+ public final void testTAJO_1600() throws Exception {
+ runSimpleTests();
+ }
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
new file mode 100644
index 0000000..0861283
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestCaseByCases/testTAJO_1600.sql
@@ -0,0 +1,10 @@
+SELECT DISTINCT
+ c_custkey,
+ orders.o_orderkey,
+ orders.o_orderstatus,
+ orders.o_orderdate
+from
+ customer left outer join orders on c_custkey = o_orderkey
+order by
+ c_custkey,
+ o_orderkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
new file mode 100644
index 0000000..12921cd
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.plan
@@ -0,0 +1,21 @@
+explain
+-------------------------------
+SORT(3)
+ => Sort Keys: default.customer.c_custkey (INT4) (asc),default.orders.o_orderkey (INT4) (asc)
+ GROUP_BY(5)(c_custkey,o_orderkey,o_orderstatus,o_orderdate)
+ => target list: default.customer.c_custkey (INT4), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT), default.orders.o_orderdate (TEXT)
+ => out schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+ => in schema:{(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+ JOIN(7)(LEFT_OUTER)
+ => Join Cond: default.customer.c_custkey (INT4) = default.orders.o_orderkey (INT4)
+ => target list: default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)
+ => out schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+ => in schema: {(4) default.customer.c_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+ SCAN(1) on default.orders
+ => target list: default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)
+ => out schema: {(3) default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderstatus (TEXT)}
+ => in schema: {(9) default.orders.o_clerk (TEXT), default.orders.o_comment (TEXT), default.orders.o_custkey (INT4), default.orders.o_orderdate (TEXT), default.orders.o_orderkey (INT4), default.orders.o_orderpriority (TEXT), default.orders.o_orderstatus (TEXT), default.orders.o_shippriority (INT4), default.orders.o_totalprice (FLOAT8)}
+ SCAN(0) on default.customer
+ => target list: default.customer.c_custkey (INT4)
+ => out schema: {(1) default.customer.c_custkey (INT4)}
+ => in schema: {(8) default.customer.c_acctbal (FLOAT8), default.customer.c_address (TEXT), default.customer.c_comment (TEXT), default.customer.c_custkey (INT4), default.customer.c_mktsegment (TEXT), default.customer.c_name (TEXT), default.customer.c_nationkey (INT4), default.customer.c_phone (TEXT)}
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
new file mode 100644
index 0000000..7cb5166
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestCaseByCases/testTAJO_1600.result
@@ -0,0 +1,7 @@
+c_custkey,o_orderkey,o_orderstatus,o_orderdate
+-------------------------------
+1,1,O,1996-01-02
+2,2,O,1996-12-01
+3,3,F,1993-10-14
+4,null,null,null
+5,null,null,null
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
index 6d40cda..24dcfd5 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/LogicalPlanner.java
@@ -323,18 +323,40 @@ public class LogicalPlanner extends BaseAlgebraVisitor<LogicalPlanner.PlanContex
LogicalPlan plan = context.plan;
QueryBlock block = context.queryBlock;
- Schema outSchema = projectionNode.getOutSchema();
- GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
- dupRemoval.setChild(child);
- dupRemoval.setInSchema(projectionNode.getInSchema());
- dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema));
- dupRemoval.setGroupingColumns(outSchema.toArray());
-
- block.registerNode(dupRemoval);
- postHook(context, stack, null, dupRemoval);
-
- projectionNode.setChild(dupRemoval);
- projectionNode.setInSchema(dupRemoval.getOutSchema());
+ if (child.getType() == NodeType.SORT) {
+ SortNode sortNode = (SortNode) child;
+
+ GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
+ dupRemoval.setForDistinctBlock();
+ dupRemoval.setChild(sortNode.getChild());
+ dupRemoval.setInSchema(sortNode.getInSchema());
+ dupRemoval.setTargets(PlannerUtil.schemaToTargets(sortNode.getInSchema()));
+ dupRemoval.setGroupingColumns(sortNode.getInSchema().toArray());
+
+ block.registerNode(dupRemoval);
+ postHook(context, stack, null, dupRemoval);
+
+ sortNode.setChild(dupRemoval);
+ sortNode.setInSchema(dupRemoval.getOutSchema());
+ sortNode.setOutSchema(dupRemoval.getOutSchema());
+ projectionNode.setInSchema(sortNode.getOutSchema());
+ projectionNode.setChild(sortNode);
+
+ } else {
+ Schema outSchema = projectionNode.getOutSchema();
+ GroupbyNode dupRemoval = context.plan.createNode(GroupbyNode.class);
+ dupRemoval.setForDistinctBlock();
+ dupRemoval.setChild(child);
+ dupRemoval.setInSchema(projectionNode.getInSchema());
+ dupRemoval.setTargets(PlannerUtil.schemaToTargets(outSchema));
+ dupRemoval.setGroupingColumns(outSchema.toArray());
+
+ block.registerNode(dupRemoval);
+ postHook(context, stack, null, dupRemoval);
+
+ projectionNode.setChild(dupRemoval);
+ projectionNode.setInSchema(dupRemoval.getOutSchema());
+ }
}
private Pair<String [], ExprNormalizer.WindowSpecReferences []> doProjectionPrephase(PlanContext context,
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
index 23a9154..3966606 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/logical/GroupbyNode.java
@@ -41,6 +41,10 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable {
* */
@Expose private Target [] targets;
@Expose private boolean hasDistinct = false;
+ /**
+ * A flag to indicate if this groupby is for distinct block (i.e., SELECT DISTINCT x,y,z, ...)
+ */
+ @Expose private boolean forDistinctBlock = false;
public GroupbyNode(int pid) {
super(pid, NodeType.GROUP_BY);
@@ -70,6 +74,14 @@ public class GroupbyNode extends UnaryNode implements Projectable, Cloneable {
hasDistinct = distinct;
}
+ public final void setForDistinctBlock() {
+ forDistinctBlock = true;
+ }
+
+ public boolean isForDistinctBlock() {
+ return forDistinctBlock;
+ }
+
public boolean hasAggFunctions() {
return aggrFunctions.length > 0;
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/fe99e0ff/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
index 5322868..408f2d2 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/rewrite/rules/ProjectionPushDownRule.java
@@ -714,36 +714,51 @@ public class ProjectionPushDownRule extends
Stack<LogicalNode> stack) throws TajoException {
Context newContext = new Context(context);
- // Getting grouping key names
- final int groupingKeyNum = node.getGroupingColumns().length;
+ int groupingKeyNum = node.getGroupingColumns().length;
LinkedHashSet<String> groupingKeyNames = null;
- if (groupingKeyNum > 0) {
- groupingKeyNames = Sets.newLinkedHashSet();
- for (int i = 0; i < groupingKeyNum; i++) {
- FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
- groupingKeyNames.add(newContext.addExpr(fieldEval));
+ String[] aggEvalNames = null;
+
+ // if this query block is distinct, this groupby node have the same target to that of its above operator.
+ // So, it does not need to add new expression to newContext.
+ if (!node.isForDistinctBlock()) {
+ // Getting grouping key names
+ if (groupingKeyNum > 0) {
+ groupingKeyNames = Sets.newLinkedHashSet();
+ for (int i = 0; i < groupingKeyNum; i++) {
+ FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
+ groupingKeyNames.add(newContext.addExpr(fieldEval));
+ }
}
- }
- // Getting eval names
-
- final String [] aggEvalNames;
- if (node.hasAggFunctions()) {
- final int evalNum = node.getAggFunctions().length;
- aggEvalNames = new String[evalNum];
- for (int evalIdx = 0, targetIdx = groupingKeyNum; targetIdx < node.getTargets().length; evalIdx++, targetIdx++) {
- Target target = node.getTargets()[targetIdx];
- EvalNode evalNode = node.getAggFunctions()[evalIdx];
- aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName()));
+ // Getting eval names
+ if (node.hasAggFunctions()) {
+ final int evalNum = node.getAggFunctions().length;
+ aggEvalNames = new String[evalNum];
+ for (int evalIdx = 0, targetIdx = node.getGroupingColumns().length; targetIdx < node.getTargets().length;
+ evalIdx++, targetIdx++) {
+ Target target = node.getTargets()[targetIdx];
+ EvalNode evalNode = node.getAggFunctions()[evalIdx];
+ aggEvalNames[evalIdx] = newContext.addExpr(new Target(evalNode, target.getCanonicalName()));
+ }
}
- } else {
- aggEvalNames = null;
}
// visit a child node
LogicalNode child = super.visitGroupBy(newContext, plan, block, node, stack);
node.setInSchema(child.getOutSchema());
+ if (node.isForDistinctBlock()) { // the grouping columns should be updated according to the schema of child node.
+ node.setGroupingColumns(child.getOutSchema().toArray());
+ node.setTargets(PlannerUtil.schemaToTargets(child.getOutSchema()));
+
+ // Because it updates grouping columns and targets, it should refresh grouping key num and names.
+ groupingKeyNum = node.getGroupingColumns().length;
+ groupingKeyNames = Sets.newLinkedHashSet();
+ for (int i = 0; i < groupingKeyNum; i++) {
+ FieldEval fieldEval = new FieldEval(node.getGroupingColumns()[i]);
+ groupingKeyNames.add(newContext.addExpr(fieldEval));
+ }
+ }
List<Target> targets = Lists.newArrayList();
if (groupingKeyNum > 0 && groupingKeyNames != null) {