You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/05/20 20:46:42 UTC
[44/48] TAJO-801: Multiple distinct should be supported. (Hyoungjun
Kim via hyunsik)
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
index 629889d..4c4227f 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
@@ -47,13 +47,13 @@ public class SortAggregateExec extends AggregationExec {
public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException {
super(context, plan, child);
- contexts = new FunctionContext[plan.getAggFunctions().length];
+ contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().length];
}
@Override
public Tuple next() throws IOException {
Tuple currentKey;
- Tuple tuple;
+ Tuple tuple = null;
Tuple outputTuple = null;
while(!context.isStopped() && (tuple = child.next()) != null) {
@@ -101,6 +101,10 @@ public class SortAggregateExec extends AggregationExec {
}
} // while loop
+ if (tuple == null && lastKey == null) {
+ finished = true;
+ return null;
+ }
if (!finished) {
outputTuple = new VTuple(outSchema.size());
int tupleIdx = 0;
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/main/proto/TajoWorkerProtocol.proto
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/proto/TajoWorkerProtocol.proto b/tajo-core/src/main/proto/TajoWorkerProtocol.proto
index e3d3028..dbff67f 100644
--- a/tajo-core/src/main/proto/TajoWorkerProtocol.proto
+++ b/tajo-core/src/main/proto/TajoWorkerProtocol.proto
@@ -195,6 +195,7 @@ message EnforceProperty {
SORT = 4;
BROADCAST = 5;
COLUMN_PARTITION = 6;
+ DISTINCT_GROUP_BY = 7;
}
// Identifies which field is filled in.
@@ -208,6 +209,7 @@ message EnforceProperty {
optional SortEnforce sort = 6;
optional BroadcastEnforce broadcast = 7;
optional ColumnPartitionEnforcer columnPartition = 8;
+ optional DistinctGroupbyEnforcer distinct = 9;
}
message SortedInputEnforce {
@@ -266,6 +268,21 @@ message ColumnPartitionEnforcer {
required ColumnPartitionAlgorithm algorithm = 2;
}
+message DistinctGroupbyEnforcer {
+ enum DistinctAggregationAlgorithm {
+ HASH_AGGREGATION = 0;
+ SORT_AGGREGATION = 1;
+ }
+
+ message SortSpecArray {
+ required int32 pid = 1;
+ repeated SortSpecProto sortSpecs = 2;
+ }
+ required int32 pid = 1;
+ required DistinctAggregationAlgorithm algorithm = 2;
+ repeated SortSpecArray sortSpecArrays = 3;
+}
+
message EnforcerProto {
repeated EnforceProperty properties = 1;
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 1f8cb2a..91993a1 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -179,7 +179,7 @@ public class TestGroupByQuery extends QueryTestCaseBase {
@Test
public final void testDistinctAggregation6() throws Exception {
- // select count(distinct l_orderkey), sum(l_orderkey), sum(l_linenumber), count(*) as v4 from lineitem
+ // select count(distinct l_orderkey) v0, sum(l_orderkey) v1, sum(l_linenumber) v2, count(*) as v4 from lineitem
// group by l_orderkey;
ResultSet res = executeQuery();
assertResultSet(res);
@@ -187,6 +187,15 @@ public class TestGroupByQuery extends QueryTestCaseBase {
}
@Test
+ public final void testDistinctAggregation7() throws Exception {
+ // select count(*), count(distinct c_nationkey), count(distinct c_mktsegment) from customer
+ // tpch scale 1000: 15000000 25 5
+ ResultSet res = executeQuery();
+ assertResultSet(res);
+ cleanupQuery(res);
+ }
+
+ @Test
public final void testDistinctAggregationWithHaving1() throws Exception {
// select l_linenumber, count(*), count(distinct l_orderkey), sum(distinct l_orderkey) from lineitem
// group by l_linenumber having sum(distinct l_orderkey) >= 6;
@@ -205,6 +214,44 @@ public class TestGroupByQuery extends QueryTestCaseBase {
}
@Test
+ public final void testDistinctAggregationCasebyCase() throws Exception {
+ ResultSet res;
+
+ // one groupby, distinct, aggregation
+ res = executeFile("testDistinctAggregation_case1.sql");
+ assertResultSet(res, "testDistinctAggregation_case1.result");
+ res.close();
+
+ // one groupby, two distinct, one aggregation
+ res = executeFile("testDistinctAggregation_case2.sql");
+ assertResultSet(res, "testDistinctAggregation_case2.result");
+ res.close();
+
+ // one groupby, two distinct, two aggregation(no alias)
+ res = executeFile("testDistinctAggregation_case3.sql");
+ assertResultSet(res, "testDistinctAggregation_case3.result");
+ res.close();
+
+ // two groupby, two distinct, two aggregation
+ res = executeFile("testDistinctAggregation_case4.sql");
+ assertResultSet(res, "testDistinctAggregation_case4.result");
+ res.close();
+
+ // two groupby, two distinct, two aggregation with subquery
+ res = executeFile("testDistinctAggregation_case5.sql");
+ assertResultSet(res, "testDistinctAggregation_case5.result");
+ res.close();
+
+ res = executeFile("testDistinctAggregation_case6.sql");
+ assertResultSet(res, "testDistinctAggregation_case6.result");
+ res.close();
+
+ res = executeFile("testDistinctAggregation_case7.sql");
+ assertResultSet(res, "testDistinctAggregation_case7.result");
+ res.close();
+ }
+
+ @Test
public final void testComplexParameter() throws Exception {
// select sum(l_extendedprice*l_discount) as revenue from lineitem;
ResultSet res = executeQuery();
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
new file mode 100644
index 0000000..75b3eea
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
@@ -0,0 +1,6 @@
+select
+ count(*),
+ count(distinct c_nationkey),
+ count(distinct c_mktsegment)
+from
+ customer
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
new file mode 100644
index 0000000..4f01904
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
@@ -0,0 +1,7 @@
+select
+ sum(l_quantity) as quantity,
+ count(distinct l_suppkey) suppkey,
+ l_returnflag
+from
+ lineitem
+group by l_returnflag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
new file mode 100644
index 0000000..10f1970
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
@@ -0,0 +1,8 @@
+select
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey) as suppkey,
+ l_returnflag
+from
+ lineitem
+group by l_returnflag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
new file mode 100644
index 0000000..e585d42
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
@@ -0,0 +1,9 @@
+select
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey),
+ max(l_quantity),
+ l_returnflag
+from
+ lineitem
+group by l_returnflag
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
new file mode 100644
index 0000000..e8f05de
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
@@ -0,0 +1,10 @@
+select
+ l_orderkey,
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey),
+ max(l_quantity),
+ l_returnflag
+from
+ lineitem
+group by l_returnflag, l_orderkey
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
new file mode 100644
index 0000000..9a4d4eb
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
@@ -0,0 +1,10 @@
+select
+ l_orderkey,
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey),
+ max(l_quantity),
+ l_returnflag
+from
+ (select * from lineitem) as litem
+group by l_returnflag, l_orderkey
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
new file mode 100644
index 0000000..0a30fe3
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
@@ -0,0 +1,12 @@
+select * from (
+select
+ l_orderkey,
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey),
+ max(l_quantity),
+ l_returnflag
+from
+ lineitem
+group by l_returnflag, l_orderkey
+) a
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
new file mode 100644
index 0000000..e08c829
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
@@ -0,0 +1,9 @@
+select
+ count(distinct l_orderkey),
+ sum(l_quantity) as quantity,
+ count(distinct l_partkey) as partkey,
+ count(distinct l_suppkey),
+ max(l_quantity)
+from
+ lineitem
+group by l_orderkey
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
new file mode 100644
index 0000000..4173b04
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
@@ -0,0 +1,3 @@
+?count,?count_1,?count_2
+-------------------------------
+5,5,4
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
new file mode 100644
index 0000000..9296346
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
@@ -0,0 +1,4 @@
+quantity,suppkey,l_returnflag
+-------------------------------
+91.0,3,N
+94.0,2,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
new file mode 100644
index 0000000..7af127f
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
@@ -0,0 +1,4 @@
+quantity,partkey,suppkey,l_returnflag
+-------------------------------
+91.0,2,3,N
+94.0,2,2,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
new file mode 100644
index 0000000..31905fa
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
@@ -0,0 +1,4 @@
+quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+91.0,2,3,38.0,N
+94.0,2,2,49.0,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
new file mode 100644
index 0000000..03cdf1e
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
@@ -0,0 +1,5 @@
+?count,quantity,partkey,?count_1,?max_2
+-------------------------------
+1,53.0,1,2,36.0
+1,38.0,1,1,38.0
+1,94.0,2,2,49.0
\ No newline at end of file