You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/05/20 20:46:42 UTC

[44/48] TAJO-801: Multiple distinct should be supported. (Hyoungjun Kim via hyunsik)

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
index 629889d..4c4227f 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/SortAggregateExec.java
@@ -47,13 +47,13 @@ public class SortAggregateExec extends AggregationExec {
 
   public SortAggregateExec(TaskAttemptContext context, GroupbyNode plan, PhysicalExec child) throws IOException {
     super(context, plan, child);
-    contexts = new FunctionContext[plan.getAggFunctions().length];
+    contexts = new FunctionContext[plan.getAggFunctions() == null ? 0 : plan.getAggFunctions().length];
   }
 
   @Override
   public Tuple next() throws IOException {
     Tuple currentKey;
-    Tuple tuple;
+    Tuple tuple = null;
     Tuple outputTuple = null;
 
     while(!context.isStopped() && (tuple = child.next()) != null) {
@@ -101,6 +101,10 @@ public class SortAggregateExec extends AggregationExec {
       }
     } // while loop
 
+    if (tuple == null && lastKey == null) {
+      finished = true;
+      return null;
+    }
     if (!finished) {
       outputTuple = new VTuple(outSchema.size());
       int tupleIdx = 0;

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/main/proto/TajoWorkerProtocol.proto
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/proto/TajoWorkerProtocol.proto b/tajo-core/src/main/proto/TajoWorkerProtocol.proto
index e3d3028..dbff67f 100644
--- a/tajo-core/src/main/proto/TajoWorkerProtocol.proto
+++ b/tajo-core/src/main/proto/TajoWorkerProtocol.proto
@@ -195,6 +195,7 @@ message EnforceProperty {
     SORT = 4;
     BROADCAST = 5;
     COLUMN_PARTITION = 6;
+    DISTINCT_GROUP_BY = 7;
   }
 
   // Identifies which field is filled in.
@@ -208,6 +209,7 @@ message EnforceProperty {
   optional SortEnforce sort = 6;
   optional BroadcastEnforce broadcast = 7;
   optional ColumnPartitionEnforcer columnPartition = 8;
+  optional DistinctGroupbyEnforcer distinct = 9;
 }
 
 message SortedInputEnforce {
@@ -266,6 +268,21 @@ message ColumnPartitionEnforcer {
   required ColumnPartitionAlgorithm algorithm = 2;
 }
 
+message DistinctGroupbyEnforcer {
+  enum DistinctAggregationAlgorithm {
+    HASH_AGGREGATION = 0;
+    SORT_AGGREGATION = 1;
+  }
+
+  message SortSpecArray {
+    required int32 pid = 1;
+    repeated SortSpecProto sortSpecs = 2;
+  }
+  required int32 pid = 1;
+  required DistinctAggregationAlgorithm algorithm = 2;
+  repeated SortSpecArray sortSpecArrays = 3;
+}
+
 message EnforcerProto {
   repeated EnforceProperty properties = 1;
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 1f8cb2a..91993a1 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -179,7 +179,7 @@ public class TestGroupByQuery extends QueryTestCaseBase {
 
   @Test
   public final void testDistinctAggregation6() throws Exception {
-    // select count(distinct l_orderkey), sum(l_orderkey), sum(l_linenumber), count(*) as v4 from lineitem
+    // select count(distinct l_orderkey) v0, sum(l_orderkey) v1, sum(l_linenumber) v2, count(*) as v4 from lineitem
     // group by l_orderkey;
     ResultSet res = executeQuery();
     assertResultSet(res);
@@ -187,6 +187,15 @@ public class TestGroupByQuery extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testDistinctAggregation7() throws Exception {
+    // select count(*), count(distinct c_nationkey), count(distinct c_mktsegment) from customer
+    // tpch scale 1000: 15000000	25	5
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
   public final void testDistinctAggregationWithHaving1() throws Exception {
     // select l_linenumber, count(*), count(distinct l_orderkey), sum(distinct l_orderkey) from lineitem
     // group by l_linenumber having sum(distinct l_orderkey) >= 6;
@@ -205,6 +214,44 @@ public class TestGroupByQuery extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testDistinctAggregationCasebyCase() throws Exception {
+    ResultSet res;
+
+    // one groupby, distinct, aggregation
+    res = executeFile("testDistinctAggregation_case1.sql");
+    assertResultSet(res, "testDistinctAggregation_case1.result");
+    res.close();
+
+    // one groupby, two distinct, one aggregation
+    res = executeFile("testDistinctAggregation_case2.sql");
+    assertResultSet(res, "testDistinctAggregation_case2.result");
+    res.close();
+
+    // one groupby, two distinct, two aggregation(no alias)
+    res = executeFile("testDistinctAggregation_case3.sql");
+    assertResultSet(res, "testDistinctAggregation_case3.result");
+    res.close();
+
+    // two groupby, two distinct, two aggregation
+    res = executeFile("testDistinctAggregation_case4.sql");
+    assertResultSet(res, "testDistinctAggregation_case4.result");
+    res.close();
+
+    // two groupby, two distinct, two aggregation with subquery
+    res = executeFile("testDistinctAggregation_case5.sql");
+    assertResultSet(res, "testDistinctAggregation_case5.result");
+    res.close();
+
+    res = executeFile("testDistinctAggregation_case6.sql");
+    assertResultSet(res, "testDistinctAggregation_case6.result");
+    res.close();
+
+    res = executeFile("testDistinctAggregation_case7.sql");
+    assertResultSet(res, "testDistinctAggregation_case7.result");
+    res.close();
+  }
+
+  @Test
   public final void testComplexParameter() throws Exception {
     // select sum(l_extendedprice*l_discount) as revenue from lineitem;
     ResultSet res = executeQuery();

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
new file mode 100644
index 0000000..75b3eea
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation7.sql
@@ -0,0 +1,6 @@
+select
+    count(*),
+    count(distinct c_nationkey),
+    count(distinct c_mktsegment)
+from
+    customer
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
new file mode 100644
index 0000000..4f01904
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case1.sql
@@ -0,0 +1,7 @@
+select
+    sum(l_quantity) as quantity,
+    count(distinct l_suppkey) suppkey,
+    l_returnflag
+from
+    lineitem
+group by l_returnflag
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
new file mode 100644
index 0000000..10f1970
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case2.sql
@@ -0,0 +1,8 @@
+select
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey) as suppkey,
+    l_returnflag
+from
+    lineitem
+group by l_returnflag
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
new file mode 100644
index 0000000..e585d42
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case3.sql
@@ -0,0 +1,9 @@
+select
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey),
+    max(l_quantity),
+    l_returnflag
+from
+    lineitem
+group by l_returnflag
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
new file mode 100644
index 0000000..e8f05de
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case4.sql
@@ -0,0 +1,10 @@
+select
+    l_orderkey,
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey),
+    max(l_quantity),
+    l_returnflag
+from
+    lineitem
+group by l_returnflag, l_orderkey
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
new file mode 100644
index 0000000..9a4d4eb
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case5.sql
@@ -0,0 +1,10 @@
+select
+    l_orderkey,
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey),
+    max(l_quantity),
+    l_returnflag
+from
+    (select * from lineitem) as litem
+group by l_returnflag, l_orderkey
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
new file mode 100644
index 0000000..0a30fe3
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case6.sql
@@ -0,0 +1,12 @@
+select * from (
+select
+    l_orderkey,
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey),
+    max(l_quantity),
+    l_returnflag
+from
+    lineitem
+group by l_returnflag, l_orderkey
+) a
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
new file mode 100644
index 0000000..e08c829
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testDistinctAggregation_case7.sql
@@ -0,0 +1,9 @@
+select
+    count(distinct l_orderkey),
+    sum(l_quantity) as quantity,
+    count(distinct l_partkey) as partkey,
+    count(distinct l_suppkey),
+    max(l_quantity)
+from
+    lineitem
+group by l_orderkey
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
new file mode 100644
index 0000000..4173b04
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation7.result
@@ -0,0 +1,3 @@
+?count,?count_1,?count_2
+-------------------------------
+5,5,4
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
new file mode 100644
index 0000000..9296346
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case1.result
@@ -0,0 +1,4 @@
+quantity,suppkey,l_returnflag
+-------------------------------
+91.0,3,N
+94.0,2,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
new file mode 100644
index 0000000..7af127f
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case2.result
@@ -0,0 +1,4 @@
+quantity,partkey,suppkey,l_returnflag
+-------------------------------
+91.0,2,3,N
+94.0,2,2,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
new file mode 100644
index 0000000..31905fa
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case3.result
@@ -0,0 +1,4 @@
+quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+91.0,2,3,38.0,N
+94.0,2,2,49.0,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case4.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case5.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
new file mode 100644
index 0000000..8afda65
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case6.result
@@ -0,0 +1,5 @@
+l_orderkey,quantity,partkey,?count,?max_1,l_returnflag
+-------------------------------
+1,53.0,1,2,36.0,N
+2,38.0,1,1,38.0,N
+3,94.0,2,2,49.0,R
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/9350a802/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
new file mode 100644
index 0000000..03cdf1e
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testDistinctAggregation_case7.result
@@ -0,0 +1,5 @@
+?count,quantity,partkey,?count_1,?max_2
+-------------------------------
+1,53.0,1,2,36.0
+1,38.0,1,1,38.0
+1,94.0,2,2,49.0
\ No newline at end of file