You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by ji...@apache.org on 2014/08/01 07:58:54 UTC

[01/14] git commit: TAJO-972: Broadcast join with left outer join returns duplicated rows.(Hyoungjun Kim via jaehwa)

Repository: tajo
Updated Branches:
  refs/heads/index_support c9b8f511e -> 5c0277fd8


TAJO-972: Broadcast join with left outer join returns duplicated rows.(Hyoungjun Kim via jaehwa)

Closes #89


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/a5de8372
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/a5de8372
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/a5de8372

Branch: refs/heads/index_support
Commit: a5de837209a8d6d9685ad1aa8132b3b4ecd99727
Parents: 2a6b38e
Author: blrunner <bl...@apache.org>
Authored: Wed Jul 23 11:26:42 2014 +0900
Committer: blrunner <bl...@apache.org>
Committed: Wed Jul 23 11:26:42 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 +
 .../engine/planner/global/GlobalPlanner.java    |  4 +-
 .../planner/global/TestBroadcastJoinPlan.java   | 94 ++++++++++++--------
 .../tajo/engine/query/TestJoinBroadcast.java    | 47 +++++++++-
 4 files changed, 104 insertions(+), 44 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/a5de8372/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 8a1aae6..2be9b26 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,9 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-972: Broadcast join with left outer join returns duplicated rows.
+    (Hyoungjun Kim via jaehwa)
+
     TAJO-666: java.nio.BufferOverflowException occurs when the query includes an order by 
     clause on a TEXT column. (Mai Hai Thanh via jihoon)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/a5de8372/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java
index 69ecd02..2daf799 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java
@@ -293,7 +293,7 @@ public class GlobalPlanner {
       // Checking Left Side of Join
       if (ScanNode.isScanNode(leftNode)) {
         ScanNode scanNode = (ScanNode)leftNode;
-        if (getTableVolume(scanNode) >= broadcastThreshold) {
+        if (joinNode.getJoinType() == JoinType.LEFT_OUTER || getTableVolume(scanNode) >= broadcastThreshold) {
           numLargeTables++;
         } else {
           leftBroadcast = true;
@@ -306,7 +306,7 @@ public class GlobalPlanner {
       // Checking Right Side OF Join
       if (ScanNode.isScanNode(rightNode)) {
         ScanNode scanNode = (ScanNode)rightNode;
-        if (getTableVolume(scanNode) >= broadcastThreshold) {
+        if (joinNode.getJoinType() == JoinType.RIGHT_OUTER || getTableVolume(scanNode) >= broadcastThreshold) {
           numLargeTables++;
         } else {
           rightBroadcast = true;

http://git-wip-us.apache.org/repos/asf/tajo/blob/a5de8372/tajo-core/src/test/java/org/apache/tajo/engine/planner/global/TestBroadcastJoinPlan.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/global/TestBroadcastJoinPlan.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/global/TestBroadcastJoinPlan.java
index fd07ae4..ec39609 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/global/TestBroadcastJoinPlan.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/global/TestBroadcastJoinPlan.java
@@ -495,11 +495,13 @@ public class TestBroadcastJoinPlan {
 
     // ((((default.small1 ⟕ default.small2) ⟕ default.small3) ⟕ default.large1) ⟕ default.large2)
     /*
-    |-eb_1402495213549_0000_000007
-       |-eb_1402495213549_0000_000006       (GROUP BY)
-          |-eb_1402495213549_0000_000005    (JOIN)
-             |-eb_1402495213549_0000_000004 (LEAF, large2)
-             |-eb_1402495213549_0000_000003 (LEAF, broadcast JOIN small1, small2, small3, large1)
+    |-eb_1406022243130_0000_000009
+       |-eb_1406022243130_0000_000008
+          |-eb_1406022243130_0000_000007       (join)
+             |-eb_1406022243130_0000_000006    (scan large2)
+             |-eb_1406022243130_0000_000005    (join)
+                |-eb_1406022243130_0000_000004 (scan large1)
+                |-eb_1406022243130_0000_000003 (scan small1, broadcast join small2, small3)
      */
 
     ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
@@ -508,9 +510,9 @@ public class TestBroadcastJoinPlan {
       ExecutionBlock eb = ebCursor.nextBlock();
       if(index == 0) {
         Collection<String> broadcastTables = eb.getBroadcastTables();
-        assertEquals(3, broadcastTables.size());
+        assertEquals(2, broadcastTables.size());
 
-        assertTrue(broadcastTables.contains("default.small1"));
+        assertTrue(!broadcastTables.contains("default.small1"));
         assertTrue(broadcastTables.contains("default.small2"));
         assertTrue(broadcastTables.contains("default.small3"));
       } else if(index == 1 || index == 2 || index == 3) {
@@ -520,7 +522,7 @@ public class TestBroadcastJoinPlan {
       index++;
     }
 
-    assertEquals(5, index);
+    assertEquals(7, index);
   }
 
   @Test
@@ -712,9 +714,9 @@ public class TestBroadcastJoinPlan {
     globalPlanner.build(masterPlan);
 
     /*
-    |-eb_1402500846700_0000_000007
-       |-eb_1402500846700_0000_000006
-          |-eb_1402500846700_0000_000005 (LEAF, broadcast join small1, small2, small3)
+    |-eb_1406022971444_0000_000005
+       |-eb_1406022971444_0000_000004     (group by)
+          |-eb_1406022971444_0000_000003  (scan small1, broadcast join small2, small3)
     */
 
     ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
@@ -735,7 +737,10 @@ public class TestBroadcastJoinPlan {
         assertEquals("default.small2", scanNode.getCanonicalName());
 
         Collection<String> broadcastTables = eb.getBroadcastTables();
-        assertEquals(3, broadcastTables.size());
+        assertEquals(2, broadcastTables.size());
+
+        assertTrue(broadcastTables.contains("default.small2"));
+        assertTrue(broadcastTables.contains("default.small3"));
       } else if(index == 1) {
         Collection<String> broadcastTables = eb.getBroadcastTables();
         assertEquals(0, broadcastTables.size());
@@ -769,9 +774,11 @@ public class TestBroadcastJoinPlan {
 
     //(((default.small1 ⟕ default.small2) ⟕ default.large1) ⟕ default.small3)
     /*
-     |-eb_1402642709028_0000_000005
-       |-eb_1402642709028_0000_000004    (GROUP BY)
-          |-eb_1402642709028_0000_000003 (LEAF, broadcast JOIN small1, small2, small3, large1)
+    |-eb_1406023347983_0000_000007
+       |-eb_1406023347983_0000_000006
+          |-eb_1406023347983_0000_000005    (join, broadcast small3)
+             |-eb_1406023347983_0000_000004 (scan large1)
+             |-eb_1406023347983_0000_000003 (scan small1, broadcast join small2)
      */
 
     ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
@@ -780,19 +787,20 @@ public class TestBroadcastJoinPlan {
       ExecutionBlock eb = ebCursor.nextBlock();
       if(index == 0) {
         Collection<String> broadcastTables = eb.getBroadcastTables();
-        assertEquals(3, broadcastTables.size());
-
-        assertTrue(broadcastTables.contains("default.small1"));
+        assertEquals(1, broadcastTables.size());
         assertTrue(broadcastTables.contains("default.small2"));
+      } else if (index == 2) {
+        Collection<String> broadcastTables = eb.getBroadcastTables();
+        assertEquals(1, broadcastTables.size());
         assertTrue(broadcastTables.contains("default.small3"));
-      } else if(index == 1 || index == 2 || index == 3) {
+      } else if(index == 1 || index == 3) {
         Collection<String> broadcastTables = eb.getBroadcastTables();
         assertEquals(0, broadcastTables.size());
       }
       index++;
     }
 
-    assertEquals(3, index);
+    assertEquals(5, index);
   }
 
   @Test
@@ -820,11 +828,13 @@ public class TestBroadcastJoinPlan {
     // ((((default.small1 ⟕ default.small2) ⟕ default.large1) ⟕ default.large2) ⟕ default.small3)
 
     /*
-    |-eb_1404125948432_0000_000007
-       |-eb_1404125948432_0000_000006
-          |-eb_1404125948432_0000_000005     (JOIN broadcast small3)
-             |-eb_1404125948432_0000_000004  (LEAF, scan large2)
-             |-eb_1404125948432_0000_000003  (LEAF, scan large1, broadcast small1, small2)
+    |-eb_1406023537578_0000_000009
+       |-eb_1406023537578_0000_000008
+          |-eb_1406023537578_0000_000007        (join, broadcast small3)
+             |-eb_1406023537578_0000_000006     (scan large2)
+             |-eb_1406023537578_0000_000005     (join)
+                |-eb_1406023537578_0000_000004  (scan large1)
+                |-eb_1406023537578_0000_000003  (scan small1, broadcast join small2)
     */
     ExecutionBlockCursor ebCursor = new ExecutionBlockCursor(masterPlan);
     int index = 0;
@@ -835,26 +845,34 @@ public class TestBroadcastJoinPlan {
         assertEquals(NodeType.JOIN, node.getType());
         JoinNode joinNode = (JoinNode)node;
 
-        JoinNode joinNode2 = joinNode.getLeftChild();
+        ScanNode scanNode1 = joinNode.getLeftChild();
         ScanNode scanNode2 = joinNode.getRightChild();
-        assertEquals("default.large1", scanNode2.getCanonicalName());
-
-        ScanNode scanNode3 = joinNode2.getLeftChild();
-        ScanNode scanNode4 = joinNode2.getRightChild();
-        assertEquals("default.small1", scanNode3.getCanonicalName());
-        assertEquals("default.small2", scanNode4.getCanonicalName());
+        assertEquals("default.small1", scanNode1.getCanonicalName());
+        assertEquals("default.small2", scanNode2.getCanonicalName());
 
         Collection<String> broadcastTables = eb.getBroadcastTables();
-        assertEquals(2, broadcastTables.size());
+        assertEquals(1, broadcastTables.size());
+        assertTrue(broadcastTables.contains("default.small2"));
       } else if (index == 1) {
         LogicalNode node = eb.getPlan();
         assertEquals(NodeType.SCAN, node.getType());
-        ScanNode scanNode = (ScanNode)node;
+        ScanNode scanNode = (ScanNode) node;
+        assertEquals("default.large1", scanNode.getCanonicalName());
+
+        Collection<String> broadcastTables = eb.getBroadcastTables();
+        assertEquals(0, broadcastTables.size());
+      } else if (index == 2) {
+        LogicalNode node = eb.getPlan();
+        assertEquals(NodeType.JOIN, node.getType());
+      } else if (index == 3) {
+        LogicalNode node = eb.getPlan();
+        assertEquals(NodeType.SCAN, node.getType());
+        ScanNode scanNode = (ScanNode) node;
         assertEquals("default.large2", scanNode.getCanonicalName());
 
         Collection<String> broadcastTables = eb.getBroadcastTables();
         assertEquals(0, broadcastTables.size());
-      } else if(index == 2) {
+      } else if(index == 4) {
         LogicalNode node = eb.getPlan();
         assertEquals(NodeType.GROUP_BY, node.getType());
 
@@ -866,8 +884,8 @@ public class TestBroadcastJoinPlan {
 
         ScanNode scanNode2 = joinNode1.getLeftChild();
         ScanNode scanNode3 = joinNode1.getRightChild();
-        assertTrue(scanNode2.getCanonicalName().indexOf("0000_000003") > 0);
-        assertTrue(scanNode3.getCanonicalName().indexOf("0000_000004") > 0);
+        assertTrue(scanNode2.getCanonicalName().indexOf("0000_000005") > 0);
+        assertTrue(scanNode3.getCanonicalName().indexOf("0000_000006") > 0);
 
         Collection<String> broadcastTables = eb.getBroadcastTables();
         assertEquals(1, broadcastTables.size());
@@ -875,7 +893,7 @@ public class TestBroadcastJoinPlan {
       index++;
     }
 
-    assertEquals(5, index);
+    assertEquals(7, index);
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/tajo/blob/a5de8372/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
index e01b3c5..9cc65bc 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
@@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.tajo.*;
 import org.apache.tajo.catalog.*;
+import org.apache.tajo.common.TajoDataTypes.Type;
 import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.Int4Datum;
@@ -32,11 +33,9 @@ import org.apache.tajo.engine.planner.global.MasterPlan;
 import org.apache.tajo.engine.planner.logical.NodeType;
 import org.apache.tajo.jdbc.TajoResultSet;
 import org.apache.tajo.master.querymaster.QueryMasterTask;
-import org.apache.tajo.storage.Appender;
-import org.apache.tajo.storage.StorageManagerFactory;
-import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.storage.VTuple;
+import org.apache.tajo.storage.*;
 import org.apache.tajo.util.FileUtil;
+import org.apache.tajo.util.KeyValueSet;
 import org.apache.tajo.worker.TajoWorker;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
@@ -576,4 +575,44 @@ public class TestJoinBroadcast extends QueryTestCaseBase {
     appender.flush();
     appender.close();
   }
+
+  @Test
+  public final void testLeftOuterJoinLeftSideSmallTable() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.put(StorageConstants.CSVFILE_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("id", Type.INT4);
+    schema.addColumn("name", Type.TEXT);
+    String[] data = new String[]{ "1000000|a", "1000001|b", "2|c", "3|d", "4|e" };
+    TajoTestingCluster.createTable("table1", schema, tableOptions, data, 1);
+
+    data = new String[10000];
+    for (int i = 0; i < data.length; i++) {
+      data[i] = i + "|" + "this is testLeftOuterJoinLeftSideSmallTabletestLeftOuterJoinLeftSideSmallTable" + i;
+    }
+    TajoTestingCluster.createTable("table_large", schema, tableOptions, data, 2);
+
+    try {
+      ResultSet res = executeString(
+          "select a.id, b.name from table1 a left outer join table_large b on a.id = b.id order by a.id"
+      );
+
+      String expected = "id,name\n" +
+          "-------------------------------\n" +
+          "2,this is testLeftOuterJoinLeftSideSmallTabletestLeftOuterJoinLeftSideSmallTable2\n" +
+          "3,this is testLeftOuterJoinLeftSideSmallTabletestLeftOuterJoinLeftSideSmallTable3\n" +
+          "4,this is testLeftOuterJoinLeftSideSmallTabletestLeftOuterJoinLeftSideSmallTable4\n" +
+          "1000000,null\n" +
+          "1000001,null\n";
+
+      assertEquals(expected, resultSetToString(res));
+
+      cleanupQuery(res);
+    } finally {
+      executeString("DROP TABLE table1 PURGE").close();
+      executeString("DROP TABLE table_large PURGE").close();
+    }
+  }
 }


[02/14] git commit: TAJO-969: Distributed sort on a large data set may result in incorrect results.

Posted by ji...@apache.org.
TAJO-969: Distributed sort on a large data set may result in incorrect results.

Closes #87


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/45559ce6
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/45559ce6
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/45559ce6

Branch: refs/heads/index_support
Commit: 45559ce60c1da55ac7be1a5900afab45cc9deb06
Parents: a5de837
Author: Hyunsik Choi <hy...@apache.org>
Authored: Wed Jul 23 18:13:44 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Wed Jul 23 18:13:56 2014 +0900

----------------------------------------------------------------------
 .../tajo/master/querymaster/QueryUnit.java      | 10 +++++++-
 .../tajo/master/querymaster/Repartitioner.java  | 10 ++++++--
 .../java/org/apache/tajo/worker/FetchImpl.java  | 24 +++++++++++++++++++-
 3 files changed, 40 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/45559ce6/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java
index 806c0f1..8c953bd 100644
--- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java
+++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/QueryUnit.java
@@ -626,7 +626,7 @@ public class QueryUnit implements EventHandler<TaskEvent> {
     return this.intermediateData;
   }
 
-  public static class PullHost {
+  public static class PullHost implements Cloneable {
     String host;
     int port;
     public PullHost(String pullServerAddr, int pullServerPort){
@@ -659,6 +659,14 @@ public class QueryUnit implements EventHandler<TaskEvent> {
 
       return false;
     }
+
+    @Override
+    public PullHost clone() throws CloneNotSupportedException {
+      PullHost newPullHost = (PullHost) super.clone();
+      newPullHost.host = host;
+      newPullHost.port = port;
+      return newPullHost;
+    }
   }
 
   public static class IntermediateEntry {

http://git-wip-us.apache.org/repos/asf/tajo/blob/45559ce6/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
index 055e9a2..31c520f 100644
--- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
+++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
@@ -636,8 +636,14 @@ public class Repartitioner {
         for (FetchImpl fetch: fetches) {
           String rangeParam =
               TupleUtil.rangeToQuery(ranges[i], ascendingFirstKey ? i == (ranges.length - 1) : i == 0, encoder);
-          fetch.setRangeParams(rangeParam);
-          fetchSet.add(fetch);
+          FetchImpl copy = null;
+          try {
+            copy = fetch.clone();
+          } catch (CloneNotSupportedException e) {
+            throw new RuntimeException(e);
+          }
+          copy.setRangeParams(rangeParam);
+          fetchSet.add(copy);
         }
         map.put(ranges[i], fetchSet);
       }

http://git-wip-us.apache.org/repos/asf/tajo/blob/45559ce6/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java b/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java
index 9d1f428..869c106 100644
--- a/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java
+++ b/tajo-core/src/main/java/org/apache/tajo/worker/FetchImpl.java
@@ -20,6 +20,7 @@ package org.apache.tajo.worker;
 
 import com.google.common.base.Objects;
 import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
 import org.apache.tajo.ExecutionBlockId;
 import org.apache.tajo.common.ProtoObject;
 import org.apache.tajo.ipc.TajoWorkerProtocol;
@@ -33,7 +34,7 @@ import java.util.List;
 /**
  * <code>FetchImpl</code> information to indicate the locations of intermediate data.
  */
-public class FetchImpl implements ProtoObject<TajoWorkerProtocol.FetchProto> {
+public class FetchImpl implements ProtoObject<TajoWorkerProtocol.FetchProto>, Cloneable {
   private TajoWorkerProtocol.FetchProto.Builder builder = null;
 
   private QueryUnit.PullHost host;             // The pull server host information
@@ -110,6 +111,7 @@ public class FetchImpl implements ProtoObject<TajoWorkerProtocol.FetchProto> {
     builder.setPartitionId(partitionId);
     builder.setHasNext(hasNext);
     builder.setName(name);
+
     if (rangeParams != null && !rangeParams.isEmpty()) {
       builder.setRangeParams(rangeParams);
     }
@@ -198,4 +200,24 @@ public class FetchImpl implements ProtoObject<TajoWorkerProtocol.FetchProto> {
   public List<Integer> getAttemptIds() {
     return attemptIds;
   }
+
+  public FetchImpl clone() throws CloneNotSupportedException {
+    FetchImpl newFetchImpl = (FetchImpl) super.clone();
+
+    newFetchImpl.builder = TajoWorkerProtocol.FetchProto.newBuilder();
+    newFetchImpl.host = host.clone();
+    newFetchImpl.type = type;
+    newFetchImpl.executionBlockId = executionBlockId;
+    newFetchImpl.partitionId = partitionId;
+    newFetchImpl.name = name;
+    newFetchImpl.rangeParams = rangeParams;
+    newFetchImpl.hasNext = hasNext;
+    if (taskIds != null) {
+      newFetchImpl.taskIds = Lists.newArrayList(taskIds);
+    }
+    if (attemptIds != null) {
+      newFetchImpl.attemptIds = Lists.newArrayList(attemptIds);
+    }
+    return newFetchImpl;
+  }
 }


[09/14] git commit: TAJO-980: execution page in Web UI broken.

Posted by ji...@apache.org.
TAJO-980: execution page in Web UI broken.

Closes #97


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/fe870851
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/fe870851
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/fe870851

Branch: refs/heads/index_support
Commit: fe870851934e76cde745f84b0b8061480031a7d1
Parents: b49ff30
Author: Hyunsik Choi <hy...@apache.org>
Authored: Tue Jul 29 11:39:23 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Tue Jul 29 11:39:23 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |   2 +
 .../tajo/webapp/QueryExecutorServlet.java       | 200 ++++++++++++-------
 2 files changed, 134 insertions(+), 68 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/fe870851/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 95747a1..4390177 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,8 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-980: execution page in Web UI broken. (hyunsik)
+
     TAJO-952: Wrong default partition volume config. (Mai Hai Thanh via jihoon)
 
     TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim 

http://git-wip-us.apache.org/repos/asf/tajo/blob/fe870851/tajo-core/src/main/java/org/apache/tajo/webapp/QueryExecutorServlet.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/webapp/QueryExecutorServlet.java b/tajo-core/src/main/java/org/apache/tajo/webapp/QueryExecutorServlet.java
index 3cb7d25..8b849ca 100644
--- a/tajo-core/src/main/java/org/apache/tajo/webapp/QueryExecutorServlet.java
+++ b/tajo-core/src/main/java/org/apache/tajo/webapp/QueryExecutorServlet.java
@@ -1,9 +1,11 @@
 package org.apache.tajo.webapp;
 
+import com.google.protobuf.ServiceException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.tajo.QueryId;
+import org.apache.tajo.QueryIdFactory;
 import org.apache.tajo.TajoProtos;
 import org.apache.tajo.catalog.CatalogUtil;
 import org.apache.tajo.catalog.TableDesc;
@@ -26,6 +28,7 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
 import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.HashMap;
@@ -159,7 +162,7 @@ public class QueryExecutorServlet extends HttpServlet {
             return;
           }
           returnValue.put("numOfRows", queryRunner.numOfRows);
-          returnValue.put("resultSize", queryRunner.resultSize);
+          returnValue.put("resultSize", queryRunner.resultRows);
           returnValue.put("resultData", queryRunner.queryResult);
           returnValue.put("resultColumns", queryRunner.columnNames);
           returnValue.put("runningTime", JSPUtil.getElapsedTime(queryRunner.startTime, queryRunner.finishTime));
@@ -239,12 +242,12 @@ public class QueryExecutorServlet extends HttpServlet {
 
     String queryRunnerId;
 
-    ClientProtos.SubmitQueryResponse queryRespons;
+    ClientProtos.SubmitQueryResponse response;
     AtomicBoolean running = new AtomicBoolean(true);
     AtomicBoolean stop = new AtomicBoolean(false);
     QueryId queryId;
     String query;
-    long resultSize;
+    long resultRows;
     int sizeLimit;
     long numOfRows;
     Exception error;
@@ -268,60 +271,110 @@ public class QueryExecutorServlet extends HttpServlet {
     public void run() {
       startTime = System.currentTimeMillis();
       try {
-        queryRespons = tajoClient.executeQuery(query);
-        if (queryRespons.getResultCode() == ClientProtos.ResultCode.OK) {
-          QueryId queryId = null;
-          try {
-            queryId = new QueryId(queryRespons.getQueryId());
+        response = tajoClient.executeQuery(query);
+
+        if (response == null) {
+          LOG.error("Internal Error: SubmissionResponse is NULL");
+          error = new Exception("Internal Error: SubmissionResponse is NULL");
+
+        } else if (response.getResultCode() == ClientProtos.ResultCode.OK) {
+          if (response.getIsForwarded()) {
+            queryId = new QueryId(response.getQueryId());
             getQueryResult(queryId);
-          } finally {
-            if (queryId != null) {
-              tajoClient.closeQuery(queryId);
+          } else {
+            if (!response.hasTableDesc() && !response.hasResultSet()) {
+            } else {
+              getSimpleQueryResult(response);
             }
+
+            progress.set(100);
           }
-        } else {
-          LOG.error("queryRespons.getResultCode() not OK:" + queryRespons.getResultCode());
-          error = new Exception("queryRespons.getResultCode() not OK:" + queryRespons.getResultCode());
         }
       } catch (Exception e) {
         LOG.error(e.getMessage(), e);
         error = e;
       } finally {
         running.set(false);
+
         finishTime = System.currentTimeMillis();
+
+        if (queryId != null) {
+          tajoClient.closeQuery(queryId);
+        }
       }
     }
 
-    private void getQueryResult(QueryId tajoQueryId) {
-      // query execute
+    private void getSimpleQueryResult(ClientProtos.SubmitQueryResponse response) {
+      ResultSet res = null;
       try {
-        QueryStatus status = null;
+        QueryId queryId = new QueryId(response.getQueryId());
+        TableDesc desc = new TableDesc(response.getTableDesc());
 
-        while (!stop.get()) {
+        if (response.getMaxRowNum() < 0 && queryId.equals(QueryIdFactory.NULL_QUERY_ID)) {
+          // non-forwarded INSERT INTO query does not have any query id.
+          // In this case, it just returns succeeded query information without printing the query results.
+        } else {
+          res = TajoClient.createResultSet(tajoClient, response);
+          MakeResultText(res, desc);
+        }
+        progress.set(100);
+      } catch (Exception e) {
+        LOG.error(e.getMessage(), e);
+        error = e;
+      } finally {
+        if (res != null) {
           try {
-            Thread.sleep(1000);
-          } catch(InterruptedException e) {
-            break;
-          }
-          status = tajoClient.getQueryStatus(tajoQueryId);
-          if (status.getState() == TajoProtos.QueryState.QUERY_MASTER_INIT
-              || status.getState() == TajoProtos.QueryState.QUERY_MASTER_LAUNCHED) {
-            continue;
+            res.close();
+          } catch (SQLException e) {
           }
+        }
+      }
+    }
 
-          if (status.getState() == TajoProtos.QueryState.QUERY_RUNNING
-              || status.getState() == TajoProtos.QueryState.QUERY_SUCCEEDED) {
-            int progressValue = (int) (status.getProgress() * 100.0f);
-            if(progressValue == 100)  {
-              progressValue = 99;
-            }
-            progress.set(progressValue);
-          }
-          if (status.getState() != TajoProtos.QueryState.QUERY_RUNNING
-              && status.getState() != TajoProtos.QueryState.QUERY_NOT_ASSIGNED) {
-            break;
+    private QueryStatus waitForComplete(QueryId queryid) throws ServiceException {
+      QueryStatus status = null;
+
+      while (!stop.get()) {
+
+        try {
+          Thread.sleep(150);
+        } catch(InterruptedException e) {
+          break;
+        }
+
+        status = tajoClient.getQueryStatus(queryid);
+        if (status.getState() == TajoProtos.QueryState.QUERY_MASTER_INIT
+            || status.getState() == TajoProtos.QueryState.QUERY_MASTER_LAUNCHED) {
+          continue;
+        }
+
+        if (status.getState() == TajoProtos.QueryState.QUERY_RUNNING
+            || status.getState() == TajoProtos.QueryState.QUERY_SUCCEEDED) {
+          int progressValue = (int) (status.getProgress() * 100.0f);
+          if(progressValue == 100)  {
+            progressValue = 99;
           }
+          progress.set(progressValue);
         }
+        if (status.getState() != TajoProtos.QueryState.QUERY_RUNNING
+            && status.getState() != TajoProtos.QueryState.QUERY_NOT_ASSIGNED) {
+          break;
+        }
+
+        try {
+          Thread.sleep(100);
+        } catch(InterruptedException e) {
+          break;
+        }
+      }
+
+      return status;
+    }
+
+    private void getQueryResult(QueryId tajoQueryId) {
+      // query execute
+      try {
+        QueryStatus status = waitForComplete(tajoQueryId);
 
         if(status == null) {
           LOG.error("Query Status is null");
@@ -344,44 +397,21 @@ public class QueryExecutorServlet extends HttpServlet {
                 tajoClient.getConf().setVar(TajoConf.ConfVars.USERNAME, response.getTajoUserName());
                 res = new TajoResultSet(tajoClient, queryId, tajoClient.getConf(), desc);
 
-                ResultSetMetaData rsmd = res.getMetaData();
-                resultSize = desc.getStats().getNumBytes();
-                LOG.info("Tajo Query Result: " + desc.getPath() + "\n");
-
-                int numOfColumns = rsmd.getColumnCount();
-                for(int i = 0; i < numOfColumns; i++) {
-                  columnNames.add(rsmd.getColumnName(i + 1));
-                }
-                queryResult = new ArrayList<List<Object>>();
-
-                if(sizeLimit < resultSize) {
-                    numOfRows = (long)((float)(desc.getStats().getNumRows()) * ((float)sizeLimit / (float)resultSize));
-                } else {
-                    numOfRows = desc.getStats().getNumRows();
-                }
-                int rowCount = 0;
-                boolean hasMoreData = false;
-                while (res.next()) {
-                  if(rowCount > numOfRows) {
-                    hasMoreData = true;
-                    break;
-                  }
-                  List<Object> row = new ArrayList<Object>();
-                  for(int i = 0; i < numOfColumns; i++) {
-                    row.add(res.getObject(i + 1).toString());
-                  }
-                  queryResult.add(row);
-                  rowCount++;
+                MakeResultText(res, desc);
 
-                }
               } finally {
                 if (res != null) {
                   res.close();
                 }
                 progress.set(100);
               }
-            } else {
-              error = new Exception(queryId + " no result");
+            } else { // CTAS or INSERT (OVERWRITE) INTO
+              progress.set(100);
+              try {
+                tajoClient.closeQuery(queryId);
+              } catch (Exception e) {
+                LOG.warn(e);
+              }
             }
           }
         }
@@ -390,5 +420,39 @@ public class QueryExecutorServlet extends HttpServlet {
         error = e;
       }
     }
+
+    private void MakeResultText(ResultSet res, TableDesc desc) throws SQLException {
+      ResultSetMetaData rsmd = res.getMetaData();
+      resultRows = desc.getStats() == null ? 0 : desc.getStats().getNumRows();
+      if (resultRows == 0) {
+        resultRows = 1000;
+      }
+      LOG.info("Tajo Query Result: " + desc.getPath() + "\n");
+
+      int numOfColumns = rsmd.getColumnCount();
+      for(int i = 0; i < numOfColumns; i++) {
+        columnNames.add(rsmd.getColumnName(i + 1));
+      }
+      queryResult = new ArrayList<List<Object>>();
+
+      if(sizeLimit < resultRows) {
+        numOfRows = (long)((float)(resultRows) * ((float)sizeLimit / (float) resultRows));
+      } else {
+        numOfRows = resultRows;
+      }
+
+      int rowCount = 0;
+      while (res.next()) {
+        if(rowCount > numOfRows) {
+          break;
+        }
+        List<Object> row = new ArrayList<Object>();
+        for(int i = 0; i < numOfColumns; i++) {
+          row.add(res.getObject(i + 1).toString());
+        }
+        queryResult.add(row);
+        rowCount++;
+      }
+    }
   }
 }


[11/14] git commit: TAJO-976: HashPartitioner doesn't make desired number of partitions infrequently. (Fixed a build failure due to 1.7 API)

Posted by ji...@apache.org.
TAJO-976: HashPartitioner doesn't make desired number of partitions infrequently. (Fixed a build failure due to 1.7 API)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/b6374168
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/b6374168
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/b6374168

Branch: refs/heads/index_support
Commit: b637416834951443e816fec930a8a08d8f2e17b7
Parents: 8be501f
Author: Hyunsik Choi <hy...@apache.org>
Authored: Tue Jul 29 13:38:02 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Tue Jul 29 13:38:02 2014 +0900

----------------------------------------------------------------------
 .../test/java/org/apache/tajo/engine/query/TestGroupByQuery.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/b6374168/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 72759c0..79efd92 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -600,7 +600,9 @@ public class TestGroupByQuery extends QueryTestCaseBase {
       Collections.sort(qmTasks, new Comparator<QueryMasterTask>() {
         @Override
         public int compare(QueryMasterTask o1, QueryMasterTask o2) {
-          return Long.compare(o1.getQuerySubmitTime(), o2.getQuerySubmitTime());
+          long l1 = o1.getQuerySubmitTime();
+          long l2 = o2.getQuerySubmitTime();
+          return l1 < l2 ? - 1 : (l1 > l2 ? 1 : 0);
         }
       });
 


[04/14] git commit: TAJO-968: Self-Join query (including partitioned table) doesn't run unexpectedly using auto broad cast join. (jaehwa)

Posted by ji...@apache.org.
TAJO-968: Self-Join query (including partitioned table) doesn't run unexpectedly using auto broad cast join. (jaehwa)

Closes #88


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/72808e06
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/72808e06
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/72808e06

Branch: refs/heads/index_support
Commit: 72808e06f02cbb0bd7d9cf345544c60205cf34b0
Parents: 326be45
Author: blrunner <bl...@apache.org>
Authored: Thu Jul 24 11:27:18 2014 +0900
Committer: blrunner <bl...@apache.org>
Committed: Thu Jul 24 11:27:18 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 ++
 .../apache/tajo/worker/TaskAttemptContext.java  | 26 +++++++++++-
 .../tajo/engine/query/TestJoinBroadcast.java    | 42 +++++++++++++++++---
 3 files changed, 64 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/72808e06/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index a67625d..08cf60a 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,9 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-968: Self-Join query (including partitioned table) doesn't run unexpectedly 
+    using auto broadcast join. (jaewha)
+
     TAJO-914: join queries with constant values can cause schema mismatch in
     logical plan. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/72808e06/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java b/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java
index 1f0c410..db4af45 100644
--- a/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java
+++ b/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java
@@ -31,8 +31,10 @@ import org.apache.tajo.conf.TajoConf;
 import org.apache.tajo.engine.planner.enforce.Enforcer;
 import org.apache.tajo.engine.planner.global.DataChannel;
 import org.apache.tajo.engine.query.QueryContext;
+import org.apache.tajo.storage.fragment.FileFragment;
 import org.apache.tajo.storage.fragment.Fragment;
 import org.apache.tajo.storage.fragment.FragmentConvertor;
+import org.apache.tajo.util.TUtil;
 
 import java.io.File;
 import java.util.*;
@@ -234,10 +236,30 @@ public class TaskAttemptContext {
       tableFragments = new ArrayList<FragmentProto>();
     }
 
+    List<Path> paths = fragmentToPath(tableFragments);
+
     for (FragmentProto eachFragment: fragments) {
-      tableFragments.add(eachFragment);
+      FileFragment fileFragment = FragmentConvertor.convert(FileFragment.class, eachFragment);
+      // If current attempt already has same path, we don't need to add it to fragments.
+      if (!paths.contains(fileFragment.getPath())) {
+        tableFragments.add(eachFragment);
+      }
     }
-    fragmentMap.put(tableId, tableFragments);
+
+    if (tableFragments.size() > 0) {
+      fragmentMap.put(tableId, tableFragments);
+    }
+  }
+
+  private List<Path> fragmentToPath(List<FragmentProto> tableFragments) {
+    List<Path> list = TUtil.newList();
+
+    for (FragmentProto proto : tableFragments) {
+      FileFragment fragment = FragmentConvertor.convert(FileFragment.class, proto);
+      list.add(fragment.getPath());
+    }
+
+    return list;
   }
 
   public Path getWorkDir() {

http://git-wip-us.apache.org/repos/asf/tajo/blob/72808e06/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
index 9cc65bc..5df6f24 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
@@ -18,8 +18,6 @@
 
 package org.apache.tajo.engine.query;
 
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.tajo.*;
 import org.apache.tajo.catalog.*;
@@ -43,13 +41,11 @@ import org.junit.experimental.categories.Category;
 import java.io.File;
 import java.sql.ResultSet;
 
-import static junit.framework.TestCase.*;
 import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME;
-import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.*;
 
 @Category(IntegrationTest.class)
 public class TestJoinBroadcast extends QueryTestCaseBase {
-  private static final Log LOG = LogFactory.getLog(TestJoinBroadcast.class);
   public TestJoinBroadcast() throws Exception {
     super(TajoConstants.DEFAULT_DATABASE_NAME);
     testingCluster.setAllTajoDaemonConfValue(TajoConf.ConfVars.DIST_QUERY_BROADCAST_JOIN_AUTO.varname, "true");
@@ -615,4 +611,40 @@ public class TestJoinBroadcast extends QueryTestCaseBase {
       executeString("DROP TABLE table_large PURGE").close();
     }
   }
+
+
+  @Test
+  public final void testSelfJoin() throws Exception {
+    String tableName = CatalogUtil.normalizeIdentifier("paritioned_nation");
+    ResultSet res = executeString(
+        "create table " + tableName + " (n_name text,"
+            + "  n_comment text, n_regionkey int8) USING csv "
+            + "WITH ('csvfile.delimiter'='|')"
+            + "PARTITION BY column(n_nationkey int8)");
+    res.close();
+    assertTrue(catalog.existsTable(DEFAULT_DATABASE_NAME, tableName));
+
+    res = executeString(
+        "insert overwrite into " + tableName
+            + " select n_name, n_comment, n_regionkey, n_nationkey from nation");
+    res.close();
+
+    res = executeString(
+      "select a.n_nationkey, a.n_name from nation a join nation b on a.n_nationkey = b.n_nationkey"
+      + " where a.n_nationkey in (1)");
+    String expected = resultSetToString(res);
+    res.close();
+
+    res = executeString(
+      "select a.n_nationkey, a.n_name from " + tableName + " a join "+tableName +
+      " b on a.n_nationkey = b.n_nationkey "
+      + " where a.n_nationkey in (1)");
+    String resultSetData = resultSetToString(res);
+    res.close();
+
+    assertEquals(expected, resultSetData);
+
+  }
+
+
 }


[06/14] git commit: TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim via hyunsik)

Posted by ji...@apache.org.
TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim via hyunsik)

Closes #974


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/1f6b5b38
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/1f6b5b38
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/1f6b5b38

Branch: refs/heads/index_support
Commit: 1f6b5b38752f499ee6d70ea1be399df34442b4f3
Parents: 9880f06
Author: Hyunsik Choi <hy...@apache.org>
Authored: Mon Jul 28 11:19:54 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Mon Jul 28 11:19:54 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |   7 +-
 .../tajo/master/querymaster/Repartitioner.java  |  11 +-
 .../tajo/master/querymaster/SubQuery.java       |  23 +++--
 .../tajo/engine/query/TestGroupByQuery.java     | 103 ++++++++++++++++++-
 4 files changed, 131 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/1f6b5b38/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 2e530af..3ac13a9 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,9 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim 
+    via hyunsik)
+
     TAJO-977: INSERT into a partitioned table as SELECT statement uses a wrong 
     schema. (Hyoungjun Kim via hyunsik)
 
@@ -112,8 +115,8 @@ Release 0.9.0 - unreleased
     TAJO-972: Broadcast join with left outer join returns duplicated rows.
     (Hyoungjun Kim via jaehwa)
 
-    TAJO-666: java.nio.BufferOverflowException occurs when the query includes an order by 
-    clause on a TEXT column. (Mai Hai Thanh via jihoon)
+    TAJO-666: java.nio.BufferOverflowException occurs when the query includes 
+    an order by clause on a TEXT column. (Mai Hai Thanh via jihoon)
 
     TAJO-939: Refactoring the column resolver in LogicalPlan. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/1f6b5b38/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
index 31c520f..6eebbde 100644
--- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
+++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
@@ -726,12 +726,21 @@ public class Repartitioner {
       }
     }
 
+    int groupingColumns = 0;
     GroupbyNode groupby = PlannerUtil.findMostBottomNode(subQuery.getBlock().getPlan(), NodeType.GROUP_BY);
+    if (groupby != null) {
+      groupingColumns = groupby.getGroupingColumns().length;
+    } else {
+      DistinctGroupbyNode dGroupby = PlannerUtil.findMostBottomNode(subQuery.getBlock().getPlan(), NodeType.DISTINCT_GROUP_BY);
+      if (dGroupby != null) {
+        groupingColumns = dGroupby.getGroupingColumns().length;
+      }
+    }
     // get a proper number of tasks
     int determinedTaskNum = Math.min(maxNum, finalFetches.size());
     LOG.info(subQuery.getId() + ", ScheduleHashShuffledFetches - Max num=" + maxNum + ", finalFetchURI=" + finalFetches.size());
 
-    if (groupby != null && groupby.getGroupingColumns().length == 0) {
+    if (groupingColumns == 0) {
       determinedTaskNum = 1;
       LOG.info(subQuery.getId() + ", No Grouping Column - determinedTaskNum is set to 1");
     } else {

http://git-wip-us.apache.org/repos/asf/tajo/blob/1f6b5b38/tajo-core/src/main/java/org/apache/tajo/master/querymaster/SubQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/SubQuery.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/SubQuery.java
index 94f8b32..f2e9dd5 100644
--- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/SubQuery.java
+++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/SubQuery.java
@@ -48,10 +48,7 @@ import org.apache.tajo.engine.planner.PlannerUtil;
 import org.apache.tajo.engine.planner.global.DataChannel;
 import org.apache.tajo.engine.planner.global.ExecutionBlock;
 import org.apache.tajo.engine.planner.global.MasterPlan;
-import org.apache.tajo.engine.planner.logical.GroupbyNode;
-import org.apache.tajo.engine.planner.logical.NodeType;
-import org.apache.tajo.engine.planner.logical.ScanNode;
-import org.apache.tajo.engine.planner.logical.StoreTableNode;
+import org.apache.tajo.engine.planner.logical.*;
 import org.apache.tajo.ipc.TajoMasterProtocol;
 import org.apache.tajo.master.*;
 import org.apache.tajo.master.TaskRunnerGroupEvent.EventType;
@@ -716,9 +713,12 @@ public class SubQuery implements EventHandler<SubQueryEvent> {
       MasterPlan masterPlan = subQuery.getMasterPlan();
       ExecutionBlock parent = masterPlan.getParent(subQuery.getBlock());
 
-      GroupbyNode grpNode = null;
+      LogicalNode grpNode = null;
       if (parent != null) {
         grpNode = PlannerUtil.findMostBottomNode(parent.getPlan(), NodeType.GROUP_BY);
+        if (grpNode == null) {
+          grpNode = PlannerUtil.findMostBottomNode(parent.getPlan(), NodeType.DISTINCT_GROUP_BY);
+        }
       }
 
       // We assume this execution block the first stage of join if two or more tables are included in this block,
@@ -779,8 +779,13 @@ public class SubQuery implements EventHandler<SubQueryEvent> {
         return taskNum;
         // Is this subquery the first step of group-by?
       } else if (grpNode != null) {
-
-        if (grpNode.getGroupingColumns().length == 0) {
+        boolean hasGroupColumns = true;
+        if (grpNode.getType() == NodeType.GROUP_BY) {
+          hasGroupColumns = ((GroupbyNode)grpNode).getGroupingColumns().length > 0;
+        } else if (grpNode.getType() == NodeType.DISTINCT_GROUP_BY) {
+          hasGroupColumns = ((DistinctGroupbyNode)grpNode).getGroupingColumns().length > 0;
+        }
+        if (!hasGroupColumns) {
           return 1;
         } else {
           long volume = getInputVolume(subQuery.masterPlan, subQuery.context, subQuery.block);
@@ -836,10 +841,10 @@ public class SubQuery implements EventHandler<SubQueryEvent> {
       long volume = getInputVolume(subQuery.getMasterPlan(), subQuery.context, subQuery.getBlock());
 
       int mb = (int) Math.ceil((double)volume / 1048576);
-      LOG.info("Table's volume is approximately " + mb + " MB");
+      LOG.info(subQuery.getId() + ", Table's volume is approximately " + mb + " MB");
       // determine the number of task per 64MB
       int maxTaskNum = Math.max(1, (int) Math.ceil((double)mb / 64));
-      LOG.info("The determined number of non-leaf tasks is " + maxTaskNum);
+      LOG.info(subQuery.getId() + ", The determined number of non-leaf tasks is " + maxTaskNum);
       return maxTaskNum;
     }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/1f6b5b38/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 41ffa06..0ffcf11 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -18,23 +18,34 @@
 
 package org.apache.tajo.engine.query;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.tajo.IntegrationTest;
 import org.apache.tajo.QueryTestCaseBase;
 import org.apache.tajo.TajoConstants;
 import org.apache.tajo.TajoTestingCluster;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.common.TajoDataTypes.Type;
+import org.apache.tajo.conf.TajoConf.ConfVars;
+import org.apache.tajo.ipc.TajoWorkerProtocol.ShuffleFileOutput;
+import org.apache.tajo.master.querymaster.Query;
+import org.apache.tajo.master.querymaster.QueryMasterTask;
+import org.apache.tajo.master.querymaster.QueryUnit;
+import org.apache.tajo.master.querymaster.SubQuery;
 import org.apache.tajo.storage.StorageConstants;
 import org.apache.tajo.util.KeyValueSet;
+import org.apache.tajo.worker.TajoWorker;
 import org.junit.Test;
 import org.junit.experimental.categories.Category;
 
 import java.sql.ResultSet;
+import java.util.*;
 
-import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.*;
 
 @Category(IntegrationTest.class)
 public class TestGroupByQuery extends QueryTestCaseBase {
+  private static final Log LOG = LogFactory.getLog(TestGroupByQuery.class);
 
   public TestGroupByQuery() throws Exception {
     super(TajoConstants.DEFAULT_DATABASE_NAME);
@@ -529,4 +540,94 @@ public class TestGroupByQuery extends QueryTestCaseBase {
     assertResultSet(res);
     cleanupQuery(res);
   }
+
+  @Test
+  public final void testNumShufflePartition() throws Exception {
+    KeyValueSet tableOptions = new KeyValueSet();
+    tableOptions.put(StorageConstants.CSVFILE_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);
+    tableOptions.put(StorageConstants.CSVFILE_NULL, "\\\\N");
+
+    Schema schema = new Schema();
+    schema.addColumn("col1", Type.TEXT);
+    schema.addColumn("col2", Type.TEXT);
+
+    List<String> data = new ArrayList<String>();
+    int totalBytes = 0;
+    Random rand = new Random(System.currentTimeMillis());
+    String col1Prefix = "Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1" +
+        "Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1" +
+        "Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1Column-1";
+
+    Set<Integer> uniqKeys = new HashSet<Integer>();
+    while(true) {
+      int col1RandomValue = rand.nextInt(1000000);
+      uniqKeys.add(col1RandomValue);
+      String str = (col1Prefix + "-" + col1RandomValue) + "|col2-" + rand.nextInt(1000000);
+      data.add(str);
+
+      totalBytes += str.getBytes().length;
+
+      if (totalBytes > 3 * 1024 * 1024) {
+        break;
+      }
+    }
+    TajoTestingCluster.createTable("testnumshufflepartition", schema, tableOptions, data.toArray(new String[]{}), 3);
+
+    try {
+      testingCluster.setAllTajoDaemonConfValue(ConfVars.DIST_QUERY_GROUPBY_PARTITION_VOLUME.varname, "2");
+      ResultSet res = executeString(
+          "select col1 \n" +
+              ",count(distinct col2) as cnt1\n" +
+              "from testnumshufflepartition \n" +
+              "group by col1"
+      );
+
+      int numRows = 0;
+      while (res.next()) {
+        numRows++;
+      }
+      assertEquals(uniqKeys.size(), numRows);
+
+      // find last QueryMasterTask
+      List<QueryMasterTask> qmTasks = new ArrayList<QueryMasterTask>();
+
+      for(TajoWorker worker: testingCluster.getTajoWorkers()) {
+        qmTasks.addAll(worker.getWorkerContext().getQueryMaster().getFinishedQueryMasterTasks());
+      }
+
+      assertTrue(!qmTasks.isEmpty());
+
+      Collections.sort(qmTasks, new Comparator<QueryMasterTask>() {
+        @Override
+        public int compare(QueryMasterTask o1, QueryMasterTask o2) {
+          return Long.compare(o1.getQuerySubmitTime(), o2.getQuerySubmitTime());
+        }
+      });
+
+      // Getting the number of partitions. It should be 2.
+      Set<Integer> partitionIds = new HashSet<Integer>();
+
+      Query query = qmTasks.get(qmTasks.size() - 1).getQuery();
+      Collection<SubQuery> subQueries = query.getSubQueries();
+      assertNotNull(subQueries);
+      assertTrue(!subQueries.isEmpty());
+      for (SubQuery subQuery: subQueries) {
+        if (subQuery.getId().toStringNoPrefix().endsWith("_000001")) {
+          QueryUnit[] queryUnits = subQuery.getQueryUnits();
+          assertNotNull(queryUnits);
+          for (QueryUnit eachQueryUnit: queryUnits) {
+            for (ShuffleFileOutput output: eachQueryUnit.getShuffleFileOutputs()) {
+              partitionIds.add(output.getPartId());
+            }
+          }
+        }
+      }
+
+      assertEquals(2, partitionIds.size());
+      executeString("DROP TABLE testnumshufflepartition PURGE").close();
+    } finally {
+      testingCluster.setAllTajoDaemonConfValue(ConfVars.DIST_QUERY_GROUPBY_PARTITION_VOLUME.varname,
+          ConfVars.DIST_QUERY_GROUPBY_PARTITION_VOLUME.defaultVal);
+    }
+  }
 }


[08/14] git commit: TAJO-952: Wrong default partition volume config. (Mai Hai Thanh via jihoon)

Posted by ji...@apache.org.
TAJO-952: Wrong default partition volume config. (Mai Hai Thanh via jihoon)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/b49ff30b
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/b49ff30b
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/b49ff30b

Branch: refs/heads/index_support
Commit: b49ff30b9a3da97d347e8e08798affceac79e6bb
Parents: 3cfc198
Author: Jihoon Son <ji...@apache.org>
Authored: Tue Jul 29 11:13:14 2014 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Tue Jul 29 11:13:14 2014 +0900

----------------------------------------------------------------------
 CHANGES                                                          | 2 ++
 tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java     | 3 +--
 .../java/org/apache/tajo/master/querymaster/Repartitioner.java   | 4 ++--
 3 files changed, 5 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/b49ff30b/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 9dac2c7..95747a1 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,8 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-952: Wrong default partition volume config. (Mai Hai Thanh via jihoon)
+
     TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim 
     via hyunsik) 
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/b49ff30b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index 83ff9ed..a8e6d8d 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -249,8 +249,7 @@ public class TajoConf extends Configuration {
     DIST_QUERY_SORT_PARTITION_VOLUME("tajo.dist-query.sort.partition-volume-mb", 256),
     DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256),
 
-    DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb",
-        256 * 1024 * 1024),
+    DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256),
 
     //////////////////////////////////
     // Physical Executors

http://git-wip-us.apache.org/repos/asf/tajo/blob/b49ff30b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
index 6eebbde..f86106f 100644
--- a/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
+++ b/tajo-core/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
@@ -773,8 +773,8 @@ public class Repartitioner {
        SubQuery subQuery, Map<ExecutionBlockId, List<IntermediateEntry>> intermediates,
        String tableName) {
     int i = 0;
-    int splitVolume =   subQuery.getContext().getConf().
-        getIntVar(ConfVars.DIST_QUERY_TABLE_PARTITION_VOLUME);
+    long splitVolume = ((long) 1048576) * subQuery.getContext().getConf().
+        getIntVar(ConfVars.DIST_QUERY_TABLE_PARTITION_VOLUME); // in bytes
 
     long sumNumBytes = 0L;
     Map<Integer, List<FetchImpl>> fetches = new HashMap<Integer, List<FetchImpl>>();


[03/14] git commit: TAJO-914: join queries with constant values can cause schema mismatch in logical plan.

Posted by ji...@apache.org.
TAJO-914: join queries with constant values can cause schema mismatch in logical plan.

Closes #78


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/326be451
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/326be451
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/326be451

Branch: refs/heads/index_support
Commit: 326be451ded71099b88bdb6a50c40946df0af206
Parents: 45559ce
Author: Hyunsik Choi <hy...@apache.org>
Authored: Wed Jul 23 18:26:15 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Wed Jul 23 18:26:15 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  6 ++++
 .../apache/tajo/engine/utils/SchemaUtil.java    | 20 +++++++++--
 .../tajo/engine/query/TestCaseByCases.java      | 38 ++++++++++++++++++--
 .../TestCaseByCases/testTAJO914Case1.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case2.sql        |  9 +++++
 .../TestCaseByCases/testTAJO914Case3.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case4.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case1.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case2.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case3.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case4.result     |  5 +++
 11 files changed, 118 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 2be9b26..a67625d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,12 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-914: join queries with constant values can cause schema mismatch in
+    logical plan. (hyunsik)
+
+    TAJO-969: Distributed sort on a large data set may result in incorrect
+    results. (hyunsik)
+
     TAJO-972: Broadcast join with left outer join returns duplicated rows.
     (Hyoungjun Kim via jaehwa)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
index c882607..981b572 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
@@ -23,6 +23,16 @@ import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.TableDesc;
 
 public class SchemaUtil {
+  // See TAJO-914 bug.
+  //
+  // Its essential problem is that constant value is evaluated multiple times at each scan.
+  // As a result, join nodes can take the child nodes which have the same named fields.
+  // Because current schema does not allow the same name and ignore the duplicated schema,
+  // it finally causes the in-out schema mismatch between the parent and child nodes.
+  //
+  // tmpColumnSeq is a hack to avoid the above problem by keeping duplicated constant values as different name fields.
+  // The essential solution would be https://issues.apache.org/jira/browse/TAJO-895.
+  static int tmpColumnSeq = 0;
   public static Schema merge(Schema left, Schema right) {
     Schema merged = new Schema();
     for(Column col : left.getColumns()) {
@@ -31,11 +41,17 @@ public class SchemaUtil {
       }
     }
     for(Column col : right.getColumns()) {
-      if (!merged.containsByQualifiedName(col.getQualifiedName())) {
+      if (merged.containsByQualifiedName(col.getQualifiedName())) {
+        merged.addColumn("?fake" + (tmpColumnSeq++), col.getDataType());
+      } else {
         merged.addColumn(col);
       }
     }
-    
+
+    // if overflow
+    if (tmpColumnSeq < 0) {
+      tmpColumnSeq = 0;
+    }
     return merged;
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index 459a4c1..846c290 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -71,7 +71,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_1() throws Exception {
+  public final void testTAJO880Case1() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case when l_returnflag != 'R' then l_orderkey else null end from lineitem"
@@ -91,7 +91,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_2() throws Exception {
+  public final void testTAJO880Case2() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case when l_returnflag != 'R' then null else l_orderkey end from lineitem"
@@ -111,7 +111,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_3() throws Exception {
+  public final void testTAJO880Case3() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case " +
@@ -135,6 +135,38 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testTAJO914Case1() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case2() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case3() throws Exception {
+    executeString("CREATE TABLE T3 (l_orderkey bigint, col1 text);").close();
+    ResultSet res = executeQuery();
+    res.close();
+
+    res = executeString("select * from T3;");
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case4() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
   public final void testTAJO917Case1() throws Exception {
     ResultSet res = executeQuery();
     assertResultSet(res);

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
new file mode 100644
index 0000000..765110a
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
@@ -0,0 +1,10 @@
+select
+  l_orderkey,
+  '##' as col1
+from
+  lineitem
+  join orders on l_orderkey = o_orderkey
+group by
+  l_orderkey
+order by
+  l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
new file mode 100644
index 0000000..f404088
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
@@ -0,0 +1,9 @@
+select
+  l_orderkey,
+  '##' as col1
+from
+  lineitem join orders on l_orderkey = o_orderkey
+group by
+  l_orderkey, col1
+order by
+  l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
new file mode 100644
index 0000000..f9ee355
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
@@ -0,0 +1,10 @@
+insert OVERWRITE into T3
+  select
+    l_orderkey,
+    '##' as col1
+  from
+    lineitem join orders on l_orderkey = o_orderkey
+  group by
+    l_orderkey, col1
+  order by
+    l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
new file mode 100644
index 0000000..ba99b70
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
@@ -0,0 +1,10 @@
+  select
+    l_orderkey,
+    '##' as col1,
+    sum(l_orderkey) as s1
+  from
+    lineitem join orders o1 on l_orderkey = o1.o_orderkey join orders o2 on l_orderkey = o2.o_orderkey
+  group by
+    l_orderkey, col1
+  order by
+    l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
new file mode 100644
index 0000000..d3b79d1
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
@@ -0,0 +1,5 @@
+l_orderkey,col1,s1
+-------------------------------
+1,##,2
+2,##,2
+3,##,6
\ No newline at end of file


[13/14] git commit: TAJO-957: ROUND should be support INT parameter. (Mai Hai Thanh via hyunsik)

Posted by ji...@apache.org.
TAJO-957: ROUND should be support INT parameter. (Mai Hai Thanh via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/8024f6ab
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/8024f6ab
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/8024f6ab

Branch: refs/heads/index_support
Commit: 8024f6ab324e9e3ba2c1968386292f9243f44b97
Parents: b637416
Author: Hyunsik Choi <hy...@apache.org>
Authored: Thu Jul 31 19:41:02 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Fri Aug 1 14:23:58 2014 +0900

----------------------------------------------------------------------
 CHANGES                                                 |  2 ++
 .../org/apache/tajo/engine/function/math/Round.java     |  7 +++++--
 .../apache/tajo/engine/function/TestMathFunctions.java  | 12 ++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/8024f6ab/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 4390177..f423548 100644
--- a/CHANGES
+++ b/CHANGES
@@ -96,6 +96,8 @@ Release 0.9.0 - unreleased
     (Hyoungjun Kim via hyunsik)
 
   BUG FIXES
+    
+    TAJO-957: ROUND should be support INT parameter. (Mai Hai Thanh via hyunsik)
 
     TAJO-980: execution page in Web UI broken. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/8024f6ab/tajo-core/src/main/java/org/apache/tajo/engine/function/math/Round.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/function/math/Round.java b/tajo-core/src/main/java/org/apache/tajo/engine/function/math/Round.java
index 9740888..e457791 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/function/math/Round.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/function/math/Round.java
@@ -39,8 +39,11 @@ import org.apache.tajo.storage.Tuple;
   example = "> SELECT round(42.4)\n"
           + "42",
   returnType = TajoDataTypes.Type.INT8,
-  paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.FLOAT4}),
-          @ParamTypes(paramTypes = {TajoDataTypes.Type.FLOAT8})}
+    paramTypes = {@ParamTypes(paramTypes = {TajoDataTypes.Type.FLOAT4}),
+        @ParamTypes(paramTypes = {TajoDataTypes.Type.FLOAT8}),
+        @ParamTypes(paramTypes = {TajoDataTypes.Type.INT4}),
+        @ParamTypes(paramTypes = {TajoDataTypes.Type.INT8}),
+    }
 )
 public class Round extends GeneralFunction {
   public Round() {

http://git-wip-us.apache.org/repos/asf/tajo/blob/8024f6ab/tajo-core/src/test/java/org/apache/tajo/engine/function/TestMathFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestMathFunctions.java b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestMathFunctions.java
index 99c31c6..41fcbfe 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/function/TestMathFunctions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/function/TestMathFunctions.java
@@ -23,8 +23,10 @@ import org.apache.tajo.engine.eval.ExprTestBase;
 import org.junit.Test;
 
 import java.io.IOException;
+import java.sql.ResultSet;
 
 import static org.apache.tajo.common.TajoDataTypes.Type.*;
+import static org.junit.Assert.assertEquals;
 
 public class TestMathFunctions extends ExprTestBase {
   @Test
@@ -44,6 +46,16 @@ public class TestMathFunctions extends ExprTestBase {
 
     testEval(schema, "table1", "1.0, 0.2, 0.4", "select round(col1 + col2 + col3) from table1",
         new String[]{"2"});
+
+    Schema schema2 = new Schema();
+    schema2.addColumn("col1", INT4);
+    schema2.addColumn("col2", INT8);
+    schema2.addColumn("col3", FLOAT4);
+    schema2.addColumn("col4", FLOAT8);
+
+    testEval(schema2, "table1", "9,9,9.5,9.5",
+        "select round(col1), round (col2), round(col3), round(col4) from table1",
+        new String [] {"9", "9", "10", "10"});
   }
 
   @Test


[10/14] git commit: TAJO-976: HashPartitioner doesn't make desired number of partitions infrequently. (Hyoungjun Kim via hyunsik)

Posted by ji...@apache.org.
TAJO-976: HashPartitioner doesn't make desired number of partitions infrequently. (Hyoungjun Kim via hyunsik)

Closes #94


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/8be501f4
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/8be501f4
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/8be501f4

Branch: refs/heads/index_support
Commit: 8be501f43fc1b20c2a57790259aceafb02987df5
Parents: fe87085
Author: Hyunsik Choi <hy...@apache.org>
Authored: Tue Jul 29 12:26:12 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Tue Jul 29 13:27:00 2014 +0900

----------------------------------------------------------------------
 .../java/org/apache/tajo/datum/Float4Datum.java |   3 +-
 .../java/org/apache/tajo/datum/Float8Datum.java |   3 +-
 .../java/org/apache/tajo/datum/Int2Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/Int4Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/Int8Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/TextDatum.java   |   4 +-
 .../java/org/apache/tajo/util/MurmurHash.java   | 213 +++++++++++++++++++
 .../planner/physical/HashPartitioner.java       |   3 +-
 .../planner/physical/TestHashPartitioner.java   |  39 +++-
 .../tajo/engine/query/TestGroupByQuery.java     |   4 +-
 .../tajo/engine/query/TestJoinBroadcast.java    |   2 +-
 .../testGroupByWithSameConstantKeys1.sql        |   2 +-
 .../testGroupByWithSameExprs1.sql               |   2 +
 .../testGroupByWithSameExprs2.sql               |   4 +-
 .../testHavingWithNamedTarget.sql               |  14 +-
 .../TestGroupByQuery/testGroupBy2.result        |   4 +-
 .../testGroupByWithSameConstantKeys1.result     |   4 +-
 .../TestGroupByQuery/testGroupbyWithJson.result |   4 +-
 .../testHavingWithNamedTarget.result            |   4 +-
 ...estBroadcastMultiColumnPartitionTable.result |   4 +-
 20 files changed, 294 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java
index e24bce4..610ea95 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/Float4Datum.java
@@ -22,6 +22,7 @@ import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.exception.InvalidCastException;
 import org.apache.tajo.exception.InvalidOperationException;
+import org.apache.tajo.util.MurmurHash;
 import org.apache.tajo.util.NumberUtil;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 import org.apache.tajo.util.datetime.TimeMeta;
@@ -106,7 +107,7 @@ public class Float4Datum extends NumericDatum {
 
   @Override
   public int hashCode() {
-    return (int) val;
+    return MurmurHash.hash(val);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/Float8Datum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Float8Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Float8Datum.java
index 0542148..90adcc7 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/Float8Datum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/Float8Datum.java
@@ -22,6 +22,7 @@ import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.exception.InvalidOperationException;
 import org.apache.tajo.util.Bytes;
+import org.apache.tajo.util.MurmurHash;
 import org.apache.tajo.util.NumberUtil;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 import org.apache.tajo.util.datetime.TimeMeta;
@@ -96,7 +97,7 @@ public class Float8Datum extends NumericDatum {
 
   @Override
   public int hashCode() {
-    return (int) val;
+    return MurmurHash.hash(val);
   }
 
   public boolean equals(Object obj) {

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java
index 38cf019..ab17bdc 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/Int2Datum.java
@@ -21,6 +21,7 @@ package org.apache.tajo.datum;
 import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.exception.InvalidOperationException;
+import org.apache.tajo.util.MurmurHash;
 import org.apache.tajo.util.NumberUtil;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 import org.apache.tajo.util.datetime.TimeMeta;
@@ -97,7 +98,7 @@ public class Int2Datum extends NumericDatum {
 
   @Override
   public int hashCode() {
-    return val;
+    return MurmurHash.hash(val);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/Int4Datum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Int4Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Int4Datum.java
index d26b6b2..9a60863 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/Int4Datum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/Int4Datum.java
@@ -21,6 +21,7 @@ package org.apache.tajo.datum;
 import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes.Type;
 import org.apache.tajo.exception.InvalidOperationException;
+import org.apache.tajo.util.MurmurHash;
 import org.apache.tajo.util.NumberUtil;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 import org.apache.tajo.util.datetime.TimeMeta;
@@ -102,7 +103,7 @@ public class Int4Datum extends NumericDatum {
 
   @Override
   public int hashCode() {
-    return val;
+    return MurmurHash.hash(val);
   }
 
   public boolean equals(Object obj) {

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java b/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java
index 46a1353..db8a12b 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/Int8Datum.java
@@ -22,6 +22,7 @@ import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.exception.InvalidCastException;
 import org.apache.tajo.exception.InvalidOperationException;
+import org.apache.tajo.util.MurmurHash;
 import org.apache.tajo.util.NumberUtil;
 import org.apache.tajo.util.datetime.DateTimeUtil;
 import org.apache.tajo.util.datetime.TimeMeta;
@@ -108,7 +109,7 @@ public class Int8Datum extends NumericDatum {
   
   @Override
   public int hashCode() {
-    return (int) val;
+    return MurmurHash.hash(val);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
index 49f09f6..e8424b3 100644
--- a/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
+++ b/tajo-common/src/main/java/org/apache/tajo/datum/TextDatum.java
@@ -23,8 +23,8 @@ import com.google.gson.annotations.Expose;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.exception.InvalidCastException;
 import org.apache.tajo.exception.InvalidOperationException;
+import org.apache.tajo.util.MurmurHash;
 
-import java.util.Arrays;
 import java.util.Comparator;
 
 public class TextDatum extends Datum {
@@ -140,7 +140,7 @@ public class TextDatum extends Datum {
 
   @Override
   public int hashCode() {
-    return Arrays.hashCode(bytes);
+    return MurmurHash.hash(bytes);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-common/src/main/java/org/apache/tajo/util/MurmurHash.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/util/MurmurHash.java b/tajo-common/src/main/java/org/apache/tajo/util/MurmurHash.java
new file mode 100644
index 0000000..b60df9c
--- /dev/null
+++ b/tajo-common/src/main/java/org/apache/tajo/util/MurmurHash.java
@@ -0,0 +1,213 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.util;
+
+/**
+ * This class is borrowed from the following source code
+ * https://github.com/addthis/stream-lib/blob/master/src/main/java/com/clearspring/analytics/hash/MurmurHash.java
+ */
+public class MurmurHash {
+  public static int hash(Object o) {
+    if (o == null) {
+      return 0;
+    }
+    if (o instanceof Long) {
+      return hashLong((Long) o);
+    }
+    if (o instanceof Integer) {
+      return hashLong((Integer) o);
+    }
+    if (o instanceof Double) {
+      return hashLong(Double.doubleToRawLongBits((Double) o));
+    }
+    if (o instanceof Float) {
+      return hashLong(Float.floatToRawIntBits((Float) o));
+    }
+    if (o instanceof String) {
+      return hash(((String) o).getBytes());
+    }
+    if (o instanceof byte[]) {
+      return hash((byte[]) o);
+    }
+    return hash(o.toString());
+  }
+
+  public static int hash(byte[] data) {
+    return hash(data, data.length, -1);
+  }
+
+  public static int hash(byte[] data, int seed) {
+    return hash(data, data.length, seed);
+  }
+
+  public static int hash(byte[] data, int length, int seed) {
+    int m = 0x5bd1e995;
+    int r = 24;
+
+    int h = seed ^ length;
+
+    int len_4 = length >> 2;
+
+    for (int i = 0; i < len_4; i++) {
+      int i_4 = i << 2;
+      int k = data[i_4 + 3];
+      k = k << 8;
+      k = k | (data[i_4 + 2] & 0xff);
+      k = k << 8;
+      k = k | (data[i_4 + 1] & 0xff);
+      k = k << 8;
+      k = k | (data[i_4 + 0] & 0xff);
+      k *= m;
+      k ^= k >>> r;
+      k *= m;
+      h *= m;
+      h ^= k;
+    }
+
+    // avoid calculating modulo
+    int len_m = len_4 << 2;
+    int left = length - len_m;
+
+    if (left != 0) {
+      if (left >= 3) {
+        h ^= (int) data[length - 3] << 16;
+      }
+      if (left >= 2) {
+        h ^= (int) data[length - 2] << 8;
+      }
+      if (left >= 1) {
+        h ^= (int) data[length - 1];
+      }
+
+      h *= m;
+    }
+
+    h ^= h >>> 13;
+    h *= m;
+    h ^= h >>> 15;
+
+    return h;
+  }
+
+  public static int hashLong(long data) {
+    int m = 0x5bd1e995;
+    int r = 24;
+
+    int h = 0;
+
+    int k = (int) data * m;
+    k ^= k >>> r;
+    h ^= k * m;
+
+    k = (int) (data >> 32) * m;
+    k ^= k >>> r;
+    h *= m;
+    h ^= k * m;
+
+    h ^= h >>> 13;
+    h *= m;
+    h ^= h >>> 15;
+
+    return h;
+  }
+
+  public static long hash64(Object o) {
+    if (o == null) {
+      return 0l;
+    } else if (o instanceof String) {
+      final byte[] bytes = ((String) o).getBytes();
+      return hash64(bytes, bytes.length);
+    } else if (o instanceof byte[]) {
+      final byte[] bytes = (byte[]) o;
+      return hash64(bytes, bytes.length);
+    }
+    return hash64(o.toString());
+  }
+
+  // 64 bit implementation copied from here:  https://github.com/tnm/murmurhash-java
+
+  /**
+   * Generates 64 bit hash from byte array with default seed value.
+   *
+   * @param data   byte array to hash
+   * @param length length of the array to hash
+   * @return 64 bit hash of the given string
+   */
+  public static long hash64(final byte[] data, int length) {
+    return hash64(data, length, 0xe17a1465);
+  }
+
+
+  /**
+   * Generates 64 bit hash from byte array of the given length and seed.
+   *
+   * @param data   byte array to hash
+   * @param length length of the array to hash
+   * @param seed   initial seed value
+   * @return 64 bit hash of the given array
+   */
+  public static long hash64(final byte[] data, int length, int seed) {
+    final long m = 0xc6a4a7935bd1e995L;
+    final int r = 47;
+
+    long h = (seed & 0xffffffffl) ^ (length * m);
+
+    int length8 = length / 8;
+
+    for (int i = 0; i < length8; i++) {
+      final int i8 = i * 8;
+      long k = ((long) data[i8 + 0] & 0xff) + (((long) data[i8 + 1] & 0xff) << 8)
+          + (((long) data[i8 + 2] & 0xff) << 16) + (((long) data[i8 + 3] & 0xff) << 24)
+          + (((long) data[i8 + 4] & 0xff) << 32) + (((long) data[i8 + 5] & 0xff) << 40)
+          + (((long) data[i8 + 6] & 0xff) << 48) + (((long) data[i8 + 7] & 0xff) << 56);
+
+      k *= m;
+      k ^= k >>> r;
+      k *= m;
+
+      h ^= k;
+      h *= m;
+    }
+
+    switch (length % 8) {
+      case 7:
+        h ^= (long) (data[(length & ~7) + 6] & 0xff) << 48;
+      case 6:
+        h ^= (long) (data[(length & ~7) + 5] & 0xff) << 40;
+      case 5:
+        h ^= (long) (data[(length & ~7) + 4] & 0xff) << 32;
+      case 4:
+        h ^= (long) (data[(length & ~7) + 3] & 0xff) << 24;
+      case 3:
+        h ^= (long) (data[(length & ~7) + 2] & 0xff) << 16;
+      case 2:
+        h ^= (long) (data[(length & ~7) + 1] & 0xff) << 8;
+      case 1:
+        h ^= (long) (data[length & ~7] & 0xff);
+        h *= m;
+    }
+    ;
+
+    h ^= h >>> r;
+    h *= m;
+    h ^= h >>> r;
+
+    return h;
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashPartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashPartitioner.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashPartitioner.java
index 3ae53d9..233d6ec 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashPartitioner.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/HashPartitioner.java
@@ -43,7 +43,6 @@ public class HashPartitioner extends Partitioner {
     for (int i = 0; i < partitionKeyIds.length; i++) {
       keyTuple.put(i, tuple.get(partitionKeyIds[i]));
     }
-    return (keyTuple.hashCode() & Integer.MAX_VALUE) %
-        (numPartitions == 32 ? numPartitions-1 : numPartitions);
+    return (keyTuple.hashCode() & Integer.MAX_VALUE) % numPartitions;
   }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/TestHashPartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/TestHashPartitioner.java b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/TestHashPartitioner.java
index f0d846c..2241870 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/TestHashPartitioner.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/planner/physical/TestHashPartitioner.java
@@ -18,15 +18,21 @@
 
 package org.apache.tajo.engine.planner.physical;
 
-import org.junit.After;
-import org.junit.Before;
-import org.junit.Test;
 import org.apache.tajo.datum.Datum;
 import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.TextDatum;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.VTuple;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.Set;
+import java.util.TreeSet;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 public class TestHashPartitioner {
 
@@ -81,4 +87,31 @@ public class TestHashPartitioner {
     int part2 = p.getPartition(tuple4);
     assertEquals(part2, p.getPartition(tuple5));    
   }
+
+  @Test
+  public final void testGetPartition2() {
+    // https://issues.apache.org/jira/browse/TAJO-976
+    Random rand = new Random();
+    String[][] data = new String[1000][];
+
+    for (int i = 0; i < 1000; i++) {
+      data[i] = new String[]{ String.valueOf(rand.nextInt(1000)), String.valueOf(rand.nextInt(1000)), String.valueOf(rand.nextInt(1000))};
+    }
+
+    int[] testNumPartitions = new int[]{31, 62, 124, 32, 63, 125};
+    for (int index = 0; index <  testNumPartitions.length; index++) {
+      Partitioner p = new HashPartitioner(new int[]{0, 1, 2}, testNumPartitions[index]);
+
+      Set<Integer> ids = new TreeSet<Integer>();
+      for (int i = 0; i < data.length; i++) {
+        Tuple tuple = new VTuple(
+            new Datum[]{new TextDatum(data[i][0]), new TextDatum(data[i][1]), new TextDatum(data[i][2])});
+
+        ids.add(p.getPartition(tuple));
+      }
+
+      // The number of partitions isn't exactly matched.
+      assertTrue(ids.size() + 5 >= testNumPartitions[index]);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
index 0ffcf11..72759c0 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestGroupByQuery.java
@@ -143,7 +143,7 @@ public class TestGroupByQuery extends QueryTestCaseBase {
 
   @Test
   public final void testGroupByWithSameConstantKeys1() throws Exception {
-    // select l_partkey as a, '##' as b, '##' as c, count(*) d from lineitem group by a, b, c;
+    // select l_partkey as a, '##' as b, '##' as c, count(*) d from lineitem group by a, b, c order by a;
     ResultSet res = executeQuery();
     assertResultSet(res);
     cleanupQuery(res);
@@ -408,7 +408,7 @@ public class TestGroupByQuery extends QueryTestCaseBase {
   @Test
   public final void testHavingWithNamedTarget() throws Exception {
     // select l_orderkey, avg(l_partkey) total, sum(l_linenumber) as num from lineitem group by l_orderkey
-    // having total >= 2 or num = 3;
+    // having total >= 2 or num = 3 order by l_orderkey, total;
     ResultSet res = executeQuery();
     assertResultSet(res);
     cleanupQuery(res);

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
index 5df6f24..349ad0c 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestJoinBroadcast.java
@@ -451,7 +451,7 @@ public class TestJoinBroadcast extends QueryTestCaseBase {
     res = executeString(
         "select distinct a.col3 from " + tableName + " as a " +
             "left outer join lineitem_large b " +
-            "on a.col1 = b.l_orderkey"
+            "on a.col1 = b.l_orderkey order by a.col3"
     );
 
     assertResultSet(res);

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameConstantKeys1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameConstantKeys1.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameConstantKeys1.sql
index 47a7832..c8c3db7 100644
--- a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameConstantKeys1.sql
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameConstantKeys1.sql
@@ -1 +1 @@
-select l_partkey as a, '##' as b, '##' as c, count(*) d from lineitem group by a, b, c;
\ No newline at end of file
+select l_partkey as a, '##' as b, '##' as c, count(*) d from lineitem group by a, b, c order by a;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs1.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs1.sql
index 17c88c5..a04745e 100644
--- a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs1.sql
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs1.sql
@@ -3,4 +3,6 @@ select
 from
   lineitem
 group by
+  l_orderkey + l_partkey
+order by
   l_orderkey + l_partkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs2.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs2.sql
index a0a1c11..074e252 100644
--- a/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs2.sql
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testGroupByWithSameExprs2.sql
@@ -3,4 +3,6 @@ select
 from
   lineitem
 group by
-  l_orderkey + l_partkey;
\ No newline at end of file
+  l_orderkey + l_partkey
+order by
+  total1, total2;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/queries/TestGroupByQuery/testHavingWithNamedTarget.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestGroupByQuery/testHavingWithNamedTarget.sql b/tajo-core/src/test/resources/queries/TestGroupByQuery/testHavingWithNamedTarget.sql
index 79467be..87a784f 100644
--- a/tajo-core/src/test/resources/queries/TestGroupByQuery/testHavingWithNamedTarget.sql
+++ b/tajo-core/src/test/resources/queries/TestGroupByQuery/testHavingWithNamedTarget.sql
@@ -1 +1,13 @@
-select l_orderkey, avg(l_partkey) total, sum(l_linenumber) as num from lineitem group by l_orderkey having total >= 2 or num = 3;
\ No newline at end of file
+select
+  l_orderkey,
+  avg(l_partkey) total,
+  sum(l_linenumber) as num
+from
+  lineitem
+group by
+  l_orderkey
+having
+  total >= 2 or num = 3
+order by
+  l_orderkey,
+  total;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupBy2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupBy2.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupBy2.result
index 23efdb7..6afdd23 100644
--- a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupBy2.result
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupBy2.result
@@ -1,4 +1,4 @@
 unique_key
 -------------------------------
-3
-2
\ No newline at end of file
+2
+3
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupByWithSameConstantKeys1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupByWithSameConstantKeys1.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupByWithSameConstantKeys1.result
index b08b1bc..a8fbe00 100644
--- a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupByWithSameConstantKeys1.result
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupByWithSameConstantKeys1.result
@@ -1,5 +1,5 @@
 a,b,c,d
 -------------------------------
 1,##,##,2
-3,##,##,1
-2,##,##,2
\ No newline at end of file
+2,##,##,2
+3,##,##,1
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupbyWithJson.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupbyWithJson.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupbyWithJson.result
index 627db72..366b76e 100644
--- a/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupbyWithJson.result
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testGroupbyWithJson.result
@@ -1,5 +1,5 @@
 l_orderkey,total,num
 -------------------------------
 3,2.5,3
-1,1.0,3
-2,2.0,1
\ No newline at end of file
+2,2.0,1
+1,1.0,3
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result b/tajo-core/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
index 627db72..81f1bfd 100644
--- a/tajo-core/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
+++ b/tajo-core/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
@@ -1,5 +1,5 @@
 l_orderkey,total,num
 -------------------------------
-3,2.5,3
 1,1.0,3
-2,2.0,1
\ No newline at end of file
+2,2.0,1
+3,2.5,3
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/8be501f4/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTable.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTable.result b/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTable.result
index df3c7bc..9ef26b4 100644
--- a/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTable.result
+++ b/tajo-core/src/test/resources/results/TestJoinBroadcast/testBroadcastMultiColumnPartitionTable.result
@@ -1,5 +1,5 @@
 col3
 -------------------------------
 01
-10
-12
\ No newline at end of file
+12
+10
\ No newline at end of file


[05/14] git commit: TAJO-977: INSERT into a partitioned table as SELECT statement uses a wrong schema. (Hyoungjun Kim via hyunsik)

Posted by ji...@apache.org.
TAJO-977: INSERT into a partitioned table as SELECT statement uses a wrong schema. (Hyoungjun Kim via hyunsik)

Closes #95


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/9880f06f
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/9880f06f
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/9880f06f

Branch: refs/heads/index_support
Commit: 9880f06fd9e13402593a48a834df972e5b170fe2
Parents: 72808e0
Author: Hyunsik Choi <hy...@apache.org>
Authored: Sat Jul 26 21:02:25 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Sat Jul 26 21:02:25 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  7 +-
 .../planner/physical/ColPartitionStoreExec.java |  3 +-
 .../tajo/engine/query/TestInsertQuery.java      | 70 ++++++++++++++++++++
 3 files changed, 77 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/9880f06f/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 08cf60a..2e530af 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,8 +97,11 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
-    TAJO-968: Self-Join query (including partitioned table) doesn't run unexpectedly 
-    using auto broadcast join. (jaewha)
+    TAJO-977: INSERT into a partitioned table as SELECT statement uses a wrong 
+    schema. (Hyoungjun Kim via hyunsik)
+
+    TAJO-968: Self-Join query (including partitioned table) doesn't run 
+    unexpectedly using auto broadcast join. (jaewha)
 
     TAJO-914: join queries with constant values can cause schema mismatch in
     logical plan. (hyunsik)

http://git-wip-us.apache.org/repos/asf/tajo/blob/9880f06f/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ColPartitionStoreExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ColPartitionStoreExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ColPartitionStoreExec.java
index d292437..e90baff 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ColPartitionStoreExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/ColPartitionStoreExec.java
@@ -38,7 +38,6 @@ import org.apache.tajo.storage.StorageUtil;
 import org.apache.tajo.worker.TaskAttemptContext;
 
 import java.io.IOException;
-import java.text.NumberFormat;
 
 public abstract class ColPartitionStoreExec extends UnaryPhysicalExec {
   private static Log LOG = LogFactory.getLog(ColPartitionStoreExec.class);
@@ -57,6 +56,8 @@ public abstract class ColPartitionStoreExec extends UnaryPhysicalExec {
 
     if (plan.getType() == NodeType.CREATE_TABLE) {
       this.outSchema = ((CreateTableNode)plan).getTableSchema();
+    } else if (plan.getType() == NodeType.INSERT) {
+      this.outSchema = ((InsertNode)plan).getTableSchema();
     }
 
     // set table meta

http://git-wip-us.apache.org/repos/asf/tajo/blob/9880f06f/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java
index 4b48182..0d309c7 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestInsertQuery.java
@@ -507,6 +507,76 @@ public class TestInsertQuery extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testInsertOverwriteIntoParquet() throws Exception {
+    if (!testingCluster.isHCatalogStoreRunning()) {
+      executeString("create table parquet_table " +
+          "(l_orderkey int4, l_shipdate text, l_shipdate_function text) using parquet").close();
+
+      CatalogService catalog = testingCluster.getMaster().getCatalog();
+      assertTrue(catalog.existsTable(getCurrentDatabase(), "parquet_table"));
+
+      executeString(
+          "insert overwrite into parquet_table  " +
+              "select l_orderkey, l_shipdate, substr(l_shipdate, 1, 10) from default.lineitem").close();
+
+      TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), "parquet_table");
+      if (!testingCluster.isHCatalogStoreRunning()) {
+        assertEquals(5, desc.getStats().getNumRows().intValue());
+      }
+
+      ResultSet res = executeString("select l_orderkey, l_shipdate, l_shipdate_function " +
+          "from parquet_table ");
+
+      String expected = "l_orderkey,l_shipdate,l_shipdate_function\n" +
+          "-------------------------------\n" +
+          "1,1996-03-13,1996-03-13\n" +
+          "1,1996-04-12,1996-04-12\n" +
+          "2,1997-01-28,1997-01-28\n" +
+          "3,1994-02-02,1994-02-02\n" +
+          "3,1993-11-09,1993-11-09\n";
+
+      assertEquals(expected, resultSetToString(res));
+
+      executeString("DROP TABLE parquet_table PURGE");
+    }
+  }
+
+  @Test
+  public final void testInsertOverwriteIntoPartitionedParquet() throws Exception {
+    if (!testingCluster.isHCatalogStoreRunning()) {
+      executeString("create table parquet_table " +
+          "(l_orderkey int4, l_shipdate_function text) using parquet partition by column (l_shipdate text)").close();
+
+      CatalogService catalog = testingCluster.getMaster().getCatalog();
+      assertTrue(catalog.existsTable(getCurrentDatabase(), "parquet_table"));
+
+      executeString(
+          "insert overwrite into parquet_table  " +
+              "select l_orderkey, substr(l_shipdate, 1, 10), l_shipdate from default.lineitem").close();
+
+      TableDesc desc = catalog.getTableDesc(getCurrentDatabase(), "parquet_table");
+      if (!testingCluster.isHCatalogStoreRunning()) {
+        assertEquals(5, desc.getStats().getNumRows().intValue());
+      }
+
+      ResultSet res = executeString("select l_orderkey, l_shipdate, l_shipdate_function " +
+          "from parquet_table ");
+
+      String expected = "l_orderkey,l_shipdate,l_shipdate_function\n" +
+          "-------------------------------\n" +
+          "3,1993-11-09,1993-11-09\n" +
+          "3,1994-02-02,1994-02-02\n" +
+          "1,1996-03-13,1996-03-13\n" +
+          "1,1996-04-12,1996-04-12\n" +
+          "2,1997-01-28,1997-01-28\n";
+
+      assertEquals(expected, resultSetToString(res));
+
+      executeString("DROP TABLE parquet_table PURGE");
+    }
+  }
+
+  @Test
   public final void testInsertOverwriteWithDatabase() throws Exception {
     ResultSet res = executeFile("table1_ddl.sql");
     res.close();


[07/14] git commit: TAJO-974: Eliminate unexpected case condition in SubQuery. (missed ticket close)

Posted by ji...@apache.org.
TAJO-974: Eliminate unexpected case condition in SubQuery. (missed ticket close)

Closes #93


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/3cfc1987
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/3cfc1987
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/3cfc1987

Branch: refs/heads/index_support
Commit: 3cfc1987e69130fa0232d09765c5c3582914884c
Parents: 1f6b5b3
Author: Hyunsik Choi <hy...@apache.org>
Authored: Mon Jul 28 11:29:30 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Mon Jul 28 11:29:30 2014 +0900

----------------------------------------------------------------------
 CHANGES | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/3cfc1987/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 3ac13a9..9dac2c7 100644
--- a/CHANGES
+++ b/CHANGES
@@ -98,7 +98,7 @@ Release 0.9.0 - unreleased
   BUG FIXES
 
     TAJO-974: Eliminate unexpected case condition in SubQuery. (Hyoungjun Kim 
-    via hyunsik)
+    via hyunsik) 
 
     TAJO-977: INSERT into a partitioned table as SELECT statement uses a wrong 
     schema. (Hyoungjun Kim via hyunsik)


[14/14] git commit: Merge branch 'master' of http://git-wip-us.apache.org/repos/asf/tajo into index_support

Posted by ji...@apache.org.
Merge branch 'master' of http://git-wip-us.apache.org/repos/asf/tajo into index_support


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/5c0277fd
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/5c0277fd
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/5c0277fd

Branch: refs/heads/index_support
Commit: 5c0277fd87c896d675522a3ecee88d2e813f6aac
Parents: 645d306 8024f6a
Author: Jihoon Son <ji...@apache.org>
Authored: Fri Aug 1 14:58:33 2014 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Fri Aug 1 14:58:33 2014 +0900

----------------------------------------------------------------------
 CHANGES                                                 |  2 ++
 .../org/apache/tajo/engine/function/math/Round.java     |  7 +++++--
 .../apache/tajo/engine/function/TestMathFunctions.java  | 12 ++++++++++++
 3 files changed, 19 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/5c0277fd/CHANGES
----------------------------------------------------------------------


[12/14] git commit: Merge branch 'master' of http://git-wip-us.apache.org/repos/asf/tajo into index_support

Posted by ji...@apache.org.
Merge branch 'master' of http://git-wip-us.apache.org/repos/asf/tajo into index_support


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/645d306a
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/645d306a
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/645d306a

Branch: refs/heads/index_support
Commit: 645d306a525dca62acfbfc966774755408ed7660
Parents: c9b8f51 b637416
Author: Jihoon Son <ji...@apache.org>
Authored: Fri Aug 1 10:39:20 2014 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Fri Aug 1 10:39:20 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  26 ++-
 .../java/org/apache/tajo/conf/TajoConf.java     |   3 +-
 .../java/org/apache/tajo/datum/Float4Datum.java |   3 +-
 .../java/org/apache/tajo/datum/Float8Datum.java |   3 +-
 .../java/org/apache/tajo/datum/Int2Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/Int4Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/Int8Datum.java   |   3 +-
 .../java/org/apache/tajo/datum/TextDatum.java   |   4 +-
 .../java/org/apache/tajo/util/MurmurHash.java   | 213 +++++++++++++++++++
 .../engine/planner/global/GlobalPlanner.java    |   4 +-
 .../planner/physical/ColPartitionStoreExec.java |   3 +-
 .../planner/physical/HashPartitioner.java       |   3 +-
 .../apache/tajo/engine/utils/SchemaUtil.java    |  20 +-
 .../tajo/master/querymaster/QueryUnit.java      |  10 +-
 .../tajo/master/querymaster/Repartitioner.java  |  25 ++-
 .../tajo/master/querymaster/SubQuery.java       |  23 +-
 .../tajo/webapp/QueryExecutorServlet.java       | 200 +++++++++++------
 .../java/org/apache/tajo/worker/FetchImpl.java  |  24 ++-
 .../apache/tajo/worker/TaskAttemptContext.java  |  26 ++-
 .../planner/global/TestBroadcastJoinPlan.java   |  94 ++++----
 .../planner/physical/TestHashPartitioner.java   |  39 +++-
 .../tajo/engine/query/TestCaseByCases.java      |  38 +++-
 .../tajo/engine/query/TestGroupByQuery.java     | 109 +++++++++-
 .../tajo/engine/query/TestInsertQuery.java      |  70 ++++++
 .../tajo/engine/query/TestJoinBroadcast.java    |  91 +++++++-
 .../TestCaseByCases/testTAJO914Case1.sql        |  10 +
 .../TestCaseByCases/testTAJO914Case2.sql        |   9 +
 .../TestCaseByCases/testTAJO914Case3.sql        |  10 +
 .../TestCaseByCases/testTAJO914Case4.sql        |  10 +
 .../testGroupByWithSameConstantKeys1.sql        |   2 +-
 .../testGroupByWithSameExprs1.sql               |   2 +
 .../testGroupByWithSameExprs2.sql               |   4 +-
 .../testHavingWithNamedTarget.sql               |  14 +-
 .../TestCaseByCases/testTAJO914Case1.result     |   5 +
 .../TestCaseByCases/testTAJO914Case2.result     |   5 +
 .../TestCaseByCases/testTAJO914Case3.result     |   5 +
 .../TestCaseByCases/testTAJO914Case4.result     |   5 +
 .../TestGroupByQuery/testGroupBy2.result        |   4 +-
 .../testGroupByWithSameConstantKeys1.result     |   4 +-
 .../TestGroupByQuery/testGroupbyWithJson.result |   4 +-
 .../testHavingWithNamedTarget.result            |   4 +-
 ...estBroadcastMultiColumnPartitionTable.result |   4 +-
 42 files changed, 967 insertions(+), 174 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/645d306a/CHANGES
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/tajo/blob/645d306a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/tajo/blob/645d306a/tajo-core/src/main/java/org/apache/tajo/engine/planner/global/GlobalPlanner.java
----------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/tajo/blob/645d306a/tajo-core/src/main/java/org/apache/tajo/worker/TaskAttemptContext.java
----------------------------------------------------------------------