You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/03/28 15:35:06 UTC

git commit: TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys. (hyoungjunkim via hyunsik)

Repository: tajo
Updated Branches:
  refs/heads/master 5d94b031b -> 36007d779


TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys. (hyoungjunkim via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/36007d77
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/36007d77
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/36007d77

Branch: refs/heads/master
Commit: 36007d779142b48194ff8eab5610db29390ab9d2
Parents: 5d94b03
Author: Hyunsik Choi <hy...@apache.org>
Authored: Fri Mar 28 23:10:49 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Fri Mar 28 23:10:49 2014 +0900

----------------------------------------------------------------------
 CHANGES.txt                                            |  5 ++++-
 .../queries/TestBuiltinFunctions/testAvgDouble.sql     |  2 +-
 .../queries/TestCaseByCases/testTAJO415Case.sql        |  5 ++++-
 .../queries/TestGroupByQuery/testGroupBy4.sql          | 10 +++++++++-
 .../testJoinCoReferredEvalsWithSameExprs1.sql          |  2 +-
 .../testJoinCoReferredEvalsWithSameExprs2.sql          |  2 +-
 .../resources/queries/TestNetTypes/testGroupby.sql     |  9 ++++++++-
 .../resources/queries/TestNetTypes/testGroupby2.sql    | 10 +++++++++-
 .../results/TestBuiltinFunctions/testAvgDouble.result  |  4 ++--
 .../results/TestBuiltinFunctions/testRandom.result     |  1 -
 .../results/TestCaseByCases/testTAJO415Case.result     |  2 +-
 .../results/TestGroupByQuery/testGroupBy4.result       |  4 ++--
 .../TestGroupByQuery/testHavingWithNamedTarget.result  |  2 +-
 .../testJoinCoReferredEvalsWithSameExprs1.result       |  6 +++---
 .../testJoinCoReferredEvalsWithSameExprs2.result       |  2 +-
 .../resources/results/TestNetTypes/testGroupby.result  |  6 +++---
 .../resources/results/TestNetTypes/testGroupby2.result |  4 ++--
 .../main/java/org/apache/tajo/rpc/NettyClientBase.java |  3 ++-
 .../main/java/org/apache/tajo/storage/LazyTuple.java   | 12 +-----------
 .../src/main/java/org/apache/tajo/storage/VTuple.java  | 13 ++-----------
 20 files changed, 57 insertions(+), 47 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bc8be9b..57a2f93 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -138,7 +138,10 @@ Release 0.8.0 - unreleased
 
   IMPROVEMENTS
 
-    TAJO-685: Add prerequisite to the document of network functions and operators. 
+    TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys.
+    (hyoungjunkim via hyunsik)
+
+    TAJO-685: Add prerequisite to the document of network functions and operators.
     (jihoon)
 
     TAJO-644: Support quoted identifiers. (hyunsik)

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
index 1c8c749..a9afc5c 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
@@ -1 +1 @@
-select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey;
\ No newline at end of file
+select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey order by l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
index 4a73b04..4b915d3 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
@@ -30,4 +30,7 @@ from (
   group by
     o_orderkey
 
-) b on (a.c_custkey = b.o_orderkey);
\ No newline at end of file
+) b on (a.c_custkey = b.o_orderkey)
+
+order by
+  c_custkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
index df354d4..4f2c63a 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
@@ -1 +1,9 @@
-select l_orderkey as gkey, count(1) as unique_key from lineitem group by lineitem.l_orderkey;
\ No newline at end of file
+select
+  l_orderkey as gkey,
+  count(1) as unique_key
+from
+  lineitem
+group by
+  lineitem.l_orderkey
+order by
+  gkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
index edd69ff..3638393 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
@@ -11,4 +11,4 @@ from
 where
   r_regionkey = n_regionkey and r_regionkey > 0
 order by
-  n_regionkey + n_nationkey;
\ No newline at end of file
+  n_regionkey + n_nationkey, n_regionkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
index 03b7f8e..a5b75d4 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
@@ -19,4 +19,4 @@ group by
   ((r_regionkey + n_regionkey) / 2)
 
 order by
-  n_regionkey + n_nationkey;
\ No newline at end of file
+  n_regionkey + n_nationkey, n_regionkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
index e3e61d8..27353a9 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
@@ -1 +1,8 @@
-select name, addr, count(1) from table1 group by name, addr;
\ No newline at end of file
+select
+  name, addr, count(1)
+from
+  table1
+group by
+  name, addr
+order by
+  name, addr;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
index c39c3b9..6c3c357 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
@@ -1 +1,9 @@
-select addr, count(*) from table1 group by addr;
\ No newline at end of file
+select
+  addr,
+  count(*)
+from
+  table1
+group by
+  addr
+order by
+  addr;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
index 33b954e..bd2a69f 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
@@ -1,5 +1,5 @@
 l_orderkey,revenue
 -------------------------------
 1,0.065
-3,0.08
-2,0.0
\ No newline at end of file
+2,0.0
+3,0.08
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
deleted file mode 100644
index 9f2a456..0000000
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
+++ /dev/null
@@ -1 +0,0 @@
-select l_orderkey, random(3) as rndnum from lineitem group by l_orderkey, rndnum;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
index 4b02873..675037b 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
@@ -1,7 +1,7 @@
 c_custkey,o_orderkey,cnt
 -------------------------------
 1,1,1
-3,3,1
 2,2,1
+3,3,1
 4,0,1
 5,0,1
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
index 714e73d..22c6664 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
@@ -1,5 +1,5 @@
 gkey,unique_key
 -------------------------------
 1,2
-3,2
-2,1
\ No newline at end of file
+2,1
+3,2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
index 8e6eabd..627db72 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
@@ -1,5 +1,5 @@
 l_orderkey,total,num
 -------------------------------
-1,1.0,3
 3,2.5,3
+1,1.0,3
 2,2.0,1
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
index 2b07dba..23776b6 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
@@ -5,11 +5,11 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result
 4,1,1,2,2,1
 8,4,4,8,8,4
 9,3,3,6,6,3
-10,3,3,6,6,3
 10,2,2,4,4,2
+10,3,3,6,6,3
 11,2,2,4,4,2
-14,4,4,8,8,4
 14,2,2,4,4,2
+14,4,4,8,8,4
 15,4,4,8,8,4
 17,4,4,8,8,4
 18,1,1,2,2,1
@@ -17,6 +17,6 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result
 22,3,3,6,6,3
 23,2,2,4,4,2
 24,4,4,8,8,4
-25,3,3,6,6,3
 25,1,1,2,2,1
+25,3,3,6,6,3
 26,3,3,6,6,3
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
index 91bb289..0e31362 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
@@ -5,8 +5,8 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result,total
 4,1,1,2,2,1,2
 8,4,4,8,8,4,8
 9,3,3,6,6,3,6
-10,3,3,6,6,3,6
 10,2,2,4,4,2,4
+10,3,3,6,6,3,6
 11,2,2,4,4,2,4
 14,2,2,4,4,2,4
 14,4,4,8,8,4,8

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
index 5bedfc2..95847a6 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
@@ -1,7 +1,7 @@
 name,addr,?count
 -------------------------------
+ooo,127.0.0.1,1
 ppp,127.0.1.1,1
 qqq,127.0.0.8,1
-xxx,127.0.1.1,1
-ooo,127.0.0.1,1
-rrr,127.0.0.1,1
\ No newline at end of file
+rrr,127.0.0.1,1
+xxx,127.0.1.1,1
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
index 25cd91f..ffd1d9a 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
@@ -1,5 +1,5 @@
 addr,?count
 -------------------------------
-127.0.1.1,2
 127.0.0.1,2
-127.0.0.8,1
\ No newline at end of file
+127.0.0.8,1
+127.0.1.1,2
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
----------------------------------------------------------------------
diff --git a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
index 8373c37..60d3aa6 100644
--- a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
+++ b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
@@ -45,6 +45,7 @@ public abstract class NettyClientBase implements Closeable {
   public void init(InetSocketAddress addr, ChannelPipelineFactory pipeFactory, ClientSocketChannelFactory factory)
       throws IOException {
     try {
+
       this.bootstrap = new ClientBootstrap(factory);
       this.bootstrap.setPipelineFactory(pipeFactory);
       // TODO - should be configurable
@@ -57,7 +58,7 @@ public abstract class NettyClientBase implements Closeable {
       connect(addr);
     } catch (Throwable t) {
       close();
-      throw new IOException(t.getCause());
+      throw new IOException("Connect error to " + addr + " cause " + t.getMessage(), t.getCause());
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
index 3e7ca5f..27d2691 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
@@ -215,17 +215,7 @@ public class LazyTuple implements Tuple, Cloneable {
 
   @Override
   public int hashCode() {
-    int hashCode = 37;
-    for (int i = 0; i < values.length; i++) {
-      Datum d = get(i);
-      if (d != null) {
-        hashCode ^= (d.hashCode() * 41);
-      } else {
-        hashCode = hashCode ^ (i + 17);
-      }
-    }
-
-    return hashCode;
+    return Arrays.hashCode(values);
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
index 72a4566..22d4fd9 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
@@ -198,19 +198,10 @@ public class VTuple implements Tuple, Cloneable {
 		str.append(")");
 		return str.toString();
 	}
-	
+
 	@Override
 	public int hashCode() {
-	  int hashCode = 37;
-	  for (int i=0; i < values.length; i++) {
-	    if(values[i] != null) {
-        hashCode ^= (values[i].hashCode() * 41);
-	    } else {
-	      hashCode = hashCode ^ (i + 17);
-	    }
-	  }
-	  
-	  return hashCode;
+	  return Arrays.hashCode(values);
 	}
 
   @Override