You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2014/03/28 15:35:06 UTC
git commit: TAJO-691: HashJoin or HashAggregation is too slow if
there is many unique keys. (hyoungjunkim via hyunsik)
Repository: tajo
Updated Branches:
refs/heads/master 5d94b031b -> 36007d779
TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys. (hyoungjunkim via hyunsik)
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/36007d77
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/36007d77
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/36007d77
Branch: refs/heads/master
Commit: 36007d779142b48194ff8eab5610db29390ab9d2
Parents: 5d94b03
Author: Hyunsik Choi <hy...@apache.org>
Authored: Fri Mar 28 23:10:49 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Fri Mar 28 23:10:49 2014 +0900
----------------------------------------------------------------------
CHANGES.txt | 5 ++++-
.../queries/TestBuiltinFunctions/testAvgDouble.sql | 2 +-
.../queries/TestCaseByCases/testTAJO415Case.sql | 5 ++++-
.../queries/TestGroupByQuery/testGroupBy4.sql | 10 +++++++++-
.../testJoinCoReferredEvalsWithSameExprs1.sql | 2 +-
.../testJoinCoReferredEvalsWithSameExprs2.sql | 2 +-
.../resources/queries/TestNetTypes/testGroupby.sql | 9 ++++++++-
.../resources/queries/TestNetTypes/testGroupby2.sql | 10 +++++++++-
.../results/TestBuiltinFunctions/testAvgDouble.result | 4 ++--
.../results/TestBuiltinFunctions/testRandom.result | 1 -
.../results/TestCaseByCases/testTAJO415Case.result | 2 +-
.../results/TestGroupByQuery/testGroupBy4.result | 4 ++--
.../TestGroupByQuery/testHavingWithNamedTarget.result | 2 +-
.../testJoinCoReferredEvalsWithSameExprs1.result | 6 +++---
.../testJoinCoReferredEvalsWithSameExprs2.result | 2 +-
.../resources/results/TestNetTypes/testGroupby.result | 6 +++---
.../resources/results/TestNetTypes/testGroupby2.result | 4 ++--
.../main/java/org/apache/tajo/rpc/NettyClientBase.java | 3 ++-
.../main/java/org/apache/tajo/storage/LazyTuple.java | 12 +-----------
.../src/main/java/org/apache/tajo/storage/VTuple.java | 13 ++-----------
20 files changed, 57 insertions(+), 47 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index bc8be9b..57a2f93 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -138,7 +138,10 @@ Release 0.8.0 - unreleased
IMPROVEMENTS
- TAJO-685: Add prerequisite to the document of network functions and operators.
+ TAJO-691: HashJoin or HashAggregation is too slow if there is many unique keys.
+ (hyoungjunkim via hyunsik)
+
+ TAJO-685: Add prerequisite to the document of network functions and operators.
(jihoon)
TAJO-644: Support quoted identifiers. (hyunsik)
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
index 1c8c749..a9afc5c 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestBuiltinFunctions/testAvgDouble.sql
@@ -1 +1 @@
-select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey;
\ No newline at end of file
+select l_orderkey, avg(l_discount) as revenue from lineitem group by l_orderkey order by l_orderkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
index 4a73b04..4b915d3 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestCaseByCases/testTAJO415Case.sql
@@ -30,4 +30,7 @@ from (
group by
o_orderkey
-) b on (a.c_custkey = b.o_orderkey);
\ No newline at end of file
+) b on (a.c_custkey = b.o_orderkey)
+
+order by
+ c_custkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
index df354d4..4f2c63a 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestGroupByQuery/testGroupBy4.sql
@@ -1 +1,9 @@
-select l_orderkey as gkey, count(1) as unique_key from lineitem group by lineitem.l_orderkey;
\ No newline at end of file
+select
+ l_orderkey as gkey,
+ count(1) as unique_key
+from
+ lineitem
+group by
+ lineitem.l_orderkey
+order by
+ gkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
index edd69ff..3638393 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.sql
@@ -11,4 +11,4 @@ from
where
r_regionkey = n_regionkey and r_regionkey > 0
order by
- n_regionkey + n_nationkey;
\ No newline at end of file
+ n_regionkey + n_nationkey, n_regionkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
index 03b7f8e..a5b75d4 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.sql
@@ -19,4 +19,4 @@ group by
((r_regionkey + n_regionkey) / 2)
order by
- n_regionkey + n_nationkey;
\ No newline at end of file
+ n_regionkey + n_nationkey, n_regionkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
index e3e61d8..27353a9 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby.sql
@@ -1 +1,8 @@
-select name, addr, count(1) from table1 group by name, addr;
\ No newline at end of file
+select
+ name, addr, count(1)
+from
+ table1
+group by
+ name, addr
+order by
+ name, addr;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
index c39c3b9..6c3c357 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestNetTypes/testGroupby2.sql
@@ -1 +1,9 @@
-select addr, count(*) from table1 group by addr;
\ No newline at end of file
+select
+ addr,
+ count(*)
+from
+ table1
+group by
+ addr
+order by
+ addr;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
index 33b954e..bd2a69f 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testAvgDouble.result
@@ -1,5 +1,5 @@
l_orderkey,revenue
-------------------------------
1,0.065
-3,0.08
-2,0.0
\ No newline at end of file
+2,0.0
+3,0.08
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
deleted file mode 100644
index 9f2a456..0000000
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestBuiltinFunctions/testRandom.result
+++ /dev/null
@@ -1 +0,0 @@
-select l_orderkey, random(3) as rndnum from lineitem group by l_orderkey, rndnum;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
index 4b02873..675037b 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestCaseByCases/testTAJO415Case.result
@@ -1,7 +1,7 @@
c_custkey,o_orderkey,cnt
-------------------------------
1,1,1
-3,3,1
2,2,1
+3,3,1
4,0,1
5,0,1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
index 714e73d..22c6664 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testGroupBy4.result
@@ -1,5 +1,5 @@
gkey,unique_key
-------------------------------
1,2
-3,2
-2,1
\ No newline at end of file
+2,1
+3,2
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
index 8e6eabd..627db72 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestGroupByQuery/testHavingWithNamedTarget.result
@@ -1,5 +1,5 @@
l_orderkey,total,num
-------------------------------
-1,1.0,3
3,2.5,3
+1,1.0,3
2,2.0,1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
index 2b07dba..23776b6 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs1.result
@@ -5,11 +5,11 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result
4,1,1,2,2,1
8,4,4,8,8,4
9,3,3,6,6,3
-10,3,3,6,6,3
10,2,2,4,4,2
+10,3,3,6,6,3
11,2,2,4,4,2
-14,4,4,8,8,4
14,2,2,4,4,2
+14,4,4,8,8,4
15,4,4,8,8,4
17,4,4,8,8,4
18,1,1,2,2,1
@@ -17,6 +17,6 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result
22,3,3,6,6,3
23,2,2,4,4,2
24,4,4,8,8,4
-25,3,3,6,6,3
25,1,1,2,2,1
+25,3,3,6,6,3
26,3,3,6,6,3
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
index 91bb289..0e31362 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testJoinCoReferredEvalsWithSameExprs2.result
@@ -5,8 +5,8 @@ v1,n_regionkey,r_regionkey,plus1,plus2,result,total
4,1,1,2,2,1,2
8,4,4,8,8,4,8
9,3,3,6,6,3,6
-10,3,3,6,6,3,6
10,2,2,4,4,2,4
+10,3,3,6,6,3,6
11,2,2,4,4,2,4
14,2,2,4,4,2,4
14,4,4,8,8,4,8
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
index 5bedfc2..95847a6 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby.result
@@ -1,7 +1,7 @@
name,addr,?count
-------------------------------
+ooo,127.0.0.1,1
ppp,127.0.1.1,1
qqq,127.0.0.8,1
-xxx,127.0.1.1,1
-ooo,127.0.0.1,1
-rrr,127.0.0.1,1
\ No newline at end of file
+rrr,127.0.0.1,1
+xxx,127.0.1.1,1
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
index 25cd91f..ffd1d9a 100644
--- a/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestNetTypes/testGroupby2.result
@@ -1,5 +1,5 @@
addr,?count
-------------------------------
-127.0.1.1,2
127.0.0.1,2
-127.0.0.8,1
\ No newline at end of file
+127.0.0.8,1
+127.0.1.1,2
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
----------------------------------------------------------------------
diff --git a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
index 8373c37..60d3aa6 100644
--- a/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
+++ b/tajo-rpc/src/main/java/org/apache/tajo/rpc/NettyClientBase.java
@@ -45,6 +45,7 @@ public abstract class NettyClientBase implements Closeable {
public void init(InetSocketAddress addr, ChannelPipelineFactory pipeFactory, ClientSocketChannelFactory factory)
throws IOException {
try {
+
this.bootstrap = new ClientBootstrap(factory);
this.bootstrap.setPipelineFactory(pipeFactory);
// TODO - should be configurable
@@ -57,7 +58,7 @@ public abstract class NettyClientBase implements Closeable {
connect(addr);
} catch (Throwable t) {
close();
- throw new IOException(t.getCause());
+ throw new IOException("Connect error to " + addr + " cause " + t.getMessage(), t.getCause());
}
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
index 3e7ca5f..27d2691 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
@@ -215,17 +215,7 @@ public class LazyTuple implements Tuple, Cloneable {
@Override
public int hashCode() {
- int hashCode = 37;
- for (int i = 0; i < values.length; i++) {
- Datum d = get(i);
- if (d != null) {
- hashCode ^= (d.hashCode() * 41);
- } else {
- hashCode = hashCode ^ (i + 17);
- }
- }
-
- return hashCode;
+ return Arrays.hashCode(values);
}
@Override
http://git-wip-us.apache.org/repos/asf/tajo/blob/36007d77/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
index 72a4566..22d4fd9 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/VTuple.java
@@ -198,19 +198,10 @@ public class VTuple implements Tuple, Cloneable {
str.append(")");
return str.toString();
}
-
+
@Override
public int hashCode() {
- int hashCode = 37;
- for (int i=0; i < values.length; i++) {
- if(values[i] != null) {
- hashCode ^= (values[i].hashCode() * 41);
- } else {
- hashCode = hashCode ^ (i + 17);
- }
- }
-
- return hashCode;
+ return Arrays.hashCode(values);
}
@Override