You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by bl...@apache.org on 2014/02/27 05:45:22 UTC
git commit: TAJO-620: A join query can cause
IndexOutOfBoundsException if one of tables is empty. (jaehwa)
Repository: incubator-tajo
Updated Branches:
refs/heads/master 56fbd99ac -> db5c017d4
TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/db5c017d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/db5c017d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/db5c017d
Branch: refs/heads/master
Commit: db5c017d416561c9dac08c093558d7344aa53be1
Parents: 56fbd99
Author: blrunner <jh...@gruter.com>
Authored: Thu Feb 27 13:45:02 2014 +0900
Committer: blrunner <jh...@gruter.com>
Committed: Thu Feb 27 13:45:02 2014 +0900
----------------------------------------------------------------------
CHANGES.txt | 2 ++
.../java/org/apache/tajo/benchmark/TPCH.java | 8 +++++++
.../tajo/master/querymaster/Repartitioner.java | 22 ++++++++++++++++++--
.../apache/tajo/engine/query/TestJoinQuery.java | 8 +++++++
.../testInnerJoinWithEmptyTable.sql | 8 +++++++
.../testInnerJoinWithEmptyTable.result | 1 +
6 files changed, 47 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1d86479..eef17d2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -263,6 +263,8 @@ Release 0.8.0 - unreleased
BUG FIXES
+ TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa)
+
TAJO-628: The second stage of distinct aggregation can be scheduled to
only one node. (hyunsik)
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
index 2e12b1d..5e9c9d3 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
@@ -46,6 +46,8 @@ public class TPCH extends BenchmarkSet {
public static final String ORDERS = "orders";
public static final String PARTSUPP = "partsupp";
public static final String SUPPLIER = "supplier";
+ public static final String EMPTY_ORDERS = "empty_orders";
+
public static final Map<String, Long> tableVolumes = Maps.newHashMap();
@@ -58,6 +60,8 @@ public class TPCH extends BenchmarkSet {
tableVolumes.put(ORDERS, 171952161L);
tableVolumes.put(PARTSUPP, 118984616L);
tableVolumes.put(SUPPLIER, 1409184L);
+ tableVolumes.put(EMPTY_ORDERS, 0L);
+
}
@Override
@@ -131,6 +135,8 @@ public class TPCH extends BenchmarkSet {
.addColumn("o_shippriority", Type.INT4) // 7
.addColumn("o_comment", Type.TEXT); // 8
schemas.put(ORDERS, orders);
+ schemas.put(EMPTY_ORDERS, orders);
+
Schema partsupp = new Schema()
.addColumn("ps_partkey", Type.INT4) // 0
@@ -177,6 +183,8 @@ public class TPCH extends BenchmarkSet {
loadTable(ORDERS);
loadTable(PARTSUPP) ;
loadTable(SUPPLIER);
+ loadTable(EMPTY_ORDERS);
+
}
private void loadTable(String tableName) throws ServiceException {
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
index 0d3f95e..4a7976f 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
@@ -23,6 +23,7 @@ import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
+import org.apache.tajo.algebra.JoinType;
import org.apache.tajo.ExecutionBlockId;
import org.apache.tajo.catalog.*;
import org.apache.tajo.catalog.statistics.StatisticsUtil;
@@ -100,13 +101,30 @@ public class Repartitioner {
} catch (PlanningException e) {
throw new IOException(e);
}
- fragments[i] = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(),
- tablePath).get(0);
+
+ // if table has no data, storageManager will return empty FileFragment.
+ // So, we need to handle FileFragment by its size.
+ // If we don't check its size, it can cause IndexOutOfBoundsException.
+ List<FileFragment> fileFragments = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), tablePath);
+ if (fileFragments.size() > 0) {
+ fragments[i] = fileFragments.get(0);
+ } else {
+ fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
+ }
}
}
LOG.info(String.format("Left Volume: %d, Right Volume: %d", stats[0], stats[1]));
+ // If one of inner join tables has no input data,
+ // it should return zero rows.
+ JoinNode joinNode = PlannerUtil.findMostBottomNode(execBlock.getPlan(), NodeType.JOIN);
+ if (joinNode != null) {
+ if ( (joinNode.getJoinType().equals(JoinType.INNER)) && (stats[0] == 0 || stats[1] == 0)) {
+ return;
+ }
+ }
+
// Assigning either fragments or fetch urls to query units
boolean leftSmall = execBlock.isBroadcastTable(scans[0].getCanonicalName());
boolean rightSmall = execBlock.isBroadcastTable(scans[1].getCanonicalName());
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
index 3a95724..0e925f1 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
@@ -207,4 +207,12 @@ public class TestJoinQuery extends QueryTestCaseBase {
assertResultSet(res);
cleanupQuery(res);
}
+
+ @Test
+ public final void testInnerJoinWithEmptyTable() throws Exception {
+ ResultSet res = executeQuery();
+ assertResultSet(res);
+ cleanupQuery(res);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
new file mode 100644
index 0000000..00c7884
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
@@ -0,0 +1,8 @@
+select
+ c_custkey,
+ orders.o_orderkey
+from
+ customer, empty_orders
+where c_custkey = o_orderkey
+order by
+ c_custkey, o_orderkey;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
new file mode 100644
index 0000000..c6036d7
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
@@ -0,0 +1 @@
+-------------------------------
\ No newline at end of file