You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by bl...@apache.org on 2014/02/27 05:45:22 UTC

git commit: TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa)

Repository: incubator-tajo
Updated Branches:
  refs/heads/master 56fbd99ac -> db5c017d4


TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/db5c017d
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/db5c017d
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/db5c017d

Branch: refs/heads/master
Commit: db5c017d416561c9dac08c093558d7344aa53be1
Parents: 56fbd99
Author: blrunner <jh...@gruter.com>
Authored: Thu Feb 27 13:45:02 2014 +0900
Committer: blrunner <jh...@gruter.com>
Committed: Thu Feb 27 13:45:02 2014 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 ++
 .../java/org/apache/tajo/benchmark/TPCH.java    |  8 +++++++
 .../tajo/master/querymaster/Repartitioner.java  | 22 ++++++++++++++++++--
 .../apache/tajo/engine/query/TestJoinQuery.java |  8 +++++++
 .../testInnerJoinWithEmptyTable.sql             |  8 +++++++
 .../testInnerJoinWithEmptyTable.result          |  1 +
 6 files changed, 47 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 1d86479..eef17d2 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -263,6 +263,8 @@ Release 0.8.0 - unreleased
 
   BUG FIXES
 
+    TAJO-620: A join query can cause IndexOutOfBoundsException if one of tables is empty. (jaehwa)
+
     TAJO-628: The second stage of distinct aggregation can be scheduled to
     only one node. (hyunsik)
 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
index 2e12b1d..5e9c9d3 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/benchmark/TPCH.java
@@ -46,6 +46,8 @@ public class TPCH extends BenchmarkSet {
   public static final String ORDERS = "orders";
   public static final String PARTSUPP = "partsupp";
   public static final String SUPPLIER = "supplier";
+  public static final String EMPTY_ORDERS = "empty_orders";
+
 
   public static final Map<String, Long> tableVolumes = Maps.newHashMap();
 
@@ -58,6 +60,8 @@ public class TPCH extends BenchmarkSet {
     tableVolumes.put(ORDERS, 171952161L);
     tableVolumes.put(PARTSUPP, 118984616L);
     tableVolumes.put(SUPPLIER, 1409184L);
+    tableVolumes.put(EMPTY_ORDERS, 0L);
+
   }
 
   @Override
@@ -131,6 +135,8 @@ public class TPCH extends BenchmarkSet {
         .addColumn("o_shippriority", Type.INT4) // 7
         .addColumn("o_comment", Type.TEXT); // 8
     schemas.put(ORDERS, orders);
+    schemas.put(EMPTY_ORDERS, orders);
+
 
     Schema partsupp = new Schema()
         .addColumn("ps_partkey", Type.INT4) // 0
@@ -177,6 +183,8 @@ public class TPCH extends BenchmarkSet {
     loadTable(ORDERS);
     loadTable(PARTSUPP) ;
     loadTable(SUPPLIER);
+    loadTable(EMPTY_ORDERS);
+
   }
 
   private void loadTable(String tableName) throws ServiceException {

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
index 0d3f95e..4a7976f 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/querymaster/Repartitioner.java
@@ -23,6 +23,7 @@ import com.google.common.collect.Lists;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
+import org.apache.tajo.algebra.JoinType;
 import org.apache.tajo.ExecutionBlockId;
 import org.apache.tajo.catalog.*;
 import org.apache.tajo.catalog.statistics.StatisticsUtil;
@@ -100,13 +101,30 @@ public class Repartitioner {
         } catch (PlanningException e) {
           throw new IOException(e);
         }
-        fragments[i] = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(),
-            tablePath).get(0);
+
+        // if table has no data, storageManager will return empty FileFragment.
+        // So, we need to handle FileFragment by its size.
+        // If we don't check its size, it can cause IndexOutOfBoundsException.
+        List<FileFragment> fileFragments = storageManager.getSplits(scans[i].getCanonicalName(), tableDesc.getMeta(), tableDesc.getSchema(), tablePath);
+        if (fileFragments.size() > 0) {
+          fragments[i] = fileFragments.get(0);
+        } else {
+          fragments[i] = new FileFragment(scans[i].getCanonicalName(), tablePath, 0, 0, new String[]{UNKNOWN_HOST});
+        }
       }
     }
 
     LOG.info(String.format("Left Volume: %d, Right Volume: %d", stats[0], stats[1]));
 
+    // If one of inner join tables has no input data,
+    // it should return zero rows.
+    JoinNode joinNode = PlannerUtil.findMostBottomNode(execBlock.getPlan(), NodeType.JOIN);
+    if (joinNode != null) {
+      if ( (joinNode.getJoinType().equals(JoinType.INNER)) && (stats[0] == 0 || stats[1] == 0)) {
+        return;
+      }
+    }
+
     // Assigning either fragments or fetch urls to query units
     boolean leftSmall = execBlock.isBroadcastTable(scans[0].getCanonicalName());
     boolean rightSmall = execBlock.isBroadcastTable(scans[1].getCanonicalName());

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
index 3a95724..0e925f1 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/query/TestJoinQuery.java
@@ -207,4 +207,12 @@ public class TestJoinQuery extends QueryTestCaseBase {
     assertResultSet(res);
     cleanupQuery(res);
   }
+
+  @Test
+  public final void testInnerJoinWithEmptyTable() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
new file mode 100644
index 0000000..00c7884
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/queries/TestJoinQuery/testInnerJoinWithEmptyTable.sql
@@ -0,0 +1,8 @@
+select
+  c_custkey,
+  orders.o_orderkey
+from
+  customer, empty_orders
+where c_custkey = o_orderkey
+order by
+  c_custkey, o_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/db5c017d/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
new file mode 100644
index 0000000..c6036d7
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/test/resources/results/TestJoinQuery/testInnerJoinWithEmptyTable.result
@@ -0,0 +1 @@
+-------------------------------
\ No newline at end of file