You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2016/02/17 06:20:17 UTC

tajo git commit: TAJO-2077: Join condition causes incorrect result, when a table has an empty row file.

Repository: tajo
Updated Branches:
  refs/heads/master 294104d28 -> 81ab265fd


TAJO-2077: Join condition causes incorrect result, when a table has an empty row file.

Closes #963


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/81ab265f
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/81ab265f
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/81ab265f

Branch: refs/heads/master
Commit: 81ab265fdb77a2eedc0c9b4dccc2826b461daf56
Parents: 294104d
Author: Jinho Kim <jh...@apache.org>
Authored: Wed Feb 17 14:19:23 2016 +0900
Committer: Jinho Kim <jh...@apache.org>
Committed: Wed Feb 17 14:19:23 2016 +0900

----------------------------------------------------------------------
 CHANGES                                         |   3 ++
 .../tajo/engine/query/TestInnerJoinQuery.java   |  14 ++++++++++
 .../TestJoinQuery/customer_parquet/customer     | Bin 0 -> 506 bytes
 .../TestJoinQuery/customer_parquet/customer1    | Bin 0 -> 506 bytes
 .../TestJoinQuery/customer_parquet/customer2    | Bin 0 -> 506 bytes
 .../TestJoinQuery/customer_parquet/customer3    | Bin 0 -> 506 bytes
 .../customer_parquet/customer_5rows             | Bin 0 -> 2311 bytes
 .../dataset/TestJoinQuery/nation_parquet/nation | Bin 0 -> 314 bytes
 .../TestJoinQuery/nation_parquet/nation1        | Bin 0 -> 314 bytes
 .../TestJoinQuery/nation_parquet/nation2        | Bin 0 -> 314 bytes
 .../TestJoinQuery/nation_parquet/nation3        | Bin 0 -> 314 bytes
 .../TestJoinQuery/nation_parquet/nation_24rows  | Bin 0 -> 3266 bytes
 .../testBroadcastJoinWithEmptyRows.sql          |  13 +++++++++
 .../create_customer_parquet_ddl.sql             |   4 +++
 .../TestJoinQuery/create_nation_parquet_ddl.sql |   6 ++++
 .../testBroadcastJoinWithEmptyRows.result       |   7 +++++
 .../engine/planner/physical/MergeJoinExec.java  |   4 +++
 .../org/apache/tajo/storage/MergeScanner.java   |  28 +++++++++----------
 18 files changed, 65 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index ff4ca46..b474205 100644
--- a/CHANGES
+++ b/CHANGES
@@ -106,6 +106,9 @@ Release 0.12.0 - unreleased
 
   BUG FIXES
 
+    TAJO-2077: Join condition causes incorrect result, when a table has an empty 
+    row file. (jinho)
+
     TAJO-2072: The constructor of RegionSizeCalculator changes for HBase 1.0.0 
     compatibility. (Byunghwa Yun via jinho)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
index 42d8b48..2b92d80 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
@@ -266,4 +266,18 @@ public class TestInnerJoinQuery extends TestJoinQuery {
   public final void testBroadcastTwoPartJoin() throws Exception {
     runSimpleTests();
   }
+
+  @Test
+  @Option(withExplain = false, withExplainGlobal = false, parameterized = true)
+  @SimpleTest()
+  public void testBroadcastJoinWithEmptyRows() throws Exception {
+    executeDDL("create_customer_parquet_ddl.sql", "customer_parquet");
+    executeDDL("create_nation_parquet_ddl.sql", "nation_parquet");
+    try {
+      runSimpleTests();
+    } finally {
+      executeString("DROP TABLE customer_parquet");
+      executeString("DROP TABLE nation_parquet");
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows
new file mode 100644
index 0000000..3f6fac9
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows
new file mode 100644
index 0000000..d6a27aa
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows differ

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
new file mode 100644
index 0000000..318758f
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
@@ -0,0 +1,13 @@
+select
+    c_custkey,
+    c_name,
+    n_name
+from
+    customer_parquet,
+    nation_parquet
+where
+    c_nationkey = n_nationkey
+order by
+    c_custkey,
+    c_name,
+    n_name
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
new file mode 100644
index 0000000..1c43a40
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
@@ -0,0 +1,4 @@
+create external table if not exists customer_parquet (
+    c_custkey INT4, c_name TEXT, c_address TEXT, c_nationkey INT4,
+    c_phone TEXT, c_acctbal FLOAT8, c_mktsegment TEXT, c_comment TEXT)
+using parquet location ${table.path};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
new file mode 100644
index 0000000..65bd56f
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
@@ -0,0 +1,6 @@
+create external table if not exists nation_parquet (
+    n_nationkey int,
+    n_name text,
+    n_regionkey int,
+    n_comment text)
+    using parquet location ${table.path};
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
new file mode 100644
index 0000000..7932c52
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
@@ -0,0 +1,7 @@
+c_custkey,c_name,n_name
+-------------------------------
+1,Customer#000000001,MOROCCO
+2,Customer#000000002,JORDAN
+3,Customer#000000003,ARGENTINA
+4,Customer#000000004,EGYPT
+5,Customer#000000005,CANADA

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
index ab831b5..80c10f6 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
@@ -92,6 +92,10 @@ public class MergeJoinExec extends CommonJoinExec {
         outerTupleSlots.clear();
         innerTupleSlots.clear();
 
+        if (innerTuple == null || outerTuple == null) {
+          return null;
+        }
+
         int cmp;
         while ((cmp = joincomparator.compare(outerTuple, innerTuple)) != 0) {
           if (cmp > 0) {

http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
index 27ff589..04d7fed 100644
--- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
+++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
@@ -37,14 +37,12 @@ import java.util.Iterator;
 import java.util.List;
 
 public class MergeScanner implements Scanner {
-  private Configuration conf;
   private TableMeta meta;
   private Schema schema;
   private List<Fragment> fragments;
   private Iterator<Fragment> iterator;
   private Fragment currentFragment;
   private Scanner currentScanner;
-  private Tuple tuple;
   private boolean projectable = false;
   private boolean selectable = false;
   private Schema target;
@@ -59,7 +57,6 @@ public class MergeScanner implements Scanner {
   public MergeScanner(Configuration conf, Schema schema, TableMeta meta, List<Fragment> rawFragmentList,
                       Schema target)
       throws IOException {
-    this.conf = conf;
     this.schema = schema;
     this.meta = meta;
     this.target = target;
@@ -101,12 +98,15 @@ public class MergeScanner implements Scanner {
 
   @Override
   public Tuple next() throws IOException {
-    if (currentScanner != null)
+    Tuple tuple;
+    while (currentScanner != null) {
       tuple = currentScanner.next();
 
-    if (tuple != null) {
-      return tuple;
-    } else {
+      if (tuple != null) {
+        return tuple;
+      }
+
+      // since read tuple is null, close the current scanner.
       if (currentScanner != null) {
         currentScanner.close();
         TableStats scannerTableStsts = currentScanner.getInputStats();
@@ -114,13 +114,13 @@ public class MergeScanner implements Scanner {
           tableStats.setReadBytes(tableStats.getReadBytes() + scannerTableStsts.getReadBytes());
           tableStats.setNumRows(tableStats.getNumRows() + scannerTableStsts.getNumRows());
         }
+        currentScanner = null;
       }
+
       currentScanner = getNextScanner();
-      if (currentScanner != null) {
-        tuple = currentScanner.next();
-      }
     }
-    return tuple;
+
+    return null;
   }
 
   @Override
@@ -201,10 +201,10 @@ public class MergeScanner implements Scanner {
         currentScannerReadBytes = scannerTableStsts.getReadBytes();
       }
 
-      return (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes();
-    } else {
-      return progress;
+      progress = (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes();
     }
+
+    return progress;
   }
 
   @Override