You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2016/02/17 06:20:17 UTC
tajo git commit: TAJO-2077: Join condition causes incorrect result,
when a table has an empty row file.
Repository: tajo
Updated Branches:
refs/heads/master 294104d28 -> 81ab265fd
TAJO-2077: Join condition causes incorrect result, when a table has an empty row file.
Closes #963
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/81ab265f
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/81ab265f
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/81ab265f
Branch: refs/heads/master
Commit: 81ab265fdb77a2eedc0c9b4dccc2826b461daf56
Parents: 294104d
Author: Jinho Kim <jh...@apache.org>
Authored: Wed Feb 17 14:19:23 2016 +0900
Committer: Jinho Kim <jh...@apache.org>
Committed: Wed Feb 17 14:19:23 2016 +0900
----------------------------------------------------------------------
CHANGES | 3 ++
.../tajo/engine/query/TestInnerJoinQuery.java | 14 ++++++++++
.../TestJoinQuery/customer_parquet/customer | Bin 0 -> 506 bytes
.../TestJoinQuery/customer_parquet/customer1 | Bin 0 -> 506 bytes
.../TestJoinQuery/customer_parquet/customer2 | Bin 0 -> 506 bytes
.../TestJoinQuery/customer_parquet/customer3 | Bin 0 -> 506 bytes
.../customer_parquet/customer_5rows | Bin 0 -> 2311 bytes
.../dataset/TestJoinQuery/nation_parquet/nation | Bin 0 -> 314 bytes
.../TestJoinQuery/nation_parquet/nation1 | Bin 0 -> 314 bytes
.../TestJoinQuery/nation_parquet/nation2 | Bin 0 -> 314 bytes
.../TestJoinQuery/nation_parquet/nation3 | Bin 0 -> 314 bytes
.../TestJoinQuery/nation_parquet/nation_24rows | Bin 0 -> 3266 bytes
.../testBroadcastJoinWithEmptyRows.sql | 13 +++++++++
.../create_customer_parquet_ddl.sql | 4 +++
.../TestJoinQuery/create_nation_parquet_ddl.sql | 6 ++++
.../testBroadcastJoinWithEmptyRows.result | 7 +++++
.../engine/planner/physical/MergeJoinExec.java | 4 +++
.../org/apache/tajo/storage/MergeScanner.java | 28 +++++++++----------
18 files changed, 65 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index ff4ca46..b474205 100644
--- a/CHANGES
+++ b/CHANGES
@@ -106,6 +106,9 @@ Release 0.12.0 - unreleased
BUG FIXES
+ TAJO-2077: Join condition causes incorrect result, when a table has an empty
+ row file. (jinho)
+
TAJO-2072: The constructor of RegionSizeCalculator changes for HBase 1.0.0
compatibility. (Byunghwa Yun via jinho)
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
index 42d8b48..2b92d80 100644
--- a/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
+++ b/tajo-core-tests/src/test/java/org/apache/tajo/engine/query/TestInnerJoinQuery.java
@@ -266,4 +266,18 @@ public class TestInnerJoinQuery extends TestJoinQuery {
public final void testBroadcastTwoPartJoin() throws Exception {
runSimpleTests();
}
+
+ @Test
+ @Option(withExplain = false, withExplainGlobal = false, parameterized = true)
+ @SimpleTest()
+ public void testBroadcastJoinWithEmptyRows() throws Exception {
+ executeDDL("create_customer_parquet_ddl.sql", "customer_parquet");
+ executeDDL("create_nation_parquet_ddl.sql", "nation_parquet");
+ try {
+ runSimpleTests();
+ } finally {
+ executeString("DROP TABLE customer_parquet");
+ executeString("DROP TABLE nation_parquet");
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer1 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer2 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3
new file mode 100644
index 0000000..2ed9752
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer3 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows
new file mode 100644
index 0000000..3f6fac9
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/customer_parquet/customer_5rows differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation1 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation2 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3
new file mode 100644
index 0000000..401ecf0
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation3 differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows
new file mode 100644
index 0000000..d6a27aa
Binary files /dev/null and b/tajo-core-tests/src/test/resources/dataset/TestJoinQuery/nation_parquet/nation_24rows differ
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
new file mode 100644
index 0000000..318758f
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.sql
@@ -0,0 +1,13 @@
+select
+ c_custkey,
+ c_name,
+ n_name
+from
+ customer_parquet,
+ nation_parquet
+where
+ c_nationkey = n_nationkey
+order by
+ c_custkey,
+ c_name,
+ n_name
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
new file mode 100644
index 0000000..1c43a40
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_customer_parquet_ddl.sql
@@ -0,0 +1,4 @@
+create external table if not exists customer_parquet (
+ c_custkey INT4, c_name TEXT, c_address TEXT, c_nationkey INT4,
+ c_phone TEXT, c_acctbal FLOAT8, c_mktsegment TEXT, c_comment TEXT)
+using parquet location ${table.path};
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
new file mode 100644
index 0000000..65bd56f
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/queries/TestJoinQuery/create_nation_parquet_ddl.sql
@@ -0,0 +1,6 @@
+create external table if not exists nation_parquet (
+ n_nationkey int,
+ n_name text,
+ n_regionkey int,
+ n_comment text)
+ using parquet location ${table.path};
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
----------------------------------------------------------------------
diff --git a/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
new file mode 100644
index 0000000..7932c52
--- /dev/null
+++ b/tajo-core-tests/src/test/resources/results/TestInnerJoinQuery/testBroadcastJoinWithEmptyRows.result
@@ -0,0 +1,7 @@
+c_custkey,c_name,n_name
+-------------------------------
+1,Customer#000000001,MOROCCO
+2,Customer#000000002,JORDAN
+3,Customer#000000003,ARGENTINA
+4,Customer#000000004,EGYPT
+5,Customer#000000005,CANADA
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
index ab831b5..80c10f6 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/MergeJoinExec.java
@@ -92,6 +92,10 @@ public class MergeJoinExec extends CommonJoinExec {
outerTupleSlots.clear();
innerTupleSlots.clear();
+ if (innerTuple == null || outerTuple == null) {
+ return null;
+ }
+
int cmp;
while ((cmp = joincomparator.compare(outerTuple, innerTuple)) != 0) {
if (cmp > 0) {
http://git-wip-us.apache.org/repos/asf/tajo/blob/81ab265f/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
index 27ff589..04d7fed 100644
--- a/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
+++ b/tajo-storage/tajo-storage-common/src/main/java/org/apache/tajo/storage/MergeScanner.java
@@ -37,14 +37,12 @@ import java.util.Iterator;
import java.util.List;
public class MergeScanner implements Scanner {
- private Configuration conf;
private TableMeta meta;
private Schema schema;
private List<Fragment> fragments;
private Iterator<Fragment> iterator;
private Fragment currentFragment;
private Scanner currentScanner;
- private Tuple tuple;
private boolean projectable = false;
private boolean selectable = false;
private Schema target;
@@ -59,7 +57,6 @@ public class MergeScanner implements Scanner {
public MergeScanner(Configuration conf, Schema schema, TableMeta meta, List<Fragment> rawFragmentList,
Schema target)
throws IOException {
- this.conf = conf;
this.schema = schema;
this.meta = meta;
this.target = target;
@@ -101,12 +98,15 @@ public class MergeScanner implements Scanner {
@Override
public Tuple next() throws IOException {
- if (currentScanner != null)
+ Tuple tuple;
+ while (currentScanner != null) {
tuple = currentScanner.next();
- if (tuple != null) {
- return tuple;
- } else {
+ if (tuple != null) {
+ return tuple;
+ }
+
+ // since read tuple is null, close the current scanner.
if (currentScanner != null) {
currentScanner.close();
TableStats scannerTableStsts = currentScanner.getInputStats();
@@ -114,13 +114,13 @@ public class MergeScanner implements Scanner {
tableStats.setReadBytes(tableStats.getReadBytes() + scannerTableStsts.getReadBytes());
tableStats.setNumRows(tableStats.getNumRows() + scannerTableStsts.getNumRows());
}
+ currentScanner = null;
}
+
currentScanner = getNextScanner();
- if (currentScanner != null) {
- tuple = currentScanner.next();
- }
}
- return tuple;
+
+ return null;
}
@Override
@@ -201,10 +201,10 @@ public class MergeScanner implements Scanner {
currentScannerReadBytes = scannerTableStsts.getReadBytes();
}
- return (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes();
- } else {
- return progress;
+ progress = (float)(tableStats.getReadBytes() + currentScannerReadBytes) / (float)tableStats.getNumBytes();
}
+
+ return progress;
}
@Override