You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by ji...@apache.org on 2014/08/01 07:58:56 UTC

[03/14] git commit: TAJO-914: join queries with constant values can cause schema mismatch in logical plan.

TAJO-914: join queries with constant values can cause schema mismatch in logical plan.

Closes #78


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/326be451
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/326be451
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/326be451

Branch: refs/heads/index_support
Commit: 326be451ded71099b88bdb6a50c40946df0af206
Parents: 45559ce
Author: Hyunsik Choi <hy...@apache.org>
Authored: Wed Jul 23 18:26:15 2014 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Wed Jul 23 18:26:15 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  6 ++++
 .../apache/tajo/engine/utils/SchemaUtil.java    | 20 +++++++++--
 .../tajo/engine/query/TestCaseByCases.java      | 38 ++++++++++++++++++--
 .../TestCaseByCases/testTAJO914Case1.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case2.sql        |  9 +++++
 .../TestCaseByCases/testTAJO914Case3.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case4.sql        | 10 ++++++
 .../TestCaseByCases/testTAJO914Case1.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case2.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case3.result     |  5 +++
 .../TestCaseByCases/testTAJO914Case4.result     |  5 +++
 11 files changed, 118 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 2be9b26..a67625d 100644
--- a/CHANGES
+++ b/CHANGES
@@ -97,6 +97,12 @@ Release 0.9.0 - unreleased
 
   BUG FIXES
 
+    TAJO-914: join queries with constant values can cause schema mismatch in
+    logical plan. (hyunsik)
+
+    TAJO-969: Distributed sort on a large data set may result in incorrect
+    results. (hyunsik)
+
     TAJO-972: Broadcast join with left outer join returns duplicated rows.
     (Hyoungjun Kim via jaehwa)
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java b/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
index c882607..981b572 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/utils/SchemaUtil.java
@@ -23,6 +23,16 @@ import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.TableDesc;
 
 public class SchemaUtil {
+  // See TAJO-914 bug.
+  //
+  // Its essential problem is that constant value is evaluated multiple times at each scan.
+  // As a result, join nodes can take the child nodes which have the same named fields.
+  // Because current schema does not allow the same name and ignore the duplicated schema,
+  // it finally causes the in-out schema mismatch between the parent and child nodes.
+  //
+  // tmpColumnSeq is a hack to avoid the above problem by keeping duplicated constant values as different name fields.
+  // The essential solution would be https://issues.apache.org/jira/browse/TAJO-895.
+  static int tmpColumnSeq = 0;
   public static Schema merge(Schema left, Schema right) {
     Schema merged = new Schema();
     for(Column col : left.getColumns()) {
@@ -31,11 +41,17 @@ public class SchemaUtil {
       }
     }
     for(Column col : right.getColumns()) {
-      if (!merged.containsByQualifiedName(col.getQualifiedName())) {
+      if (merged.containsByQualifiedName(col.getQualifiedName())) {
+        merged.addColumn("?fake" + (tmpColumnSeq++), col.getDataType());
+      } else {
         merged.addColumn(col);
       }
     }
-    
+
+    // if overflow
+    if (tmpColumnSeq < 0) {
+      tmpColumnSeq = 0;
+    }
     return merged;
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
index 459a4c1..846c290 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCaseByCases.java
@@ -71,7 +71,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_1() throws Exception {
+  public final void testTAJO880Case1() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case when l_returnflag != 'R' then l_orderkey else null end from lineitem"
@@ -91,7 +91,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_2() throws Exception {
+  public final void testTAJO880Case2() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case when l_returnflag != 'R' then null else l_orderkey end from lineitem"
@@ -111,7 +111,7 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
-  public final void testTAJO880_3() throws Exception {
+  public final void testTAJO880Case3() throws Exception {
     //TAJO-880: NULL in CASE clause occurs Exception.
     ResultSet res = executeString(
         "select case " +
@@ -135,6 +135,38 @@ public class TestCaseByCases extends QueryTestCaseBase {
   }
 
   @Test
+  public final void testTAJO914Case1() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case2() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case3() throws Exception {
+    executeString("CREATE TABLE T3 (l_orderkey bigint, col1 text);").close();
+    ResultSet res = executeQuery();
+    res.close();
+
+    res = executeString("select * from T3;");
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
+  public final void testTAJO914Case4() throws Exception {
+    ResultSet res = executeQuery();
+    assertResultSet(res);
+    cleanupQuery(res);
+  }
+
+  @Test
   public final void testTAJO917Case1() throws Exception {
     ResultSet res = executeQuery();
     assertResultSet(res);

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
new file mode 100644
index 0000000..765110a
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case1.sql
@@ -0,0 +1,10 @@
+select
+  l_orderkey,
+  '##' as col1
+from
+  lineitem
+  join orders on l_orderkey = o_orderkey
+group by
+  l_orderkey
+order by
+  l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
new file mode 100644
index 0000000..f404088
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case2.sql
@@ -0,0 +1,9 @@
+select
+  l_orderkey,
+  '##' as col1
+from
+  lineitem join orders on l_orderkey = o_orderkey
+group by
+  l_orderkey, col1
+order by
+  l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
new file mode 100644
index 0000000..f9ee355
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case3.sql
@@ -0,0 +1,10 @@
+insert OVERWRITE into T3
+  select
+    l_orderkey,
+    '##' as col1
+  from
+    lineitem join orders on l_orderkey = o_orderkey
+  group by
+    l_orderkey, col1
+  order by
+    l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
new file mode 100644
index 0000000..ba99b70
--- /dev/null
+++ b/tajo-core/src/test/resources/queries/TestCaseByCases/testTAJO914Case4.sql
@@ -0,0 +1,10 @@
+  select
+    l_orderkey,
+    '##' as col1,
+    sum(l_orderkey) as s1
+  from
+    lineitem join orders o1 on l_orderkey = o1.o_orderkey join orders o2 on l_orderkey = o2.o_orderkey
+  group by
+    l_orderkey, col1
+  order by
+    l_orderkey;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case1.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case2.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
new file mode 100644
index 0000000..01e467c
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case3.result
@@ -0,0 +1,5 @@
+l_orderkey,col1
+-------------------------------
+1,##
+2,##
+3,##
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/tajo/blob/326be451/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
new file mode 100644
index 0000000..d3b79d1
--- /dev/null
+++ b/tajo-core/src/test/resources/results/TestCaseByCases/testTAJO914Case4.result
@@ -0,0 +1,5 @@
+l_orderkey,col1,s1
+-------------------------------
+1,##,2
+2,##,2
+3,##,6
\ No newline at end of file