You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by bl...@apache.org on 2015/06/26 04:12:34 UTC

tajo git commit: TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa)

Repository: tajo
Updated Branches:
  refs/heads/master aa49dc4a8 -> f57d6c43f


TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa)

Closes #601


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f57d6c43
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f57d6c43
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f57d6c43

Branch: refs/heads/master
Commit: f57d6c43fd201326fef2a695b1d1e798d0f814e3
Parents: aa49dc4
Author: JaeHwa Jung <bl...@apache.org>
Authored: Fri Jun 26 11:11:09 2015 +0900
Committer: JaeHwa Jung <bl...@apache.org>
Committed: Fri Jun 26 11:11:09 2015 +0900

----------------------------------------------------------------------
 CHANGES                                         |   3 +
 .../main/java/org/apache/tajo/SessionVars.java  |   4 +
 .../java/org/apache/tajo/conf/TajoConf.java     |   7 +-
 .../tajo/engine/query/TestOuterJoinQuery.java   |   2 +-
 .../tajo/engine/query/TestTablePartitions.java  | 106 ++++++++-----------
 .../TestTajoCli/testHelpSessionVars.result      |   1 +
 .../org/apache/tajo/storage/FileTablespace.java |   7 +-
 7 files changed, 66 insertions(+), 64 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 9412179..e0cbd47 100644
--- a/CHANGES
+++ b/CHANGES
@@ -163,6 +163,9 @@ Release 0.11.0 - unreleased
 
   BUG FIXES
 
+    TAJO-1644: When inserting empty data into a partitioned table,
+    existing data would be removed. (jaehwa)
+
     TAJO-1642: CatalogServer need to check meta table first. (jaehwa)
 
     TAJO-1650: TestQueryResource.testGetAllQueries() occasionally fails.

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
index 98c2f3e..28fdb0b 100644
--- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
+++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
@@ -126,6 +126,10 @@ public enum SessionVars implements ConfigKey {
   NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT),
   CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT),
 
+  PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED,
+    "If True, a partitioned table is overwritten even if a sub query leads to no result. "
+    + "Otherwise, the table data will be kept if there is no result", DEFAULT),
+
   // Behavior Control ---------------------------------------------------------
   ARITHABORT(ConfVars.$BEHAVIOR_ARITHMETIC_ABORT,
       "If true, a running query will be terminated when an overflow or divide-by-zero occurs.", DEFAULT),

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index ba777c1..14cfb11 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -323,7 +323,6 @@ public class TajoConf extends Configuration {
     $DIST_QUERY_JOIN_TASK_VOLUME("tajo.dist-query.join.task-volume-mb", 128),
     $DIST_QUERY_SORT_TASK_VOLUME("tajo.dist-query.sort.task-volume-mb", 128),
     $DIST_QUERY_GROUPBY_TASK_VOLUME("tajo.dist-query.groupby.task-volume-mb", 128),
-
     $DIST_QUERY_JOIN_PARTITION_VOLUME("tajo.dist-query.join.partition-volume-mb", 128, Validators.min("1")),
     $DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256, Validators.min("1")),
     $DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256, Validators.min("1")),
@@ -376,7 +375,11 @@ public class TajoConf extends Configuration {
     // Behavior Control ---------------------------------------------------------
     $BEHAVIOR_ARITHMETIC_ABORT("tajo.behavior.arithmetic-abort", false),
 
-    // ResultSet ---------------------------------------------------------
+    // If True, a partitioned table is overwritten even if a sub query leads to no result.
+    // Otherwise, the table data will be kept if there is no result
+    $PARTITION_NO_RESULT_OVERWRITE_ENABLED("tajo.partition.overwrite.even-if-no-result", false),
+
+      // ResultSet ---------------------------------------------------------
     $RESULT_SET_FETCH_ROWNUM("tajo.resultset.fetch.rownum", 200),
     $RESULT_SET_BLOCK_WAIT("tajo.resultset.block.wait", true),
     ;

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
index 9445557..9d0e0bc 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
@@ -251,7 +251,7 @@ public class TestOuterJoinQuery extends TestJoinQuery {
   }
 
   @Test
-  @Option(withExplain = true, withExplainGlobal = true, parameterized = true)
+  @Option(withExplain = true, withExplainGlobal = true, parameterized = true, sort = true)
   @SimpleTest(queries = {
       @QuerySpec("select t1.id, t1.name, t2.id, t3.id, t4.id\n" +
           "from jointable11 t1\n" +

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
index 397b9ef..ef57356 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
@@ -56,6 +56,7 @@ import java.util.*;
 import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME;
 import static org.apache.tajo.plan.serder.PlanProto.ShuffleType.SCATTERED_HASH_SHUFFLE;
 import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
 
 @RunWith(Parameterized.class)
 public class TestTablePartitions extends QueryTestCaseBase {
@@ -437,18 +438,7 @@ public class TestTablePartitions extends QueryTestCaseBase {
     Path path = new Path(desc.getUri());
 
     FileSystem fs = FileSystem.get(conf);
-    assertTrue(fs.isDirectory(path));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
+    verifyDirectoriesForThreeColumns(fs, path, 1);
     if (!testingCluster.isHiveCatalogStoreRunning()) {
       assertEquals(5, desc.getStats().getNumRows().intValue());
     }
@@ -488,22 +478,11 @@ public class TestTablePartitions extends QueryTestCaseBase {
     desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
     path = new Path(desc.getUri());
 
-    assertTrue(fs.isDirectory(path));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
+    verifyDirectoriesForThreeColumns(fs, path, 2);
     if (!testingCluster.isHiveCatalogStoreRunning()) {
       assertEquals(5, desc.getStats().getNumRows().intValue());
     }
+
     String expected = "N\n" +
         "N\n" +
         "N\n" +
@@ -548,54 +527,61 @@ public class TestTablePartitions extends QueryTestCaseBase {
         + " where l_orderkey = 1 and l_partkey = 1 and  l_linenumber = 1");
     res.close();
 
-    desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
-    assertTrue(fs.isDirectory(path));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
-    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
+    verifyDirectoriesForThreeColumns(fs, path, 3);
     if (!testingCluster.isHiveCatalogStoreRunning()) {
       // TODO: If there is existing another partition directory, we must add its rows number to result row numbers.
+      // desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
       // assertEquals(6, desc.getStats().getNumRows().intValue());
     }
 
-    res = executeString("select * from " + tableName + " where col2 = 1");
-    resultSetData = resultSetToString(res);
-    res.close();
-    expected = "col4,col1,col2,col3\n" +
-        "-------------------------------\n" +
-        "N,1,1,17.0\n" +
-        "N,1,1,17.0\n" +
-        "N,1,1,30.0\n" +
-        "N,1,1,36.0\n" +
-        "N,1,1,36.0\n";
-
-    assertEquals(expected, resultSetData);
+    verifyKeptExistingData(res, tableName);
 
     // insert overwrite empty result to partitioned table
     res = executeString("insert overwrite into " + tableName
-      + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey" +
-      " > 100");
+      + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey > 100");
     res.close();
 
-    desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
+    verifyDirectoriesForThreeColumns(fs, path, 4);
+    verifyKeptExistingData(res, tableName);
 
-    ContentSummary summary = fs.getContentSummary(new Path(desc.getUri()));
+    executeString("DROP TABLE " + tableName + " PURGE").close();
+  }
 
-    assertEquals(summary.getDirectoryCount(), 1L);
-    assertEquals(summary.getFileCount(), 0L);
-    assertEquals(summary.getLength(), 0L);
+  private final void verifyKeptExistingData(ResultSet res, String tableName) throws Exception {
+    res = executeString("select * from " + tableName + " where col2 = 1");
+    String resultSetData = resultSetToString(res);
+    res.close();
+    String expected = "col4,col1,col2,col3\n" +
+      "-------------------------------\n" +
+      "N,1,1,17.0\n" +
+      "N,1,1,17.0\n" +
+      "N,1,1,30.0\n" +
+      "N,1,1,36.0\n" +
+      "N,1,1,36.0\n";
 
-    executeString("DROP TABLE " + tableName + " PURGE").close();
+    assertEquals(expected, resultSetData);
+  }
+
+  private final void verifyDirectoriesForThreeColumns(FileSystem fs, Path path, int step) throws Exception {
+    assertTrue(fs.isDirectory(path));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
+
+    if (step == 1 || step == 2) {
+      assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+    } else {
+      assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+      assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
+    }
+
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
+    assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
index 137b0de..5c2ffe3 100644
--- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
+++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
@@ -35,6 +35,7 @@ Available Session Variables:
 \set MAX_OUTPUT_FILE_SIZE [int value] - Maximum per-output file size (mb). 0 means infinite.
 \set NULL_CHAR [text value] - null char of text file output
 \set CODEGEN [true or false] - Runtime code generation enabled (experiment)
+\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If True, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result
 \set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow or divide-by-zero occurs.
 \set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master
 \set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution

http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
index 3b63012..e8a6c12 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
@@ -981,7 +981,12 @@ public class FileTablespace extends Tablespace {
           Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
           ContentSummary summary = fs.getContentSummary(stagingResultDir);
 
-          if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty() && summary.getFileCount() > 0L) {
+          // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not.
+          boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED);
+
+          // If existing data doesn't need to keep, check if there are some files.
+          if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty())
+            && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) {
             // This is a map for existing non-leaf directory to rename. A key is current directory and a value is
             // renaming directory.
             Map<Path, Path> renameDirs = TUtil.newHashMap();