You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by bl...@apache.org on 2015/06/26 04:12:34 UTC
tajo git commit: TAJO-1644: When inserting empty data into a
partitioned table, existing data would be removed. (jaehwa)
Repository: tajo
Updated Branches:
refs/heads/master aa49dc4a8 -> f57d6c43f
TAJO-1644: When inserting empty data into a partitioned table, existing data would be removed. (jaehwa)
Closes #601
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f57d6c43
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f57d6c43
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f57d6c43
Branch: refs/heads/master
Commit: f57d6c43fd201326fef2a695b1d1e798d0f814e3
Parents: aa49dc4
Author: JaeHwa Jung <bl...@apache.org>
Authored: Fri Jun 26 11:11:09 2015 +0900
Committer: JaeHwa Jung <bl...@apache.org>
Committed: Fri Jun 26 11:11:09 2015 +0900
----------------------------------------------------------------------
CHANGES | 3 +
.../main/java/org/apache/tajo/SessionVars.java | 4 +
.../java/org/apache/tajo/conf/TajoConf.java | 7 +-
.../tajo/engine/query/TestOuterJoinQuery.java | 2 +-
.../tajo/engine/query/TestTablePartitions.java | 106 ++++++++-----------
.../TestTajoCli/testHelpSessionVars.result | 1 +
.../org/apache/tajo/storage/FileTablespace.java | 7 +-
7 files changed, 66 insertions(+), 64 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 9412179..e0cbd47 100644
--- a/CHANGES
+++ b/CHANGES
@@ -163,6 +163,9 @@ Release 0.11.0 - unreleased
BUG FIXES
+ TAJO-1644: When inserting empty data into a partitioned table,
+ existing data would be removed. (jaehwa)
+
TAJO-1642: CatalogServer need to check meta table first. (jaehwa)
TAJO-1650: TestQueryResource.testGetAllQueries() occasionally fails.
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
index 98c2f3e..28fdb0b 100644
--- a/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
+++ b/tajo-common/src/main/java/org/apache/tajo/SessionVars.java
@@ -126,6 +126,10 @@ public enum SessionVars implements ConfigKey {
NULL_CHAR(ConfVars.$TEXT_NULL, "null char of text file output", DEFAULT),
CODEGEN(ConfVars.$CODEGEN, "Runtime code generation enabled (experiment)", DEFAULT),
+ PARTITION_NO_RESULT_OVERWRITE_ENABLED(ConfVars.$PARTITION_NO_RESULT_OVERWRITE_ENABLED,
+ "If True, a partitioned table is overwritten even if a sub query leads to no result. "
+ + "Otherwise, the table data will be kept if there is no result", DEFAULT),
+
// Behavior Control ---------------------------------------------------------
ARITHABORT(ConfVars.$BEHAVIOR_ARITHMETIC_ABORT,
"If true, a running query will be terminated when an overflow or divide-by-zero occurs.", DEFAULT),
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index ba777c1..14cfb11 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -323,7 +323,6 @@ public class TajoConf extends Configuration {
$DIST_QUERY_JOIN_TASK_VOLUME("tajo.dist-query.join.task-volume-mb", 128),
$DIST_QUERY_SORT_TASK_VOLUME("tajo.dist-query.sort.task-volume-mb", 128),
$DIST_QUERY_GROUPBY_TASK_VOLUME("tajo.dist-query.groupby.task-volume-mb", 128),
-
$DIST_QUERY_JOIN_PARTITION_VOLUME("tajo.dist-query.join.partition-volume-mb", 128, Validators.min("1")),
$DIST_QUERY_GROUPBY_PARTITION_VOLUME("tajo.dist-query.groupby.partition-volume-mb", 256, Validators.min("1")),
$DIST_QUERY_TABLE_PARTITION_VOLUME("tajo.dist-query.table-partition.task-volume-mb", 256, Validators.min("1")),
@@ -376,7 +375,11 @@ public class TajoConf extends Configuration {
// Behavior Control ---------------------------------------------------------
$BEHAVIOR_ARITHMETIC_ABORT("tajo.behavior.arithmetic-abort", false),
- // ResultSet ---------------------------------------------------------
+ // If True, a partitioned table is overwritten even if a sub query leads to no result.
+ // Otherwise, the table data will be kept if there is no result
+ $PARTITION_NO_RESULT_OVERWRITE_ENABLED("tajo.partition.overwrite.even-if-no-result", false),
+
+ // ResultSet ---------------------------------------------------------
$RESULT_SET_FETCH_ROWNUM("tajo.resultset.fetch.rownum", 200),
$RESULT_SET_BLOCK_WAIT("tajo.resultset.block.wait", true),
;
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
index 9445557..9d0e0bc 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestOuterJoinQuery.java
@@ -251,7 +251,7 @@ public class TestOuterJoinQuery extends TestJoinQuery {
}
@Test
- @Option(withExplain = true, withExplainGlobal = true, parameterized = true)
+ @Option(withExplain = true, withExplainGlobal = true, parameterized = true, sort = true)
@SimpleTest(queries = {
@QuerySpec("select t1.id, t1.name, t2.id, t3.id, t4.id\n" +
"from jointable11 t1\n" +
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
index 397b9ef..ef57356 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestTablePartitions.java
@@ -56,6 +56,7 @@ import java.util.*;
import static org.apache.tajo.TajoConstants.DEFAULT_DATABASE_NAME;
import static org.apache.tajo.plan.serder.PlanProto.ShuffleType.SCATTERED_HASH_SHUFFLE;
import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
@RunWith(Parameterized.class)
public class TestTablePartitions extends QueryTestCaseBase {
@@ -437,18 +438,7 @@ public class TestTablePartitions extends QueryTestCaseBase {
Path path = new Path(desc.getUri());
FileSystem fs = FileSystem.get(conf);
- assertTrue(fs.isDirectory(path));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
+ verifyDirectoriesForThreeColumns(fs, path, 1);
if (!testingCluster.isHiveCatalogStoreRunning()) {
assertEquals(5, desc.getStats().getNumRows().intValue());
}
@@ -488,22 +478,11 @@ public class TestTablePartitions extends QueryTestCaseBase {
desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
path = new Path(desc.getUri());
- assertTrue(fs.isDirectory(path));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
+ verifyDirectoriesForThreeColumns(fs, path, 2);
if (!testingCluster.isHiveCatalogStoreRunning()) {
assertEquals(5, desc.getStats().getNumRows().intValue());
}
+
String expected = "N\n" +
"N\n" +
"N\n" +
@@ -548,54 +527,61 @@ public class TestTablePartitions extends QueryTestCaseBase {
+ " where l_orderkey = 1 and l_partkey = 1 and l_linenumber = 1");
res.close();
- desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
- assertTrue(fs.isDirectory(path));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
- assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
-
+ verifyDirectoriesForThreeColumns(fs, path, 3);
if (!testingCluster.isHiveCatalogStoreRunning()) {
// TODO: If there is existing another partition directory, we must add its rows number to result row numbers.
+ // desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
// assertEquals(6, desc.getStats().getNumRows().intValue());
}
- res = executeString("select * from " + tableName + " where col2 = 1");
- resultSetData = resultSetToString(res);
- res.close();
- expected = "col4,col1,col2,col3\n" +
- "-------------------------------\n" +
- "N,1,1,17.0\n" +
- "N,1,1,17.0\n" +
- "N,1,1,30.0\n" +
- "N,1,1,36.0\n" +
- "N,1,1,36.0\n";
-
- assertEquals(expected, resultSetData);
+ verifyKeptExistingData(res, tableName);
// insert overwrite empty result to partitioned table
res = executeString("insert overwrite into " + tableName
- + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey" +
- " > 100");
+ + " select l_returnflag, l_orderkey, l_partkey, l_quantity from lineitem where l_orderkey > 100");
res.close();
- desc = catalog.getTableDesc(DEFAULT_DATABASE_NAME, tableName);
+ verifyDirectoriesForThreeColumns(fs, path, 4);
+ verifyKeptExistingData(res, tableName);
- ContentSummary summary = fs.getContentSummary(new Path(desc.getUri()));
+ executeString("DROP TABLE " + tableName + " PURGE").close();
+ }
- assertEquals(summary.getDirectoryCount(), 1L);
- assertEquals(summary.getFileCount(), 0L);
- assertEquals(summary.getLength(), 0L);
+ private final void verifyKeptExistingData(ResultSet res, String tableName) throws Exception {
+ res = executeString("select * from " + tableName + " where col2 = 1");
+ String resultSetData = resultSetToString(res);
+ res.close();
+ String expected = "col4,col1,col2,col3\n" +
+ "-------------------------------\n" +
+ "N,1,1,17.0\n" +
+ "N,1,1,17.0\n" +
+ "N,1,1,30.0\n" +
+ "N,1,1,36.0\n" +
+ "N,1,1,36.0\n";
- executeString("DROP TABLE " + tableName + " PURGE").close();
+ assertEquals(expected, resultSetData);
+ }
+
+ private final void verifyDirectoriesForThreeColumns(FileSystem fs, Path path, int step) throws Exception {
+ assertTrue(fs.isDirectory(path));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1")));
+
+ if (step == 1 || step == 2) {
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+ } else {
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=17.0")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=1/col2=1/col3=30.0")));
+ }
+
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=2/col2=2/col3=38.0")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=2/col3=45.0")));
+ assertTrue(fs.isDirectory(new Path(path.toUri() + "/col1=3/col2=3/col3=49.0")));
}
@Test
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
index 137b0de..5c2ffe3 100644
--- a/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
+++ b/tajo-core/src/test/resources/results/TestTajoCli/testHelpSessionVars.result
@@ -35,6 +35,7 @@ Available Session Variables:
\set MAX_OUTPUT_FILE_SIZE [int value] - Maximum per-output file size (mb). 0 means infinite.
\set NULL_CHAR [text value] - null char of text file output
\set CODEGEN [true or false] - Runtime code generation enabled (experiment)
+\set PARTITION_NO_RESULT_OVERWRITE_ENABLED [true or false] - If True, a partitioned table is overwritten even if a sub query leads to no result. Otherwise, the table data will be kept if there is no result
\set ARITHABORT [true or false] - If true, a running query will be terminated when an overflow or divide-by-zero occurs.
\set FETCH_ROWNUM [int value] - Sets the number of rows at a time from Master
\set BLOCK_ON_RESULT [true or false] - Whether to block result set on query execution
http://git-wip-us.apache.org/repos/asf/tajo/blob/f57d6c43/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
index 3b63012..e8a6c12 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/FileTablespace.java
@@ -981,7 +981,12 @@ public class FileTablespace extends Tablespace {
Path oldTableDir = new Path(stagingDir, TajoConstants.INSERT_OVERWIRTE_OLD_TABLE_NAME);
ContentSummary summary = fs.getContentSummary(stagingResultDir);
- if (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty() && summary.getFileCount() > 0L) {
+ // When inserting empty data into a partitioned table, check if keep existing data need to be remove or not.
+ boolean overwriteEnabled = queryContext.getBool(SessionVars.PARTITION_NO_RESULT_OVERWRITE_ENABLED);
+
+ // If existing data doesn't need to keep, check if there are some files.
+ if ( (!queryContext.get(QueryVars.OUTPUT_PARTITIONS, "").isEmpty())
+ && (!overwriteEnabled || (overwriteEnabled && summary.getFileCount() > 0L))) {
// This is a map for existing non-leaf directory to rename. A key is current directory and a value is
// renaming directory.
Map<Path, Path> renameDirs = TUtil.newHashMap();