You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2020/03/17 08:49:23 UTC
[hive] branch master updated: HIVE-23023: MR compaction ignores
column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)
This is an automated email from the ASF dual-hosted git repository.
lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 4daa57c HIVE-23023: MR compaction ignores column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)
4daa57c is described below
commit 4daa57c95e0027253977f56b7332bc7ed12cb8a4
Author: Karen Coppage <ka...@cloudera.com>
AuthorDate: Tue Mar 17 09:48:28 2020 +0100
HIVE-23023: MR compaction ignores column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)
---
.../hadoop/hive/ql/txn/compactor/CompactorMR.java | 3 +-
.../apache/hadoop/hive/ql/TestTxnCommands2.java | 51 ++++++++++++++++++++++
2 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index c44f2b50..543ec0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -149,7 +149,8 @@ public class CompactorMR {
job.setQueueName(queueName);
}
- setColumnTypes(job, sd.getCols());
+ // have to use table columns since partition SD isn't updated if these are altered
+ setColumnTypes(job, t.getSd().getCols());
//with feature on, multiple tasks may get into conflict creating/using TMP_LOCATION and if we were
//to generate the target dir in the Map task, there is no easy way to pass it to OutputCommitter
//to do the final move
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 79dfb02..f3834cc 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -1978,6 +1978,57 @@ public class TestTxnCommands2 {
}
/**
+ * Create a table with schema evolution, and verify that no data is lost during (MR major)
+ * compaction.
+ *
+ * @throws Exception if a query fails
+ */
+ @Test
+ public void testSchemaEvolutionCompaction() throws Exception {
+ String tblName = "schemaevolutioncompaction";
+ runStatementOnDriver("drop table if exists " + tblName);
+ runStatementOnDriver("CREATE TABLE " + tblName + "(a INT) " +
+ " PARTITIONED BY(part string)" +
+ " STORED AS ORC TBLPROPERTIES ('transactional'='true')");
+
+ // First INSERT round.
+ runStatementOnDriver("insert into " + tblName + " partition (part='aa') values (1)");
+
+ // ALTER TABLE ... ADD COLUMNS
+ runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(b int)");
+
+ // Second INSERT round.
+ runStatementOnDriver("insert into " + tblName + " partition (part='aa') values (2, 2000)");
+
+ // Validate data
+ List<String> res = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a");
+ Assert.assertEquals(2, res.size());
+ Assert.assertEquals("1\tNULL\taa", res.get(0));
+ Assert.assertEquals("2\t2000\taa", res.get(1));
+
+ // Compact
+ TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+ CompactionRequest compactionRequest =
+ new CompactionRequest("default", tblName, CompactionType.MAJOR);
+ compactionRequest.setPartitionname("part=aa");
+ txnHandler.compact(compactionRequest);
+ runWorker(hiveConf);
+ runCleaner(hiveConf);
+
+ // Verify successful compaction
+ List<ShowCompactResponseElement> compacts =
+ txnHandler.showCompact(new ShowCompactRequest()).getCompacts();
+ Assert.assertEquals(1, compacts.size());
+ Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
+
+ // Validate data after compaction
+ res = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a");
+ Assert.assertEquals(2, res.size());
+ Assert.assertEquals("1\tNULL\taa", res.get(0));
+ Assert.assertEquals("2\t2000\taa", res.get(1));
+ }
+
+ /**
* Test cleaner for TXN_TO_WRITE_ID table.
* @throws Exception
*/