You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2020/03/17 08:49:23 UTC

[hive] branch master updated: HIVE-23023: MR compaction ignores column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)

This is an automated email from the ASF dual-hosted git repository.

lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 4daa57c  HIVE-23023: MR compaction ignores column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)
4daa57c is described below

commit 4daa57c95e0027253977f56b7332bc7ed12cb8a4
Author: Karen Coppage <ka...@cloudera.com>
AuthorDate: Tue Mar 17 09:48:28 2020 +0100

    HIVE-23023: MR compaction ignores column schema evolution (Kare Coppage, reviewed by Laszlo Pinter)
---
 .../hadoop/hive/ql/txn/compactor/CompactorMR.java  |  3 +-
 .../apache/hadoop/hive/ql/TestTxnCommands2.java    | 51 ++++++++++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index c44f2b50..543ec0b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -149,7 +149,8 @@ public class CompactorMR {
       job.setQueueName(queueName);
     }
 
-    setColumnTypes(job, sd.getCols());
+    // have to use table columns since partition SD isn't updated if these are altered
+    setColumnTypes(job, t.getSd().getCols());
     //with feature on, multiple tasks may get into conflict creating/using TMP_LOCATION and if we were
     //to generate the target dir in the Map task, there is no easy way to pass it to OutputCommitter
     //to do the final move
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
index 79dfb02..f3834cc 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands2.java
@@ -1978,6 +1978,57 @@ public class TestTxnCommands2 {
   }
 
   /**
+   * Create a table with schema evolution, and verify that no data is lost during (MR major)
+   * compaction.
+   *
+   * @throws Exception if a query fails
+   */
+  @Test
+  public void testSchemaEvolutionCompaction() throws Exception {
+    String tblName = "schemaevolutioncompaction";
+    runStatementOnDriver("drop table if exists " + tblName);
+    runStatementOnDriver("CREATE TABLE " + tblName + "(a INT) " +
+        " PARTITIONED BY(part string)" +
+        " STORED AS ORC TBLPROPERTIES ('transactional'='true')");
+
+    // First INSERT round.
+    runStatementOnDriver("insert into " + tblName + " partition (part='aa') values (1)");
+
+    // ALTER TABLE ... ADD COLUMNS
+    runStatementOnDriver("ALTER TABLE " + tblName + " ADD COLUMNS(b int)");
+
+    // Second INSERT round.
+    runStatementOnDriver("insert into " + tblName + " partition (part='aa') values (2, 2000)");
+
+    // Validate data
+    List<String> res = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a");
+    Assert.assertEquals(2, res.size());
+    Assert.assertEquals("1\tNULL\taa", res.get(0));
+    Assert.assertEquals("2\t2000\taa", res.get(1));
+
+    // Compact
+    TxnStore txnHandler = TxnUtils.getTxnStore(hiveConf);
+    CompactionRequest compactionRequest =
+        new CompactionRequest("default", tblName, CompactionType.MAJOR);
+    compactionRequest.setPartitionname("part=aa");
+    txnHandler.compact(compactionRequest);
+    runWorker(hiveConf);
+    runCleaner(hiveConf);
+
+    // Verify successful compaction
+    List<ShowCompactResponseElement> compacts =
+        txnHandler.showCompact(new ShowCompactRequest()).getCompacts();
+    Assert.assertEquals(1, compacts.size());
+    Assert.assertEquals(TxnStore.SUCCEEDED_RESPONSE, compacts.get(0).getState());
+
+    // Validate data after compaction
+    res = runStatementOnDriver("SELECT * FROM " + tblName + " ORDER BY a");
+    Assert.assertEquals(2, res.size());
+    Assert.assertEquals("1\tNULL\taa", res.get(0));
+    Assert.assertEquals("2\t2000\taa", res.get(1));
+  }
+
+  /**
    * Test cleaner for TXN_TO_WRITE_ID table.
    * @throws Exception
    */