You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kl...@apache.org on 2020/08/13 15:57:14 UTC

[hive] branch master updated: HIVE-24015: Disable query-based compaction on MR execution engine (Karen Coppage, reviewed by Laszlo Pinter)

This is an automated email from the ASF dual-hosted git repository.

klcopp pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 29b39c6  HIVE-24015: Disable query-based compaction on MR execution engine (Karen Coppage, reviewed by Laszlo Pinter)
29b39c6 is described below

commit 29b39c651fd7690d60048071bf6c8e704d1d840c
Author: Karen Coppage <ka...@gmail.com>
AuthorDate: Thu Aug 13 17:56:57 2020 +0200

    HIVE-24015: Disable query-based compaction on MR execution engine (Karen Coppage, reviewed by Laszlo Pinter)
    
    Closes #1375.
---
 .../hive/ql/txn/compactor/CompactorOnTezTest.java  |  1 -
 .../ql/txn/compactor/TestCrudCompactorOnMr.java    | 30 ---------
 .../ql/txn/compactor/TestCrudCompactorOnTez.java   | 72 ++++------------------
 .../ql/txn/compactor/QueryCompactorFactory.java    | 15 +++--
 4 files changed, 21 insertions(+), 97 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java
index 08b9039..95d437e 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorOnTezTest.java
@@ -54,7 +54,6 @@ public class CompactorOnTezTest {
   protected HiveConf conf;
   protected IMetaStoreClient msClient;
   protected IDriver driver;
-  protected boolean runsOnTez = true;
   protected boolean mmCompaction = false;
 
   @Before
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnMr.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnMr.java
deleted file mode 100644
index c60ff1d..0000000
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnMr.java
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.hadoop.hive.ql.txn.compactor;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.junit.Before;
-
-@SuppressWarnings("deprecation")
-public class TestCrudCompactorOnMr extends TestCrudCompactorOnTez {
-  @Before
-  public void setMr() {
-    driver.getConf().setVar(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE, "mr");
-    runsOnTez = false;
-  }
-}
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
index 04d7663..cebe830 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCrudCompactorOnTez.java
@@ -185,17 +185,9 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
     List<String> preCompactionRsBucket0 = testDataProvider.getBucketData(tblName, "536870912");
     List<String> preCompactionRsBucket1 = testDataProvider.getBucketData(tblName, "536936448");
     List<String> preCompactionRsBucket2 = testDataProvider.getBucketData(tblName, "537001984");
-    if (runsOnTez) { // Check bucket contents
-      Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0, preCompactionRsBucket0);
-      Assert.assertEquals("pre-compaction bucket 1", expectedRsBucket1, preCompactionRsBucket1);
-      Assert.assertEquals("pre-compaction bucket 2", expectedRsBucket2, preCompactionRsBucket2);
-    } else {
-      // MR sometimes inserts rows in the opposite order from Tez, so rowids won't match. so we
-      // just check whether the bucket contents change during compaction.
-      expectedRsBucket0 = preCompactionRsBucket0;
-      expectedRsBucket1 = preCompactionRsBucket1;
-      expectedRsBucket2 = preCompactionRsBucket2;
-    }
+    Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0, preCompactionRsBucket0);
+    Assert.assertEquals("pre-compaction bucket 1", expectedRsBucket1, preCompactionRsBucket1);
+    Assert.assertEquals("pre-compaction bucket 2", expectedRsBucket2, preCompactionRsBucket2);
 
     // Run major compaction and cleaner
     CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true);
@@ -269,14 +261,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
         "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":1}\t6\t2\ttoday",
         "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":2}\t6\t3\ttoday",
         "{\"writeid\":4,\"bucketid\":536870912,\"rowid\":3}\t6\t4\ttoday"));
-    if (runsOnTez) { // Check bucket contents
-      Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0,
-          testDataProvider.getBucketData(tblName, "536870912"));
-    } else {
-      // MR sometimes inserts rows in the opposite order from Tez, so rowids won't match. so we
-      // just check whether the bucket contents change during compaction.
-      expectedRsBucket0 = testDataProvider.getBucketData(tblName, "536870912");
-    }
+    Assert.assertEquals("pre-compaction bucket 0", expectedRsBucket0,
+        testDataProvider.getBucketData(tblName, "536870912"));
 
     // Run major compaction and cleaner for all 3 partitions
     CompactorTestUtil.runCompaction(conf, dbName, tblName, CompactionType.MAJOR, true,
@@ -361,15 +347,8 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
         "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":2}\t6\t3\ttoday",
         "{\"writeid\":4,\"bucketid\":536936448,\"rowid\":3}\t6\t4\ttoday");
     List<String> rsBucket1 = dataProvider.getBucketData(tableName, "536936448");
-    if (runsOnTez) {
-      Assert.assertEquals(expectedRsBucket0, rsBucket0);
-      Assert.assertEquals(expectedRsBucket1, rsBucket1);
-    } else {
-      // MR sometimes inserts rows in the opposite order from Tez, so rowids won't match. so we
-      // just check whether the bucket contents change during compaction.
-      expectedRsBucket0 = rsBucket0;
-      expectedRsBucket1 = rsBucket1;
-    }
+    Assert.assertEquals(expectedRsBucket0, rsBucket0);
+    Assert.assertEquals(expectedRsBucket1, rsBucket1);
 
     // Run a compaction
     CompactorTestUtil
@@ -429,7 +408,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionNotPartitionedWithoutBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -503,7 +481,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWithoutBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction_wobuckets_1";
     String tempTableName = "tmp_txt_table_1";
@@ -526,7 +503,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWithoutBucketsInsertOverwrite() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction_wobuckets_2";
     String tempTableName = "tmp_txt_table_2";
@@ -571,7 +547,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
       boolean insertOverWrite, List<String> expectedDeltas, List<String> expectedDeleteDeltas,
       String expectedCompactedDeltaDirName, CompactionType compactionType) throws Exception {
 
-    Assume.assumeTrue(runsOnTez);
     TestDataProvider dataProvider = new TestDataProvider();
     dataProvider.createTableWithoutBucketWithMultipleSplits(dbName, tableName, tempTableName, true, true,
         insertOverWrite);
@@ -656,7 +631,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorAndMajorCompactionWithoutBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction_wobuckets_5";
     String tempTableName = "tmp_txt_table_5";
@@ -762,7 +736,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionNotPartitionedWithBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -840,7 +813,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionPartitionedWithoutBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -925,7 +897,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionPartitionedWithBuckets() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -1011,7 +982,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompaction10DeltaDirs() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -1069,7 +1039,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMultipleMinorCompactions() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -1121,7 +1090,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWhileStreaming() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
@@ -1159,7 +1127,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWhileStreamingAfterAbort() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
@@ -1189,7 +1156,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWhileStreamingWithAbort() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
@@ -1216,7 +1182,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWhileStreamingWithAbortInMiddle() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
@@ -1254,7 +1219,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMajorCompactionAfterMinor() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     // Create test table
@@ -1306,7 +1270,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionAfterMajor() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompactionAfterMajor";
     // Create test table
@@ -1361,7 +1324,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
 
   @Test
   public void testMinorCompactionWhileStreamingWithSplitUpdate() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testMinorCompaction";
     executeStatementOnDriver("drop table if exists " + tableName, driver);
@@ -1430,17 +1392,10 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
     List<String> rsBucket0PtnToday = executeStatementOnDriverAndReturnResults("select ROW__ID, * from  "
         + tblName + " where ROW__ID.bucketid = 536870912 and ds='today' order by a,b", driver);
     // Bucket 1, partition 'today'
-    List<String> rsBucket1PtnToday = executeStatementOnDriverAndReturnResults("select ROW__ID, * from  "
-        + tblName + " where ROW__ID.bucketid = 536936448 and ds='today' order by a,b", driver);
-    if (runsOnTez) {
-      Assert.assertEquals("pre-compaction read", expectedRsBucket0PtnToday, rsBucket0PtnToday);
-      Assert.assertEquals("pre-compaction read", expectedRsBucket1PtnToday, rsBucket1PtnToday);
-    } else {
-      // MR sometimes inserts rows in the opposite order from Tez, so rowids won't match. so we
-      // just check whether the bucket contents change during compaction.
-      expectedRsBucket0PtnToday = rsBucket0PtnToday;
-      expectedRsBucket1PtnToday = rsBucket1PtnToday;
-    }
+    List<String> rsBucket1PtnToday = executeStatementOnDriverAndReturnResults("select ROW__ID, * from  " + tblName
+        + " where ROW__ID.bucketid = 536936448 and ds='today' order by a,b", driver);
+    Assert.assertEquals("pre-compaction read", expectedRsBucket0PtnToday, rsBucket0PtnToday);
+    Assert.assertEquals("pre-compaction read", expectedRsBucket1PtnToday, rsBucket1PtnToday);
 
     //  Run major compaction and cleaner
     CompactorTestUtil
@@ -1512,7 +1467,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
   }
 
   @Test public void testMinorCompactionDb() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     testCompactionDb(CompactionType.MINOR, "delta_0000001_0000005_v0000011");
   }
 
@@ -1520,7 +1474,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
    * Minor compaction on a table with no deletes shouldn't result in any delete deltas.
    */
   @Test public void testJustInserts() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testJustInserts";
     // Create test table
@@ -1558,7 +1511,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
    * Minor compaction on a table with no insert deltas should result in just a delete delta.
    */
   @Test public void testJustDeletes() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testJustDeletes";
     // Create test table
@@ -1601,7 +1553,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
    * compaction was resulting in deltas named delta_1_y.
    */
   @Test public void testIowMinorMajor() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     String dbName = "default";
     String tableName = "testIowMinorMajor";
     // Create test table
@@ -1715,7 +1666,6 @@ public class TestCrudCompactorOnTez extends CompactorOnTezTest {
   }
 
   @Test public void testVectorizationOff() throws Exception {
-    Assume.assumeTrue(runsOnTez);
     conf.setBoolVar(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, false);
     testMinorCompactionAfterMajor();
   }
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/QueryCompactorFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/QueryCompactorFactory.java
index 93dd85b..5bac881 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/QueryCompactorFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/QueryCompactorFactory.java
@@ -21,11 +21,14 @@ import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.txn.CompactionInfo;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Simple factory class, which returns an instance of {@link QueryCompactor}.
  */
 final class QueryCompactorFactory {
+  static final private Logger LOG = LoggerFactory.getLogger(QueryCompactorFactory.class.getName());
 
   /**
    * Factory class, no need to expose constructor.
@@ -51,13 +54,15 @@ final class QueryCompactorFactory {
    * @return {@link QueryCompactor} or null.
    */
   static QueryCompactor getQueryCompactor(Table table, HiveConf configuration, CompactionInfo compactionInfo) {
-    if (!AcidUtils.isInsertOnlyTable(table.getParameters()) && HiveConf
-        .getBoolVar(configuration, HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED)) {
+    if (!AcidUtils.isInsertOnlyTable(table.getParameters())
+        && HiveConf.getBoolVar(configuration, HiveConf.ConfVars.COMPACTOR_CRUD_QUERY_BASED)) {
+      if (!"tez".equalsIgnoreCase(HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
+        LOG.info("Query-based compaction is only supported on tez. Falling back to MR compaction.");
+        return null;
+      }
       if (compactionInfo.isMajorCompaction()) {
         return new MajorQueryCompactor();
-      } else if (!compactionInfo.isMajorCompaction() && "tez"
-          .equalsIgnoreCase(HiveConf.getVar(configuration, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
-        // query based minor compactigenerateAddMmTaskson is only supported on tez
+      } else {
         return new MinorQueryCompactor();
       }
     }