You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2022/04/26 14:07:06 UTC
[hive] branch master updated: HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)

This is an automated email from the ASF dual-hosted git repository.

lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dbdcf00dd63 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
dbdcf00dd63 is described below

commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Tue Apr 26 16:06:55 2022 +0200

    HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java     | 18 ++++++++++++++----
 .../hive/HiveIcebergStorageHandlerWithEngineBase.java  |  3 +--
 .../apache/iceberg/mr/hive/TestHiveIcebergSelects.java |  2 +-
 3 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index f23b0d16c10..6fdddb9b343 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -78,6 +78,7 @@ import org.apache.hadoop.mapred.JobID;
 import org.apache.hadoop.mapred.JobStatus;
 import org.apache.hadoop.mapred.OutputCommitter;
 import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.iceberg.FileFormat;
 import org.apache.iceberg.ManifestFile;
 import org.apache.iceberg.PartitionSpecParser;
 import org.apache.iceberg.Schema;
@@ -167,14 +168,14 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
   public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> map) {
     overlayTableProperties(conf, tableDesc, map);
     // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
-    fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+    fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
   }
 
   @Override
   public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) {
     overlayTableProperties(conf, tableDesc, map);
     // Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
-    fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+    fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
     // For Tez, setting the committer here is enough to make sure it'll be part of the jobConf
     map.put("mapred.output.committer.class", HiveIcebergNoJobCommitter.class.getName());
     // For MR, the jobConf is set only in configureJobConf, so we're setting the write key here to detect it over there
@@ -744,8 +745,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
     return column;
   }
 
-  private void fallbackToNonVectorizedModeForV2(Properties tableProps) {
-    if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) {
+  /**
+   * If any of the following checks is true we fall back to non vectorized mode:
+   * <ul>
+   *   <li>iceberg format-version is "2"</li>
+   *   <li>fileformat is set to avro</li>
+   * </ul>
+   * @param tableProps table properties, must be not null
+   */
+  private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) {
+    if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) ||
+        FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) {
       conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false);
     }
   }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
index 95f03cdade0..6de80dfd32e 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
@@ -112,8 +112,7 @@ public abstract class HiveIcebergStorageHandlerWithEngineBase {
         if (javaVersion.equals("1.8")) {
           testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false});
           // test for vectorization=ON in case of ORC and PARQUET format with Tez engine
-          if ((fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) &&
-               "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
+          if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
             testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true});
           }
         }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
index 29051d0f5b0..ff54a9b0e2a 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
@@ -63,7 +63,6 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB
   @Test
   public void testCBOWithSelectedColumnsNonOverlapJoin() throws IOException {
     shell.setHiveSessionValue("hive.cbo.enable", true);
-
     testTables.createTable(shell, "products", PRODUCT_SCHEMA, fileFormat, PRODUCT_RECORDS);
     testTables.createTable(shell, "orders", ORDER_SCHEMA, fileFormat, ORDER_RECORDS);
 
@@ -190,6 +189,7 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB
 
   @Test
   public void testScanTableCaseInsensitive() throws IOException {
+    shell.setHiveSessionValue(InputFormatConfig.CASE_SENSITIVE, false);
     testTables.createTable(shell, "customers",
         HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA_WITH_UPPERCASE, fileFormat,
         HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);