You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by lp...@apache.org on 2022/04/26 14:07:06 UTC
[hive] branch master updated: HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
This is an automated email from the ASF dual-hosted git repository.
lpinter pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new dbdcf00dd63 HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
dbdcf00dd63 is described below
commit dbdcf00dd6334acaded4369fc0c1ccbdd142255e
Author: László Pintér <47...@users.noreply.github.com>
AuthorDate: Tue Apr 26 16:06:55 2022 +0200
HIVE-26169: Set non-vectorized mode as default when accessing iceberg tables in avro fileformat. (#3236) (Laszlo Pinter, reviewed by Marton Bod)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 18 ++++++++++++++----
.../hive/HiveIcebergStorageHandlerWithEngineBase.java | 3 +--
.../apache/iceberg/mr/hive/TestHiveIcebergSelects.java | 2 +-
3 files changed, 16 insertions(+), 7 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index f23b0d16c10..6fdddb9b343 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -78,6 +78,7 @@ import org.apache.hadoop.mapred.JobID;
import org.apache.hadoop.mapred.JobStatus;
import org.apache.hadoop.mapred.OutputCommitter;
import org.apache.hadoop.mapred.OutputFormat;
+import org.apache.iceberg.FileFormat;
import org.apache.iceberg.ManifestFile;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
@@ -167,14 +168,14 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
public void configureInputJobProperties(TableDesc tableDesc, Map<String, String> map) {
overlayTableProperties(conf, tableDesc, map);
// Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
- fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+ fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
}
@Override
public void configureOutputJobProperties(TableDesc tableDesc, Map<String, String> map) {
overlayTableProperties(conf, tableDesc, map);
// Until the vectorized reader can handle delete files, let's fall back to non-vector mode for V2 tables
- fallbackToNonVectorizedModeForV2(tableDesc.getProperties());
+ fallbackToNonVectorizedModeBasedOnProperties(tableDesc.getProperties());
// For Tez, setting the committer here is enough to make sure it'll be part of the jobConf
map.put("mapred.output.committer.class", HiveIcebergNoJobCommitter.class.getName());
// For MR, the jobConf is set only in configureJobConf, so we're setting the write key here to detect it over there
@@ -744,8 +745,17 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
return column;
}
- private void fallbackToNonVectorizedModeForV2(Properties tableProps) {
- if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION))) {
+ /**
+ * If any of the following checks is true we fall back to non vectorized mode:
+ * <ul>
+ * <li>iceberg format-version is "2"</li>
+ * <li>fileformat is set to avro</li>
+ * </ul>
+ * @param tableProps table properties, must be not null
+ */
+ private void fallbackToNonVectorizedModeBasedOnProperties(Properties tableProps) {
+ if ("2".equals(tableProps.get(TableProperties.FORMAT_VERSION)) ||
+ FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) {
conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false);
}
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
index 95f03cdade0..6de80dfd32e 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandlerWithEngineBase.java
@@ -112,8 +112,7 @@ public abstract class HiveIcebergStorageHandlerWithEngineBase {
if (javaVersion.equals("1.8")) {
testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, false});
// test for vectorization=ON in case of ORC and PARQUET format with Tez engine
- if ((fileFormat == FileFormat.ORC || fileFormat == FileFormat.PARQUET) &&
- "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
+ if (fileFormat != FileFormat.METADATA && "tez".equals(engine) && MetastoreUtil.hive3PresentOnClasspath()) {
testParams.add(new Object[] {fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG, true});
}
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
index 29051d0f5b0..ff54a9b0e2a 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergSelects.java
@@ -63,7 +63,6 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB
@Test
public void testCBOWithSelectedColumnsNonOverlapJoin() throws IOException {
shell.setHiveSessionValue("hive.cbo.enable", true);
-
testTables.createTable(shell, "products", PRODUCT_SCHEMA, fileFormat, PRODUCT_RECORDS);
testTables.createTable(shell, "orders", ORDER_SCHEMA, fileFormat, ORDER_RECORDS);
@@ -190,6 +189,7 @@ public class TestHiveIcebergSelects extends HiveIcebergStorageHandlerWithEngineB
@Test
public void testScanTableCaseInsensitive() throws IOException {
+ shell.setHiveSessionValue(InputFormatConfig.CASE_SENSITIVE, false);
testTables.createTable(shell, "customers",
HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA_WITH_UPPERCASE, fileFormat,
HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);