You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2022/07/20 06:55:23 UTC
[hive] branch master updated: HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new aa8891ad648 HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
aa8891ad648 is described below
commit aa8891ad6480b4a50c91a60a2eacd1871128482b
Author: Adam Szita <40...@users.noreply.github.com>
AuthorDate: Wed Jul 20 08:55:16 2022 +0200
HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
---
.../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java | 14 ++++++++------
.../org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java | 2 --
2 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 5f1c9158aab..25881408a63 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -818,7 +819,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)) ||
(tableProps.containsKey("metaTable") && isValidMetadataTable(tableProps.getProperty("metaTable"))) ||
hasOrcTimeInSchema(tableProps, tableSchema) ||
- !hasParquetListColumnSupport(tableProps, tableSchema)) {
+ !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema)) {
conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false);
}
}
@@ -839,20 +840,21 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
}
/**
- * Vectorized reads of parquet files from columns with list type is only supported if the element is a primitive type
- * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details
+ * Vectorized reads of parquet files from columns with list or map type is only supported if the nested types are of
+ * primitive type category
+ * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details on nested types under lists
* @param tableProps iceberg table properties
* @param tableSchema iceberg table schema
* @return
*/
- private static boolean hasParquetListColumnSupport(Properties tableProps, Schema tableSchema) {
+ private static boolean hasParquetNestedTypeWithinListOrMap(Properties tableProps, Schema tableSchema) {
if (!FileFormat.PARQUET.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) {
return true;
}
for (Types.NestedField field : tableSchema.columns()) {
- if (field.type().isListType()) {
- for (Types.NestedField nestedField : field.type().asListType().fields()) {
+ if (field.type().isListType() || field.type().isMapType()) {
+ for (Types.NestedField nestedField : field.type().asNestedType().fields()) {
if (!nestedField.type().isPrimitiveType()) {
return false;
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
index efb08c36d95..31a589a7c96 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
@@ -334,8 +334,6 @@ public class TestHiveIcebergInserts extends HiveIcebergStorageHandlerWithEngineB
@Test
public void testStructMapWithNull() throws IOException {
- Assume.assumeTrue("Vectorized parquet read throws class cast exception",
- !(fileFormat == FileFormat.PARQUET && isVectorized));
Schema schema = new Schema(required(1, "id", Types.LongType.get()),
required(2, "mapofstructs", Types.MapType.ofRequired(3, 4, Types.StringType.get(),
Types.StructType.of(required(5, "something", Types.StringType.get()),