You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sz...@apache.org on 2022/07/20 06:55:23 UTC

[hive] branch master updated: HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)

This is an automated email from the ASF dual-hosted git repository.

szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new aa8891ad648 HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
aa8891ad648 is described below

commit aa8891ad6480b4a50c91a60a2eacd1871128482b
Author: Adam Szita <40...@users.noreply.github.com>
AuthorDate: Wed Jul 20 08:55:16 2022 +0200

    HIVE-26410: Reading nested types within maps in Parquet Iceberg is not supported with vectorization (#3455) (Adam Szita, reviewed by Laszlo Pinter)
---
 .../apache/iceberg/mr/hive/HiveIcebergStorageHandler.java  | 14 ++++++++------
 .../org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java |  2 --
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 5f1c9158aab..25881408a63 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -49,6 +49,7 @@ import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -818,7 +819,7 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
         FileFormat.AVRO.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT)) ||
         (tableProps.containsKey("metaTable") && isValidMetadataTable(tableProps.getProperty("metaTable"))) ||
         hasOrcTimeInSchema(tableProps, tableSchema) ||
-        !hasParquetListColumnSupport(tableProps, tableSchema)) {
+        !hasParquetNestedTypeWithinListOrMap(tableProps, tableSchema)) {
       conf.setBoolean(HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED.varname, false);
     }
   }
@@ -839,20 +840,21 @@ public class HiveIcebergStorageHandler implements HiveStoragePredicateHandler, H
   }
 
   /**
-   * Vectorized reads of parquet files from columns with list type is only supported if the element is a primitive type
-   * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details
+   * Vectorized reads of parquet files from columns with list or map type is only supported if the nested types are of
+   * primitive type category
+   * check {@link VectorizedParquetRecordReader#checkListColumnSupport} for details on nested types under lists
    * @param tableProps iceberg table properties
    * @param tableSchema iceberg table schema
    * @return
    */
-  private static boolean hasParquetListColumnSupport(Properties tableProps, Schema tableSchema) {
+  private static boolean hasParquetNestedTypeWithinListOrMap(Properties tableProps, Schema tableSchema) {
     if (!FileFormat.PARQUET.name().equalsIgnoreCase(tableProps.getProperty(TableProperties.DEFAULT_FILE_FORMAT))) {
       return true;
     }
 
     for (Types.NestedField field : tableSchema.columns()) {
-      if (field.type().isListType()) {
-        for (Types.NestedField nestedField : field.type().asListType().fields()) {
+      if (field.type().isListType() || field.type().isMapType()) {
+        for (Types.NestedField nestedField : field.type().asNestedType().fields()) {
           if (!nestedField.type().isPrimitiveType()) {
             return false;
           }
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
index efb08c36d95..31a589a7c96 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergInserts.java
@@ -334,8 +334,6 @@ public class TestHiveIcebergInserts extends HiveIcebergStorageHandlerWithEngineB
 
   @Test
   public void testStructMapWithNull() throws IOException {
-    Assume.assumeTrue("Vectorized parquet read throws class cast exception",
-        !(fileFormat == FileFormat.PARQUET && isVectorized));
     Schema schema = new Schema(required(1, "id", Types.LongType.get()),
         required(2, "mapofstructs", Types.MapType.ofRequired(3, 4, Types.StringType.get(),
             Types.StructType.of(required(5, "something", Types.StringType.get()),