You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2022/12/23 12:59:00 UTC

[hive] branch master updated: HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)

This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new da42736483c HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)
da42736483c is described below

commit da42736483ce110997f924a746c63b4bc8a8d5b7
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Fri Dec 23 18:28:48 2022 +0530

    HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
 .../iceberg/mr/hive/vector/HiveBatchIterator.java       |  2 +-
 .../mr/hive/vector/TestHiveIcebergVectorization.java    | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
index 22a42f2953e..3b543b22aca 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
@@ -65,7 +65,7 @@ public final class HiveBatchIterator implements CloseableIterator<HiveBatchConte
           batch.size = 0;
         }
 
-        if (recordReader instanceof RowPositionAwareVectorizedRecordReader) {
+        if (batch.size != 0 && recordReader instanceof RowPositionAwareVectorizedRecordReader) {
           rowOffset = ((RowPositionAwareVectorizedRecordReader) recordReader).getRowNumber();
         }
 
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
index c35fa22568c..18059d41838 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
@@ -19,6 +19,7 @@
 
 package org.apache.iceberg.mr.hive.vector;
 
+import java.util.Collections;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -136,8 +137,20 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE
   /**
    * Tests HiveDeleteFilter implementation correctly filtering rows from VRBs.
    */
+  @Test
+  public void testHiveDeleteFilterWithEmptyBatches() {
+    Map<String, String> props = Maps.newHashMap();
+    props.put("parquet.block.size", "8192");
+    props.put("parquet.page.row.count.limit", "20");
+    testVectorizedReadWithDeleteFilter(props);
+  }
+
   @Test
   public void testHiveDeleteFilter() {
+    testVectorizedReadWithDeleteFilter(Collections.emptyMap());
+  }
+
+  private void testVectorizedReadWithDeleteFilter(Map<String, String> props) {
     // The Avro "vectorized" case should actually serve as compareTo scenario to non-vectorized reading, because
     // there's no vectorization for Avro and it falls back to the non-vectorized implementation
     Assume.assumeTrue(isVectorized && testTableType == TestTables.TestTableType.HIVE_CATALOG);
@@ -158,8 +171,8 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE
     for (int i = 0; i < records.size(); ++i) {
       records.get(i).setField("customer_id", (long) i);
     }
-    testTables.createTable(shell, "vectordelete", schema,
-        PartitionSpec.unpartitioned(), fileFormat, records, 2);
+
+    testTables.createTable(shell, "vectordelete", schema, PartitionSpec.unpartitioned(), fileFormat, records, 2, props);
 
     // Delete every odd row until 6000
     shell.executeStatement("DELETE FROM vectordelete WHERE customer_id % 2 = 1 and customer_id < 6000");