You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ay...@apache.org on 2022/12/23 12:59:00 UTC
[hive] branch master updated: HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)
This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new da42736483c HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)
da42736483c is described below
commit da42736483ce110997f924a746c63b4bc8a8d5b7
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Fri Dec 23 18:28:48 2022 +0530
HIVE-26885: Iceberg: Parquet Vectorized V2 reads fails with NPE. (#3892). (Ayush Saxena, reviewed by Denys Kuzmenko)
---
.../iceberg/mr/hive/vector/HiveBatchIterator.java | 2 +-
.../mr/hive/vector/TestHiveIcebergVectorization.java | 17 +++++++++++++++--
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
index 22a42f2953e..3b543b22aca 100644
--- a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
+++ b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/vector/HiveBatchIterator.java
@@ -65,7 +65,7 @@ public final class HiveBatchIterator implements CloseableIterator<HiveBatchConte
batch.size = 0;
}
- if (recordReader instanceof RowPositionAwareVectorizedRecordReader) {
+ if (batch.size != 0 && recordReader instanceof RowPositionAwareVectorizedRecordReader) {
rowOffset = ((RowPositionAwareVectorizedRecordReader) recordReader).getRowNumber();
}
diff --git a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
index c35fa22568c..18059d41838 100644
--- a/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
+++ b/iceberg/iceberg-handler/src/test/java/org/apache/iceberg/mr/hive/vector/TestHiveIcebergVectorization.java
@@ -19,6 +19,7 @@
package org.apache.iceberg.mr.hive.vector;
+import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -136,8 +137,20 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE
/**
* Tests HiveDeleteFilter implementation correctly filtering rows from VRBs.
*/
+ @Test
+ public void testHiveDeleteFilterWithEmptyBatches() {
+ Map<String, String> props = Maps.newHashMap();
+ props.put("parquet.block.size", "8192");
+ props.put("parquet.page.row.count.limit", "20");
+ testVectorizedReadWithDeleteFilter(props);
+ }
+
@Test
public void testHiveDeleteFilter() {
+ testVectorizedReadWithDeleteFilter(Collections.emptyMap());
+ }
+
+ private void testVectorizedReadWithDeleteFilter(Map<String, String> props) {
// The Avro "vectorized" case should actually serve as compareTo scenario to non-vectorized reading, because
// there's no vectorization for Avro and it falls back to the non-vectorized implementation
Assume.assumeTrue(isVectorized && testTableType == TestTables.TestTableType.HIVE_CATALOG);
@@ -158,8 +171,8 @@ public class TestHiveIcebergVectorization extends HiveIcebergStorageHandlerWithE
for (int i = 0; i < records.size(); ++i) {
records.get(i).setField("customer_id", (long) i);
}
- testTables.createTable(shell, "vectordelete", schema,
- PartitionSpec.unpartitioned(), fileFormat, records, 2);
+
+ testTables.createTable(shell, "vectordelete", schema, PartitionSpec.unpartitioned(), fileFormat, records, 2, props);
// Delete every odd row until 6000
shell.executeStatement("DELETE FROM vectordelete WHERE customer_id % 2 = 1 and customer_id < 6000");