You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hudi.apache.org by me...@apache.org on 2022/06/06 04:07:32 UTC

[hudi] branch master updated: [HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw NullPointerException (#5755)

This is an automated email from the ASF dual-hosted git repository.

mengtao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 73b0be3c96 [HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw NullPointerException (#5755)
73b0be3c96 is described below

commit 73b0be3c962112efe541ae04fe0ea6f298558f17
Author: marchpure <ma...@126.com>
AuthorDate: Mon Jun 6 12:07:26 2022 +0800

    [HUDI-4192] HoodieHFileReader scan top cells after bottom cells throw NullPointerException (#5755)
    
    SeekTo top cells avoid NullPointerException
---
 .../io/storage/TestHoodieHFileReaderWriter.java    | 32 ++++++++++++++++++++++
 .../apache/hudi/io/storage/HoodieHFileReader.java  |  6 ++++
 2 files changed, 38 insertions(+)

diff --git a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
index da6f717258..baede154c9 100644
--- a/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
+++ b/hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/storage/TestHoodieHFileReaderWriter.java
@@ -294,6 +294,38 @@ public class TestHoodieHFileReaderWriter extends TestHoodieReaderWriterBase {
         StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
             .collect(Collectors.toList());
     assertEquals(Collections.emptyList(), recordsByPrefix);
+
+    // filter for "key50" and "key1" : entries from key50 and 'key10 to key19' should be matched.
+    List<GenericRecord> expectedKey50and1s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key1")
+        || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key1"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(expectedKey50and1s, recordsByPrefix);
+
+    // filter for "key50" and "key0" : entries from key50 and 'key00 to key09' should be matched.
+    List<GenericRecord> expectedKey50and0s = allRecords.stream().filter(entry -> (entry.get("_row_key").toString()).contains("key0")
+        || (entry.get("_row_key").toString()).contains("key50")).collect(Collectors.toList());
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key50", "key0"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(expectedKey50and0s, recordsByPrefix);
+
+    // filter for "key1" and "key0" : entries from 'key10 to key19' and 'key00 to key09' should be matched.
+    List<GenericRecord> expectedKey1sand0s = expectedKey1s;
+    expectedKey1sand0s.addAll(allRecords.stream()
+        .filter(entry -> (entry.get("_row_key").toString()).contains("key0"))
+        .collect(Collectors.toList()));
+    iterator =
+        hfileReader.getRecordsByKeyPrefixIterator(Arrays.asList("key1", "key0"), avroSchema);
+    recordsByPrefix =
+        StreamSupport.stream(Spliterators.spliteratorUnknownSize(iterator, Spliterator.ORDERED), false)
+            .collect(Collectors.toList());
+    assertEquals(expectedKey1sand0s, recordsByPrefix);
   }
 
   @ParameterizedTest
diff --git a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
index 899c2475da..0bf31d2a25 100644
--- a/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
+++ b/hudi-common/src/main/java/org/apache/hudi/io/storage/HoodieHFileReader.java
@@ -258,6 +258,12 @@ public class HoodieHFileReader<R extends IndexedRecord> implements HoodieFileRea
       if (!scanner.next()) {
         return Collections.emptyIterator();
       }
+    } else if (val == -1) {
+      // If scanner is aleady on the top of hfile. avoid trigger seekTo again.
+      Option<Cell> headerCell = Option.fromJavaOptional(scanner.getReader().getFirstKey());
+      if (headerCell.isPresent() && !headerCell.get().equals(scanner.getCell())) {
+        scanner.seekTo();
+      }
     }
 
     class KeyPrefixIterator implements Iterator<GenericRecord> {