You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/12/28 20:00:27 UTC

[orc] branch branch-1.7 updated: ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new 546f72a  ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979
546f72a is described below

commit 546f72a1111d36d35ab89c8798f7866f8b404570
Author: cxzl25 <cx...@users.noreply.github.com>
AuthorDate: Wed Dec 29 03:55:21 2021 +0800

    ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979
    
    ### What changes were proposed in this pull request?
    Use buffer limit as `readSize` to avoid `IndexOutOfBoundsException`.
    
    **main**
    https://github.com/apache/orc/blob/3a2cb60e4ab6af6305c351fbdb51b98f460f64a0/java/core/src/java/org/apache/orc/impl/ReaderImpl.java#L720-L725
    
    **branch-1.5**
    https://github.com/apache/orc/blob/5f88704d9bd36fc55b57a60c2fbbd35980b1b7e5/java/core/src/java/org/apache/orc/impl/ReaderImpl.java#L487-L490
    
    ### Why are the changes needed?
    ORC-251 remove `ReaderImpl.extractFileTail`
    ORC-685 Add `ReaderImpl.extractFileTail` back
    
    In ORC-685, file length is used as readsize, which causes that if the buffer is read from the cache, the use of length is incorrect, resulting in IndexOutOfBoundsException.
    ```
    long readSize = fileLen != -1? fileLen: buffer.limit();
    int psLen = buffer.get((int) (readSize-1)) & 0xff;
    ```
    ```
    Caused by: java.lang.IndexOutOfBoundsException
        at java.nio.Buffer.checkIndex(Buffer.java:540)
        at java.nio.HeapByteBuffer.get(HeapByteBuffer.java:139)
        at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:726)
        at org.apache.hadoop.hive.ql.io.orc.LocalCache.getAndValidate(LocalCache.java:103)
        at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.getSplits(OrcInputFormat.java:798)
        at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.runGetSplitsSync(OrcInputFormat.java:916)
        at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.generateSplitWork(OrcInputFormat.java:885)
        at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.scheduleSplits(OrcInputFormat.java:1759)
        at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1703)
    ```
    
    ### How was this patch tested?
    local test
    
    (cherry picked from commit f53b1491ba8895fa40edd4c3002f51273495831a)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../core/src/java/org/apache/orc/impl/ReaderImpl.java |  4 ++--
 .../src/test/org/apache/orc/impl/TestReaderImpl.java  | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 402152e..2ddd1ae 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -720,9 +720,9 @@ public class ReaderImpl implements Reader {
   public static OrcTail extractFileTail(ByteBuffer buffer, long fileLen, long modificationTime)
       throws IOException {
     OrcProto.PostScript ps;
-    long readSize = fileLen != -1 ? fileLen : buffer.limit();
+    long readSize = buffer.limit();
     OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
-    fileTailBuilder.setFileLength(readSize);
+    fileTailBuilder.setFileLength(fileLen != -1 ? fileLen : readSize);
 
     int psLen = buffer.get((int) (readSize - 1)) & 0xff;
     int psOffset = (int) (readSize - 1 - psLen);
diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
index 900d5e0..9d23d72 100644
--- a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
@@ -445,4 +445,23 @@ public class TestReaderImpl {
     CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
     CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++ ");
   }
+
+  @Test
+  public void testExtractFileTailIndexOutOfBoundsException() throws Exception {
+    Configuration conf = new Configuration();
+    Path path = new Path(workDir, "demo-11-none.orc");
+    FileSystem fs = path.getFileSystem(conf);
+    FileStatus fileStatus = fs.getFileStatus(path);
+    try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
+            OrcFile.readerOptions(conf).filesystem(fs))) {
+      OrcTail tail = reader.extractFileTail(fs, path, Long.MAX_VALUE);
+      ByteBuffer tailBuffer = tail.getSerializedTail();
+
+      OrcTail extractedTail = ReaderImpl.extractFileTail(tailBuffer, fileStatus.getLen(), fileStatus.getModificationTime());
+
+      assertEquals(tail.getFileLength(), extractedTail.getFileLength());
+      assertEquals(tail.getFooter().getMetadataList(), extractedTail.getFooter().getMetadataList());
+      assertEquals(tail.getFooter().getStripesList(), extractedTail.getFooter().getStripesList());
+    }
+  }
 }