You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/12/28 20:00:27 UTC
[orc] branch branch-1.7 updated: ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.7 by this push:
new 546f72a ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979
546f72a is described below
commit 546f72a1111d36d35ab89c8798f7866f8b404570
Author: cxzl25 <cx...@users.noreply.github.com>
AuthorDate: Wed Dec 29 03:55:21 2021 +0800
ORC-1065: Fix IndexOutOfBoundsException in ReaderImpl.extractFileTail #979
### What changes were proposed in this pull request?
Use buffer limit as `readSize` to avoid `IndexOutOfBoundsException`.
**main**
https://github.com/apache/orc/blob/3a2cb60e4ab6af6305c351fbdb51b98f460f64a0/java/core/src/java/org/apache/orc/impl/ReaderImpl.java#L720-L725
**branch-1.5**
https://github.com/apache/orc/blob/5f88704d9bd36fc55b57a60c2fbbd35980b1b7e5/java/core/src/java/org/apache/orc/impl/ReaderImpl.java#L487-L490
### Why are the changes needed?
ORC-251 remove `ReaderImpl.extractFileTail`
ORC-685 Add `ReaderImpl.extractFileTail` back
In ORC-685, file length is used as readsize, which causes that if the buffer is read from the cache, the use of length is incorrect, resulting in IndexOutOfBoundsException.
```
long readSize = fileLen != -1? fileLen: buffer.limit();
int psLen = buffer.get((int) (readSize-1)) & 0xff;
```
```
Caused by: java.lang.IndexOutOfBoundsException
at java.nio.Buffer.checkIndex(Buffer.java:540)
at java.nio.HeapByteBuffer.get(HeapByteBuffer.java:139)
at org.apache.orc.impl.ReaderImpl.extractFileTail(ReaderImpl.java:726)
at org.apache.hadoop.hive.ql.io.orc.LocalCache.getAndValidate(LocalCache.java:103)
at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.getSplits(OrcInputFormat.java:798)
at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.runGetSplitsSync(OrcInputFormat.java:916)
at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat$ETLSplitStrategy.generateSplitWork(OrcInputFormat.java:885)
at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.scheduleSplits(OrcInputFormat.java:1759)
at org.apache.hadoop.hive.ql.io.orc.OrcInputFormat.generateSplitsInfo(OrcInputFormat.java:1703)
```
### How was this patch tested?
local test
(cherry picked from commit f53b1491ba8895fa40edd4c3002f51273495831a)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../core/src/java/org/apache/orc/impl/ReaderImpl.java | 4 ++--
.../src/test/org/apache/orc/impl/TestReaderImpl.java | 19 +++++++++++++++++++
2 files changed, 21 insertions(+), 2 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 402152e..2ddd1ae 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -720,9 +720,9 @@ public class ReaderImpl implements Reader {
public static OrcTail extractFileTail(ByteBuffer buffer, long fileLen, long modificationTime)
throws IOException {
OrcProto.PostScript ps;
- long readSize = fileLen != -1 ? fileLen : buffer.limit();
+ long readSize = buffer.limit();
OrcProto.FileTail.Builder fileTailBuilder = OrcProto.FileTail.newBuilder();
- fileTailBuilder.setFileLength(readSize);
+ fileTailBuilder.setFileLength(fileLen != -1 ? fileLen : readSize);
int psLen = buffer.get((int) (readSize - 1)) & 0xff;
int psOffset = (int) (readSize - 1 - psLen);
diff --git a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
index 900d5e0..9d23d72 100644
--- a/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestReaderImpl.java
@@ -445,4 +445,23 @@ public class TestReaderImpl {
CheckFileWithSargs("bad_bloom_filter_1.6.11.orc", "ORC C++ 1.6.11");
CheckFileWithSargs("bad_bloom_filter_1.6.0.orc", "ORC C++ ");
}
+
+ @Test
+ public void testExtractFileTailIndexOutOfBoundsException() throws Exception {
+ Configuration conf = new Configuration();
+ Path path = new Path(workDir, "demo-11-none.orc");
+ FileSystem fs = path.getFileSystem(conf);
+ FileStatus fileStatus = fs.getFileStatus(path);
+ try (ReaderImpl reader = (ReaderImpl) OrcFile.createReader(path,
+ OrcFile.readerOptions(conf).filesystem(fs))) {
+ OrcTail tail = reader.extractFileTail(fs, path, Long.MAX_VALUE);
+ ByteBuffer tailBuffer = tail.getSerializedTail();
+
+ OrcTail extractedTail = ReaderImpl.extractFileTail(tailBuffer, fileStatus.getLen(), fileStatus.getModificationTime());
+
+ assertEquals(tail.getFileLength(), extractedTail.getFileLength());
+ assertEquals(tail.getFooter().getMetadataList(), extractedTail.getFooter().getMetadataList());
+ assertEquals(tail.getFooter().getStripesList(), extractedTail.getFooter().getStripesList());
+ }
+ }
}