You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/11/27 20:46:28 UTC
(orc) branch branch-1.9 updated: ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.9 by this push:
new 5df38259f ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
5df38259f is described below
commit 5df38259ffa06aee318346472f966f505ca40ef7
Author: yebukong <ye...@qq.com>
AuthorDate: Mon Nov 27 12:37:46 2023 -0800
ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
- I have adjusted the calculation of `reqBytes` and `readBytes` ranges in the `org.apache.orc.impl.RecordReaderUtils.ChunkReader#create` method from `Integer` to `Long`, and added validation for `Integer` overflow
- Adds more test cases
This PR aims to fix [ORC-1528](https://issues.apache.org/jira/browse/ORC-1528)
org.apache.orc.impl.TestRecordReaderUtils#testBufferChunkOffsetExceedsMaxInt
Closes #1662 from yebukong/main.
Lead-authored-by: yebukong <ye...@qq.com>
Co-authored-by: yebukong <ye...@live.com>
Signed-off-by: Dongjoon Hyun <do...@apache.org>
(cherry picked from commit e554765801c8cc0f25713aaee09701ca47250d01)
Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
.../core/src/java/org/apache/orc/impl/IOUtils.java | 5 ++
.../org/apache/orc/impl/RecordReaderUtils.java | 21 ++++---
.../org/apache/orc/impl/TestRecordReaderUtils.java | 68 ++++++++++++++++++++++
3 files changed, 87 insertions(+), 7 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/IOUtils.java b/java/core/src/java/org/apache/orc/impl/IOUtils.java
index ff13bdaec..e2554f008 100644
--- a/java/core/src/java/org/apache/orc/impl/IOUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/IOUtils.java
@@ -30,6 +30,11 @@ public final class IOUtils {
public static final int DEFAULT_BUFFER_SIZE = 8192;
+ /**
+ * The maximum size of array to allocate, value being the same as {@link java.util.Hashtable}
+ */
+ public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
/**
* Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}.
*
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
index ede844276..141cc7aa1 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -760,18 +760,18 @@ public class RecordReaderUtils {
}
static ChunkReader create(BufferChunk from, BufferChunk to) {
- long f = Integer.MAX_VALUE;
- long e = Integer.MIN_VALUE;
+ long f = Long.MAX_VALUE;
+ long e = Long.MIN_VALUE;
- long cf = Integer.MAX_VALUE;
- long ef = Integer.MIN_VALUE;
- int reqBytes = 0;
+ long cf = Long.MAX_VALUE;
+ long ef = Long.MIN_VALUE;
+ long reqBytes = 0L;
BufferChunk current = from;
while (current != to.next) {
f = Math.min(f, current.getOffset());
e = Math.max(e, current.getEnd());
- if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) {
+ if (ef == Long.MIN_VALUE || current.getOffset() <= ef) {
cf = Math.min(cf, current.getOffset());
ef = Math.max(ef, current.getEnd());
} else {
@@ -782,7 +782,14 @@ public class RecordReaderUtils {
current = (BufferChunk) current.next;
}
reqBytes += ef - cf;
- return new ChunkReader(from, to, (int) (e - f), reqBytes);
+ if (reqBytes > IOUtils.MAX_ARRAY_SIZE) {
+ throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+ }
+ long readBytes = e - f;
+ if (readBytes > IOUtils.MAX_ARRAY_SIZE) {
+ throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+ }
+ return new ChunkReader(from, to, (int) readBytes, (int) reqBytes);
}
static ChunkReader create(BufferChunk from, int minSeekSize) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
index 08890b37d..617fc740e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
@@ -21,10 +21,13 @@ package org.apache.orc.impl;
import org.junit.jupiter.api.Test;
import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
import java.util.Objects;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
import static org.junit.jupiter.api.Assertions.assertTrue;
class TestRecordReaderUtils {
@@ -140,6 +143,71 @@ class TestRecordReaderUtils {
assertEquals(chunkReader.getReadBytes(), chunkReader.getFrom().getData().array().length);
}
+ @Test
+ public void testChunkReaderCreateOffsetExceedsMaxInt() {
+ List<long[]> mockData = Arrays.asList(
+ new long[]{15032282586L, 15032298848L},
+ new long[]{15032298848L, 15032299844L},
+ new long[]{15032299844L, 15032377804L},
+ new long[]{15058260587L, 15058261632L},
+ new long[]{15058261632L, 15058288409L},
+ new long[]{15058288409L, 15058288862L},
+ new long[]{15058339730L, 15058340775L},
+ new long[]{15058340775L, 15058342439L},
+ new long[]{15058449794L, 15058449982L},
+ new long[]{15058449982L, 15058451700L},
+ new long[]{15058451700L, 15058451749L},
+ new long[]{15058484358L, 15058484422L},
+ new long[]{15058484422L, 15058484862L},
+ new long[]{15058484862L, 15058484878L}
+ );
+ TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder();
+ mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0])));
+ BufferChunkList rangeList = rangeBuilder.build();
+
+ RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728);
+ long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0];
+ long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum();
+ assertEquals(chunkReader.getReadBytes(), readBytes);
+ assertEquals(chunkReader.getReqBytes(), reqBytes);
+ }
+
+ @Test
+ public void testChunkReaderCreateReqBytesAndReadBytesValidation() {
+ BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder()
+ .range(0, IOUtils.MAX_ARRAY_SIZE)
+ .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1)
+ .range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4)
+ .range(3L * IOUtils.MAX_ARRAY_SIZE, 2)
+ .build();
+
+ // reqBytes,readBytes boundary value
+ RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0);
+ assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE);
+ assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE);
+
+ // reqBytes > IOUtils.MAX_ARRAY_SIZE validation
+ assertThrowsExactly(IllegalArgumentException.class,
+ () -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0),
+ () -> String.format("invalid reqBytes value %d,out of bounds %d",
+ rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE)
+ );
+
+ // readBytes > IOUtils.MAX_ARRAY_SIZE validation
+ assertThrowsExactly(IllegalArgumentException.class,
+ () -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100),
+ () -> String.format("invalid readBytes value %d,out of bounds %d",
+ rangeList.get(3).getEnd() - rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE)
+ );
+ }
+
+ private static byte[] byteBufferToArray(ByteBuffer buf) {
+ byte[] resultArray = new byte[buf.remaining()];
+ ByteBuffer buffer = buf.slice();
+ buffer.get(resultArray);
+ return resultArray;
+ }
+
private ByteBuffer makeByteBuffer(int length, long offset) {
byte[] readBytes = new byte[length];
for (int i = 0; i < readBytes.length; i++) {