You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/11/27 20:46:28 UTC

(orc) branch branch-1.9 updated: ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.9
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.9 by this push:
     new 5df38259f ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
5df38259f is described below

commit 5df38259ffa06aee318346472f966f505ca40ef7
Author: yebukong <ye...@qq.com>
AuthorDate: Mon Nov 27 12:37:46 2023 -0800

    ORC-1528: Fix readBytes potential overflow in RecordReaderUtils.ChunkReader#create
    
    - I have adjusted the calculation of `reqBytes` and `readBytes` ranges in the `org.apache.orc.impl.RecordReaderUtils.ChunkReader#create` method from `Integer` to `Long`, and added validation for `Integer` overflow
    
    - Adds more test cases
    
    This PR aims to fix [ORC-1528](https://issues.apache.org/jira/browse/ORC-1528)
    
    org.apache.orc.impl.TestRecordReaderUtils#testBufferChunkOffsetExceedsMaxInt
    
    Closes #1662 from yebukong/main.
    
    Lead-authored-by: yebukong <ye...@qq.com>
    Co-authored-by: yebukong <ye...@live.com>
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit e554765801c8cc0f25713aaee09701ca47250d01)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../core/src/java/org/apache/orc/impl/IOUtils.java |  5 ++
 .../org/apache/orc/impl/RecordReaderUtils.java     | 21 ++++---
 .../org/apache/orc/impl/TestRecordReaderUtils.java | 68 ++++++++++++++++++++++
 3 files changed, 87 insertions(+), 7 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/IOUtils.java b/java/core/src/java/org/apache/orc/impl/IOUtils.java
index ff13bdaec..e2554f008 100644
--- a/java/core/src/java/org/apache/orc/impl/IOUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/IOUtils.java
@@ -30,6 +30,11 @@ public final class IOUtils {
 
   public static final int DEFAULT_BUFFER_SIZE = 8192;
 
+  /**
+   * The maximum size of array to allocate, value being the same as {@link java.util.Hashtable}
+   */
+  public static final int MAX_ARRAY_SIZE = Integer.MAX_VALUE - 8;
+
   /**
    * Returns a new byte array of size {@link #DEFAULT_BUFFER_SIZE}.
    *
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
index ede844276..141cc7aa1 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderUtils.java
@@ -760,18 +760,18 @@ public class RecordReaderUtils {
     }
 
     static ChunkReader create(BufferChunk from, BufferChunk to) {
-      long f = Integer.MAX_VALUE;
-      long e = Integer.MIN_VALUE;
+      long f = Long.MAX_VALUE;
+      long e = Long.MIN_VALUE;
 
-      long cf = Integer.MAX_VALUE;
-      long ef = Integer.MIN_VALUE;
-      int reqBytes = 0;
+      long cf = Long.MAX_VALUE;
+      long ef = Long.MIN_VALUE;
+      long reqBytes = 0L;
 
       BufferChunk current = from;
       while (current != to.next) {
         f = Math.min(f, current.getOffset());
         e = Math.max(e, current.getEnd());
-        if (ef == Integer.MIN_VALUE || current.getOffset() <= ef) {
+        if (ef == Long.MIN_VALUE || current.getOffset() <= ef) {
           cf = Math.min(cf, current.getOffset());
           ef = Math.max(ef, current.getEnd());
         } else {
@@ -782,7 +782,14 @@ public class RecordReaderUtils {
         current = (BufferChunk) current.next;
       }
       reqBytes += ef - cf;
-      return new ChunkReader(from, to, (int) (e - f), reqBytes);
+      if (reqBytes > IOUtils.MAX_ARRAY_SIZE) {
+        throw new IllegalArgumentException("invalid reqBytes value " + reqBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+      }
+      long readBytes = e - f;
+      if (readBytes > IOUtils.MAX_ARRAY_SIZE) {
+        throw new IllegalArgumentException("invalid readBytes value " + readBytes + ",out of bounds " + IOUtils.MAX_ARRAY_SIZE);
+      }
+      return new ChunkReader(from, to, (int) readBytes, (int) reqBytes);
     }
 
     static ChunkReader create(BufferChunk from, int minSeekSize) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
index 08890b37d..617fc740e 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderUtils.java
@@ -21,10 +21,13 @@ package org.apache.orc.impl;
 import org.junit.jupiter.api.Test;
 
 import java.nio.ByteBuffer;
+import java.util.Arrays;
+import java.util.List;
 import java.util.Objects;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
+import static org.junit.jupiter.api.Assertions.assertThrowsExactly;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 class TestRecordReaderUtils {
@@ -140,6 +143,71 @@ class TestRecordReaderUtils {
     assertEquals(chunkReader.getReadBytes(), chunkReader.getFrom().getData().array().length);
   }
 
+  @Test
+  public void testChunkReaderCreateOffsetExceedsMaxInt() {
+    List<long[]> mockData = Arrays.asList(
+      new long[]{15032282586L, 15032298848L},
+      new long[]{15032298848L, 15032299844L},
+      new long[]{15032299844L, 15032377804L},
+      new long[]{15058260587L, 15058261632L},
+      new long[]{15058261632L, 15058288409L},
+      new long[]{15058288409L, 15058288862L},
+      new long[]{15058339730L, 15058340775L},
+      new long[]{15058340775L, 15058342439L},
+      new long[]{15058449794L, 15058449982L},
+      new long[]{15058449982L, 15058451700L},
+      new long[]{15058451700L, 15058451749L},
+      new long[]{15058484358L, 15058484422L},
+      new long[]{15058484422L, 15058484862L},
+      new long[]{15058484862L, 15058484878L}
+    );
+    TestOrcLargeStripe.RangeBuilder rangeBuilder = new TestOrcLargeStripe.RangeBuilder();
+    mockData.forEach(e -> rangeBuilder.range(e[0], (int) (e[1] - e[0])));
+    BufferChunkList rangeList = rangeBuilder.build();
+
+    RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(), 134217728);
+    long readBytes = mockData.get(mockData.size() - 1)[1] - mockData.get(0)[0];
+    long reqBytes = mockData.stream().mapToLong(e -> (int) (e[1] - e[0])).sum();
+    assertEquals(chunkReader.getReadBytes(), readBytes);
+    assertEquals(chunkReader.getReqBytes(), reqBytes);
+  }
+
+  @Test
+  public void testChunkReaderCreateReqBytesAndReadBytesValidation() {
+    BufferChunkList rangeList = new TestOrcLargeStripe.RangeBuilder()
+      .range(0, IOUtils.MAX_ARRAY_SIZE)
+      .range(1L + IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE + 1)
+      .range(2L * IOUtils.MAX_ARRAY_SIZE, IOUtils.MAX_ARRAY_SIZE - 4)
+      .range(3L * IOUtils.MAX_ARRAY_SIZE, 2)
+      .build();
+
+    // reqBytes,readBytes boundary value
+    RecordReaderUtils.ChunkReader chunkReader = RecordReaderUtils.ChunkReader.create(rangeList.get(0), 0);
+    assertEquals(chunkReader.getReadBytes(), IOUtils.MAX_ARRAY_SIZE);
+    assertEquals(chunkReader.getReqBytes(), IOUtils.MAX_ARRAY_SIZE);
+
+    // reqBytes > IOUtils.MAX_ARRAY_SIZE validation
+    assertThrowsExactly(IllegalArgumentException.class,
+      () -> RecordReaderUtils.ChunkReader.create(rangeList.get(1), 0),
+      () -> String.format("invalid reqBytes value %d,out of bounds %d",
+                          rangeList.get(1).getLength(), IOUtils.MAX_ARRAY_SIZE)
+    );
+
+    // readBytes > IOUtils.MAX_ARRAY_SIZE validation
+    assertThrowsExactly(IllegalArgumentException.class,
+      () -> RecordReaderUtils.ChunkReader.create(rangeList.get(2), 100),
+      () -> String.format("invalid readBytes value %d,out of bounds %d",
+                          rangeList.get(3).getEnd() - rangeList.get(2).getOffset(), IOUtils.MAX_ARRAY_SIZE)
+    );
+  }
+
+  private static byte[] byteBufferToArray(ByteBuffer buf) {
+    byte[] resultArray = new byte[buf.remaining()];
+    ByteBuffer buffer = buf.slice();
+    buffer.get(resultArray);
+    return resultArray;
+  }
+
   private ByteBuffer makeByteBuffer(int length, long offset) {
     byte[] readBytes = new byte[length];
     for (int i = 0; i < readBytes.length; i++) {