You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2023/01/06 18:27:49 UTC
[orc] branch branch-1.7 updated: ORC-1332: Avoid NegativeArraySizeException when using searchArgument

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.7
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.7 by this push:
     new c0c7ab755 ORC-1332: Avoid NegativeArraySizeException when using searchArgument
c0c7ab755 is described below

commit c0c7ab755f2af0a72bf3ec2332fe425566efda23
Author: sychen <sy...@ctrip.com>
AuthorDate: Sun Dec 25 20:12:49 2022 -0800

    ORC-1332: Avoid NegativeArraySizeException when using searchArgument
    
    ### What changes were proposed in this pull request?
    searchArgument takes effect only when rowIndexStride is greater than 0.
    
    ### Why are the changes needed?
    When `orc.row.index.stride` is set to a negative number, using searchArgument will throw NegativeArraySizeException.
    
    ```java
    Caused by: java.lang.NegativeArraySizeException
            at org.apache.orc.impl.RecordReaderImpl$SargApplier.pickRowGroups(RecordReaderImpl.java:1164)
            at org.apache.orc.impl.RecordReaderImpl.pickRowGroups(RecordReaderImpl.java:1273)
            at org.apache.orc.impl.RecordReaderImpl.readStripe(RecordReaderImpl.java:1293)
            at org.apache.orc.impl.RecordReaderImpl.advanceStripe(RecordReaderImpl.java:1345)
            at org.apache.orc.impl.RecordReaderImpl.advanceToNextRow(RecordReaderImpl.java:1388)
            at org.apache.orc.impl.RecordReaderImpl.<init>(RecordReaderImpl.java:367)
    ```
    
    ### How was this patch tested?
    add UT
    
    Closes #1340 from cxzl25/ORC-1332.
    
    Authored-by: sychen <sy...@ctrip.com>
    Signed-off-by: William Hyun <wi...@apache.org>
    (cherry picked from commit ae84ad06e1f34b0c8d523fa3a5a10573ac354c54)
    Signed-off-by: William Hyun <wi...@apache.org>
    (cherry picked from commit 38aef7309c948f818e464a5afea2cd220af661fe)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 .../java/org/apache/orc/impl/RecordReaderImpl.java |  2 +-
 .../src/java/org/apache/orc/impl/WriterImpl.java   |  6 ++-
 .../org/apache/orc/impl/TestRecordReaderImpl.java  | 46 ++++++++++++++++++++++
 3 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index 9116323b4..333cf5b9e 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -231,7 +231,7 @@ public class RecordReaderImpl implements RecordReader {
     this.fileIncluded = evolution.getFileIncluded();
     SearchArgument sarg = options.getSearchArgument();
     boolean[] rowIndexCols = new boolean[evolution.getFileIncluded().length];
-    if (sarg != null && rowIndexStride != 0) {
+    if (sarg != null && rowIndexStride > 0) {
       sargApp = new SargApplier(sarg,
           rowIndexStride,
           evolution,
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 82376ef8c..329b64ccc 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -182,7 +182,11 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
     this.encodingStrategy = opts.getEncodingStrategy();
     this.compressionStrategy = opts.getCompressionStrategy();
 
-    this.rowIndexStride = opts.getRowIndexStride();
+    if (opts.getRowIndexStride() >= 0) {
+      this.rowIndexStride = opts.getRowIndexStride();
+    } else {
+      this.rowIndexStride = 0;
+    }
 
     this.buildIndex = opts.isBuildIndex() && (rowIndexStride > 0);
     if (buildIndex && rowIndexStride < MIN_ROW_INDEX_STRIDE) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index e1ce5c881..85e502afb 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.PositionedReadable;
 import org.apache.hadoop.fs.Seekable;
 import org.apache.hadoop.hive.common.io.DiskRangeList;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
@@ -65,6 +66,7 @@ import java.io.InputStream;
 import java.lang.reflect.Field;
 import java.nio.ByteBuffer;
 import java.nio.IntBuffer;
+import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.text.DateFormat;
@@ -2504,4 +2506,48 @@ public class TestRecordReaderImpl {
       }
     }
   }
+
+  @Test
+  public void testRowIndexStrideNegativeFilter() throws Exception {
+    Path testFilePath = new Path(workDir, "rowIndexStrideNegative.orc");
+    Configuration conf = new Configuration();
+    FileSystem fs = FileSystem.get(conf);
+    fs.delete(testFilePath, true);
+
+    TypeDescription schema =
+        TypeDescription.fromString("struct<str:string>");
+    Writer writer = OrcFile.createWriter(
+        testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema).rowIndexStride(-1));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    BytesColumnVector strVector = (BytesColumnVector) batch.cols[0];
+    for (int i = 0; i < 32 * 1024; i++) {
+      if (batch.size == batch.getMaxSize()) {
+        writer.addRowBatch(batch);
+        batch.reset();
+      }
+      byte[] value = String.format("row %06d", i).getBytes(StandardCharsets.UTF_8);
+      strVector.setRef(batch.size, value, 0, value.length);
+      ++batch.size;
+    }
+    writer.addRowBatch(batch);
+    batch.reset();
+    writer.close();
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(fs));
+    SearchArgument sarg = SearchArgumentFactory.newBuilder(conf)
+        .startNot().isNull("str", PredicateLeaf.Type.STRING).end()
+        .build();
+    RecordReader recordReader = reader.rows(reader.options().searchArgument(sarg, null));
+    batch = reader.getSchema().createRowBatch();
+    strVector = (BytesColumnVector) batch.cols[0];
+    long base = 0;
+    while (recordReader.nextBatch(batch)) {
+      for (int r = 0; r < batch.size; ++r) {
+        String value = String.format("row %06d", r + base);
+        assertEquals(value, strVector.toString(r), "row " + (r + base));
+      }
+      base += batch.size;
+    }
+  }
 }