You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@parquet.apache.org by "Yuming Wang (Jira)" <ji...@apache.org> on 2020/01/09 04:43:00 UTC

[jira] [Updated] (PARQUET-1744) Some filter throws ArrayIndexOutOfBoundsException

     [ https://issues.apache.org/jira/browse/PARQUET-1744?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Yuming Wang updated PARQUET-1744:
---------------------------------
    Description: 
How to reproduce:
* Build Spark
{code:sh}
git clone https://github.com/apache/spark.git && cd spark
git fetch origin pull/26804/head:PARQUET-1744
git checkout PARQUET-1744
build/sbt  package
bin/spark-shell
{code}
* Prepare data:
{code:scala}
spark.sql("create table t1(a int, b int, c int) using parquet")
spark.sql("insert into t1 values(1,0,0)")
spark.sql("insert into t1 values(2,0,1)")
spark.sql("insert into t1 values(3,1,0)")
spark.sql("insert into t1 values(4,1,1)")
spark.sql("insert into t1 values(5,null,0)")
spark.sql("insert into t1 values(6,null,1)")
spark.sql("insert into t1 values(7,null,null)")
{code}
* Run test 1
{code:scala}
scala> spark.sql("select a+120 from t1 where b<10 OR c=1").show
java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index -1 out of bounds for length 0
	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMin(IntColumnIndexBuilder.java:74)
	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.lt(BoundaryOrder.java:123)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:262)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$2(ColumnIndexFilter.java:131)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:131)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Lt.accept(Operators.java:209)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
	... 29 more
+---------+
|(a + 120)|
+---------+
|      124|
|      121|
|      122|
|      123|
+---------+
{code}

* Run test 2

{code:scala}
scala> spark.sql("select a+140 from t1 where not (b<10 AND c=1)").show
java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
	... 29 more
java.lang.reflect.InvocationTargetException
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
	... 29 more
+---------+
|(a + 140)|
+---------+
|      141|
|      143|
+---------+
{code}

* Run test 3

{code:scala}
scala> spark.sql("select a+150 from t1 where not (c=1 AND b<10)").show
java.lang.reflect.InvocationTargetException
	at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
	... 28 more
java.lang.reflect.InvocationTargetException
	at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source)
	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
	at org.apache.spark.scheduler.Task.run(Task.scala:127)
	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
	at java.base/java.lang.Thread.run(Thread.java:834)
Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
	... 28 more
+---------+
|(a + 150)|
+---------+
|      151|
|      153|
+---------+
{code}




  was:
How to reproduce:
{noformat}

{noformat}



> Some filter throws ArrayIndexOutOfBoundsException
> -------------------------------------------------
>
>                 Key: PARQUET-1744
>                 URL: https://issues.apache.org/jira/browse/PARQUET-1744
>             Project: Parquet
>          Issue Type: Sub-task
>          Components: parquet-mr
>    Affects Versions: 1.11.0
>            Reporter: Yuming Wang
>            Priority: Major
>
> How to reproduce:
> * Build Spark
> {code:sh}
> git clone https://github.com/apache/spark.git && cd spark
> git fetch origin pull/26804/head:PARQUET-1744
> git checkout PARQUET-1744
> build/sbt  package
> bin/spark-shell
> {code}
> * Prepare data:
> {code:scala}
> spark.sql("create table t1(a int, b int, c int) using parquet")
> spark.sql("insert into t1 values(1,0,0)")
> spark.sql("insert into t1 values(2,0,1)")
> spark.sql("insert into t1 values(3,1,0)")
> spark.sql("insert into t1 values(4,1,1)")
> spark.sql("insert into t1 values(5,null,0)")
> spark.sql("insert into t1 values(6,null,1)")
> spark.sql("insert into t1 values(7,null,null)")
> {code}
> * Run test 1
> {code:scala}
> scala> spark.sql("select a+120 from t1 where b<10 OR c=1").show
> java.lang.reflect.InvocationTargetException
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
> 	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
> 	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
> 	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
> 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:127)
> 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> 	at java.base/java.lang.Thread.run(Thread.java:834)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: Index -1 out of bounds for length 0
> 	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMin(IntColumnIndexBuilder.java:74)
> 	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.lt(BoundaryOrder.java:123)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:262)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$2(ColumnIndexFilter.java:131)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:131)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Lt.accept(Operators.java:209)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
> 	... 29 more
> +---------+
> |(a + 120)|
> +---------+
> |      124|
> |      121|
> |      122|
> |      123|
> +---------+
> {code}
> * Run test 2
> {code:scala}
> scala> spark.sql("select a+140 from t1 where not (b<10 AND c=1)").show
> java.lang.reflect.InvocationTargetException
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
> 	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
> 	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
> 	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
> 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:127)
> 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> 	at java.base/java.lang.Thread.run(Thread.java:834)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
> 	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
> 	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
> 	... 29 more
> java.lang.reflect.InvocationTargetException
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> 	at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
> 	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
> 	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
> 	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
> 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:127)
> 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> 	at java.base/java.lang.Thread.run(Thread.java:834)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
> 	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
> 	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
> 	... 29 more
> +---------+
> |(a + 140)|
> +---------+
> |      141|
> |      143|
> +---------+
> {code}
> * Run test 3
> {code:scala}
> scala> spark.sql("select a+150 from t1 where not (c=1 AND b<10)").show
> java.lang.reflect.InvocationTargetException
> 	at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
> 	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
> 	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
> 	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
> 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:127)
> 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> 	at java.base/java.lang.Thread.run(Thread.java:834)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
> 	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
> 	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
> 	... 28 more
> java.lang.reflect.InvocationTargetException
> 	at jdk.internal.reflect.GeneratedMethodAccessor59.invoke(Unknown Source)
> 	at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> 	at java.base/java.lang.reflect.Method.invoke(Method.java:566)
> 	at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:155)
> 	at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:131)
> 	at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$2(ParquetFileFormat.scala:319)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:116)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:169)
> 	at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
> 	at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:486)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.columnartorow_nextBatch_0$(Unknown Source)
> 	at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
> 	at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
> 	at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:726)
> 	at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:339)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:872)
> 	at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:872)
> 	at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
> 	at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:349)
> 	at org.apache.spark.rdd.RDD.iterator(RDD.scala:313)
> 	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
> 	at org.apache.spark.scheduler.Task.run(Task.scala:127)
> 	at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:441)
> 	at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
> 	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:444)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
> 	at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
> 	at java.base/java.lang.Thread.run(Thread.java:834)
> Caused by: java.lang.ArrayIndexOutOfBoundsException: Index 0 out of bounds for length 0
> 	at org.apache.parquet.internal.column.columnindex.IntColumnIndexBuilder$IntColumnIndex$1.compareValueToMax(IntColumnIndexBuilder.java:79)
> 	at org.apache.parquet.internal.column.columnindex.BoundaryOrder$2.gtEq(BoundaryOrder.java:107)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:257)
> 	at org.apache.parquet.internal.column.columnindex.ColumnIndexBuilder$ColumnIndexBase.visit(ColumnIndexBuilder.java:64)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.lambda$visit$5(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.applyPredicate(ColumnIndexFilter.java:176)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:146)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$GtEq.accept(Operators.java:249)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:186)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.visit(ColumnIndexFilter.java:56)
> 	at org.apache.parquet.filter2.predicate.Operators$Or.accept(Operators.java:321)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:86)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter$1.visit(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.filter2.compat.FilterCompat$FilterPredicateCompat.accept(FilterCompat.java:137)
> 	at org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges(ColumnIndexFilter.java:81)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getRowRanges(ParquetFileReader.java:961)
> 	at org.apache.parquet.hadoop.ParquetFileReader.getFilteredRecordCount(ParquetFileReader.java:766)
> 	... 28 more
> +---------+
> |(a + 150)|
> +---------+
> |      151|
> |      153|
> +---------+
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)