You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@parquet.apache.org by ga...@apache.org on 2020/01/22 12:09:18 UTC
[parquet-mr] 03/04: PARQUET-1744: Some filters throws
ArrayIndexOutOfBoundsException (#732)
This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch parquet-1.11.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
commit 49de5b475c21c241dff9b5020c17401cc6707b87
Author: Gabor Szadovszky <ga...@apache.org>
AuthorDate: Fri Jan 10 07:08:01 2020 +0100
PARQUET-1744: Some filters throws ArrayIndexOutOfBoundsException (#732)
(cherry picked from commit f0fc29fd3341046f7d46a4a02a7c9ec3d7cd3e46)
---
.../internal/column/columnindex/BoundaryOrder.java | 32 +++++++
.../column/columnindex/TestBoundaryOrder.java | 56 +++++++++++
.../filter2/columnindex/TestColumnIndexFilter.java | 106 ++++++++++++++-------
3 files changed, 159 insertions(+), 35 deletions(-)
diff --git a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
index e47b5b3..8a7cee0 100644
--- a/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
+++ b/parquet-column/src/main/java/org/apache/parquet/internal/column/columnindex/BoundaryOrder.java
@@ -84,6 +84,10 @@ public enum BoundaryOrder {
@Override
OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = 0;
int right = length;
do {
@@ -100,6 +104,10 @@ public enum BoundaryOrder {
@Override
OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = 0;
int right = length;
do {
@@ -116,6 +124,10 @@ public enum BoundaryOrder {
@Override
OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = -1;
int right = length - 1;
do {
@@ -132,6 +144,10 @@ public enum BoundaryOrder {
@Override
OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = -1;
int right = length - 1;
do {
@@ -207,6 +223,10 @@ public enum BoundaryOrder {
@Override
OfInt gt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = -1;
int right = length - 1;
do {
@@ -223,6 +243,10 @@ public enum BoundaryOrder {
@Override
OfInt gtEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = -1;
int right = length - 1;
do {
@@ -239,6 +263,10 @@ public enum BoundaryOrder {
@Override
OfInt lt(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = 0;
int right = length;
do {
@@ -255,6 +283,10 @@ public enum BoundaryOrder {
@Override
OfInt ltEq(ColumnIndexBase<?>.ValueComparator comparator) {
int length = comparator.arrayLength();
+ if (length == 0) {
+ // No matching rows if the column index contains null pages only
+ return IndexIterator.EMPTY;
+ }
int left = 0;
int right = length;
do {
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
index 3d2a924..97cbf07 100644
--- a/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/column/columnindex/TestBoundaryOrder.java
@@ -183,6 +183,7 @@ public class TestBoundaryOrder {
private static final int RAND_COUNT = 2000;
private static final ColumnIndexBase<?> RAND_ASCENDING;
private static final ColumnIndexBase<?> RAND_DESCENDING;
+ private static final ColumnIndexBase<?> ALL_NULL_PAGES;
private static final SpyValueComparatorBuilder SPY_COMPARATOR_BUILDER = new SpyValueComparatorBuilder();
static {
ColumnIndexBuilder builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
@@ -233,6 +234,13 @@ public class TestBoundaryOrder {
builder.add(stats(it.next(), it.next()));
}
RAND_DESCENDING = (ColumnIndexBase<?>) builder.build();
+
+ builder = ColumnIndexBuilder.getBuilder(TYPE, Integer.MAX_VALUE);
+ // Add a couple of empty stats so column index will contain null pages only
+ for (int i = 0; i < 10; ++i) {
+ builder.add(Statistics.createStats(TYPE));
+ }
+ ALL_NULL_PAGES = (ColumnIndexBase<?>) builder.build();
}
private static Statistics<?> stats(int min, int max) {
@@ -267,6 +275,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::eq,
BoundaryOrder.DESCENDING::eq,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::eq,
+ BoundaryOrder.ASCENDING::eq,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::eq,
+ BoundaryOrder.DESCENDING::eq,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
@@ -305,6 +321,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::gt,
BoundaryOrder.DESCENDING::gt,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::gt,
+ BoundaryOrder.ASCENDING::gt,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::gt,
+ BoundaryOrder.DESCENDING::gt,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
@@ -343,6 +367,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::gtEq,
BoundaryOrder.DESCENDING::gtEq,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::gtEq,
+ BoundaryOrder.ASCENDING::gtEq,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::gtEq,
+ BoundaryOrder.DESCENDING::gtEq,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
@@ -381,6 +413,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::lt,
BoundaryOrder.DESCENDING::lt,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::lt,
+ BoundaryOrder.ASCENDING::lt,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::lt,
+ BoundaryOrder.DESCENDING::lt,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
@@ -419,6 +459,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::ltEq,
BoundaryOrder.DESCENDING::ltEq,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::ltEq,
+ BoundaryOrder.ASCENDING::ltEq,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::ltEq,
+ BoundaryOrder.DESCENDING::ltEq,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = SINGLE_FROM - 1; i <= SINGLE_TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
@@ -457,6 +505,14 @@ public class TestBoundaryOrder {
BoundaryOrder.UNORDERED::notEq,
BoundaryOrder.DESCENDING::notEq,
DESCENDING.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with ASCENDING order in ",
+ BoundaryOrder.UNORDERED::notEq,
+ BoundaryOrder.ASCENDING::notEq,
+ ALL_NULL_PAGES.createValueComparator(i));
+ validateOperator("Mismatching page indexes for all null pages and value " + i + " with DESCENDING order in ",
+ BoundaryOrder.UNORDERED::notEq,
+ BoundaryOrder.DESCENDING::notEq,
+ ALL_NULL_PAGES.createValueComparator(i));
}
for (int i = FROM - 1; i <= TO + 1; ++i) {
ColumnIndexBase<?>.ValueComparator singleComparator = SINGLE.createValueComparator(i);
diff --git a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
index ae27214..f37a343 100644
--- a/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
+++ b/parquet-column/src/test/java/org/apache/parquet/internal/filter2/columnindex/TestColumnIndexFilter.java
@@ -27,6 +27,7 @@ import static org.apache.parquet.filter2.predicate.FilterApi.eq;
import static org.apache.parquet.filter2.predicate.FilterApi.gt;
import static org.apache.parquet.filter2.predicate.FilterApi.gtEq;
import static org.apache.parquet.filter2.predicate.FilterApi.intColumn;
+import static org.apache.parquet.filter2.predicate.FilterApi.longColumn;
import static org.apache.parquet.filter2.predicate.FilterApi.lt;
import static org.apache.parquet.filter2.predicate.FilterApi.ltEq;
import static org.apache.parquet.filter2.predicate.FilterApi.notEq;
@@ -38,10 +39,11 @@ import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.DESCE
import static org.apache.parquet.internal.column.columnindex.BoundaryOrder.UNORDERED;
import static org.apache.parquet.internal.filter2.columnindex.ColumnIndexFilter.calculateRowRanges;
import static org.apache.parquet.io.api.Binary.fromString;
-import static org.apache.parquet.schema.OriginalType.UTF8;
+import static org.apache.parquet.schema.LogicalTypeAnnotation.stringType;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.DOUBLE;
import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT32;
+import static org.apache.parquet.schema.PrimitiveType.PrimitiveTypeName.INT64;
import static org.apache.parquet.schema.Types.optional;
import static org.junit.Assert.assertArrayEquals;
@@ -161,55 +163,56 @@ public class TestColumnIndexFilter {
/**
* <pre>
- * row column1 column2 column3 column4 (no column index)
- * ------0------ ------0------ ------0------ ------0------
- * 0. 1 Zulu 2.03
- * ------1------ ------1------ ------1------ ------1------
- * 1. 2 Yankee 4.67
- * 2. 3 Xray 3.42
- * 3. 4 Whiskey 8.71
+ * row column1 column2 column3 column4 column5
+ * (no column index)
+ * ------0------ ------0------ ------0------ ------0------ ------0------
+ * 0. 1 Zulu 2.03 null
+ * ------1------ ------1------ ------1------ ------1------ ------1------
+ * 1. 2 Yankee 4.67 null
+ * 2. 3 Xray 3.42 null
+ * 3. 4 Whiskey 8.71 null
* ------2------ ------2------
- * 4. 5 Victor 0.56
- * 5. 6 Uniform 4.30
+ * 4. 5 Victor 0.56 null
+ * 5. 6 Uniform 4.30 null
* ------2------ ------3------
- * 6. null null null
+ * 6. null null null null
* ------2------ ------4------
- * 7. 7 Tango 3.50
+ * 7. 7 Tango 3.50 null
* ------3------
- * 8. 7 null 3.14
+ * 8. 7 null 3.14 null
* ------3------
- * 9. 7 null null
+ * 9. 7 null null null
* ------3------
- * 10. null null 9.99
+ * 10. null null 9.99 null
* ------4------
- * 11. 8 Sierra 8.78
+ * 11. 8 Sierra 8.78 null
* ------5------
- * 12. 9 Romeo 9.56
- * 13. 10 Quebec 2.71
+ * 12. 9 Romeo 9.56 null
+ * 13. 10 Quebec 2.71 null
* ------4------
- * 14. 11 Papa 5.71
- * 15. 12 Oscar 4.09
+ * 14. 11 Papa 5.71 null
+ * 15. 12 Oscar 4.09 null
* ------5------ ------4------ ------6------
- * 16. 13 November null
- * 17. 14 Mike null
- * 18. 15 Lima 0.36
- * 19. 16 Kilo 2.94
- * 20. 17 Juliett 4.23
+ * 16. 13 November null null
+ * 17. 14 Mike null null
+ * 18. 15 Lima 0.36 null
+ * 19. 16 Kilo 2.94 null
+ * 20. 17 Juliett 4.23 null
* ------5------ ------6------ ------7------
- * 21. 18 India null
- * 22. 19 Hotel 5.32
+ * 21. 18 India null null
+ * 22. 19 Hotel 5.32 null
* ------5------
- * 23. 20 Golf 4.17
- * 24. 21 Foxtrot 7.92
- * 25. 22 Echo 7.95
+ * 23. 20 Golf 4.17 null
+ * 24. 21 Foxtrot 7.92 null
+ * 25. 22 Echo 7.95 null
* ------6------
- * 26. 23 Delta null
+ * 26. 23 Delta null null
* ------6------
- * 27. 24 Charlie null
+ * 27. 24 Charlie null null
* ------8------
- * 28. 25 Bravo null
+ * 28. 25 Bravo null null
* ------7------
- * 29. 26 Alfa null
+ * 29. 26 Alfa null null
* </pre>
*/
private static final long TOTAL_ROW_COUNT = 30;
@@ -231,7 +234,7 @@ public class TestColumnIndexFilter {
.addPage(6)
.addPage(3)
.build();
- private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(UTF8).named("column2"), DESCENDING)
+ private static final ColumnIndex COLUMN2_CI = new CIBuilder(optional(BINARY).as(stringType()).named("column2"), DESCENDING)
.addPage(0, "Zulu", "Zulu")
.addPage(0, "Whiskey", "Yankee")
.addPage(1, "Tango", "Victor")
@@ -281,6 +284,14 @@ public class TestColumnIndexFilter {
.addPage(7)
.addPage(2)
.build();
+ private static final ColumnIndex COLUMN5_CI = new CIBuilder(optional(INT64).named("column5"), ASCENDING)
+ .addNullPage(1)
+ .addNullPage(29)
+ .build();
+ private static final OffsetIndex COLUMN5_OI = new OIBuilder()
+ .addPage(1)
+ .addPage(29)
+ .build();
private static final ColumnIndexStore STORE = new ColumnIndexStore() {
@Override
public ColumnIndex getColumnIndex(ColumnPath column) {
@@ -293,6 +304,8 @@ public class TestColumnIndexFilter {
return COLUMN3_CI;
case "column4":
return COLUMN4_CI;
+ case "column5":
+ return COLUMN5_CI;
default:
return null;
}
@@ -309,6 +322,8 @@ public class TestColumnIndexFilter {
return COLUMN3_OI;
case "column4":
return COLUMN4_OI;
+ case "column5":
+ return COLUMN5_OI;
default:
throw new MissingOffsetIndexException(column);
}
@@ -461,4 +476,25 @@ public class TestColumnIndexFilter {
TOTAL_ROW_COUNT);
}
+ @Test
+ public void testFilteringWithAllNullPages() {
+ Set<ColumnPath> paths = paths("column1", "column5");
+
+ assertAllRows(calculateRowRanges(FilterCompat.get(
+ notEq(longColumn("column5"), 1234567L)),
+ STORE, paths, TOTAL_ROW_COUNT),
+ TOTAL_ROW_COUNT);
+ assertAllRows(calculateRowRanges(FilterCompat.get(
+ or(gtEq(intColumn("column1"), 10),
+ notEq(longColumn("column5"), 1234567L))),
+ STORE, paths, TOTAL_ROW_COUNT),
+ TOTAL_ROW_COUNT);
+ assertRows(calculateRowRanges(FilterCompat.get(
+ eq(longColumn("column5"), 1234567L)),
+ STORE, paths, TOTAL_ROW_COUNT));
+ assertRows(calculateRowRanges(FilterCompat.get(
+ and(lt(intColumn("column1"), 20),
+ gtEq(longColumn("column5"), 1234567L))),
+ STORE, paths, TOTAL_ROW_COUNT));
+ }
}