You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ti...@apache.org on 2018/08/01 18:36:14 UTC
[drill] 03/03: DRILL-5796 : implement ROWS_MATCH enum to keep
inside rowgroup the filter result information,
used to prune the filter if all rows match.
This is an automated email from the ASF dual-hosted git repository.
timothyfarkas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit efd6d29910d155cd84692ee8aafff3eb06c6e391
Author: jbimbert <je...@amadeus.com>
AuthorDate: Tue Jun 12 19:22:20 2018 +0200
DRILL-5796 : implement ROWS_MATCH enum to keep inside rowgroup the filter result information, used to prune the filter if all rows match.
closes #1298
---
.../exec/expr/stat/ParquetBooleanPredicate.java | 48 ++-
.../exec/expr/stat/ParquetComparisonPredicate.java | 78 ++---
.../exec/expr/stat/ParquetFilterPredicate.java | 13 +-
.../drill/exec/expr/stat/ParquetIsPredicate.java | 125 +++++---
.../drill/exec/expr/stat/RangeExprEvaluator.java | 33 +-
.../store/parquet/AbstractParquetGroupScan.java | 8 +-
.../exec/store/parquet/ParquetPushDownFilter.java | 18 +-
.../store/parquet/ParquetRGFilterEvaluator.java | 52 +++-
.../drill/exec/store/parquet/RowGroupInfo.java | 5 +
.../parquet/stat/ParquetFooterStatCollector.java | 2 +-
.../parquet/stat/ParquetMetaStatCollector.java | 2 +-
.../store/parquet/TestParquetFilterPushDown.java | 335 +++++++++++++++------
.../test/resources/parquet/multirowgroup2.parquet | Bin 0 -> 598 bytes
.../parquet/multirowgroupwithNulls.parquet | Bin 0 -> 2063 bytes
.../resources/parquetFilterPush/tfTbl/ff1.parquet | Bin 0 -> 251 bytes
.../resources/parquetFilterPush/tfTbl/ft0.parquet | Bin 0 -> 251 bytes
.../resources/parquetFilterPush/tfTbl/tt1.parquet | Bin 0 -> 251 bytes
17 files changed, 510 insertions(+), 209 deletions(-)
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicate.java
index fa5c467..f427dc6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicate.java
@@ -46,15 +46,29 @@ public abstract class ParquetBooleanPredicate<C extends Comparable<C>> extends B
ExpressionPosition pos
) {
return new ParquetBooleanPredicate<C>(name, args, pos) {
+ /**
+ * Evaluates a compound "AND" filter on the statistics of a RowGroup (the filter reads "filterA and filterB").
+ * Return value :<ul>
+ * <li>ALL : only if all filters return ALL
+ * <li>NONE : if one filter at least returns NONE
+ * <li>SOME : all other cases
+ * </ul>
+ */
@Override
- public boolean canDrop(RangeExprEvaluator<C> evaluator) {
- // "and" : as long as one branch is OK to drop, we can drop it.
+ public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
+ RowsMatch resultMatch = RowsMatch.ALL;
for (LogicalExpression child : this) {
- if (child instanceof ParquetFilterPredicate && ((ParquetFilterPredicate)child).canDrop(evaluator)) {
- return true;
+ if (child instanceof ParquetFilterPredicate) {
+ switch (((ParquetFilterPredicate) child).matches(evaluator)) {
+ case NONE:
+ return RowsMatch.NONE; // No row comply to 1 filter part => can drop RG
+ case SOME:
+ resultMatch = RowsMatch.SOME;
+ default: // Do nothing
+ }
}
}
- return false;
+ return resultMatch;
}
};
}
@@ -66,15 +80,29 @@ public abstract class ParquetBooleanPredicate<C extends Comparable<C>> extends B
ExpressionPosition pos
) {
return new ParquetBooleanPredicate<C>(name, args, pos) {
+ /**
+ * Evaluates a compound "OR" filter on the statistics of a RowGroup (the filter reads "filterA or filterB").
+ * Return value :<ul>
+ * <li>NONE : only if all filters return NONE
+ * <li>ALL : if one filter at least returns ALL
+ * <li>SOME : all other cases
+ * </ul>
+ */
@Override
- public boolean canDrop(RangeExprEvaluator<C> evaluator) {
+ public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
+ RowsMatch resultMatch = RowsMatch.NONE;
for (LogicalExpression child : this) {
- // "or" : as long as one branch is NOT ok to drop, we can NOT drop it.
- if (!(child instanceof ParquetFilterPredicate) || !((ParquetFilterPredicate)child).canDrop(evaluator)) {
- return false;
+ if (child instanceof ParquetFilterPredicate) {
+ switch (((ParquetFilterPredicate) child).matches(evaluator)) {
+ case ALL:
+ return RowsMatch.ALL; // One at least is ALL => can drop filter but not RG
+ case SOME:
+ resultMatch = RowsMatch.SOME;
+ default: // Do nothing
+ }
}
}
- return true;
+ return resultMatch;
}
};
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicate.java
index ebceefb..531cbab 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicate.java
@@ -26,8 +26,9 @@ import org.apache.parquet.column.statistics.Statistics;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import java.util.function.BiPredicate;
+import java.util.function.BiFunction;
+import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.hasNoNulls;
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isNullOrEmpty;
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isAllNulls;
@@ -38,12 +39,13 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
implements ParquetFilterPredicate<C> {
private final LogicalExpression left;
private final LogicalExpression right;
- private final BiPredicate<Statistics<C>, Statistics<C>> predicate;
+
+ private final BiFunction<Statistics<C>, Statistics<C>, RowsMatch> predicate;
private ParquetComparisonPredicate(
LogicalExpression left,
LogicalExpression right,
- BiPredicate<Statistics<C>, Statistics<C>> predicate
+ BiFunction<Statistics<C>, Statistics<C>, RowsMatch> predicate
) {
super(left.getPosition());
this.left = left;
@@ -65,7 +67,7 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
}
/**
- * Semantics of canDrop() is very similar to what is implemented in Parquet library's
+ * Semantics of matches() is very similar to what is implemented in Parquet library's
* {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
* {@link org.apache.parquet.filter2.predicate.FilterPredicate}
*
@@ -83,23 +85,29 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
* where Column1 and Column2 are from same parquet table.
*/
@Override
- public boolean canDrop(RangeExprEvaluator<C> evaluator) {
+ public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
Statistics<C> leftStat = left.accept(evaluator, null);
if (isNullOrEmpty(leftStat)) {
- return false;
+ return RowsMatch.SOME;
}
-
Statistics<C> rightStat = right.accept(evaluator, null);
if (isNullOrEmpty(rightStat)) {
- return false;
+ return RowsMatch.SOME;
}
-
- // if either side is ALL null, = is evaluated to UNKNOWN -> canDrop
if (isAllNulls(leftStat, evaluator.getRowCount()) || isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
+ return RowsMatch.NONE;
+ }
+ if (!leftStat.hasNonNullValue() || !rightStat.hasNonNullValue()) {
+ return RowsMatch.SOME;
}
+ return predicate.apply(leftStat, rightStat);
+ }
- return (leftStat.hasNonNullValue() && rightStat.hasNonNullValue()) && predicate.test(leftStat, rightStat);
+ /**
+ * If one rowgroup contains some null values, change the RowsMatch.ALL into RowsMatch.SOME (null values should be discarded by filter)
+ */
+ private static RowsMatch checkNull(Statistics leftStat, Statistics rightStat) {
+ return !hasNoNulls(leftStat) || !hasNoNulls(rightStat) ? RowsMatch.SOME : RowsMatch.ALL;
}
/**
@@ -109,12 +117,9 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression left,
LogicalExpression right
) {
- return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when left's max < right's min, or right's max < left's min
- final C leftMin = leftStat.genericGetMin();
- final C rightMin = rightStat.genericGetMin();
- return (leftStat.compareMaxToValue(rightMin) < 0) || (rightStat.compareMaxToValue(leftMin) < 0);
- }) {
+ return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) ->
+ leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 || rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0 ? RowsMatch.NONE : RowsMatch.SOME
+ ) {
@Override
public String toString() {
return left + " = " + right;
@@ -130,9 +135,10 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression right
) {
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when left's max <= right's min.
- final C rightMin = rightStat.genericGetMin();
- return leftStat.compareMaxToValue(rightMin) <= 0;
+ if (leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0) {
+ return RowsMatch.NONE;
+ }
+ return leftStat.compareMinToValue(rightStat.genericGetMax()) > 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
});
}
@@ -144,9 +150,10 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression right
) {
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when left's max < right's min.
- final C rightMin = rightStat.genericGetMin();
- return leftStat.compareMaxToValue(rightMin) < 0;
+ if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0) {
+ return RowsMatch.NONE;
+ }
+ return leftStat.compareMinToValue(rightStat.genericGetMax()) >= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
});
}
@@ -158,9 +165,10 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression right
) {
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when right's max <= left's min.
- final C leftMin = leftStat.genericGetMin();
- return rightStat.compareMaxToValue(leftMin) <= 0;
+ if (rightStat.compareMaxToValue(leftStat.genericGetMin()) <= 0) {
+ return RowsMatch.NONE;
+ }
+ return leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
});
}
@@ -171,9 +179,10 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression left, LogicalExpression right
) {
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when right's max < left's min.
- final C leftMin = leftStat.genericGetMin();
- return rightStat.compareMaxToValue(leftMin) < 0;
+ if (rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) {
+ return RowsMatch.NONE;
+ }
+ return leftStat.compareMaxToValue(rightStat.genericGetMin()) <= 0 ? checkNull(leftStat, rightStat) : RowsMatch.SOME;
});
}
@@ -185,11 +194,10 @@ public class ParquetComparisonPredicate<C extends Comparable<C>> extends Logical
LogicalExpression right
) {
return new ParquetComparisonPredicate<C>(left, right, (leftStat, rightStat) -> {
- // can drop when there is only one unique value.
- final C leftMax = leftStat.genericGetMax();
- final C rightMax = rightStat.genericGetMax();
- return leftStat.compareMinToValue(leftMax) == 0 && rightStat.compareMinToValue(rightMax) == 0 &&
- leftStat.compareMaxToValue(rightMax) == 0;
+ if (leftStat.compareMaxToValue(rightStat.genericGetMin()) < 0 || rightStat.compareMaxToValue(leftStat.genericGetMin()) < 0) {
+ return checkNull(leftStat, rightStat);
+ }
+ return leftStat.compareMaxToValue(rightStat.genericGetMax()) == 0 && leftStat.compareMinToValue(rightStat.genericGetMin()) == 0 ? RowsMatch.NONE : RowsMatch.SOME;
});
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java
index 1b7e9e5..c472d48 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetFilterPredicate.java
@@ -18,5 +18,16 @@
package org.apache.drill.exec.expr.stat;
public interface ParquetFilterPredicate<T extends Comparable<T>> {
- boolean canDrop(RangeExprEvaluator<T> evaluator);
+
+ /**
+ * Define the validity of a row group against a filter
+ * <ul>
+ * <li>ALL : all rows match the filter (can not drop the row group and can prune the filter)
+ * <li>NONE : no row matches the filter (can drop the row group)
+ * <li>SOME : some rows only match the filter or the filter can not be applied (can not drop the row group nor the filter)
+ * </ul>
+ */
+ enum RowsMatch {ALL, NONE, SOME}
+
+ RowsMatch matches(RangeExprEvaluator<T> evaluator);
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
index 42e6e0b..e69dd8b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicate.java
@@ -19,7 +19,6 @@ package org.apache.drill.exec.expr.stat;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.LogicalExpressionBase;
-import org.apache.drill.common.expression.SchemaPath;
import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.visitors.ExprVisitor;
import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
@@ -29,7 +28,7 @@ import org.apache.parquet.column.statistics.Statistics;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import java.util.function.BiPredicate;
+import java.util.function.BiFunction;
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.hasNoNulls;
import static org.apache.drill.exec.expr.stat.ParquetPredicatesHelper.isAllNulls;
@@ -42,9 +41,10 @@ public class ParquetIsPredicate<C extends Comparable<C>> extends LogicalExpressi
implements ParquetFilterPredicate<C> {
private final LogicalExpression expr;
- private final BiPredicate<Statistics<C>, RangeExprEvaluator<C>> predicate;
- private ParquetIsPredicate(LogicalExpression expr, BiPredicate<Statistics<C>, RangeExprEvaluator<C>> predicate) {
+ private final BiFunction<Statistics<C>, RangeExprEvaluator<C>, RowsMatch> predicate;
+
+ private ParquetIsPredicate(LogicalExpression expr, BiFunction<Statistics<C>, RangeExprEvaluator<C>, RowsMatch> predicate) {
super(expr.getPosition());
this.expr = expr;
this.predicate = predicate;
@@ -62,14 +62,22 @@ public class ParquetIsPredicate<C extends Comparable<C>> extends LogicalExpressi
return visitor.visitUnknown(this, value);
}
- @Override
- public boolean canDrop(RangeExprEvaluator<C> evaluator) {
+ /**
+ * Apply the filter condition against the meta of the rowgroup.
+ */
+ public RowsMatch matches(RangeExprEvaluator<C> evaluator) {
Statistics<C> exprStat = expr.accept(evaluator, null);
- if (isNullOrEmpty(exprStat)) {
- return false;
- }
+ return isNullOrEmpty(exprStat) ? RowsMatch.SOME : predicate.apply(exprStat, evaluator);
+ }
- return predicate.test(exprStat, evaluator);
+ /**
+ * After the applying of the filter against the statistics of the rowgroup, if the result is RowsMatch.ALL,
+ * then we still must know if the rowgroup contains some null values, because it can change the filter result.
+ * If it contains some null values, then we change the RowsMatch.ALL into RowsMatch.SOME, which sya that maybe
+ * some values (the null ones) should be disgarded.
+ */
+ private static RowsMatch checkNull(Statistics exprStat) {
+ return hasNoNulls(exprStat) ? RowsMatch.ALL : RowsMatch.SOME;
}
/**
@@ -77,26 +85,20 @@ public class ParquetIsPredicate<C extends Comparable<C>> extends LogicalExpressi
*/
private static <C extends Comparable<C>> LogicalExpression createIsNullPredicate(LogicalExpression expr) {
return new ParquetIsPredicate<C>(expr,
- //if there are no nulls -> canDrop
- (exprStat, evaluator) -> hasNoNulls(exprStat)) {
- private final boolean isArray = isArray(expr);
-
- private boolean isArray(LogicalExpression expression) {
- if (expression instanceof TypedFieldExpr) {
- TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expression;
- SchemaPath schemaPath = typedFieldExpr.getPath();
- return schemaPath.isArray();
- }
- return false;
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator<C> evaluator) {
+ (exprStat, evaluator) -> {
// for arrays we are not able to define exact number of nulls
// [1,2,3] vs [1,2] -> in second case 3 is absent and thus it's null but statistics shows no nulls
- return !isArray && super.canDrop(evaluator);
- }
- };
+ if (expr instanceof TypedFieldExpr) {
+ TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expr;
+ if (typedFieldExpr.getPath().isArray()) {
+ return RowsMatch.SOME;
+ }
+ }
+ if (hasNoNulls(exprStat)) {
+ return RowsMatch.NONE;
+ }
+ return isAllNulls(exprStat, evaluator.getRowCount()) ? RowsMatch.ALL : RowsMatch.SOME;
+ });
}
/**
@@ -104,8 +106,7 @@ public class ParquetIsPredicate<C extends Comparable<C>> extends LogicalExpressi
*/
private static <C extends Comparable<C>> LogicalExpression createIsNotNullPredicate(LogicalExpression expr) {
return new ParquetIsPredicate<C>(expr,
- //if there are all nulls -> canDrop
- (exprStat, evaluator) -> isAllNulls(exprStat, evaluator.getRowCount())
+ (exprStat, evaluator) -> isAllNulls(exprStat, evaluator.getRowCount()) ? RowsMatch.NONE : checkNull(exprStat)
);
}
@@ -113,40 +114,72 @@ public class ParquetIsPredicate<C extends Comparable<C>> extends LogicalExpressi
* IS TRUE predicate.
*/
private static LogicalExpression createIsTruePredicate(LogicalExpression expr) {
- return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
- //if max value is not true or if there are all nulls -> canDrop
- isAllNulls(exprStat, evaluator.getRowCount()) || exprStat.hasNonNullValue() && !((BooleanStatistics) exprStat).getMax()
- );
+ return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) -> {
+ if (isAllNulls(exprStat, evaluator.getRowCount())) {
+ return RowsMatch.NONE;
+ }
+ if (!exprStat.hasNonNullValue()) {
+ return RowsMatch.SOME;
+ }
+ if (!((BooleanStatistics) exprStat).getMax()) {
+ return RowsMatch.NONE;
+ }
+ return ((BooleanStatistics) exprStat).getMin() ? checkNull(exprStat) : RowsMatch.SOME;
+ });
}
/**
* IS FALSE predicate.
*/
private static LogicalExpression createIsFalsePredicate(LogicalExpression expr) {
- return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
- //if min value is not false or if there are all nulls -> canDrop
- isAllNulls(exprStat, evaluator.getRowCount()) || exprStat.hasNonNullValue() && ((BooleanStatistics) exprStat).getMin()
- );
+ return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) -> {
+ if (isAllNulls(exprStat, evaluator.getRowCount())) {
+ return RowsMatch.NONE;
+ }
+ if (!exprStat.hasNonNullValue()) {
+ return RowsMatch.SOME;
+ }
+ if (((BooleanStatistics) exprStat).getMin()) {
+ return RowsMatch.NONE;
+ }
+ return ((BooleanStatistics) exprStat).getMax() ? RowsMatch.SOME : checkNull(exprStat);
+ });
}
/**
* IS NOT TRUE predicate.
*/
private static LogicalExpression createIsNotTruePredicate(LogicalExpression expr) {
- return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
- //if min value is not false or if there are no nulls -> canDrop
- hasNoNulls(exprStat) && exprStat.hasNonNullValue() && ((BooleanStatistics) exprStat).getMin()
- );
+ return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) -> {
+ if (isAllNulls(exprStat, evaluator.getRowCount())) {
+ return RowsMatch.ALL;
+ }
+ if (!exprStat.hasNonNullValue()) {
+ return RowsMatch.SOME;
+ }
+ if (((BooleanStatistics) exprStat).getMin()) {
+ return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
+ }
+ return ((BooleanStatistics) exprStat).getMax() ? RowsMatch.SOME : RowsMatch.ALL;
+ });
}
/**
* IS NOT FALSE predicate.
*/
private static LogicalExpression createIsNotFalsePredicate(LogicalExpression expr) {
- return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) ->
- //if max value is not true or if there are no nulls -> canDrop
- hasNoNulls(exprStat) && exprStat.hasNonNullValue() && !((BooleanStatistics) exprStat).getMax()
- );
+ return new ParquetIsPredicate<Boolean>(expr, (exprStat, evaluator) -> {
+ if (isAllNulls(exprStat, evaluator.getRowCount())) {
+ return RowsMatch.ALL;
+ }
+ if (!exprStat.hasNonNullValue()) {
+ return RowsMatch.SOME;
+ }
+ if (!((BooleanStatistics) exprStat).getMax()) {
+ return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
+ }
+ return ((BooleanStatistics) exprStat).getMin() ? RowsMatch.ALL : RowsMatch.SOME;
+ });
}
public static <C extends Comparable<C>> LogicalExpression createIsPredicate(String function, LogicalExpression expr) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
index f127f0b..2b55e3d 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
@@ -258,21 +258,28 @@ public class RangeExprEvaluator<T extends Comparable<T>> extends AbstractExprVis
final ValueHolder minFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args1, holderExpr.getName());
final ValueHolder maxFuncHolder = InterpreterEvaluator.evaluateFunction(interpreter, args2, holderExpr.getName());
+ Statistics<T> statistics;
switch (destType) {
- //TODO : need handle # of nulls.
- case INT:
- return getStatistics( ((IntHolder)minFuncHolder).value, ((IntHolder)maxFuncHolder).value);
- case BIGINT:
- return getStatistics( ((BigIntHolder)minFuncHolder).value, ((BigIntHolder)maxFuncHolder).value);
- case FLOAT4:
- return getStatistics( ((Float4Holder)minFuncHolder).value, ((Float4Holder)maxFuncHolder).value);
- case FLOAT8:
- return getStatistics( ((Float8Holder)minFuncHolder).value, ((Float8Holder)maxFuncHolder).value);
- case TIMESTAMP:
- return getStatistics(((TimeStampHolder) minFuncHolder).value, ((TimeStampHolder) maxFuncHolder).value);
- default:
- return null;
+ case INT:
+ statistics = getStatistics(((IntHolder) minFuncHolder).value, ((IntHolder) maxFuncHolder).value);
+ break;
+ case BIGINT:
+ statistics = getStatistics(((BigIntHolder) minFuncHolder).value, ((BigIntHolder) maxFuncHolder).value);
+ break;
+ case FLOAT4:
+ statistics = getStatistics(((Float4Holder) minFuncHolder).value, ((Float4Holder) maxFuncHolder).value);
+ break;
+ case FLOAT8:
+ statistics = getStatistics(((Float8Holder) minFuncHolder).value, ((Float8Holder) maxFuncHolder).value);
+ break;
+ case TIMESTAMP:
+ statistics = getStatistics(((TimeStampHolder) minFuncHolder).value, ((TimeStampHolder) maxFuncHolder).value);
+ break;
+ default:
+ return null;
}
+ statistics.setNumNulls(input.getNumNulls());
+ return statistics;
} catch (Exception e) {
throw new DrillRuntimeException("Error in evaluating function of " + holderExpr.getName() );
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
index 33472bb..bf292be 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
@@ -262,12 +262,14 @@ public abstract class AbstractParquetGroupScan extends AbstractFileGroupScan {
}
}
- if (ParquetRGFilterEvaluator.canDrop(filterPredicate, columnStatisticsMap, rowGroup.getRowCount())) {
- continue;
+ ParquetFilterPredicate.RowsMatch match = ParquetRGFilterEvaluator.matches(filterPredicate, columnStatisticsMap, rowGroup.getRowCount(), parquetTableMetadata, rowGroup.getColumns(), schemaPathsInExpr);
+ if (match == ParquetFilterPredicate.RowsMatch.NONE) {
+ continue; // No row comply to the filter => drop the row group
}
+ rowGroup.setRowsMatch(match);
qualifiedRGs.add(rowGroup);
- qualifiedFilePath.add(rowGroup.getPath()); // TODO : optimize when 1 file contains m row groups.
+ qualifiedFilePath.add(rowGroup.getPath());
}
if (qualifiedRGs.size() == rowGroupInfos.size() ) {
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
index 83ce4d2..b5f0ca4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
@@ -29,6 +29,8 @@ import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.ValueExpressions;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate.RowsMatch;
import org.apache.drill.exec.ops.OptimizerRulesContext;
import org.apache.drill.exec.physical.base.GroupScan;
import org.apache.drill.exec.planner.common.DrillRelOptUtil;
@@ -165,12 +167,26 @@ public abstract class ParquetPushDownFilter extends StoragePluginOptimizerRule {
return;
}
-
RelNode newScan = ScanPrel.create(scan, scan.getTraitSet(), newGroupScan, scan.getRowType());;
if (project != null) {
newScan = project.copy(project.getTraitSet(), ImmutableList.of(newScan));
}
+
+ if (newGroupScan instanceof AbstractParquetGroupScan) {
+ RowsMatch matchAll = RowsMatch.ALL;
+ List<RowGroupInfo> rowGroupInfos = ((AbstractParquetGroupScan) newGroupScan).rowGroupInfos;
+ for (RowGroupInfo rowGroup : rowGroupInfos) {
+ if (rowGroup.getRowsMatch() != RowsMatch.ALL) {
+ matchAll = RowsMatch.SOME;
+ break;
+ }
+ }
+ if (matchAll == ParquetFilterPredicate.RowsMatch.ALL) {
+ call.transformTo(newScan);
+ }
+ }
+
final RelNode newFilter = filter.copy(filter.getTraitSet(), ImmutableList.<RelNode>of(newScan));
call.transformTo(newFilter);
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRGFilterEvaluator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRGFilterEvaluator.java
index 370988b..3e7bc65 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRGFilterEvaluator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetRGFilterEvaluator.java
@@ -27,6 +27,7 @@ import org.apache.drill.exec.compile.sig.ConstantExpressionIdentifier;
import org.apache.drill.exec.expr.ExpressionTreeMaterializer;
import org.apache.drill.exec.expr.fn.FunctionLookupContext;
import org.apache.drill.exec.expr.stat.ParquetFilterPredicate;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate.RowsMatch;
import org.apache.drill.exec.expr.stat.RangeExprEvaluator;
import org.apache.drill.exec.ops.FragmentContext;
import org.apache.drill.exec.ops.UdfUtilities;
@@ -37,19 +38,23 @@ import org.apache.drill.exec.store.parquet.stat.ParquetFooterStatCollector;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
import java.util.Set;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ColumnMetadata;
+import static org.apache.drill.exec.store.parquet.metadata.MetadataBase.ParquetTableMetadataBase;
+
public class ParquetRGFilterEvaluator {
static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetRGFilterEvaluator.class);
- public static boolean evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex,
+ public static RowsMatch evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex,
OptionManager options, FragmentContext fragmentContext) {
final HashMap<String, String> emptyMap = new HashMap<String, String>();
return evalFilter(expr, footer, rowGroupIndex, options, fragmentContext, emptyMap);
}
- public static boolean evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex,
+ public static RowsMatch evalFilter(LogicalExpression expr, ParquetMetadata footer, int rowGroupIndex,
OptionManager options, FragmentContext fragmentContext, Map<String, String> implicitColValues) {
// figure out the set of columns referenced in expression.
final Set<SchemaPath> schemaPathsInExpr = expr.accept(new FieldReferenceFinder(), null);
@@ -57,23 +62,19 @@ public class ParquetRGFilterEvaluator {
Map<SchemaPath, ColumnStatistics> columnStatisticsMap = columnStatCollector.collectColStat(schemaPathsInExpr);
- boolean canDrop = canDrop(expr, columnStatisticsMap, footer.getBlocks().get(rowGroupIndex).getRowCount(), fragmentContext, fragmentContext.getFunctionRegistry());
- return canDrop;
+ return matches(expr, columnStatisticsMap, footer.getBlocks().get(rowGroupIndex).getRowCount(), fragmentContext, fragmentContext.getFunctionRegistry());
}
-
- public static boolean canDrop(ParquetFilterPredicate parquetPredicate, Map<SchemaPath,
+ public static RowsMatch matches(ParquetFilterPredicate parquetPredicate, Map<SchemaPath,
ColumnStatistics> columnStatisticsMap, long rowCount) {
- boolean canDrop = false;
if (parquetPredicate != null) {
RangeExprEvaluator rangeExprEvaluator = new RangeExprEvaluator(columnStatisticsMap, rowCount);
- canDrop = parquetPredicate.canDrop(rangeExprEvaluator);
+ return parquetPredicate.matches(rangeExprEvaluator);
}
- return canDrop;
+ return RowsMatch.SOME;
}
-
- public static boolean canDrop(LogicalExpression expr, Map<SchemaPath, ColumnStatistics> columnStatisticsMap,
+ public static RowsMatch matches(LogicalExpression expr, Map<SchemaPath, ColumnStatistics> columnStatisticsMap,
long rowCount, UdfUtilities udfUtilities, FunctionLookupContext functionImplementationRegistry) {
ErrorCollector errorCollector = new ErrorCollectorImpl();
LogicalExpression materializedFilter = ExpressionTreeMaterializer.materializeFilterExpr(
@@ -82,14 +83,39 @@ public class ParquetRGFilterEvaluator {
if (errorCollector.hasErrors()) {
logger.error("{} error(s) encountered when materialize filter expression : {}",
errorCollector.getErrorCount(), errorCollector.toErrorString());
- return false;
+ return RowsMatch.SOME;
}
Set<LogicalExpression> constantBoundaries = ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter);
ParquetFilterPredicate parquetPredicate = (ParquetFilterPredicate) ParquetFilterBuilder.buildParquetFilterPredicate(
materializedFilter, constantBoundaries, udfUtilities);
- return canDrop(parquetPredicate, columnStatisticsMap, rowCount);
+ return matches(parquetPredicate, columnStatisticsMap, rowCount);
+ }
+
+ public static RowsMatch matches(ParquetFilterPredicate parquetPredicate, Map<SchemaPath, ColumnStatistics> columnStatisticsMap, long rowCount, ParquetTableMetadataBase parquetTableMetadata, List<? extends ColumnMetadata> columnMetadataList, Set<SchemaPath> schemaPathsInExpr) {
+ RowsMatch temp = matches(parquetPredicate, columnStatisticsMap, rowCount);
+ return temp == RowsMatch.ALL && isRepeated(schemaPathsInExpr, parquetTableMetadata, columnMetadataList) ? RowsMatch.SOME : temp;
+ }
+
+ /**
+ * Check if one of the fields involved in the filter is an array (used in DRILL_6259_test_data).
+ *
+ * @return true if one at least is an array, false otherwise.
+ */
+ private static boolean isRepeated(Set<SchemaPath> fields, ParquetTableMetadataBase parquetTableMetadata, List<? extends ColumnMetadata> columnMetadataList) {
+ final Map<SchemaPath, ColumnMetadata> columnMetadataMap = new HashMap<>();
+ for (final ColumnMetadata columnMetadata : columnMetadataList) {
+ SchemaPath schemaPath = SchemaPath.getCompoundPath(columnMetadata.getName());
+ columnMetadataMap.put(schemaPath, columnMetadata);
+ }
+ for (final SchemaPath field : fields) {
+ ColumnMetadata columnMetadata = columnMetadataMap.get(field.getUnIndexed());
+ if (columnMetadata != null && parquetTableMetadata.getRepetitionLevel(columnMetadata.getName()) >= 1) {
+ return true;
+ }
+ }
+ return false;
}
/**
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/RowGroupInfo.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/RowGroupInfo.java
index af436d8..7d2143c 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/RowGroupInfo.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/RowGroupInfo.java
@@ -19,6 +19,7 @@ package org.apache.drill.exec.store.parquet;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate.RowsMatch;
import org.apache.drill.exec.store.dfs.ReadEntryFromHDFS;
import org.apache.drill.exec.store.dfs.easy.FileWork;
import org.apache.drill.exec.store.schedule.CompleteWork;
@@ -35,6 +36,7 @@ public class RowGroupInfo extends ReadEntryFromHDFS implements CompleteWork, Fil
private List<? extends ColumnMetadata> columns;
private long rowCount; // rowCount = -1 indicates to include all rows.
private long numRecordsToRead;
+ private RowsMatch rowsMatch = RowsMatch.SOME;
@JsonCreator
public RowGroupInfo(@JsonProperty("path") String path,
@@ -95,4 +97,7 @@ public class RowGroupInfo extends ReadEntryFromHDFS implements CompleteWork, Fil
this.columns = columns;
}
+ public RowsMatch getRowsMatch() { return rowsMatch; }
+
+ public void setRowsMatch(RowsMatch rowsMatch) { this.rowsMatch = rowsMatch; }
}
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetFooterStatCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetFooterStatCollector.java
index ac63bda..4e73d6b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetFooterStatCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetFooterStatCollector.java
@@ -59,7 +59,7 @@ public class ParquetFooterStatCollector implements ColumnStatCollector {
// Reasons to pass implicit columns and their values:
// 1. Differentiate implicit columns from regular non-exist columns. Implicit columns do not
// exist in parquet metadata. Without such knowledge, implicit columns is treated as non-exist
- // column. A condition on non-exist column would lead to canDrop = true, which is not the
+ // column. A condition on non-exist column would lead to matches = ALL, which is not the
// right behavior for condition on implicit columns.
// 2. Pass in the implicit column name with corresponding values, and wrap them in Statistics with
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
index 437074e..a46191b 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
@@ -59,7 +59,7 @@ public class ParquetMetaStatCollector implements ColumnStatCollector {
// Reasons to pass implicit columns and their values:
// 1. Differentiate implicit columns from regular non-exist columns. Implicit columns do not
// exist in parquet metadata. Without such knowledge, implicit columns is treated as non-exist
- // column. A condition on non-exist column would lead to canDrop = true, which is not the
+ // column. A condition on non-exist column would lead to matches = ALL, which is not the
// right behavior for condition on implicit columns.
// 2. Pass in the implicit column name with corresponding values, and wrap them in Statistics with
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
index c871ccc..ea12f40 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
@@ -20,12 +20,17 @@ package org.apache.drill.exec.store.parquet;
import org.apache.commons.io.FileUtils;
import org.apache.drill.PlanTestBase;
import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate.RowsMatch;
+import org.apache.drill.exec.expr.stat.ParquetIsPredicate;
+import org.apache.drill.exec.expr.stat.RangeExprEvaluator;
import org.apache.drill.exec.ops.FragmentContextImpl;
import org.apache.drill.exec.planner.physical.PlannerSettings;
import org.apache.drill.exec.proto.BitControl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.format.converter.ParquetMetadataConverter;
import org.apache.parquet.hadoop.ParquetFileReader;
import org.apache.parquet.hadoop.metadata.ParquetMetadata;
@@ -36,6 +41,8 @@ import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestWatcher;
import org.junit.runner.Description;
+import org.mockito.ArgumentMatchers;
+import org.mockito.Mockito;
import java.io.File;
import java.io.IOException;
@@ -57,6 +64,8 @@ public class TestParquetFilterPushDown extends PlanTestBase {
dirTestWatcher.copyResourceToRoot(Paths.get("parquetFilterPush"));
dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "multirowgroup.parquet"));
+ dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "multirowgroup2.parquet"));
+ dirTestWatcher.copyResourceToRoot(Paths.get("parquet", "multirowgroupwithNulls.parquet"));
}
@AfterClass
@@ -97,73 +106,75 @@ public class TestParquetFilterPushDown extends PlanTestBase {
.toFile();
ParquetMetadata footer = getParquetMetaData(file);
- testParquetRowGroupFilterEval(footer, "intCol = 100", false);
- testParquetRowGroupFilterEval(footer, "intCol = 0", false);
- testParquetRowGroupFilterEval(footer, "intCol = 50", false);
+ testParquetRowGroupFilterEval(footer, "intCol = 100", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol = 0", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol = 50", RowsMatch.SOME);
- testParquetRowGroupFilterEval(footer, "intCol = -1", true);
- testParquetRowGroupFilterEval(footer, "intCol = 101", true);
+ testParquetRowGroupFilterEval(footer, "intCol = -1", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol = 101", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "intCol > 100", true);
- testParquetRowGroupFilterEval(footer, "intCol > 99", false);
+ testParquetRowGroupFilterEval(footer, "intCol > 100", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol > 99", RowsMatch.SOME);
- testParquetRowGroupFilterEval(footer, "intCol >= 100", false);
- testParquetRowGroupFilterEval(footer, "intCol >= 101", true);
+ testParquetRowGroupFilterEval(footer, "intCol >= 100", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol >= 101", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "intCol < 100", false);
- testParquetRowGroupFilterEval(footer, "intCol < 1", false);
- testParquetRowGroupFilterEval(footer, "intCol < 0", true);
+ testParquetRowGroupFilterEval(footer, "intCol < 100", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol < 1", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol < 0", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "intCol <= 100", false);
- testParquetRowGroupFilterEval(footer, "intCol <= 1", false);
- testParquetRowGroupFilterEval(footer, "intCol <= 0", false);
- testParquetRowGroupFilterEval(footer, "intCol <= -1", true);
+ testParquetRowGroupFilterEval(footer, "intCol <= 100", RowsMatch.ALL);
+ testParquetRowGroupFilterEval(footer, "intCol <= 1", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol <= 0", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol <= -1", RowsMatch.NONE);
// "and"
- testParquetRowGroupFilterEval(footer, "intCol > 100 and intCol < 200", true);
- testParquetRowGroupFilterEval(footer, "intCol > 50 and intCol < 200", false);
- testParquetRowGroupFilterEval(footer, "intCol > 50 and intCol > 200", true); // essentially, intCol > 200
+ testParquetRowGroupFilterEval(footer, "intCol > 100 and intCol < 200", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol > 50 and intCol < 200", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol > 50 and intCol > 200", RowsMatch.NONE); // essentially, intCol > 200
// "or"
- testParquetRowGroupFilterEval(footer, "intCol = 150 or intCol = 160", true);
- testParquetRowGroupFilterEval(footer, "intCol = 50 or intCol = 160", false);
+ testParquetRowGroupFilterEval(footer, "intCol = 150 or intCol = 160", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol = 50 or intCol = 160", RowsMatch.SOME);
//"nonExistCol" does not exist in the table. "AND" with a filter on exist column
- testParquetRowGroupFilterEval(footer, "intCol > 100 and nonExistCol = 100", true);
- testParquetRowGroupFilterEval(footer, "intCol > 50 and nonExistCol = 100", true); // since nonExistCol = 100 -> Unknown -> could drop.
- testParquetRowGroupFilterEval(footer, "nonExistCol = 100 and intCol > 50", true); // since nonExistCol = 100 -> Unknown -> could drop.
- testParquetRowGroupFilterEval(footer, "intCol > 100 and nonExistCol < 'abc'", true);
- testParquetRowGroupFilterEval(footer, "nonExistCol < 'abc' and intCol > 100", true); // nonExistCol < 'abc' hit NumberException and is ignored, but intCol >100 will say "drop".
- testParquetRowGroupFilterEval(footer, "intCol > 50 and nonExistCol < 'abc'", false); // because nonExistCol < 'abc' hit NumberException and is ignored.
+ testParquetRowGroupFilterEval(footer, "intCol > 100 and nonExistCol = 100", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol > 50 and nonExistCol = 100", RowsMatch.NONE); // since nonExistCol = 100 -> Unknown -> could drop.
+ testParquetRowGroupFilterEval(footer, "nonExistCol = 100 and intCol > 50", RowsMatch.NONE); // since nonExistCol = 100 -> Unknown -> could drop.
+ testParquetRowGroupFilterEval(footer, "intCol > 100 and nonExistCol < 'abc'", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "nonExistCol < 'abc' and intCol > 100", RowsMatch.NONE); // nonExistCol < 'abc' hit NumberException and is ignored, but intCol >100 will
+ // say "drop".
+ testParquetRowGroupFilterEval(footer, "intCol > 50 and nonExistCol < 'abc'", RowsMatch.SOME); // because nonExistCol < 'abc' hit NumberException and
+ // is ignored.
//"nonExistCol" does not exist in the table. "OR" with a filter on exist column
- testParquetRowGroupFilterEval(footer, "intCol > 100 or nonExistCol = 100", true); // nonExistCol = 100 -> could drop.
- testParquetRowGroupFilterEval(footer, "nonExistCol = 100 or intCol > 100", true); // nonExistCol = 100 -> could drop.
- testParquetRowGroupFilterEval(footer, "intCol > 50 or nonExistCol < 100", false);
- testParquetRowGroupFilterEval(footer, "nonExistCol < 100 or intCol > 50", false);
+ testParquetRowGroupFilterEval(footer, "intCol > 100 or nonExistCol = 100", RowsMatch.NONE); // nonExistCol = 100 -> could drop.
+ testParquetRowGroupFilterEval(footer, "nonExistCol = 100 or intCol > 100", RowsMatch.NONE); // nonExistCol = 100 -> could drop.
+ testParquetRowGroupFilterEval(footer, "intCol > 50 or nonExistCol < 100", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "nonExistCol < 100 or intCol > 50", RowsMatch.SOME);
// cast function on column side (LHS)
- testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 100", false);
- testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 0", false);
- testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 50", false);
- testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 101", true);
- testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = -1", true);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 100", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 0", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 50", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = 101", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as bigint) = -1", RowsMatch.NONE);
// cast function on constant side (RHS)
- testParquetRowGroupFilterEval(footer, "intCol = cast(100 as bigint)", false);
- testParquetRowGroupFilterEval(footer, "intCol = cast(0 as bigint)", false);
- testParquetRowGroupFilterEval(footer, "intCol = cast(50 as bigint)", false);
- testParquetRowGroupFilterEval(footer, "intCol = cast(101 as bigint)", true);
- testParquetRowGroupFilterEval(footer, "intCol = cast(-1 as bigint)", true);
+ testParquetRowGroupFilterEval(footer, "intCol = cast(100 as bigint)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol = cast(0 as bigint)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol = cast(50 as bigint)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "intCol = cast(101 as bigint)", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol = cast(-1 as bigint)", RowsMatch.NONE);
// cast into float4/float8
- testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(101.0 as float4)", true);
- testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(-1.0 as float4)", true);
- testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(1.0 as float4)", false);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(101.0 as float4)", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(-1.0 as float4)", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float4) = cast(1.0 as float4)", RowsMatch.SOME);
- testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = 101.0", true);
- testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = -1.0", true);
- testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = 1.0", false);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = 101.0", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = -1.0", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "cast(intCol as float8) = 1.0", RowsMatch.SOME);
}
@Test
@@ -176,15 +187,15 @@ public class TestParquetFilterPushDown extends PlanTestBase {
.toFile();
ParquetMetadata footer = getParquetMetaData(file);
- testParquetRowGroupFilterEval(footer, "intCol = 100", true);
- testParquetRowGroupFilterEval(footer, "intCol = 0", true);
- testParquetRowGroupFilterEval(footer, "intCol = -100", true);
+ testParquetRowGroupFilterEval(footer, "intCol = 100", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol = 0", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol = -100", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "intCol > 10", true);
- testParquetRowGroupFilterEval(footer, "intCol >= 10", true);
+ testParquetRowGroupFilterEval(footer, "intCol > 10", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol >= 10", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "intCol < 10", true);
- testParquetRowGroupFilterEval(footer, "intCol <= 10", true);
+ testParquetRowGroupFilterEval(footer, "intCol < 10", RowsMatch.NONE);
+ testParquetRowGroupFilterEval(footer, "intCol <= 10", RowsMatch.NONE);
}
@Test
@@ -216,21 +227,21 @@ public class TestParquetFilterPushDown extends PlanTestBase {
}
private void testDatePredicateAgainstDrillCTASHelper(ParquetMetadata footer) throws Exception{
- testParquetRowGroupFilterEval(footer, "o_orderdate = cast('1992-01-01' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate = cast('1991-12-31' as date)", true);
+ testParquetRowGroupFilterEval(footer, "o_orderdate = cast('1992-01-01' as date)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_orderdate = cast('1991-12-31' as date)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1991-12-31' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1992-01-03' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1992-01-04' as date)", true);
+ testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1991-12-31' as date)", RowsMatch.ALL);
+ testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1992-01-03' as date)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_orderdate >= cast('1992-01-04' as date)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_orderdate > cast('1992-01-01' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate > cast('1992-01-03' as date)", true);
+ testParquetRowGroupFilterEval(footer, "o_orderdate > cast('1992-01-01' as date)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_orderdate > cast('1992-01-03' as date)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_orderdate <= cast('1992-01-01' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate <= cast('1991-12-31' as date)", true);
+ testParquetRowGroupFilterEval(footer, "o_orderdate <= cast('1992-01-01' as date)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_orderdate <= cast('1991-12-31' as date)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_orderdate < cast('1992-01-02' as date)", false);
- testParquetRowGroupFilterEval(footer, "o_orderdate < cast('1992-01-01' as date)", true);
+ testParquetRowGroupFilterEval(footer, "o_orderdate < cast('1992-01-02' as date)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_orderdate < cast('1992-01-01' as date)", RowsMatch.NONE);
}
@Test
@@ -243,25 +254,99 @@ public class TestParquetFilterPushDown extends PlanTestBase {
.toFile();
ParquetMetadata footer = getParquetMetaData(file);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp = cast('1992-01-01 10:20:30' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp = cast('1992-01-01 10:20:29' as timestamp)", true);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp = cast('1992-01-01 10:20:30' as timestamp)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp = cast('1992-01-01 10:20:29' as timestamp)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-01 10:20:29' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-03 10:20:30' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-03 10:20:31' as timestamp)", true);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-01 10:20:29' as timestamp)", RowsMatch.ALL);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-03 10:20:30' as timestamp)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp >= cast('1992-01-03 10:20:31' as timestamp)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp > cast('1992-01-03 10:20:29' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp > cast('1992-01-03 10:20:30' as timestamp)", true);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp > cast('1992-01-03 10:20:29' as timestamp)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp > cast('1992-01-03 10:20:30' as timestamp)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp <= cast('1992-01-01 10:20:30' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp <= cast('1992-01-01 10:20:29' as timestamp)", true);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp <= cast('1992-01-01 10:20:30' as timestamp)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp <= cast('1992-01-01 10:20:29' as timestamp)", RowsMatch.NONE);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp < cast('1992-01-01 10:20:31' as timestamp)", false);
- testParquetRowGroupFilterEval(footer, "o_ordertimestamp < cast('1992-01-01 10:20:30' as timestamp)", true);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp < cast('1992-01-01 10:20:31' as timestamp)", RowsMatch.SOME);
+ testParquetRowGroupFilterEval(footer, "o_ordertimestamp < cast('1992-01-01 10:20:30' as timestamp)", RowsMatch.NONE);
}
@Test
+ public void testFilterPruning() throws Exception {
+ // multirowgroup2 is a parquet file with 3 rowgroups inside. One with a=0, another with a=1 and a=2, and the last with a=3 and a=4;
+ // FilterPushDown should be able to prune the filter from the scan operator according to the rowgroup statistics.
+ final String sql = "select * from dfs.`parquet/multirowgroup2.parquet` where ";
+ PlanTestBase.testPlanMatchingPatterns(sql + "a > 1", new String[]{"numRowGroups=2"}); //No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a > 2", new String[]{"numRowGroups=1"}, new String[]{"Filter\\("}); // Filter pruning
+
+ PlanTestBase.testPlanMatchingPatterns(sql + "a < 2", new String[]{"numRowGroups=2"}); // No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a < 1", new String[]{"numRowGroups=1"}, new String[]{"Filter\\("}); // Filter pruning
+
+ PlanTestBase.testPlanMatchingPatterns(sql + "a >= 2", new String[]{"numRowGroups=2"}); // No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a >= 1", new String[]{"numRowGroups=2"}, new String[]{"Filter\\("}); // Filter pruning
+
+ PlanTestBase.testPlanMatchingPatterns(sql + "a <= 1", new String[]{"numRowGroups=2"}); // No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a <= 2", new String[]{"numRowGroups=2"}, new String[]{"Filter\\("}); // Filter pruning
+
+ PlanTestBase.testPlanMatchingPatterns(sql + "a > 0 and a < 2", new String[]{"numRowGroups=1"}); // No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a > 0 and a < 3", new String[]{"numRowGroups=1"}, new String[]{"Filter\\("}); //Filter pruning
+
+ PlanTestBase.testPlanMatchingPatterns(sql + "a < 1 or a > 1", new String[]{"numRowGroups=3"}); // No filter pruning
+ PlanTestBase.testPlanMatchingPatterns(sql + "a < 1 or a > 2", new String[]{"numRowGroups=2"}, new String[]{"Filter\\("}); //Filter pruning
+ }
+
+ @Test
+ public void testFilterPruningWithNulls() throws Exception {
+ // multirowgroupwithNulls is a parquet file with 4 rowgroups inside and some groups contain null values.
+ // RG1 : [min: 20, max: 29, num_nulls: 0]
+ // RG2 : [min: 31, max: 39, num_nulls: 1]
+ // RG3 : [min: 40, max: 49, num_nulls: 1]
+ // RG4 : [min: 50, max: 59, num_nulls: 0]
+ final String sql = "select a from dfs.`parquet/multirowgroupwithNulls.parquet` where ";
+ // "<" "and" ">" with filter
+ testParquetFilterPruning(sql + "30 < a and 40 > a", 9, 1, null);
+ testParquetFilterPruning(sql + "30 < a and a < 40", 9, 1, null);
+ testParquetFilterPruning(sql + "a > 30 and 40 > a", 9, 1, null);
+ testParquetFilterPruning(sql + "a > 30 and a < 40", 9, 1, null);
+ // "<" "and" ">" with no filter
+ testParquetFilterPruning(sql + "19 < a and 30 > a", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "19 < a and a < 30", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "a > 19 and 30 > a", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "a > 19 and a < 30", 10, 1, new String[]{"Filter\\("});
+ // "<=" "and" ">=" with filter
+ testParquetFilterPruning(sql + "a >= 30 and 39 >= a", 9, 1, null);
+ testParquetFilterPruning(sql + "a >= 30 and a <= 39", 9, 1, null);
+ testParquetFilterPruning(sql + "30 <= a and 39 >= a", 9, 1, null);
+ testParquetFilterPruning(sql + "30 <= a and a <= 39", 9, 1, null);
+ // "<=" "and" ">=" with no filter
+ testParquetFilterPruning(sql + "a >= 20 and a <= 29", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "a >= 20 and 29 >= a", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "20 <= a and a <= 29", 10, 1, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "20 <= a and 29 >= a", 10, 1, new String[]{"Filter\\("});
+ // "<" "or" ">" with filter
+ testParquetFilterPruning(sql + "a < 40 or a > 49", 29, 3, null);
+ testParquetFilterPruning(sql + "a < 40 or 49 < a", 29, 3, null);
+ testParquetFilterPruning(sql + "40 > a or a > 49", 29, 3, null);
+ testParquetFilterPruning(sql + "40 > a or 49 < a", 29, 3, null);
+ // "<" "or" ">" with no filter
+ testParquetFilterPruning(sql + "a < 30 or a > 49", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "a < 30 or 49 < a", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "30 > a or a > 49", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "30 > a or 49 < a", 20, 2, new String[]{"Filter\\("});
+ // "<=" "or" ">=" with filter
+ testParquetFilterPruning(sql + "a <= 39 or a >= 50", 29, 3, null);
+ testParquetFilterPruning(sql + "a <= 39 or 50 <= a", 29, 3, null);
+ testParquetFilterPruning(sql + "39 >= a or a >= 50", 29, 3, null);
+ testParquetFilterPruning(sql + "39 >= a or 50 <= a", 29, 3, null);
+ // "<=" "or" ">=" with no filter
+ testParquetFilterPruning(sql + "a <= 29 or a >= 50", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "a <= 29 or 50 <= a", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "29 >= a or a >= 50", 20, 2, new String[]{"Filter\\("});
+ testParquetFilterPruning(sql + "29 >= a or 50 <= a", 20, 2, new String[]{"Filter\\("});
+ }
+
+ @Test
// Test against parquet files from Drill CTAS post 1.8.0 release.
public void testDatePredicateAgaistDrillCTASPost1_8() throws Exception {
test("use dfs.tmp");
@@ -428,6 +513,15 @@ public class TestParquetFilterPushDown extends PlanTestBase {
final String queryEqualTrueWithAnd = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln = true and unk_col = 'a'";
testParquetFilterPD(queryEqualTrueWithAnd, 0, 2, false);
+
+ // File ff1.parquet has column with the values: false, null, false.
+ // File tt1.parquet has column with the values: true, null, true.
+ // File ft0.parquet has column with the values: false, true.
+ final String query = "select a from dfs.`parquetFilterPush/tfTbl` where ";
+ testParquetFilterPD(query + "a is true", 3, 2, false);
+ testParquetFilterPD(query + "a is false", 3, 2, false);
+ testParquetFilterPD(query + "a is not true", 5, 1, false);
+ testParquetFilterPD(query + "a is not false", 5, 1, false);
}
@Test // DRILL-5359
@@ -478,18 +572,89 @@ public class TestParquetFilterPushDown extends PlanTestBase {
String[] expectedPlan = {"numRowGroups=2"};
PlanTestBase.testPlanMatchingPatterns(query, expectedPlan);
- testBuilder()
- .sqlQuery(query)
- .unOrdered()
- .baselineColumns("cnt")
- .baselineValues(2L)
- .go();
+ testBuilder().sqlQuery(query).unOrdered().baselineColumns("cnt").baselineValues(2L).go();
+ }
+
+ @Test // testing min=false, max=true, min/max set, no nulls
+ public void testMinFalseMaxTrue() throws Exception {
+ LogicalExpression le = Mockito.mock(LogicalExpression.class);
+ BooleanStatistics booleanStatistics = Mockito.mock(BooleanStatistics.class);
+ Mockito.doReturn(booleanStatistics).when(le).accept(ArgumentMatchers.any(), ArgumentMatchers.any());
+ RangeExprEvaluator<Boolean> re = Mockito.mock(RangeExprEvaluator.class);
+ Mockito.when(re.getRowCount()).thenReturn(Long.valueOf(2)); // 2 rows
+ Mockito.when(booleanStatistics.isEmpty()).thenReturn(false); // stat is not empty
+ Mockito.when(booleanStatistics.isNumNullsSet()).thenReturn(true); // num_nulls set
+ Mockito. when(booleanStatistics.getNumNulls()).thenReturn(Long.valueOf(0)); // no nulls
+ Mockito. when(booleanStatistics.hasNonNullValue()).thenReturn(true); // min/max set
+ Mockito.when(booleanStatistics.getMin()).thenReturn(false); // min false
+ Mockito.when(booleanStatistics.getMax()).thenReturn(true); // max true
+ ParquetIsPredicate isTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_TRUE, le);
+ assertEquals(RowsMatch.SOME, isTrue.matches(re));
+ ParquetIsPredicate isFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_FALSE, le);
+ assertEquals(RowsMatch.SOME, isFalse.matches(re));
+ ParquetIsPredicate isNotTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_TRUE, le);
+ assertEquals(RowsMatch.SOME, isNotTrue.matches(re));
+ ParquetIsPredicate isNotFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_FALSE, le);
+ assertEquals(RowsMatch.SOME, isNotFalse.matches(re));
+ }
+
+ @Test // testing min=false, max=false, min/max set, no nulls
+ public void testMinFalseMaxFalse() throws Exception {
+ LogicalExpression le = Mockito.mock(LogicalExpression.class);
+ BooleanStatistics booleanStatistics = Mockito.mock(BooleanStatistics.class);
+ Mockito.doReturn(booleanStatistics).when(le).accept(ArgumentMatchers.any(), ArgumentMatchers.any());
+ RangeExprEvaluator<Boolean> re = Mockito.mock(RangeExprEvaluator.class);
+ Mockito.when(re.getRowCount()).thenReturn(Long.valueOf(2)); // 2 rows
+ Mockito.when(booleanStatistics.isEmpty()).thenReturn(false); // stat is not empty
+ Mockito.when(booleanStatistics.isNumNullsSet()).thenReturn(true); // num_nulls set
+ Mockito. when(booleanStatistics.getNumNulls()).thenReturn(Long.valueOf(0)); // no nulls
+ Mockito. when(booleanStatistics.hasNonNullValue()).thenReturn(true); // min/max set
+ Mockito.when(booleanStatistics.getMin()).thenReturn(false); // min false
+ Mockito.when(booleanStatistics.getMax()).thenReturn(false); // max false
+ ParquetIsPredicate isTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_TRUE, le);
+ assertEquals(RowsMatch.NONE, isTrue.matches(re));
+ ParquetIsPredicate isFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_FALSE, le);
+ assertEquals(RowsMatch.ALL, isFalse.matches(re));
+ ParquetIsPredicate isNotTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_TRUE, le);
+ assertEquals(RowsMatch.ALL, isNotTrue.matches(re));
+ ParquetIsPredicate isNotFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_FALSE, le);
+ assertEquals(RowsMatch.NONE, isNotFalse.matches(re));
+ }
+
+ @Test // testing min=true, max=true, min/max set, no nulls
+ public void testMinTrueMaxTrue() throws Exception {
+ LogicalExpression le = Mockito.mock(LogicalExpression.class);
+ BooleanStatistics booleanStatistics = Mockito.mock(BooleanStatistics.class);
+ Mockito.doReturn(booleanStatistics).when(le).accept(ArgumentMatchers.any(), ArgumentMatchers.any());
+ RangeExprEvaluator<Boolean> re = Mockito.mock(RangeExprEvaluator.class);
+ Mockito.when(re.getRowCount()).thenReturn(Long.valueOf(2)); // 2 rows
+ Mockito.when(booleanStatistics.isEmpty()).thenReturn(false); // stat is not empty
+ Mockito.when(booleanStatistics.isNumNullsSet()).thenReturn(true); // num_nulls set
+ Mockito. when(booleanStatistics.getNumNulls()).thenReturn(Long.valueOf(0)); // no nulls
+ Mockito. when(booleanStatistics.hasNonNullValue()).thenReturn(true); // min/max set
+ Mockito.when(booleanStatistics.getMin()).thenReturn(true); // min false
+ Mockito.when(booleanStatistics.getMax()).thenReturn(true); // max true
+ ParquetIsPredicate isTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_TRUE, le);
+ assertEquals(RowsMatch.ALL, isTrue.matches(re));
+ ParquetIsPredicate isFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_FALSE, le);
+ assertEquals(RowsMatch.NONE, isFalse.matches(re));
+ ParquetIsPredicate isNotTrue = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_TRUE, le);
+ assertEquals(RowsMatch.NONE, isNotTrue.matches(re));
+ ParquetIsPredicate isNotFalse = (ParquetIsPredicate) ParquetIsPredicate.createIsPredicate(FunctionGenerationHelper.IS_NOT_FALSE, le);
+ assertEquals(RowsMatch.ALL, isNotFalse.matches(re));
}
//////////////////////////////////////////////////////////////////////////////////////////////////
// Some test helper functions.
//////////////////////////////////////////////////////////////////////////////////////////////////
+ private void testParquetFilterPruning(final String query, int expectedRowCount, int expectedRowgroups, String[] excludedPattern) throws Exception{
+ int actualRowCount = testSql(query);
+ assertEquals(expectedRowCount, actualRowCount);
+ String numRowGroupPattern = "numRowGroups=" + expectedRowgroups;
+ testPlanMatchingPatterns(query, new String[]{numRowGroupPattern}, excludedPattern);
+ }
+
private void testParquetFilterPD(final String query, int expectedRowCount, int expectedNumFiles, boolean usedMetadataFile) throws Exception{
int actualRowCount = testSql(query);
assertEquals(expectedRowCount, actualRowCount);
@@ -499,13 +664,13 @@ public class TestParquetFilterPushDown extends PlanTestBase {
testPlanMatchingPatterns(query, new String[]{numFilesPattern, usedMetaPattern});
}
- private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final String exprStr, boolean canDropExpected) throws Exception{
+ private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final String exprStr, RowsMatch canDropExpected) throws Exception{
final LogicalExpression filterExpr = parseExpr(exprStr);
testParquetRowGroupFilterEval(footer, 0, filterExpr, canDropExpected);
}
- private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final int rowGroupIndex, final LogicalExpression filterExpr, boolean canDropExpected) {
- boolean canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, rowGroupIndex, fragContext.getOptions(), fragContext);
+ private void testParquetRowGroupFilterEval(final ParquetMetadata footer, final int rowGroupIndex, final LogicalExpression filterExpr, RowsMatch canDropExpected) {
+ RowsMatch canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, rowGroupIndex, fragContext.getOptions(), fragContext);
Assert.assertEquals(canDropExpected, canDrop);
}
diff --git a/exec/java-exec/src/test/resources/parquet/multirowgroup2.parquet b/exec/java-exec/src/test/resources/parquet/multirowgroup2.parquet
new file mode 100644
index 0000000..5139802
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/multirowgroup2.parquet differ
diff --git a/exec/java-exec/src/test/resources/parquet/multirowgroupwithNulls.parquet b/exec/java-exec/src/test/resources/parquet/multirowgroupwithNulls.parquet
new file mode 100644
index 0000000..084b315
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/multirowgroupwithNulls.parquet differ
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ff1.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ff1.parquet
new file mode 100644
index 0000000..79c2362
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ff1.parquet differ
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ft0.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ft0.parquet
new file mode 100644
index 0000000..c0c51c4
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/ft0.parquet differ
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/tt1.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/tt1.parquet
new file mode 100644
index 0000000..35ca274
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/tfTbl/tt1.parquet differ