You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ar...@apache.org on 2018/03/03 18:47:11 UTC
[05/17] drill git commit: DRILL-6174: Parquet filter pushdown
improvements.
DRILL-6174: Parquet filter pushdown improvements.
Added support IS [NOT] NULL/TRUE/FALSE operator for the parquet filter pushdown.
Added timestamp/date/time implicit/explicit casts.
closes #1131
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/3bc4e319
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/3bc4e319
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/3bc4e319
Branch: refs/heads/master
Commit: 3bc4e31916fe3abddda8ab1c2504b0e67645a602
Parents: 1697e53
Author: Roman Kulyk <ro...@gmail.com>
Authored: Thu Feb 15 15:37:43 2018 +0000
Committer: Arina Ielchiieva <ar...@gmail.com>
Committed: Sat Mar 3 19:47:38 2018 +0200
----------------------------------------------------------------------
.../exec/expr/fn/FunctionGenerationHelper.java | 9 +-
.../exec/expr/fn/impl/DateTypeFunctions.java | 17 +-
.../expr/stat/ParquetBooleanPredicates.java | 75 ++++
.../expr/stat/ParquetComparisonPredicates.java | 292 +++++++++++++++
.../exec/expr/stat/ParquetIsPredicates.java | 209 +++++++++++
.../drill/exec/expr/stat/ParquetPredicates.java | 352 -------------------
.../exec/expr/stat/ParquetPredicatesHelper.java | 50 +++
.../exec/expr/stat/RangeExprEvaluator.java | 34 +-
.../drill/exec/resolver/TypeCastRules.java | 3 +-
.../store/parquet/ParquetFilterBuilder.java | 83 ++++-
.../parquet/stat/ParquetMetaStatCollector.java | 4 +
.../parquet/TestParquetFilterPushDown.java | 39 ++
...stParquetFilterPushDownForDateTimeCasts.java | 127 +++++++
.../parquetFilterPush/blnTbl/0_0_0.parquet | Bin 0 -> 272 bytes
.../parquetFilterPush/blnTbl/0_0_1.parquet | Bin 0 -> 272 bytes
.../parquetFilterPush/blnTbl/0_0_2.parquet | Bin 0 -> 272 bytes
.../parquetFilterPush/blnTbl/0_0_3.parquet | Bin 0 -> 259 bytes
17 files changed, 925 insertions(+), 369 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionGenerationHelper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionGenerationHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionGenerationHelper.java
index 435b451..7db862f 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionGenerationHelper.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/FunctionGenerationHelper.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -47,6 +47,13 @@ public class FunctionGenerationHelper {
public static final String LT = "less_than";
public static final String LE = "less_than_or_equal_to";
+ public static final String IS_NULL = "isnull";
+ public static final String IS_NOT_NULL = "isnotnull";
+ public static final String IS_TRUE = "istrue";
+ public static final String IS_NOT_TRUE = "isnottrue";
+ public static final String IS_FALSE = "isfalse";
+ public static final String IS_NOT_FALSE = "isnotfalse";
+
/**
* Finds ordering comparator ("compare_to...") FunctionHolderExpression with
* a specified ordering for NULL (and considering NULLS <i>equal</i>).
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
index a719ec0..25db420 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -524,6 +524,21 @@ public class DateTypeFunctions {
}
}
+ @FunctionTemplate(name = "castTIME", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+ public static class CastDateToTime implements DrillSimpleFunc {
+ @Param DateHolder in;
+ @Output TimeHolder out;
+
+ @Override
+ public void setup() {
+ }
+
+ @Override
+ public void eval() {
+ out.value = 0;
+ }
+ }
+
@FunctionTemplate(name = "unix_timestamp", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
public static class UnixTimeStamp implements DrillSimpleFunc {
@Output BigIntHolder out;
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java
new file mode 100644
index 0000000..9db629d
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetBooleanPredicates.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.stat;
+
+import org.apache.drill.common.expression.BooleanOperator;
+import org.apache.drill.common.expression.ExpressionPosition;
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.visitors.ExprVisitor;
+
+import java.util.List;
+
+/**
+ * Boolean predicates for parquet filter pushdown.
+ */
+public class ParquetBooleanPredicates {
+ public static abstract class ParquetBooleanPredicate extends BooleanOperator implements ParquetFilterPredicate {
+ public ParquetBooleanPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
+ super(name, args, pos);
+ }
+
+ @Override
+ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
+ return visitor.visitBooleanOperator(this, value);
+ }
+ }
+
+ public static class AndPredicate extends ParquetBooleanPredicate {
+ public AndPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
+ super(name, args, pos);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ // "and" : as long as one branch is OK to drop, we can drop it.
+ for (LogicalExpression child : this) {
+ if (child instanceof ParquetFilterPredicate && ((ParquetFilterPredicate) child).canDrop(evaluator)) {
+ return true;
+ }
+ }
+ return false;
+ }
+ }
+
+ public static class OrPredicate extends ParquetBooleanPredicate {
+ public OrPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
+ super(name, args, pos);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ for (LogicalExpression child : this) {
+ // "long" : as long as one branch is NOT ok to drop, we can NOT drop it.
+ if (! ((ParquetFilterPredicate) child).canDrop(evaluator)) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java
new file mode 100644
index 0000000..5657215
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetComparisonPredicates.java
@@ -0,0 +1,292 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.stat;
+
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.LogicalExpressionBase;
+import org.apache.drill.common.expression.visitors.ExprVisitor;
+import org.apache.parquet.column.statistics.Statistics;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * Comparison predicates for parquet filter pushdown.
+ */
+public class ParquetComparisonPredicates {
+ public static abstract class ParquetCompPredicate extends LogicalExpressionBase implements ParquetFilterPredicate {
+ protected final LogicalExpression left;
+ protected final LogicalExpression right;
+
+ public ParquetCompPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left.getPosition());
+ this.left = left;
+ this.right = right;
+ }
+
+ @Override
+ public Iterator<LogicalExpression> iterator() {
+ final List<LogicalExpression> args = new ArrayList<>();
+ args.add(left);
+ args.add(right);
+ return args.iterator();
+ }
+
+ @Override
+ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
+ return visitor.visitUnknown(this, value);
+ }
+
+ }
+
+ /**
+ * EQ (=) predicate
+ */
+ public static class EqualPredicate extends ParquetCompPredicate {
+ public EqualPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ /**
+ Semantics of canDrop() is very similar to what is implemented in Parquet library's
+ {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
+ {@link org.apache.parquet.filter2.predicate.FilterPredicate}
+
+ Main difference :
+ 1. A RangeExprEvaluator is used to compute the min/max of an expression, such as CAST function
+ of a column. CAST function could be explicitly added by Drill user (It's recommended to use CAST
+ function after DRILL-4372, if user wants to reduce planning time for limit 0 query), or implicitly
+ inserted by Drill, when the types of compare operands are not identical. Therefore, it's important
+ to allow CAST function to appear in the filter predicate.
+ 2. We do not require list of ColumnChunkMetaData to do the evaluation, while Parquet library's
+ StatisticsFilter has such requirement. Drill's ParquetTableMetaData does not maintain ColumnChunkMetaData,
+ making it impossible to directly use Parquet library's StatisticFilter in query planning time.
+ 3. We allows both sides of comparison operator to be a min/max range. As such, we support
+ expression_of(Column1) < expression_of(Column2),
+ where Column1 and Column2 are from same parquet table.
+ */
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when left's max < right's min, or right's max < left's min
+ if ( ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0
+ || rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return left.toString() + " = " + right.toString();
+ }
+ }
+
+ /**
+ * GT (>) predicate.
+ */
+ public static class GTPredicate extends ParquetCompPredicate {
+ public GTPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when left's max <= right's min.
+ if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) <= 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * GE (>=) predicate.
+ */
+ public static class GEPredicate extends ParquetCompPredicate {
+ public GEPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when left's max < right's min.
+ if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * LT (<) predicate.
+ */
+ public static class LTPredicate extends ParquetCompPredicate {
+ public LTPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when right's max <= left's min.
+ if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) <= 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * LE (<=) predicate.
+ */
+ public static class LEPredicate extends ParquetCompPredicate {
+ public LEPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when right's max < left's min.
+ if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0 ) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * NE (!=) predicate.
+ */
+ public static class NEPredicate extends ParquetCompPredicate {
+ public NEPredicate(LogicalExpression left, LogicalExpression right) {
+ super(left, right);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics leftStat = left.accept(evaluator, null);
+ Statistics rightStat = right.accept(evaluator, null);
+
+ if (leftStat == null ||
+ rightStat == null ||
+ leftStat.isEmpty() ||
+ rightStat.isEmpty()) {
+ return false;
+ }
+
+ // if either side is ALL null, comparison is evaluated to UNKNOW -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(leftStat, evaluator.getRowCount()) ||
+ ParquetPredicatesHelper.isAllNulls(rightStat, evaluator.getRowCount())) {
+ return true;
+ }
+
+ // can drop when there is only one unique value.
+ if ( leftStat.genericGetMin().compareTo(leftStat.genericGetMax()) == 0 &&
+ rightStat.genericGetMin().compareTo(rightStat.genericGetMax()) ==0 &&
+ leftStat.genericGetMax().compareTo(rightStat.genericGetMax()) == 0) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
new file mode 100644
index 0000000..c6f9b2f
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
@@ -0,0 +1,209 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.stat;
+
+import org.apache.drill.common.expression.LogicalExpression;
+import org.apache.drill.common.expression.LogicalExpressionBase;
+import org.apache.drill.common.expression.visitors.ExprVisitor;
+import org.apache.parquet.column.statistics.Statistics;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * IS predicates for parquet filter pushdown.
+ */
+public class ParquetIsPredicates {
+ public static abstract class ParquetIsPredicate extends LogicalExpressionBase implements ParquetFilterPredicate {
+ protected final LogicalExpression expr;
+
+ public ParquetIsPredicate(LogicalExpression expr) {
+ super(expr.getPosition());
+ this.expr = expr;
+ }
+
+ @Override
+ public Iterator<LogicalExpression> iterator() {
+ final List<LogicalExpression> args = new ArrayList<>();
+ args.add(expr);
+ return args.iterator();
+ }
+
+ @Override
+ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
+ return visitor.visitUnknown(this, value);
+ }
+ }
+
+ /**
+ * IS NULL predicate.
+ */
+ public static class IsNullPredicate extends ParquetIsPredicate {
+ public IsNullPredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null) {
+ return false;
+ }
+
+ //if there are no nulls -> canDrop
+ if (!ParquetPredicatesHelper.hasNulls(exprStat)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * IS NOT NULL predicate.
+ */
+ public static class IsNotNullPredicate extends ParquetIsPredicate {
+ public IsNotNullPredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null ||
+ exprStat.isEmpty()) {
+ return false;
+ }
+
+ //if there are all nulls -> canDrop
+ if (ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * IS TRUE predicate.
+ */
+ public static class IsTruePredicate extends ParquetIsPredicate {
+ public IsTruePredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null ||
+ exprStat.isEmpty()) {
+ return false;
+ }
+
+ //if max value is not true or if there are all nulls -> canDrop
+ if (exprStat.genericGetMax().compareTo(true) != 0 ||
+ ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * IS FALSE predicate.
+ */
+ public static class IsFalsePredicate extends ParquetIsPredicate {
+ public IsFalsePredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null ||
+ exprStat.isEmpty()) {
+ return false;
+ }
+
+ //if min value is not false or if there are all nulls -> canDrop
+ if (exprStat.genericGetMin().compareTo(false) != 0 ||
+ ParquetPredicatesHelper.isAllNulls(exprStat, evaluator.getRowCount())) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * IS NOT TRUE predicate.
+ */
+ public static class IsNotTruePredicate extends ParquetIsPredicate {
+ public IsNotTruePredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null ||
+ exprStat.isEmpty()) {
+ return false;
+ }
+
+ //if min value is not false or if there are no nulls -> canDrop
+ if (exprStat.genericGetMin().compareTo(false) != 0 && !ParquetPredicatesHelper.hasNulls(exprStat)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+
+ /**
+ * IS NOT FALSE predicate.
+ */
+ public static class IsNotFalsePredicate extends ParquetIsPredicate {
+ public IsNotFalsePredicate(LogicalExpression expr) {
+ super(expr);
+ }
+
+ @Override
+ public boolean canDrop(RangeExprEvaluator evaluator) {
+ Statistics exprStat = expr.accept(evaluator, null);
+
+ if (exprStat == null ||
+ exprStat.isEmpty()) {
+ return false;
+ }
+
+ //if max value is not true or if there are no nulls -> canDrop
+ if (exprStat.genericGetMax().compareTo(true) != 0 && !ParquetPredicatesHelper.hasNulls(exprStat)) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicates.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicates.java
deleted file mode 100644
index 54f703a..0000000
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicates.java
+++ /dev/null
@@ -1,352 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- * <p/>
- * http://www.apache.org/licenses/LICENSE-2.0
- * <p/>
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.expr.stat;
-
-import org.apache.drill.common.expression.BooleanOperator;
-import org.apache.drill.common.expression.ExpressionPosition;
-import org.apache.drill.common.expression.LogicalExpression;
-import org.apache.drill.common.expression.LogicalExpressionBase;
-import org.apache.drill.common.expression.visitors.ExprVisitor;
-import org.apache.parquet.column.statistics.Statistics;
-import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-public abstract class ParquetPredicates {
- public static abstract class ParquetCompPredicate extends LogicalExpressionBase implements ParquetFilterPredicate {
- protected final LogicalExpression left;
- protected final LogicalExpression right;
-
- public ParquetCompPredicate(LogicalExpression left, LogicalExpression right) {
- super(left.getPosition());
- this.left = left;
- this.right = right;
- }
-
- @Override
- public Iterator<LogicalExpression> iterator() {
- final List<LogicalExpression> args = new ArrayList<>();
- args.add(left);
- args.add(right);
- return args.iterator();
- }
-
- @Override
- public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
- return visitor.visitUnknown(this, value);
- }
-
- }
-
- public static abstract class ParquetBooleanPredicate extends BooleanOperator implements ParquetFilterPredicate {
- public ParquetBooleanPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
- super(name, args, pos);
- }
-
- @Override
- public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
- return visitor.visitBooleanOperator(this, value);
- }
- }
-
- public static class AndPredicate extends ParquetBooleanPredicate {
- public AndPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
- super(name, args, pos);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- // "and" : as long as one branch is OK to drop, we can drop it.
- for (LogicalExpression child : this) {
- if (((ParquetFilterPredicate) child).canDrop(evaluator)) {
- return true;
- }
- }
- return false;
- }
- }
-
- public static class OrPredicate extends ParquetBooleanPredicate {
- public OrPredicate(String name, List<LogicalExpression> args, ExpressionPosition pos) {
- super(name, args, pos);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- for (LogicalExpression child : this) {
- // "long" : as long as one branch is NOT ok to drop, we can NOT drop it.
- if (! ((ParquetFilterPredicate) child).canDrop(evaluator)) {
- return false;
- }
- }
-
- return true;
- }
- }
-
- // is this column chunk composed entirely of nulls?
- // assumes the column chunk's statistics is not empty
- protected static boolean isAllNulls(Statistics stat, long rowCount) {
- return stat.getNumNulls() == rowCount;
- }
-
- // are there any nulls in this column chunk?
- // assumes the column chunk's statistics is not empty
- protected static boolean hasNulls(Statistics stat) {
- return stat.getNumNulls() > 0;
- }
-
- /**
- * EQ (=) predicate
- */
- public static class EqualPredicate extends ParquetCompPredicate {
- public EqualPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- /**
- Semantics of canDrop() is very similar to what is implemented in Parquet library's
- {@link org.apache.parquet.filter2.statisticslevel.StatisticsFilter} and
- {@link org.apache.parquet.filter2.predicate.FilterPredicate}
-
- Main difference :
- 1. A RangeExprEvaluator is used to compute the min/max of an expression, such as CAST function
- of a column. CAST function could be explicitly added by Drill user (It's recommended to use CAST
- function after DRILL-4372, if user wants to reduce planning time for limit 0 query), or implicitly
- inserted by Drill, when the types of compare operands are not identical. Therefore, it's important
- to allow CAST function to appear in the filter predicate.
- 2. We do not require list of ColumnChunkMetaData to do the evaluation, while Parquet library's
- StatisticsFilter has such requirement. Drill's ParquetTableMetaData does not maintain ColumnChunkMetaData,
- making it impossible to directly use Parquet library's StatisticFilter in query planning time.
- 3. We allows both sides of comparison operator to be a min/max range. As such, we support
- expression_of(Column1) < expression_of(Column2),
- where Column1 and Column2 are from same parquet table.
- */
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when left's max < right's min, or right's max < left's min
- if ( ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0
- || rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0)) {
- return true;
- } else {
- return false;
- }
- }
-
- @Override
- public String toString() {
- return left.toString() + " = " + right.toString();
- }
- }
-
- /**
- * GT (>) predicate.
- */
- public static class GTPredicate extends ParquetCompPredicate {
- public GTPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when left's max <= right's min.
- if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) <= 0 ) {
- return true;
- } else {
- return false;
- }
- }
- }
-
- /**
- * GE (>=) predicate.
- */
- public static class GEPredicate extends ParquetCompPredicate {
- public GEPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when left's max < right's min.
- if ( leftStat.genericGetMax().compareTo(rightStat.genericGetMin()) < 0 ) {
- return true;
- } else {
- return false;
- }
- }
- }
-
- /**
- * LT (<) predicate.
- */
- public static class LTPredicate extends ParquetCompPredicate {
- public LTPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when right's max <= left's min.
- if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) <= 0 ) {
- return true;
- } else {
- return false;
- }
- }
- }
-
- /**
- * LE (<=) predicate.
- */
- public static class LEPredicate extends ParquetCompPredicate {
- public LEPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, = is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when right's max < left's min.
- if ( rightStat.genericGetMax().compareTo(leftStat.genericGetMin()) < 0 ) {
- return true;
- } else {
- return false;
- }
- }
- }
-
- /**
- * NE (!=) predicate.
- */
- public static class NEPredicate extends ParquetCompPredicate {
- public NEPredicate(LogicalExpression left, LogicalExpression right) {
- super(left, right);
- }
-
- @Override
- public boolean canDrop(RangeExprEvaluator evaluator) {
- Statistics leftStat = left.accept(evaluator, null);
- Statistics rightStat = right.accept(evaluator, null);
-
- if (leftStat == null ||
- rightStat == null ||
- leftStat.isEmpty() ||
- rightStat.isEmpty()) {
- return false;
- }
-
- // if either side is ALL null, comparison is evaluated to UNKNOW -> canDrop
- if (isAllNulls(leftStat, evaluator.getRowCount()) ||
- isAllNulls(rightStat, evaluator.getRowCount())) {
- return true;
- }
-
- // can drop when there is only one unique value.
- if ( leftStat.genericGetMin().compareTo(leftStat.genericGetMax()) == 0 &&
- rightStat.genericGetMin().compareTo(rightStat.genericGetMax()) ==0 &&
- leftStat.genericGetMax().compareTo(rightStat.genericGetMax()) == 0) {
- return true;
- } else {
- return false;
- }
- }
- }
-
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
new file mode 100644
index 0000000..ac82d65
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetPredicatesHelper.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.stat;
+
+import org.apache.parquet.column.statistics.Statistics;
+
+/**
+ * Parquet predicates class helper for filter pushdown.
+ */
+public class ParquetPredicatesHelper {
+
+ /**
+ * Checks that column chunk's statistics has only nulls
+ *
+ * @param stat parquet column statistics
+ * @param rowCount number of rows in the parquet file
+ * @return True if all rows are null in the parquet file
+ * False if at least one row is not null.
+ */
+ public static boolean isAllNulls(Statistics stat, long rowCount) {
+ return stat.getNumNulls() == rowCount;
+ }
+
+ /**
+ * Checks that column chunk's statistics has at least one null
+ *
+ * @param stat parquet column statistics
+ * @return True if the parquet file has nulls
+ * False if the parquet file hasn't nulls.
+ */
+ public static boolean hasNulls(Statistics stat) {
+ return stat.getNumNulls() > 0;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
index 8f77070..2d241dc 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -35,9 +35,11 @@ import org.apache.drill.exec.expr.holders.BigIntHolder;
import org.apache.drill.exec.expr.holders.Float4Holder;
import org.apache.drill.exec.expr.holders.Float8Holder;
import org.apache.drill.exec.expr.holders.IntHolder;
+import org.apache.drill.exec.expr.holders.TimeStampHolder;
import org.apache.drill.exec.expr.holders.ValueHolder;
import org.apache.drill.exec.store.parquet.stat.ColumnStatistics;
import org.apache.drill.exec.vector.ValueHolderHelper;
+import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.DoubleStatistics;
import org.apache.parquet.column.statistics.FloatStatistics;
import org.apache.parquet.column.statistics.IntStatistics;
@@ -73,9 +75,8 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
final ColumnStatistics columnStatistics = columnStatMap.get(fieldExpr.getPath());
if (columnStatistics != null) {
return columnStatistics.getStatistics();
- } else {
+ } else if (fieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) {
// field does not exist.
- Preconditions.checkArgument(fieldExpr.getMajorType().equals(Types.OPTIONAL_INT));
IntStatistics intStatistics = new IntStatistics();
intStatistics.setNumNulls(rowCount); // all values are nulls
return intStatistics;
@@ -90,6 +91,11 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
}
@Override
+ public Statistics visitBooleanConstant(ValueExpressions.BooleanExpression expr, Void value) throws RuntimeException {
+ return getStatistics(expr.getBoolean());
+ }
+
+ @Override
public Statistics visitLongConstant(ValueExpressions.LongExpression expr, Void value) throws RuntimeException {
return getStatistics(expr.getLong());
}
@@ -152,6 +158,16 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
return intStatistics;
}
+ private BooleanStatistics getStatistics(boolean value) {
+ return getStatistics(value, value);
+ }
+
+ private BooleanStatistics getStatistics(boolean min, boolean max) {
+ final BooleanStatistics booleanStatistics = new BooleanStatistics();
+ booleanStatistics.setMinMax(min, max);
+ return booleanStatistics;
+ }
+
private LongStatistics getStatistics(long value) {
return getStatistics(value, value);
}
@@ -217,6 +233,10 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
minHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMin());
maxHolder = ValueHolderHelper.getFloat8Holder(((DoubleStatistics)input).getMax());
break;
+ case DATE:
+ minHolder = ValueHolderHelper.getDateHolder(((LongStatistics)input).getMin());
+ maxHolder = ValueHolderHelper.getDateHolder(((LongStatistics)input).getMax());
+ break;
default:
return null;
}
@@ -237,6 +257,8 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
return getStatistics( ((Float4Holder)minFuncHolder).value, ((Float4Holder)maxFuncHolder).value);
case FLOAT8:
return getStatistics( ((Float8Holder)minFuncHolder).value, ((Float8Holder)maxFuncHolder).value);
+ case TIMESTAMP:
+ return getStatistics(((TimeStampHolder) minFuncHolder).value, ((TimeStampHolder) maxFuncHolder).value);
default:
return null;
}
@@ -245,7 +267,7 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
}
}
- static Map<TypeProtos.MinorType, Set<TypeProtos.MinorType>> CAST_FUNC = new HashMap<>();
+ private static final Map<TypeProtos.MinorType, Set<TypeProtos.MinorType>> CAST_FUNC = new HashMap<>();
static {
// float -> double , int, bigint
CAST_FUNC.put(TypeProtos.MinorType.FLOAT4, new HashSet<TypeProtos.MinorType>());
@@ -270,6 +292,10 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
CAST_FUNC.get(TypeProtos.MinorType.BIGINT).add(TypeProtos.MinorType.INT);
CAST_FUNC.get(TypeProtos.MinorType.BIGINT).add(TypeProtos.MinorType.FLOAT4);
CAST_FUNC.get(TypeProtos.MinorType.BIGINT).add(TypeProtos.MinorType.FLOAT8);
+
+ // date -> timestamp
+ CAST_FUNC.put(TypeProtos.MinorType.DATE, new HashSet<TypeProtos.MinorType>());
+ CAST_FUNC.get(TypeProtos.MinorType.DATE).add(TypeProtos.MinorType.TIMESTAMP);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
index 78a4509..f0e6602 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/resolver/TypeCastRules.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -405,6 +405,7 @@ public class TypeCastRules {
/** TIME cast able from **/
rule = new HashSet<>();
rule.add(MinorType.TIME);
+ rule.add(MinorType.DATE);
rule.add(MinorType.TIMESTAMP);
rule.add(MinorType.TIMESTAMPTZ);
rule.add(MinorType.FIXEDCHAR);
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
index 37a57dc..a9e55dd 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -30,6 +30,7 @@ import org.apache.drill.exec.expr.fn.DrillSimpleFuncHolder;
import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
import org.apache.drill.exec.expr.fn.interpreter.InterpreterEvaluator;
import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.BitHolder;
import org.apache.drill.exec.expr.holders.DateHolder;
import org.apache.drill.exec.expr.holders.Float4Holder;
import org.apache.drill.exec.expr.holders.Float8Holder;
@@ -37,7 +38,9 @@ import org.apache.drill.exec.expr.holders.IntHolder;
import org.apache.drill.exec.expr.holders.TimeHolder;
import org.apache.drill.exec.expr.holders.TimeStampHolder;
import org.apache.drill.exec.expr.holders.ValueHolder;
-import org.apache.drill.exec.expr.stat.ParquetPredicates;
+import org.apache.drill.exec.expr.stat.ParquetBooleanPredicates;
+import org.apache.drill.exec.expr.stat.ParquetComparisonPredicates;
+import org.apache.drill.exec.expr.stat.ParquetIsPredicates;
import org.apache.drill.exec.expr.stat.TypedFieldExpr;
import org.apache.drill.exec.ops.UdfUtilities;
import org.slf4j.Logger;
@@ -124,6 +127,11 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
}
@Override
+ public LogicalExpression visitBooleanConstant(ValueExpressions.BooleanExpression booleanExpression, Set<LogicalExpression> value) throws RuntimeException {
+ return booleanExpression;
+ }
+
+ @Override
public LogicalExpression visitBooleanOperator(BooleanOperator op, Set<LogicalExpression> value) {
List<LogicalExpression> childPredicates = new ArrayList<>();
String functionName = op.getName();
@@ -146,9 +154,9 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
return childPredicates.get(0); // only one leg is qualified, remove boolean op.
} else {
if (functionName.equals("booleanOr")) {
- return new ParquetPredicates.OrPredicate(op.getName(), childPredicates, op.getPosition());
+ return new ParquetBooleanPredicates.OrPredicate(op.getName(), childPredicates, op.getPosition());
} else {
- return new ParquetPredicates.AndPredicate(op.getName(), childPredicates, op.getPosition());
+ return new ParquetBooleanPredicates.AndPredicate(op.getName(), childPredicates, op.getPosition());
}
}
}
@@ -181,6 +189,8 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
return ValueExpressions.getTimeStamp(((TimeStampHolder) holder).value);
case TIME:
return ValueExpressions.getTime(((TimeHolder) holder).value);
+ case BIT:
+ return ValueExpressions.getBit(((BitHolder) holder).value == 1);
default:
return null;
}
@@ -214,6 +224,10 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
return handleCompareFunction(funcHolderExpr, value);
}
+ if (isIsFunction(funcName)) {
+ return handleIsFunction(funcHolderExpr, value);
+ }
+
if (CastFunctions.isCastFunction(funcName)) {
List<LogicalExpression> newArgs = new ArrayList();
for (LogicalExpression arg : funcHolderExpr.args) {
@@ -245,22 +259,53 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
switch (funcName) {
case FunctionGenerationHelper.EQ :
- return new ParquetPredicates.EqualPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.EqualPredicate(newArgs.get(0), newArgs.get(1));
case FunctionGenerationHelper.GT :
- return new ParquetPredicates.GTPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.GTPredicate(newArgs.get(0), newArgs.get(1));
case FunctionGenerationHelper.GE :
- return new ParquetPredicates.GEPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.GEPredicate(newArgs.get(0), newArgs.get(1));
case FunctionGenerationHelper.LT :
- return new ParquetPredicates.LTPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.LTPredicate(newArgs.get(0), newArgs.get(1));
case FunctionGenerationHelper.LE :
- return new ParquetPredicates.LEPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.LEPredicate(newArgs.get(0), newArgs.get(1));
case FunctionGenerationHelper.NE :
- return new ParquetPredicates.NEPredicate(newArgs.get(0), newArgs.get(1));
+ return new ParquetComparisonPredicates.NEPredicate(newArgs.get(0), newArgs.get(1));
default:
return null;
}
}
+ private LogicalExpression handleIsFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) {
+ String funcName;
+
+ if (functionHolderExpression.getHolder() instanceof DrillSimpleFuncHolder) {
+ funcName = ((DrillSimpleFuncHolder) functionHolderExpression.getHolder()).getRegisteredNames()[0];
+ } else {
+ logger.warn("Can not cast {} to DrillSimpleFuncHolder. Parquet filter pushdown can not handle function.",
+ functionHolderExpression.getHolder());
+ return null;
+ }
+ LogicalExpression arg = functionHolderExpression.args.get(0);
+
+ switch (funcName) {
+ case FunctionGenerationHelper.IS_NULL:
+ return new ParquetIsPredicates.IsNullPredicate(arg.accept(this, value));
+ case FunctionGenerationHelper.IS_NOT_NULL:
+ return new ParquetIsPredicates.IsNotNullPredicate(arg.accept(this, value));
+ case FunctionGenerationHelper.IS_TRUE:
+ return new ParquetIsPredicates.IsTruePredicate(arg.accept(this, value));
+ case FunctionGenerationHelper.IS_NOT_TRUE:
+ return new ParquetIsPredicates.IsNotTruePredicate(arg.accept(this, value));
+ case FunctionGenerationHelper.IS_FALSE:
+ return new ParquetIsPredicates.IsFalsePredicate(arg.accept(this, value));
+ case FunctionGenerationHelper.IS_NOT_FALSE:
+ return new ParquetIsPredicates.IsNotFalsePredicate(arg.accept(this, value));
+ default:
+ logger.warn("Unhandled IS function. Function name: {}", funcName);
+ return null;
+ }
+ }
+
private LogicalExpression handleCastFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) {
for (LogicalExpression arg : functionHolderExpression.args) {
LogicalExpression newArg = arg.accept(this, value);
@@ -278,6 +323,10 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
return COMPARE_FUNCTIONS_SET.contains(funcName);
}
+ private static boolean isIsFunction(String funcName) {
+ return IS_FUNCTIONS_SET.contains(funcName);
+ }
+
private static final ImmutableSet<String> COMPARE_FUNCTIONS_SET;
static {
@@ -292,4 +341,18 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
.build();
}
+ private static final ImmutableSet<String> IS_FUNCTIONS_SET;
+
+ static {
+ ImmutableSet.Builder<String> builder = ImmutableSet.builder();
+ IS_FUNCTIONS_SET = builder
+ .add(FunctionGenerationHelper.IS_NULL)
+ .add(FunctionGenerationHelper.IS_NOT_NULL)
+ .add(FunctionGenerationHelper.IS_TRUE)
+ .add(FunctionGenerationHelper.IS_NOT_TRUE)
+ .add(FunctionGenerationHelper.IS_FALSE)
+ .add(FunctionGenerationHelper.IS_NOT_FALSE)
+ .build();
+ }
+
}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
index 4501cb8..4991a22 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
@@ -24,6 +24,7 @@ import org.apache.drill.common.types.Types;
import org.apache.drill.exec.store.parquet.Metadata;
import org.apache.drill.exec.store.parquet.ParquetGroupScan;
import org.apache.parquet.column.statistics.BinaryStatistics;
+import org.apache.parquet.column.statistics.BooleanStatistics;
import org.apache.parquet.column.statistics.DoubleStatistics;
import org.apache.parquet.column.statistics.FloatStatistics;
import org.apache.parquet.column.statistics.IntStatistics;
@@ -180,6 +181,9 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
final long maxMS = convertToDrillDateValue(Integer.parseInt(max.toString()));
((LongStatistics) convertedStat ).setMinMax(minMS, maxMS);
break;
+ case BIT:
+ ((BooleanStatistics) stat).setMinMax(Boolean.parseBoolean(min.toString()), Boolean.parseBoolean(max.toString()));
+ break;
default:
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
index 98e7ef2..606d409 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
@@ -388,6 +388,45 @@ public class TestParquetFilterPushDown extends PlanTestBase {
testParquetFilterPD(query3, 49, 2, false);
}
+ @Test
+ public void testBooleanPredicate() throws Exception {
+ // Table blnTbl was created by CTAS in drill 1.12.0 and consist of 4 files withe the next data:
+ // File 0_0_0.parquet has col_bln column with the next values: true, true, true.
+ // File 0_0_1.parquet has col_bln column with the next values: false, false, false.
+ // File 0_0_2.parquet has col_bln column with the next values: true, null, false.
+ // File 0_0_3.parquet has col_bln column with the next values: null, null, null.
+
+ final String queryIsNull = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is null";
+ testParquetFilterPD(queryIsNull, 4, 2, false);
+
+ final String queryIsNotNull = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is not null";
+ testParquetFilterPD(queryIsNotNull, 8, 3, false);
+
+ final String queryIsTrue = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is true";
+ testParquetFilterPD(queryIsTrue, 4, 2, false);
+
+ final String queryIsNotTrue = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is not true";
+ testParquetFilterPD(queryIsNotTrue, 8, 3, false);
+
+ final String queryIsFalse = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is false";
+ testParquetFilterPD(queryIsFalse, 4, 2, false);
+
+ final String queryIsNotFalse = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln is not false";
+ testParquetFilterPD(queryIsNotFalse, 8, 3, false);
+
+ final String queryEqualTrue = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln = true";
+ testParquetFilterPD(queryEqualTrue, 4, 2, false);
+
+ final String queryNotEqualTrue = "select col_bln from dfs.`parquetFilterPush/blnTbl` where not col_bln = true";
+ testParquetFilterPD(queryNotEqualTrue, 4, 2, false);
+
+ final String queryEqualFalse = "select col_bln from dfs.`parquetFilterPush/blnTbl` where col_bln = false";
+ testParquetFilterPD(queryEqualFalse, 4, 2, false);
+
+ final String queryNotEqualFalse = "select col_bln from dfs.`parquetFilterPush/blnTbl` where not col_bln = false";
+ testParquetFilterPD(queryNotEqualFalse, 4, 2, false);
+ }
+
@Test // DRILL-5359
public void testFilterWithItemFlatten() throws Exception {
final String sql = "select n_regionkey\n"
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForDateTimeCasts.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForDateTimeCasts.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForDateTimeCasts.java
new file mode 100644
index 0000000..ae3bac0
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForDateTimeCasts.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import org.apache.drill.PlanTestBase;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestParquetFilterPushDownForDateTimeCasts extends PlanTestBase {
+
+ private static final String TABLE_NAME = "dateTimeCasts";
+
+ @BeforeClass
+ public static void init() throws Exception {
+ test("use dfs.tmp");
+ test("create table `%s/p1` as\n" +
+ "select timestamp '2017-01-01 00:00:00' as col_timestamp, date '2017-01-01' as col_date, time '00:00:00' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-02 00:00:00' as col_timestamp, date '2017-01-02' as col_date, time '00:00:00' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-02 21:01:15' as col_timestamp, date '2017-01-02' as col_date, time '21:01:15' as col_time from (values(1))", TABLE_NAME);
+
+ test("create table `%s/p2` as\n" +
+ "select timestamp '2017-01-03 08:50:00' as col_timestamp, date '2017-01-03' as col_date, time '08:50:00' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-04 15:25:00' as col_timestamp, date '2017-01-04' as col_date, time '15:25:00' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-04 22:14:29' as col_timestamp, date '2017-01-04' as col_date, time '22:14:29' as col_time from (values(1))", TABLE_NAME);
+
+ test("create table `%s/p3` as\n" +
+ "select timestamp '2017-01-05 05:46:11' as col_timestamp, date '2017-01-05' as col_date, time '05:46:11' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-06 06:17:59' as col_timestamp, date '2017-01-06' as col_date, time '06:17:59' as col_time from (values(1)) union all\n" +
+ "select timestamp '2017-01-06 06:17:59' as col_timestamp, date '2017-01-06' as col_date, time '06:17:59' as col_time from (values(1)) union all\n" +
+ "select cast(null as timestamp) as col_timestamp, cast(null as date) as col_date, cast(null as time) as col_time from (values(1))", TABLE_NAME);
+ }
+
+ @AfterClass
+ public static void tearDown() throws Exception {
+ test("drop table if exists `%s`", TABLE_NAME);
+ }
+
+ @Test
+ public void testCastTimestampVarchar() throws Exception {
+ testParquetFilterPushDown("col_timestamp = '2017-01-05 05:46:11'", 1, 1);
+ testParquetFilterPushDown("col_timestamp = cast('2017-01-05 05:46:11' as varchar)", 1, 1);
+ testParquetFilterPushDown("col_timestamp = cast('2017-01-05 05:46:11' as timestamp)", 1, 1);
+ testParquetFilterPushDown("col_timestamp > '2017-01-02 00:00:00'", 7, 3);
+ testParquetFilterPushDown("col_timestamp between '2017-01-03 21:01:15' and '2017-01-06 05:46:11'", 3, 2);
+ testParquetFilterPushDown("col_timestamp between '2017-01-03' and '2017-01-06'", 4, 2);
+ }
+
+ @Test
+ public void testCastTimestampDate() throws Exception {
+ testParquetFilterPushDown("col_timestamp = date '2017-01-02'", 1, 1);
+ testParquetFilterPushDown("col_timestamp = cast(date '2017-01-02' as timestamp)", 1, 1);
+ testParquetFilterPushDown("col_timestamp > date '2017-01-02'", 7, 3);
+ testParquetFilterPushDown("col_timestamp between date '2017-01-03' and date '2017-01-06'", 4, 2);
+ }
+
+ @Test
+ public void testCastDateVarchar() throws Exception {
+ testParquetFilterPushDown("col_date = '2017-01-02'", 2, 1);
+ testParquetFilterPushDown("col_date = cast('2017-01-02' as varchar)", 2, 1);
+ testParquetFilterPushDown("col_date = cast('2017-01-02' as date)", 2, 1);
+ testParquetFilterPushDown("col_date > '2017-01-02'", 6, 2);
+ testParquetFilterPushDown("col_date between '2017-01-02' and '2017-01-04'", 5, 2);
+ }
+
+ @Test
+ public void testCastDateTimestamp() throws Exception {
+ testParquetFilterPushDown("col_date = timestamp '2017-01-02 00:00:00'", 2, 1);
+ testParquetFilterPushDown("col_date = cast(timestamp '2017-01-02 00:00:00' as date)", 2, 1);
+ testParquetFilterPushDown("col_date > timestamp '2017-01-02 21:01:15'", 6, 2);
+ testParquetFilterPushDown("col_date between timestamp '2017-01-03 08:50:00' and timestamp '2017-01-06 06:17:59'", 5, 2);
+ }
+
+ @Test
+ public void testCastTimeVarchar() throws Exception {
+ testParquetFilterPushDown("col_time = '00:00:00'", 2, 1);
+ testParquetFilterPushDown("col_time = cast('00:00:00' as varchar)", 2, 1);
+ testParquetFilterPushDown("col_time = cast('00:00:00' as time)", 2, 1);
+ testParquetFilterPushDown("col_time > '15:25:00'", 2, 2);
+ testParquetFilterPushDown("col_time between '08:00:00' and '23:00:00'", 4, 2);
+ }
+
+ @Test
+ public void testCastTimeTimestamp() throws Exception {
+ testParquetFilterPushDown("col_time = timestamp '2017-01-01 05:46:11'", 1, 2);
+ testParquetFilterPushDown("col_time = cast(timestamp '2017-01-01 05:46:11' as time)", 1, 2);
+ testParquetFilterPushDown("col_time = timestamp '2017-01-01 00:00:00'", 2, 1);
+ testParquetFilterPushDown("col_time > timestamp '2017-01-01 15:25:00'", 2, 2);
+ testParquetFilterPushDown("col_time between timestamp '2017-01-01 08:00:00' and timestamp '2017-01-01 23:00:00'", 4, 2);
+ }
+
+ @Test
+ public void testCastTimeDate() throws Exception {
+ testParquetFilterPushDown("col_time = date '2017-01-01'", 2, 1);
+ testParquetFilterPushDown("col_time = cast(date '2017-01-01' as time)", 2, 1);
+ testParquetFilterPushDown("col_time > date '2017-01-01'", 7, 3);
+ testParquetFilterPushDown("col_time between date '2017-01-01' and date '2017-01-02'", 2, 1);
+ }
+
+ private void testParquetFilterPushDown(String predicate, int expectedRowCount, int expectedFilesNumber) throws Exception {
+ String query = String.format("select * from `%s` where %s", TABLE_NAME, predicate);
+
+ int actualRowCount = testSql(query);
+ assertEquals("Expected and actual row count should match", expectedRowCount, actualRowCount);
+
+ String numFilesPattern = "numFiles=" + expectedFilesNumber;
+ testPlanMatchingPatterns(query, new String[] {numFilesPattern}, new String[] {});
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_0.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_0.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_0.parquet
new file mode 100644
index 0000000..d3fb9ad
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_0.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_1.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_1.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_1.parquet
new file mode 100644
index 0000000..bca65a2
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_1.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_2.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_2.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_2.parquet
new file mode 100644
index 0000000..942947b
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_2.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/3bc4e319/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_3.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_3.parquet b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_3.parquet
new file mode 100644
index 0000000..48c717b
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquetFilterPush/blnTbl/0_0_3.parquet differ