You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2018/03/31 19:47:39 UTC
[1/3] drill git commit: DRILL-6259: Support parquet filter push down
for complex types
Repository: drill
Updated Branches:
refs/heads/master a264e7feb -> 9a6cb59b9
DRILL-6259: Support parquet filter push down for complex types
close apache/drill#1173
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/4ee5625d
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/4ee5625d
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/4ee5625d
Branch: refs/heads/master
Commit: 4ee5625d57bd73d3d82b45f687a8574ea6660f8e
Parents: a264e7f
Author: Arina Ielchiieva <ar...@gmail.com>
Authored: Tue Mar 13 19:54:25 2018 +0200
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Mar 30 22:45:33 2018 -0700
----------------------------------------------------------------------
.../sig/ConstantExpressionIdentifier.java | 6 +
.../exec/expr/ExpressionTreeMaterializer.java | 3 +-
.../exec/expr/stat/ParquetIsPredicates.java | 23 ++++
.../exec/expr/stat/RangeExprEvaluator.java | 27 ++--
.../drill/exec/expr/stat/TypedFieldExpr.java | 63 ----------
.../exec/planner/common/DrillRelOptUtil.java | 14 +--
.../logical/DrillPushFilterPastProjectRule.java | 14 ++-
.../store/parquet/ParquetFilterBuilder.java | 80 ++++--------
.../store/parquet/ParquetPushDownFilter.java | 13 +-
.../parquet/stat/ParquetMetaStatCollector.java | 29 ++---
...estParquetFilterPushDownForComplexTypes.java | 124 +++++++++++++++++++
.../resources/parquet/users/users_1.parquet | Bin 0 -> 657 bytes
.../resources/parquet/users/users_2.parquet | Bin 0 -> 641 bytes
.../resources/parquet/users/users_3.parquet | Bin 0 -> 588 bytes
.../resources/parquet/users/users_4.parquet | Bin 0 -> 653 bytes
.../resources/parquet/users/users_5.parquet | Bin 0 -> 583 bytes
.../resources/parquet/users/users_6.parquet | Bin 0 -> 627 bytes
.../resources/parquet/users/users_7.parquet | Bin 0 -> 662 bytes
.../drill/common/expression/SchemaPath.java | 29 +++--
.../drill/common/expression/TypedFieldExpr.java | 60 +++++++++
.../visitors/AbstractExprVisitor.java | 6 +-
.../expression/visitors/AggregateChecker.java | 5 +
.../expression/visitors/ConstantChecker.java | 5 +
.../common/expression/visitors/ExprVisitor.java | 54 ++++----
.../visitors/ExpressionValidator.java | 5 +
25 files changed, 365 insertions(+), 195 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/compile/sig/ConstantExpressionIdentifier.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/compile/sig/ConstantExpressionIdentifier.java b/exec/java-exec/src/main/java/org/apache/drill/exec/compile/sig/ConstantExpressionIdentifier.java
index 1e71773..1a0b7d5 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/compile/sig/ConstantExpressionIdentifier.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/compile/sig/ConstantExpressionIdentifier.java
@@ -31,6 +31,7 @@ import org.apache.drill.common.expression.IfExpression;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
@@ -231,4 +232,9 @@ public class ConstantExpressionIdentifier implements ExprVisitor<Boolean, Identi
public Boolean visitParameter(ValueExpressions.ParameterExpression e, IdentityHashMap<LogicalExpression, Object> value) throws RuntimeException {
return false;
}
+
+ @Override
+ public Boolean visitTypedFieldExpr(TypedFieldExpr e, IdentityHashMap<LogicalExpression, Object> value) throws RuntimeException {
+ return false;
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
index 23df262..f1b50c9 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/ExpressionTreeMaterializer.java
@@ -44,6 +44,7 @@ import org.apache.drill.common.expression.IfExpression.IfCondition;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
@@ -78,7 +79,6 @@ import org.apache.drill.exec.expr.fn.DrillComplexWriterFuncHolder;
import org.apache.drill.exec.expr.fn.DrillFuncHolder;
import org.apache.drill.exec.expr.fn.ExceptionFunction;
import org.apache.drill.exec.expr.fn.FunctionLookupContext;
-import org.apache.drill.exec.expr.stat.TypedFieldExpr;
import org.apache.drill.exec.record.TypedFieldId;
import org.apache.drill.exec.record.VectorAccessible;
import org.apache.drill.exec.resolver.FunctionResolver;
@@ -323,7 +323,6 @@ public class ExpressionTreeMaterializer {
} else {
logger.warn("Unable to find value vector of path {}, returning null-int instance.", path);
return new TypedFieldExpr(path, Types.OPTIONAL_INT);
- // return NullExpression.INSTANCE;
}
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
index a58ce7c..bb8f3ac 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/ParquetIsPredicates.java
@@ -18,6 +18,8 @@ package org.apache.drill.exec.expr.stat;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.LogicalExpressionBase;
+import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.visitors.ExprVisitor;
import org.apache.parquet.column.statistics.Statistics;
@@ -29,6 +31,7 @@ import java.util.List;
* IS predicates for parquet filter pushdown.
*/
public class ParquetIsPredicates {
+
public static abstract class ParquetIsPredicate extends LogicalExpressionBase implements ParquetFilterPredicate {
protected final LogicalExpression expr;
@@ -54,12 +57,22 @@ public class ParquetIsPredicates {
* IS NULL predicate.
*/
public static class IsNullPredicate extends ParquetIsPredicate {
+ private final boolean isArray;
+
public IsNullPredicate(LogicalExpression expr) {
super(expr);
+ this.isArray = isArray(expr);
}
@Override
public boolean canDrop(RangeExprEvaluator evaluator) {
+
+ // for arrays we are not able to define exact number of nulls
+ // [1,2,3] vs [1,2] -> in second case 3 is absent and thus it's null but statistics shows no nulls
+ if (isArray) {
+ return false;
+ }
+
Statistics exprStat = expr.accept(evaluator, null);
if (!ParquetPredicatesHelper.hasStats(exprStat)) {
@@ -73,6 +86,16 @@ public class ParquetIsPredicates {
return false;
}
}
+
+ private boolean isArray(LogicalExpression expression) {
+ if (expression instanceof TypedFieldExpr) {
+ TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expression;
+ SchemaPath schemaPath = typedFieldExpr.getPath();
+ return schemaPath.isArray();
+ }
+ return false;
+ }
+
}
/**
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
index 2d241dc..2cc6a70 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/RangeExprEvaluator.java
@@ -17,11 +17,11 @@
*/
package org.apache.drill.exec.expr.stat;
-import com.google.common.base.Preconditions;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.expression.FunctionHolderExpression;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.fn.CastFunctions;
import org.apache.drill.common.expression.fn.FuncHolder;
@@ -70,17 +70,20 @@ public class RangeExprEvaluator extends AbstractExprVisitor<Statistics, Void, Ru
@Override
public Statistics visitUnknown(LogicalExpression e, Void value) throws RuntimeException {
- if (e instanceof TypedFieldExpr) {
- TypedFieldExpr fieldExpr = (TypedFieldExpr) e;
- final ColumnStatistics columnStatistics = columnStatMap.get(fieldExpr.getPath());
- if (columnStatistics != null) {
- return columnStatistics.getStatistics();
- } else if (fieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) {
- // field does not exist.
- IntStatistics intStatistics = new IntStatistics();
- intStatistics.setNumNulls(rowCount); // all values are nulls
- return intStatistics;
- }
+ // do nothing for the unknown expression
+ return null;
+ }
+
+ @Override
+ public Statistics visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Void value) throws RuntimeException {
+ final ColumnStatistics columnStatistics = columnStatMap.get(typedFieldExpr.getPath());
+ if (columnStatistics != null) {
+ return columnStatistics.getStatistics();
+ } else if (typedFieldExpr.getMajorType().equals(Types.OPTIONAL_INT)) {
+ // field does not exist.
+ IntStatistics intStatistics = new IntStatistics();
+ intStatistics.setNumNulls(rowCount); // all values are nulls
+ return intStatistics;
}
return null;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/TypedFieldExpr.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/TypedFieldExpr.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/TypedFieldExpr.java
deleted file mode 100644
index 4287929..0000000
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/stat/TypedFieldExpr.java
+++ /dev/null
@@ -1,63 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.drill.exec.expr.stat;
-
-import com.google.common.collect.Iterators;
-import org.apache.drill.common.expression.LogicalExpression;
-import org.apache.drill.common.expression.LogicalExpressionBase;
-import org.apache.drill.common.expression.SchemaPath;
-import org.apache.drill.common.expression.visitors.ExprVisitor;
-import org.apache.drill.common.types.TypeProtos;
-
-import java.util.Iterator;
-
-public class TypedFieldExpr extends LogicalExpressionBase {
- TypeProtos.MajorType type;
- SchemaPath path;
-
- public TypedFieldExpr(SchemaPath path, TypeProtos.MajorType type) {
- super(path.getPosition());
- this.path = path;
- this.type = type;
- }
-
- @Override
- public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
- return visitor.visitUnknown(this, value);
- }
-
- @Override
- public Iterator<LogicalExpression> iterator() {
- return Iterators.emptyIterator();
- }
-
- @Override
- public TypeProtos.MajorType getMajorType() {
- return this.type;
- }
-
- @Override
- public String toString() {
- return this.path.getRootSegment().getPath() + "(" + type.getMinorType() + "_" + type.getMode() +")";
- }
-
- public SchemaPath getPath() {
- return this.path;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
index d5c8d94..dff83f6 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillRelOptUtil.java
@@ -18,6 +18,7 @@
package org.apache.drill.exec.planner.common;
import java.util.AbstractList;
+import java.util.Collection;
import java.util.List;
import com.google.common.collect.Lists;
@@ -178,22 +179,20 @@ public abstract class DrillRelOptUtil {
}
/**
- * Travesal RexNode to find the item/flattern operator. Continue search if RexNode has a
+ * Travesal RexNode to find at least one operator in the given collection. Continue search if RexNode has a
* RexInputRef which refers to a RexNode in project expressions.
*
* @param node : RexNode to search
* @param projExprs : the list of project expressions. Empty list means there is No project operator underneath.
+ * @param operators collection of operators to find
* @return : Return null if there is NONE; return the first appearance of item/flatten RexCall.
*/
- public static RexCall findItemOrFlatten(
- final RexNode node,
- final List<RexNode> projExprs) {
+ public static RexCall findOperators(final RexNode node, final List<RexNode> projExprs, final Collection<String> operators) {
try {
RexVisitor<Void> visitor =
new RexVisitorImpl<Void>(true) {
public Void visitCall(RexCall call) {
- if ("item".equals(call.getOperator().getName().toLowerCase()) ||
- "flatten".equals(call.getOperator().getName().toLowerCase())) {
+ if (operators.contains(call.getOperator().getName().toLowerCase())) {
throw new Util.FoundOne(call); /* throw exception to interrupt tree walk (this is similar to
other utility methods in RexUtil.java */
}
@@ -208,8 +207,7 @@ public abstract class DrillRelOptUtil {
RexNode n = projExprs.get(index);
if (n instanceof RexCall) {
RexCall r = (RexCall) n;
- if ("item".equals(r.getOperator().getName().toLowerCase()) ||
- "flatten".equals(r.getOperator().getName().toLowerCase())) {
+ if (operators.contains(r.getOperator().getName().toLowerCase())) {
throw new Util.FoundOne(r);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java
index d24abcc..7b978be 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/logical/DrillPushFilterPastProjectRule.java
@@ -30,13 +30,23 @@ import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.util.Pair;
import org.apache.drill.exec.planner.common.DrillRelOptUtil;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
public class DrillPushFilterPastProjectRule extends RelOptRule {
public final static RelOptRule INSTANCE = new DrillPushFilterPastProjectRule();
- protected DrillPushFilterPastProjectRule() {
+ private static final Collection<String> BANNED_OPERATORS;
+
+ static {
+ BANNED_OPERATORS = new ArrayList<>(2);
+ BANNED_OPERATORS.add("flatten");
+ BANNED_OPERATORS.add("item");
+ }
+
+ private DrillPushFilterPastProjectRule() {
super(
operand(
LogicalFilter.class,
@@ -60,7 +70,7 @@ public class DrillPushFilterPastProjectRule extends RelOptRule {
for (final RexNode pred : predList) {
- if (DrillRelOptUtil.findItemOrFlatten(pred, projRel.getProjects()) == null) {
+ if (DrillRelOptUtil.findOperators(pred, projRel.getProjects(), BANNED_OPERATORS) == null) {
qualifiedPredList.add(pred);
} else {
unqualifiedPredList.add(pred);
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
index a9e55dd..2d245a1 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
@@ -19,8 +19,7 @@ import com.google.common.collect.ImmutableSet;
import org.apache.drill.common.expression.BooleanOperator;
import org.apache.drill.common.expression.FunctionHolderExpression;
import org.apache.drill.common.expression.LogicalExpression;
-import org.apache.drill.common.expression.PathSegment;
-import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.fn.CastFunctions;
import org.apache.drill.common.expression.fn.FuncHolder;
@@ -41,7 +40,6 @@ import org.apache.drill.exec.expr.holders.ValueHolder;
import org.apache.drill.exec.expr.stat.ParquetBooleanPredicates;
import org.apache.drill.exec.expr.stat.ParquetComparisonPredicates;
import org.apache.drill.exec.expr.stat.ParquetIsPredicates;
-import org.apache.drill.exec.expr.stat.TypedFieldExpr;
import org.apache.drill.exec.ops.UdfUtilities;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -62,11 +60,12 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
/**
* @param expr materialized filter expression
* @param constantBoundaries set of constant expressions
- * @param udfUtilities
+ * @param udfUtilities udf utilities
+ *
+ * @return logical expression
*/
public static LogicalExpression buildParquetFilterPredicate(LogicalExpression expr, final Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) {
- final LogicalExpression predicate = expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries);
- return predicate;
+ return expr.accept(new ParquetFilterBuilder(udfUtilities), constantBoundaries);
}
private ParquetFilterBuilder(UdfUtilities udfUtilities) {
@@ -75,19 +74,16 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
@Override
public LogicalExpression visitUnknown(LogicalExpression e, Set<LogicalExpression> value) {
- if (e instanceof TypedFieldExpr &&
- ! containsArraySeg(((TypedFieldExpr) e).getPath()) &&
- e.getMajorType().getMode() != TypeProtos.DataMode.REPEATED) {
- // A filter is not qualified for push down, if
- // 1. it contains an array segment : a.b[1], a.b[1].c.d
- // 2. it's repeated type.
- return e;
- }
-
+ // for the unknown expression, do nothing
return null;
}
@Override
+ public LogicalExpression visitTypedFieldExpr(TypedFieldExpr typedFieldExpr, Set<LogicalExpression> value) throws RuntimeException {
+ return typedFieldExpr;
+ }
+
+ @Override
public LogicalExpression visitIntConstant(ValueExpressions.IntExpression intExpr, Set<LogicalExpression> value)
throws RuntimeException {
return intExpr;
@@ -161,18 +157,6 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
}
}
- private boolean containsArraySeg(final SchemaPath schemaPath) {
- PathSegment seg = schemaPath.getRootSegment();
-
- while (seg != null) {
- if (seg.isArray()) {
- return true;
- }
- seg = seg.getChild();
- }
- return false;
- }
-
private LogicalExpression getValueExpressionFromConst(ValueHolder holder, TypeProtos.MinorType type) {
switch (type) {
case INT:
@@ -229,13 +213,9 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
}
if (CastFunctions.isCastFunction(funcName)) {
- List<LogicalExpression> newArgs = new ArrayList();
- for (LogicalExpression arg : funcHolderExpr.args) {
- final LogicalExpression newArg = arg.accept(this, value);
- if (newArg == null) {
- return null;
- }
- newArgs.add(newArg);
+ List<LogicalExpression> newArgs = generateNewExpressions(funcHolderExpr.args, value);
+ if (newArgs == null) {
+ return null;
}
return funcHolderExpr.copy(newArgs);
@@ -244,15 +224,22 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
}
}
- private LogicalExpression handleCompareFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) {
- List<LogicalExpression> newArgs = new ArrayList();
-
- for (LogicalExpression arg : functionHolderExpression.args) {
- LogicalExpression newArg = arg.accept(this, value);
+ private List<LogicalExpression> generateNewExpressions(List<LogicalExpression> expressions, Set<LogicalExpression> value) {
+ List<LogicalExpression> newExpressions = new ArrayList<>();
+ for (LogicalExpression arg : expressions) {
+ final LogicalExpression newArg = arg.accept(this, value);
if (newArg == null) {
return null;
}
- newArgs.add(newArg);
+ newExpressions.add(newArg);
+ }
+ return newExpressions;
+ }
+
+ private LogicalExpression handleCompareFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) {
+ List<LogicalExpression> newArgs = generateNewExpressions(functionHolderExpression.args, value);
+ if (newArgs == null) {
+ return null;
}
String funcName = ((DrillSimpleFuncHolder) functionHolderExpression.getHolder()).getRegisteredNames()[0];
@@ -306,19 +293,6 @@ public class ParquetFilterBuilder extends AbstractExprVisitor<LogicalExpression,
}
}
- private LogicalExpression handleCastFunction(FunctionHolderExpression functionHolderExpression, Set<LogicalExpression> value) {
- for (LogicalExpression arg : functionHolderExpression.args) {
- LogicalExpression newArg = arg.accept(this, value);
- if (newArg == null) {
- return null;
- }
- }
-
- String funcName = ((DrillSimpleFuncHolder) functionHolderExpression.getHolder()).getRegisteredNames()[0];
-
- return null;
- }
-
private static boolean isCompareFunction(String funcName) {
return COMPARE_FUNCTIONS_SET.contains(funcName);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
index 1ec10d8..4257150 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetPushDownFilter.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -39,6 +39,8 @@ import org.apache.drill.exec.planner.physical.ProjectPrel;
import org.apache.drill.exec.planner.physical.ScanPrel;
import org.apache.drill.exec.store.StoragePluginOptimizerRule;
+import java.util.ArrayList;
+import java.util.Collection;
import java.util.List;
import java.util.concurrent.TimeUnit;
@@ -46,6 +48,13 @@ public abstract class ParquetPushDownFilter extends StoragePluginOptimizerRule {
private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetPushDownFilter.class);
+ private static final Collection<String> BANNED_OPERATORS;
+
+ static {
+ BANNED_OPERATORS = new ArrayList<>(1);
+ BANNED_OPERATORS.add("flatten");
+ }
+
public static RelOptRule getFilterOnProject(OptimizerRulesContext optimizerRulesContext) {
return new ParquetPushDownFilter(
RelOptHelper.some(FilterPrel.class, RelOptHelper.some(ProjectPrel.class, RelOptHelper.any(ScanPrel.class))),
@@ -127,7 +136,7 @@ public abstract class ParquetPushDownFilter extends StoragePluginOptimizerRule {
final List<RexNode> qualifiedPredList = Lists.newArrayList();
for (final RexNode pred : predList) {
- if (DrillRelOptUtil.findItemOrFlatten(pred, ImmutableList.<RexNode>of()) == null) {
+ if (DrillRelOptUtil.findOperators(pred, ImmutableList.of(), BANNED_OPERATORS) == null) {
qualifiedPredList.add(pred);
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
index 4991a22..a8c1218 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/stat/ParquetMetaStatCollector.java
@@ -41,11 +41,11 @@ import java.util.Set;
import java.util.concurrent.TimeUnit;
public class ParquetMetaStatCollector implements ColumnStatCollector{
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetMetaStatCollector.class);
+ private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ParquetMetaStatCollector.class);
- private final Metadata.ParquetTableMetadataBase parquetTableMetadata;
- private final List<? extends Metadata.ColumnMetadata> columnMetadataList;
- final Map<String, String> implicitColValues;
+ private final Metadata.ParquetTableMetadataBase parquetTableMetadata;
+ private final List<? extends Metadata.ColumnMetadata> columnMetadataList;
+ private final Map<String, String> implicitColValues;
public ParquetMetaStatCollector(Metadata.ParquetTableMetadataBase parquetTableMetadata,
List<? extends Metadata.ColumnMetadata> columnMetadataList, Map<String, String> implicitColValues) {
@@ -82,11 +82,11 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
columnMetadataMap.put(schemaPath, columnMetadata);
}
- for (final SchemaPath schemaPath : fields) {
+ for (final SchemaPath field : fields) {
final PrimitiveType.PrimitiveTypeName primitiveType;
final OriginalType originalType;
- final Metadata.ColumnMetadata columnMetadata = columnMetadataMap.get(schemaPath);
+ final Metadata.ColumnMetadata columnMetadata = columnMetadataMap.get(field.getUnIndexed());
if (columnMetadata != null) {
final Object min = columnMetadata.getMinValue();
@@ -95,7 +95,6 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
primitiveType = this.parquetTableMetadata.getPrimitiveType(columnMetadata.getName());
originalType = this.parquetTableMetadata.getOriginalType(columnMetadata.getName());
- final Integer repetitionLevel = this.parquetTableMetadata.getRepetitionLevel(columnMetadata.getName());
int precision = 0;
int scale = 0;
// ColumnTypeMetadata_v3 stores information about scale and precision
@@ -106,16 +105,16 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
precision = columnTypeInfo.precision;
}
- statMap.put(schemaPath, getStat(min, max, numNull, primitiveType, originalType, repetitionLevel, scale, precision));
+ statMap.put(field, getStat(min, max, numNull, primitiveType, originalType, scale, precision));
} else {
- final String columnName = schemaPath.getRootSegment().getPath();
+ final String columnName = field.getRootSegment().getPath();
if (implicitColValues.containsKey(columnName)) {
TypeProtos.MajorType type = Types.required(TypeProtos.MinorType.VARCHAR);
Statistics stat = new BinaryStatistics();
stat.setNumNulls(0);
byte[] val = implicitColValues.get(columnName).getBytes();
stat.setMinMaxFromBytes(val, val);
- statMap.put(schemaPath, new ColumnStatistics(stat, type));
+ statMap.put(field, new ColumnStatistics(stat, type));
}
}
}
@@ -128,7 +127,7 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
}
/**
- * Builds column statistics using given primitiveType, originalType, repetitionLevel, scale,
+ * Builds column statistics using given primitiveType, originalType, scale,
* precision, numNull, min and max values.
*
* @param min min value for statistics
@@ -136,24 +135,18 @@ public class ParquetMetaStatCollector implements ColumnStatCollector{
* @param numNull num_nulls for statistics
* @param primitiveType type that determines statistics class
* @param originalType type that determines statistics class
- * @param repetitionLevel field repetition level
* @param scale scale value (used for DECIMAL type)
* @param precision precision value (used for DECIMAL type)
* @return column statistics
*/
private ColumnStatistics getStat(Object min, Object max, Long numNull,
PrimitiveType.PrimitiveTypeName primitiveType, OriginalType originalType,
- Integer repetitionLevel, int scale, int precision) {
+ int scale, int precision) {
Statistics stat = Statistics.getStatsBasedOnType(primitiveType);
Statistics convertedStat = stat;
TypeProtos.MajorType type = ParquetGroupScan.getType(primitiveType, originalType, scale, precision);
- // Change to repeated if repetitionLevel > 0
- if (repetitionLevel != null && repetitionLevel > 0) {
- type = Types.withScaleAndPrecision(type.getMinorType(), TypeProtos.DataMode.REPEATED, scale, precision);
- }
-
if (numNull != null) {
stat.setNumNulls(numNull);
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForComplexTypes.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForComplexTypes.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForComplexTypes.java
new file mode 100644
index 0000000..5cbe5cf
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDownForComplexTypes.java
@@ -0,0 +1,124 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.store.parquet;
+
+import org.apache.drill.PlanTestBase;
+import org.apache.drill.exec.util.StoragePluginTestUtils;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.nio.file.Paths;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestParquetFilterPushDownForComplexTypes extends PlanTestBase {
+
+ private static final String TABLE_PATH = "parquet/users";
+ private static final String TABLE_NAME = String.format("%s.`%s`", StoragePluginTestUtils.DFS_PLUGIN_NAME, TABLE_PATH);
+
+ @BeforeClass
+ public static void copyData() {
+ /*
+ Parquet schema:
+ message complex_users {
+ required group user {
+ required int32 id;
+ optional int32 age;
+ repeated int32 hobby_ids;
+ optional boolean active;
+ }
+ }
+
+ Data set:
+ users_1.parquet
+ {"id":1,"age":25,"hobby_ids":[1,2,3],"active":true}
+ {"id":2,"age":28,"hobby_ids":[1,2,5],"active":true}
+
+ users_2.parquet
+ {"id":3,"age":31,"hobby_ids":[1,2,3],"active":true}
+ {"id":4,"age":32,"hobby_ids":[4,10,18],"active":false}
+
+ users_3.parquet
+ {"id":5,"hobby_ids":[11,12,13,14,15]}
+
+ users_4.parquet
+ {"id":6,"age":41,"hobby_ids":[20,21,22],"active":true}
+ {"id":7,"hobby_ids":[20,21,22,24]}
+
+ users_5.parquet
+ {"id":8,"age":41,"hobby_ids":[],"active":false}
+
+ users_6.parquet
+ {"id":9,"age":20,"hobby_ids":[],"active":false}
+ {"id":10,"age":21,"hobby_ids":[26,28,29]}
+
+ users_7.parquet
+ {"id":11,"age":23,"hobby_ids":[10,11,12],"active":true}
+ {"id":12,"age":35,"hobby_ids":[22,23,24],"active":false}
+ {"id":13,"age":25,"hobby_ids":[14,22,26]}
+
+ */
+ dirTestWatcher.copyResourceToRoot(Paths.get(TABLE_PATH));
+ }
+
+ @Test
+ public void testPushDownArray() throws Exception {
+ testParquetFilterPushDown("t.`user`.hobby_ids[0] = 1", 3, 2);
+ testParquetFilterPushDown("t.`user`.hobby_ids[0] = 100", 0, 1);
+ testParquetFilterPushDown("t.`user`.hobby_ids[0] <> 1", 8, 6);
+ testParquetFilterPushDown("t.`user`.hobby_ids[2] > 20", 5, 3);
+ testParquetFilterPushDown("t.`user`.hobby_ids[0] between 10 and 20", 5, 4);
+ testParquetFilterPushDown("t.`user`.hobby_ids[4] = 15", 1, 3);
+ testParquetFilterPushDown("t.`user`.hobby_ids[2] is not null", 11, 6);
+ testParquetFilterPushDown("t.`user`.hobby_ids[3] is null", 11, 7);
+ }
+
+ @Test
+ public void testPushDownComplexIntColumn() throws Exception {
+ testParquetFilterPushDown("t.`user`.age = 31", 1, 2);
+ testParquetFilterPushDown("t.`user`.age = 1", 0, 1);
+ testParquetFilterPushDown("t.`user`.age <> 20", 10, 6);
+ testParquetFilterPushDown("t.`user`.age > 30", 5, 4);
+ testParquetFilterPushDown("t.`user`.age between 20 and 30", 6, 3);
+ testParquetFilterPushDown("t.`user`.age is not null", 11, 6);
+ testParquetFilterPushDown("t.`user`.age is null", 2, 2);
+ }
+
+ @Test
+ public void testPushDownComplexBooleanColumn() throws Exception {
+ testParquetFilterPushDown("t.`user`.active is true", 5, 4);
+ testParquetFilterPushDown("t.`user`.active is not true", 8, 6);
+ testParquetFilterPushDown("t.`user`.active is false", 4, 4);
+ testParquetFilterPushDown("t.`user`.active is not false", 9, 6);
+ testParquetFilterPushDown("t.`user`.active is not null", 9, 6);
+ testParquetFilterPushDown("t.`user`.active is null", 4, 4);
+ }
+
+
+ private void testParquetFilterPushDown(String predicate, int expectedRowCount, int expectRowGroupsNumber) throws Exception {
+ String query = String.format("select * from %s t where %s", TABLE_NAME, predicate);
+
+ int actualRowCount = testSql(query);
+ assertEquals("Expected and actual row count should match", expectedRowCount, actualRowCount);
+
+ String expectRowGroupsNumberPattern = "numRowGroups=" + expectRowGroupsNumber;
+ testPlanMatchingPatterns(query, new String[] {expectRowGroupsNumberPattern}, new String[] {});
+ }
+
+}
+
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_1.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_1.parquet b/exec/java-exec/src/test/resources/parquet/users/users_1.parquet
new file mode 100644
index 0000000..9d9d511
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_1.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_2.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_2.parquet b/exec/java-exec/src/test/resources/parquet/users/users_2.parquet
new file mode 100644
index 0000000..f48c43a
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_2.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_3.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_3.parquet b/exec/java-exec/src/test/resources/parquet/users/users_3.parquet
new file mode 100644
index 0000000..1c1aa8e
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_3.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_4.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_4.parquet b/exec/java-exec/src/test/resources/parquet/users/users_4.parquet
new file mode 100644
index 0000000..66a7288
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_4.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_5.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_5.parquet b/exec/java-exec/src/test/resources/parquet/users/users_5.parquet
new file mode 100644
index 0000000..01aafd1
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_5.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_6.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_6.parquet b/exec/java-exec/src/test/resources/parquet/users/users_6.parquet
new file mode 100644
index 0000000..c12105a
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_6.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/exec/java-exec/src/test/resources/parquet/users/users_7.parquet
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/parquet/users/users_7.parquet b/exec/java-exec/src/test/resources/parquet/users/users_7.parquet
new file mode 100644
index 0000000..00f8016
Binary files /dev/null and b/exec/java-exec/src/test/resources/parquet/users/users_7.parquet differ
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
index 583046a..9ea57b1 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/SchemaPath.java
@@ -90,19 +90,25 @@ public class SchemaPath extends LogicalExpressionBase {
}
public NamePart getAsNamePart() {
- return getNamePart(rootSegment);
+ return getNamePart(rootSegment, false);
}
- private static NamePart getNamePart(PathSegment s) {
+ private static NamePart getNamePart(PathSegment s, boolean skipArraySegment) {
if (s == null) {
return null;
}
NamePart.Builder b = NamePart.newBuilder();
if (s.getChild() != null) {
- b.setChild(getNamePart(s.getChild()));
+ NamePart namePart = getNamePart(s.getChild(), skipArraySegment);
+ if (namePart != null) {
+ b.setChild(namePart);
+ }
}
if (s.isArray()) {
+ if (skipArraySegment) {
+ return null;
+ }
if (s.getArraySegment().hasIndex()) {
throw new IllegalStateException("You cannot convert a indexed schema path to a NamePart. NameParts can only reference Vectors, not individual records or values.");
}
@@ -129,6 +135,18 @@ public class SchemaPath extends LogicalExpressionBase {
}
/**
+ * Returns schema path with for arrays without index.
+ * Is used to find column statistics in parquet metadata.
+ * Example: a.b.c[0] -> a.b.c
+ *
+ * @return un-indexed schema path
+ */
+ public SchemaPath getUnIndexed() {
+ NamePart namePart = getNamePart(rootSegment, true);
+ return create(namePart);
+ }
+
+ /**
* Parses input string using the same rules which are used for the field in the query.
* If a string contains dot outside back-ticks, or there are no backticks in the string,
* will be created {@link SchemaPath} with the {@link NameSegment}
@@ -255,11 +273,6 @@ public class SchemaPath extends LogicalExpressionBase {
return new SchemaPath(newRoot);
}
- public SchemaPath getUnindexedArrayChild() {
- NameSegment newRoot = rootSegment.cloneWithNewChild(new ArraySegment(null));
- return new SchemaPath(newRoot);
- }
-
public SchemaPath getChild(int index) {
NameSegment newRoot = rootSegment.cloneWithNewChild(new ArraySegment(index));
return new SchemaPath(newRoot);
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/TypedFieldExpr.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/TypedFieldExpr.java b/logical/src/main/java/org/apache/drill/common/expression/TypedFieldExpr.java
new file mode 100644
index 0000000..93c7a3c
--- /dev/null
+++ b/logical/src/main/java/org/apache/drill/common/expression/TypedFieldExpr.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.common.expression;
+
+import org.apache.drill.common.expression.visitors.ExprVisitor;
+import org.apache.drill.common.types.TypeProtos;
+
+import java.util.Collections;
+import java.util.Iterator;
+
+public class TypedFieldExpr extends LogicalExpressionBase {
+ private final TypeProtos.MajorType type;
+ private final SchemaPath path;
+
+ public TypedFieldExpr(SchemaPath path, TypeProtos.MajorType type) {
+ super(path.getPosition());
+ this.path = path;
+ this.type = type;
+ }
+
+ @Override
+ public <T, V, E extends Exception> T accept(ExprVisitor<T, V, E> visitor, V value) throws E {
+ return visitor.visitTypedFieldExpr(this, value);
+ }
+
+ @Override
+ public Iterator<LogicalExpression> iterator() {
+ return Collections.emptyIterator();
+ }
+
+ @Override
+ public TypeProtos.MajorType getMajorType() {
+ return this.type;
+ }
+
+ @Override
+ public String toString() {
+ return this.path.getRootSegment().getPath() + "(" + type.getMinorType() + "_" + type.getMode() +")";
+ }
+
+ public SchemaPath getPath() {
+ return this.path;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/visitors/AbstractExprVisitor.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/visitors/AbstractExprVisitor.java b/logical/src/main/java/org/apache/drill/common/expression/visitors/AbstractExprVisitor.java
index 189e33d..5356813 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/visitors/AbstractExprVisitor.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/visitors/AbstractExprVisitor.java
@@ -26,6 +26,7 @@ import org.apache.drill.common.expression.IfExpression;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
import org.apache.drill.common.expression.ValueExpressions.DateExpression;
@@ -45,7 +46,6 @@ import org.apache.drill.common.expression.ValueExpressions.TimeExpression;
import org.apache.drill.common.expression.ValueExpressions.TimeStampExpression;
public abstract class AbstractExprVisitor<T, VAL, EXCEP extends Exception> implements ExprVisitor<T, VAL, EXCEP> {
- static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractExprVisitor.class);
@Override
public T visitFunctionCall(FunctionCall call, VAL value) throws EXCEP {
@@ -178,4 +178,8 @@ public abstract class AbstractExprVisitor<T, VAL, EXCEP extends Exception> imple
return visitUnknown(e, value);
}
+ @Override
+ public T visitTypedFieldExpr(TypedFieldExpr e, VAL value) throws EXCEP {
+ return visitUnknown(e, value);
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/visitors/AggregateChecker.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/visitors/AggregateChecker.java b/logical/src/main/java/org/apache/drill/common/expression/visitors/AggregateChecker.java
index 9a3cdcc..f6fe89d 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/visitors/AggregateChecker.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/visitors/AggregateChecker.java
@@ -28,6 +28,7 @@ import org.apache.drill.common.expression.IfExpression.IfCondition;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
@@ -209,4 +210,8 @@ public final class AggregateChecker implements ExprVisitor<Boolean, ErrorCollect
return false;
}
+ @Override
+ public Boolean visitTypedFieldExpr(TypedFieldExpr e, ErrorCollector value) throws RuntimeException {
+ return false;
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/visitors/ConstantChecker.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/visitors/ConstantChecker.java b/logical/src/main/java/org/apache/drill/common/expression/visitors/ConstantChecker.java
index 67fe12f..0468bc2 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/visitors/ConstantChecker.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/visitors/ConstantChecker.java
@@ -28,6 +28,7 @@ import org.apache.drill.common.expression.IfExpression.IfCondition;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
@@ -213,4 +214,8 @@ final class ConstantChecker implements ExprVisitor<Boolean, ErrorCollector, Runt
return false;
}
+ @Override
+ public Boolean visitTypedFieldExpr(TypedFieldExpr e, ErrorCollector value) throws RuntimeException {
+ return false;
+ }
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/visitors/ExprVisitor.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/visitors/ExprVisitor.java b/logical/src/main/java/org/apache/drill/common/expression/visitors/ExprVisitor.java
index 7c59f3c..e6198ae 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/visitors/ExprVisitor.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/visitors/ExprVisitor.java
@@ -26,6 +26,7 @@ import org.apache.drill.common.expression.IfExpression;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
import org.apache.drill.common.expression.ValueExpressions.DateExpression;
@@ -45,30 +46,31 @@ import org.apache.drill.common.expression.ValueExpressions.TimeExpression;
import org.apache.drill.common.expression.ValueExpressions.TimeStampExpression;
public interface ExprVisitor<T, VAL, EXCEP extends Exception> {
- public T visitFunctionCall(FunctionCall call, VAL value) throws EXCEP;
- public T visitFunctionHolderExpression(FunctionHolderExpression holder, VAL value) throws EXCEP;
- public T visitIfExpression(IfExpression ifExpr, VAL value) throws EXCEP;
- public T visitBooleanOperator(BooleanOperator call, VAL value) throws EXCEP;
- public T visitSchemaPath(SchemaPath path, VAL value) throws EXCEP;
- public T visitIntConstant(IntExpression intExpr, VAL value) throws EXCEP;
- public T visitFloatConstant(FloatExpression fExpr, VAL value) throws EXCEP;
- public T visitLongConstant(LongExpression intExpr, VAL value) throws EXCEP;
- public T visitDateConstant(DateExpression intExpr, VAL value) throws EXCEP;
- public T visitTimeConstant(TimeExpression intExpr, VAL value) throws EXCEP;
- public T visitTimeStampConstant(TimeStampExpression intExpr, VAL value) throws EXCEP;
- public T visitIntervalYearConstant(IntervalYearExpression intExpr, VAL value) throws EXCEP;
- public T visitIntervalDayConstant(IntervalDayExpression intExpr, VAL value) throws EXCEP;
- public T visitDecimal9Constant(Decimal9Expression decExpr, VAL value) throws EXCEP;
- public T visitDecimal18Constant(Decimal18Expression decExpr, VAL value) throws EXCEP;
- public T visitDecimal28Constant(Decimal28Expression decExpr, VAL value) throws EXCEP;
- public T visitDecimal38Constant(Decimal38Expression decExpr, VAL value) throws EXCEP;
- public T visitDoubleConstant(DoubleExpression dExpr, VAL value) throws EXCEP;
- public T visitBooleanConstant(BooleanExpression e, VAL value) throws EXCEP;
- public T visitQuotedStringConstant(QuotedString e, VAL value) throws EXCEP;
- public T visitNullConstant(TypedNullConstant e, VAL value) throws EXCEP;
- public T visitNullExpression(NullExpression e, VAL value) throws EXCEP;
- public T visitUnknown(LogicalExpression e, VAL value) throws EXCEP;
- public T visitCastExpression(CastExpression e, VAL value) throws EXCEP;
- public T visitConvertExpression(ConvertExpression e, VAL value) throws EXCEP;
- public T visitParameter(ParameterExpression e, VAL value) throws EXCEP;
+ T visitFunctionCall(FunctionCall call, VAL value) throws EXCEP;
+ T visitFunctionHolderExpression(FunctionHolderExpression holder, VAL value) throws EXCEP;
+ T visitIfExpression(IfExpression ifExpr, VAL value) throws EXCEP;
+ T visitBooleanOperator(BooleanOperator call, VAL value) throws EXCEP;
+ T visitSchemaPath(SchemaPath path, VAL value) throws EXCEP;
+ T visitIntConstant(IntExpression intExpr, VAL value) throws EXCEP;
+ T visitFloatConstant(FloatExpression fExpr, VAL value) throws EXCEP;
+ T visitLongConstant(LongExpression intExpr, VAL value) throws EXCEP;
+ T visitDateConstant(DateExpression intExpr, VAL value) throws EXCEP;
+ T visitTimeConstant(TimeExpression intExpr, VAL value) throws EXCEP;
+ T visitTimeStampConstant(TimeStampExpression intExpr, VAL value) throws EXCEP;
+ T visitIntervalYearConstant(IntervalYearExpression intExpr, VAL value) throws EXCEP;
+ T visitIntervalDayConstant(IntervalDayExpression intExpr, VAL value) throws EXCEP;
+ T visitDecimal9Constant(Decimal9Expression decExpr, VAL value) throws EXCEP;
+ T visitDecimal18Constant(Decimal18Expression decExpr, VAL value) throws EXCEP;
+ T visitDecimal28Constant(Decimal28Expression decExpr, VAL value) throws EXCEP;
+ T visitDecimal38Constant(Decimal38Expression decExpr, VAL value) throws EXCEP;
+ T visitDoubleConstant(DoubleExpression dExpr, VAL value) throws EXCEP;
+ T visitBooleanConstant(BooleanExpression e, VAL value) throws EXCEP;
+ T visitQuotedStringConstant(QuotedString e, VAL value) throws EXCEP;
+ T visitNullConstant(TypedNullConstant e, VAL value) throws EXCEP;
+ T visitNullExpression(NullExpression e, VAL value) throws EXCEP;
+ T visitUnknown(LogicalExpression e, VAL value) throws EXCEP;
+ T visitCastExpression(CastExpression e, VAL value) throws EXCEP;
+ T visitConvertExpression(ConvertExpression e, VAL value) throws EXCEP;
+ T visitParameter(ParameterExpression e, VAL value) throws EXCEP;
+ T visitTypedFieldExpr(TypedFieldExpr e, VAL value) throws EXCEP;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/4ee5625d/logical/src/main/java/org/apache/drill/common/expression/visitors/ExpressionValidator.java
----------------------------------------------------------------------
diff --git a/logical/src/main/java/org/apache/drill/common/expression/visitors/ExpressionValidator.java b/logical/src/main/java/org/apache/drill/common/expression/visitors/ExpressionValidator.java
index e8cbc2b..c32825a 100644
--- a/logical/src/main/java/org/apache/drill/common/expression/visitors/ExpressionValidator.java
+++ b/logical/src/main/java/org/apache/drill/common/expression/visitors/ExpressionValidator.java
@@ -28,6 +28,7 @@ import org.apache.drill.common.expression.IfExpression.IfCondition;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.NullExpression;
import org.apache.drill.common.expression.SchemaPath;
+import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.TypedNullConstant;
import org.apache.drill.common.expression.ValueExpressions;
import org.apache.drill.common.expression.ValueExpressions.BooleanExpression;
@@ -236,4 +237,8 @@ public class ExpressionValidator implements ExprVisitor<Void, ErrorCollector, Ru
return null;
}
+ @Override
+ public Void visitTypedFieldExpr(TypedFieldExpr e, ErrorCollector value) throws RuntimeException {
+ return null;
+ }
}
[2/3] drill git commit: DRILL-6103: lsb_release: command not found
Posted by am...@apache.org.
DRILL-6103: lsb_release: command not found
close apache/drill#1191
Thanks to Sanel Zukan for providing a small patch that checks for /etc/fedora-release path. This is more common, than lsb_release command on Linux distros.
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/f5b82239
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/f5b82239
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/f5b82239
Branch: refs/heads/master
Commit: f5b82239ad823a972e32ea732237a2e8c4346db3
Parents: 4ee5625
Author: Kunal Khatua <kk...@maprtech.com>
Authored: Mon Mar 26 23:30:31 2018 -0700
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Mar 30 22:46:36 2018 -0700
----------------------------------------------------------------------
distribution/src/resources/drill-config.sh | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/f5b82239/distribution/src/resources/drill-config.sh
----------------------------------------------------------------------
diff --git a/distribution/src/resources/drill-config.sh b/distribution/src/resources/drill-config.sh
index e3eaa64..c2f3d70 100644
--- a/distribution/src/resources/drill-config.sh
+++ b/distribution/src/resources/drill-config.sh
@@ -421,8 +421,7 @@ CP="$CP:$DRILL_HOME/jars/classb/*"
if [[ "$OSTYPE" == "linux-gnu" ]]; then
# Linux
# check for Fedora. netty-tcnative has a Fedora variant
- linuxvariant=$(lsb_release -i | cut -d: -f2 | sed s/'^\t'//)
- if [[ "$linuxvariant" == "Fedora" ]]; then
+ if [[ -f "/etc/fedora-release" ]]; then
CP="$CP:$DRILL_HOME/jars/3rdparty/fedora/*"
else
CP="$CP:$DRILL_HOME/jars/3rdparty/linux/*"
[3/3] drill git commit: DRILL-6234: Improved documentation for
VariableWidthVector mutators,
and added simple unit tests demonstrating mutator behavior.
Posted by am...@apache.org.
DRILL-6234: Improved documentation for VariableWidthVector mutators, and added simple unit tests demonstrating mutator behavior.
close apache/drill#1164
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/9a6cb59b
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/9a6cb59b
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/9a6cb59b
Branch: refs/heads/master
Commit: 9a6cb59b9b7a5b127e5f60309ce2f506ede9652a
Parents: f5b8223
Author: Timothy Farkas <ti...@apache.org>
Authored: Tue Mar 13 17:24:28 2018 -0700
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri Mar 30 22:47:31 2018 -0700
----------------------------------------------------------------------
exec/vector/pom.xml | 7 +-
.../templates/VariableLengthVectors.java | 61 ++++++++
.../apache/drill/exec/vector/ValueVector.java | 3 +-
.../exec/vector/VariableLengthVectorTest.java | 141 +++++++++++++++++++
4 files changed, 210 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/pom.xml
----------------------------------------------------------------------
diff --git a/exec/vector/pom.xml b/exec/vector/pom.xml
index 0184305..21e138d 100644
--- a/exec/vector/pom.xml
+++ b/exec/vector/pom.xml
@@ -65,7 +65,12 @@
<version>0.7.1</version>
</dependency>
-
+ <dependency>
+ <groupId>org.apache.drill</groupId>
+ <artifactId>drill-common</artifactId>
+ <version>${project.version}</version>
+ <classifier>tests</classifier>
+ </dependency>
</dependencies>
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
index 516eb52..ab995cd 100644
--- a/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
+++ b/exec/vector/src/main/codegen/templates/VariableLengthVectors.java
@@ -512,6 +512,8 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
}
/**
+ * <h4>Overview</h4>
+ * <p>
* Mutable${minor.class} implements a vector of variable width values. Elements in the vector
* are accessed by position from the logical start of the vector. A fixed width offsetVector
* is used to convert an element's position to it's offset from the start of the (0-based)
@@ -520,6 +522,46 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
* The equivalent Java primitive is '${minor.javaType!type.javaType}'
*
* NB: this class is automatically generated from ValueVectorTypes.tdd using FreeMarker.
+ * </p>
+ * <h4>Contract</h4>
+ * <p>
+ * <ol>
+ * <li>
+ * <b>Supported Writes:</b> {@link VariableWidthVector}s do not support random writes. In contrast {@link org.apache.drill.exec.vector.FixedWidthVector}s do
+ * allow random writes but special care is needed.
+ * </li>
+ * <li>
+ * <b>Writing Values:</b> All set methods must be called with a consecutive sequence of indices. With a few exceptions:
+ * <ol>
+ * <li>You can update the last index you just set.</li>
+ * <li>You can reset a previous index (call it Idx), but you must assume all the data after Idx is corrupt. Also
+ * note that the memory consumed by data that came after Idx is not released.</li>
+ * </ol>
+ * </li>
+ * <li>
+ * <b>Setting Value Count:</b> Vectors aren't explicitly aware of how many values they contain. So you must keep track of the
+ * number of values you've written to the vector and once you are done writing to the vector you must call {@link Mutator#setValueCount(int)}.
+ * It is possible to trim the vector by setting the value count to be less than the number of values currently contained in the vector. Note the extra memory consumed in
+ * the data buffer is not freed when this is done.
+ * </li>
+ * <li>
+ * <b>Memory Allocation:</b> When setting a value at an index you must do one of the following to ensure you do not get an {@link IndexOutOfBoundsException}.
+ * <ol>
+ * <li>
+ * Allocate the exact amount of memory you need when using the {@link Mutator#set(int, byte[])} methods. If you do not
+ * manually allocate sufficient memory an {@link IndexOutOfBoundsException} can be thrown when the data buffer runs out of space.
+ * </li>
+ * <li>
+ * Or you can use the {@link Mutator#setSafe(int, byte[])} methods, which will automatically grow your data buffer to
+ * fit your data.
+ * </li>
+ * </ol>
+ * </li>
+ * <li>
+ * <b>Immutability:</b> Once a vector has been populated with data and {@link #setValueCount(int)} has been called, it should be considered immutable.
+ * </li>
+ * </ol>
+ * </p>
*/
public final class Mutator extends BaseValueVector.BaseMutator implements VariableWidthVector.VariableWidthMutator {
@@ -703,6 +745,25 @@ public final class ${minor.class}Vector extends BaseDataValueVector implements V
data.setBytes(currentOffset, holder.buffer, holder.start, length);
}
+ /**
+ * <h4>Notes on Usage</h4>
+ * <p>
+ * For {@link VariableWidthVector}s this method can be used in the following cases:
+ * <ul>
+ * <li>Setting the actual number of elements currently contained in the vector.</li>
+ * <li>Trimming the vector to have fewer elements than it current does.</li>
+ * </ul>
+ * </p>
+ * <h4>Caveats</h4>
+ * <p>
+ * It is important to note that for {@link org.apache.drill.exec.vector.FixedWidthVector}s this method can also be used to expand the vector.
+ * However, {@link VariableWidthVector} do not support this usage and this method will throw an {@link IndexOutOfBoundsException} if you attempt
+ * to use it in this way. Expansion of valueCounts is not supported mainly because there is no benefit, since you would still have to rely on the setSafe
+ * methods to appropriatly expand the data buffer and populate the vector anyway (since by definition we do not know the width of elements). See DRILL-6234 for details.
+ * </p>
+ * <h4>Method Documentation</h4>
+ * {@inheritDoc}
+ */
@Override
public void setValueCount(int valueCount) {
final int currentByteCapacity = getByteCapacity();
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
index f873cc6..2659810 100644
--- a/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
+++ b/exec/vector/src/main/java/org/apache/drill/exec/vector/ValueVector.java
@@ -293,7 +293,8 @@ public interface ValueVector extends Closeable, Iterable<ValueVector> {
*/
interface Mutator {
/**
- * Sets the number of values that is stored in this vector to the given value count.
+ * Sets the number of values that is stored in this vector to the given value count. <b>WARNING!</b> Once the
+ * valueCount is set, the vector should be considered immutable.
*
* @param valueCount value count to set.
*/
http://git-wip-us.apache.org/repos/asf/drill/blob/9a6cb59b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
----------------------------------------------------------------------
diff --git a/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
new file mode 100644
index 0000000..eaee597
--- /dev/null
+++ b/exec/vector/src/test/java/org/apache/drill/exec/vector/VariableLengthVectorTest.java
@@ -0,0 +1,141 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.vector;
+
+import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.Types;
+import org.apache.drill.exec.memory.RootAllocator;
+import org.apache.drill.exec.record.MaterializedField;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This test uses {@link VarCharVector} to test the template code in VariableLengthVector.
+ */
+public class VariableLengthVectorTest
+{
+ /**
+ * If the vector contains 1000 records, setting a value count of 1000 should work.
+ */
+ @Test
+ public void testSettingSameValueCount()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+
+ mutator.setValueCount(size);
+ Assert.assertEquals(size, accessor.getValueCount());
+ checkIndexStrings("", 0, size, accessor);
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ /**
+ * Test truncating data. If you have 10000 records, reduce the vector to 1000 records.
+ */
+ @Test
+ public void testTrunicateVectorSetValueCount()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final int fluffSize = 10000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+ setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+
+ mutator.setValueCount(fluffSize);
+ Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+ checkIndexStrings("", 0, size, accessor);
+
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ /**
+ * Set 10000 values. Then go back and set new values starting at the 1001 the record.
+ */
+ @Test
+ public void testSetBackTracking()
+ {
+ try (RootAllocator allocator = new RootAllocator(10_000_000)) {
+ final MaterializedField field = MaterializedField.create("stringCol", Types.required(TypeProtos.MinorType.VARCHAR));
+ final VarCharVector vector = new VarCharVector(field, allocator);
+
+ vector.allocateNew();
+
+ try {
+ final int size = 1000;
+ final int fluffSize = 10000;
+ final VarCharVector.Mutator mutator = vector.getMutator();
+ final VarCharVector.Accessor accessor = vector.getAccessor();
+
+ setSafeIndexStrings("", 0, size, mutator);
+ setSafeIndexStrings("first cut ", size, fluffSize, mutator);
+ setSafeIndexStrings("redone cut ", size, fluffSize, mutator);
+
+ mutator.setValueCount(fluffSize);
+ Assert.assertEquals(fluffSize, accessor.getValueCount());
+
+ checkIndexStrings("", 0, size, accessor);
+ checkIndexStrings("redone cut ", size, fluffSize, accessor);
+
+ } finally {
+ vector.clear();
+ }
+ }
+ }
+
+ public static void setSafeIndexStrings(String prefix, int offset, int size, VarCharVector.Mutator mutator)
+ {
+ for (int index = offset; index < size; index++) {
+ final String indexString = prefix + "String num " + index;
+ mutator.setSafe(index, indexString.getBytes());
+ }
+ }
+
+ public static void checkIndexStrings(String prefix, int offset, int size, VarCharVector.Accessor accessor)
+ {
+ for (int index = offset; index < size; index++) {
+ final String indexString = prefix + "String num " + index;
+ Assert.assertArrayEquals(indexString.getBytes(), accessor.get(index));
+ }
+ }
+}