You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/04/04 17:51:43 UTC
[orc] branch master updated: ORC-422: Fix issue with Predicate push
down when lower/upper bounds are set
This is an automated email from the ASF dual-hosted git repository.
omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/master by this push:
new 6e825ee ORC-422: Fix issue with Predicate push down when lower/upper bounds are set
6e825ee is described below
commit 6e825eec6439dae329f73c2be1841ee71c6874fc
Author: Sandeep More <mo...@apache.org>
AuthorDate: Wed Dec 12 14:51:26 2018 -0500
ORC-422: Fix issue with Predicate push down when lower/upper bounds are set
Fixes #348
Signed-off-by: Owen O'Malley <om...@apache.org>
---
.../org/apache/orc/impl/ColumnStatisticsImpl.java | 2 +-
.../java/org/apache/orc/impl/RecordReaderImpl.java | 148 +++++----
.../orc/impl/TestPredicatePushDownBounds.java | 331 +++++++++++++++++++++
.../org/apache/orc/impl/TestRecordReaderImpl.java | 51 ++--
4 files changed, 457 insertions(+), 75 deletions(-)
diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index e983f04..ae9b276 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -677,7 +677,7 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
@Override
public String getMaximum() {
- /* if we have upper bound is set (in case of truncation)
+ /* if we have upper bound set (in case of truncation)
getMaximum will be null */
if(isUpperBoundSet) {
return null;
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index c077f24..0bedfc5 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,22 +17,18 @@
*/
package org.apache.orc.impl;
-import org.apache.orc.CompressionKind;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-import org.apache.orc.OrcFile;
-import org.apache.orc.util.BloomFilter;
-import org.apache.orc.util.BloomFilterIO;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.Text;
import org.apache.orc.BooleanColumnStatistics;
import org.apache.orc.ColumnStatistics;
import org.apache.orc.CompressionCodec;
@@ -42,6 +38,7 @@ import org.apache.orc.DecimalColumnStatistics;
import org.apache.orc.DoubleColumnStatistics;
import org.apache.orc.IntegerColumnStatistics;
import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
import org.apache.orc.OrcProto;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
@@ -49,21 +46,21 @@ import org.apache.orc.StringColumnStatistics;
import org.apache.orc.StripeInformation;
import org.apache.orc.TimestampColumnStatistics;
import org.apache.orc.TypeDescription;
+import org.apache.orc.util.BloomFilter;
+import org.apache.orc.util.BloomFilterIO;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
public class RecordReaderImpl implements RecordReader {
static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
@@ -318,16 +315,30 @@ public class RecordReaderImpl implements RecordReader {
* @param <T> the type of the comparision
* @return the location of the point
*/
- static <T> Location compareToRange(Comparable<T> point, T min, T max) {
- int minCompare = point.compareTo(min);
+ static <T> Location compareToRange(Comparable<T> point, T min, T max, T lowerBound, T upperBound) {
+
+ final boolean isLowerBoundSet = (min == null && lowerBound != null) ? true : false;
+ final boolean isUpperBoundSet = (max == null && upperBound != null) ? true : false;
+
+ final int minCompare = isLowerBoundSet ? point.compareTo(lowerBound) : point.compareTo(min);
if (minCompare < 0) {
return Location.BEFORE;
+ }
+
+ /* since min value is truncated when we have compare=0, it means the predicate string is BEFORE the min value*/
+ else if (minCompare == 0 && isLowerBoundSet) {
+ return Location.BEFORE;
} else if (minCompare == 0) {
return Location.MIN;
}
- int maxCompare = point.compareTo(max);
+
+ int maxCompare = isUpperBoundSet ? point.compareTo(upperBound) : point.compareTo(max);
if (maxCompare > 0) {
return Location.AFTER;
+ }
+ /* if upperbound is set then location here will be AFTER */
+ else if (maxCompare == 0 && isUpperBoundSet) {
+ return Location.AFTER;
} else if (maxCompare == 0) {
return Location.MAX;
}
@@ -359,7 +370,7 @@ public class RecordReaderImpl implements RecordReader {
} else if (index instanceof DoubleColumnStatistics) {
return ((DoubleColumnStatistics) index).getMaximum();
} else if (index instanceof StringColumnStatistics) {
- return ((StringColumnStatistics) index).getMaximum();
+ return ((StringColumnStatistics) index).getUpperBound();
} else if (index instanceof DateColumnStatistics) {
return ((DateColumnStatistics) index).getMaximum();
} else if (index instanceof DecimalColumnStatistics) {
@@ -406,7 +417,7 @@ public class RecordReaderImpl implements RecordReader {
} else if (index instanceof DoubleColumnStatistics) {
return ((DoubleColumnStatistics) index).getMinimum();
} else if (index instanceof StringColumnStatistics) {
- return ((StringColumnStatistics) index).getMinimum();
+ return ((StringColumnStatistics) index).getLowerBound();
} else if (index instanceof DateColumnStatistics) {
return ((DateColumnStatistics) index).getMinimum();
} else if (index instanceof DecimalColumnStatistics) {
@@ -464,6 +475,7 @@ public class RecordReaderImpl implements RecordReader {
* @return the set of truth values that may be returned for the given
* predicate.
*/
+
static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
PredicateLeaf predicate,
OrcProto.Stream.Kind kind,
@@ -490,9 +502,21 @@ public class RecordReaderImpl implements RecordReader {
return TruthValue.YES_NO_NULL;
}
}
+
+ String lowerBound = null;
+ String upperBound = null;
+
+ if(cs instanceof StringColumnStatistics) {
+ lowerBound = ((StringColumnStatistics) cs).getLowerBound();
+ minValue = ((StringColumnStatistics) cs).getMinimum();
+
+ upperBound = ((StringColumnStatistics) cs).getUpperBound();
+ maxValue = ((StringColumnStatistics) cs).getMaximum();
+ }
+
return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(),
BloomFilterIO.deserialize(kind, encoding, writerVersion, type, bloomFilter),
- useUTCTimestamp);
+ useUTCTimestamp, lowerBound, upperBound);
}
/**
@@ -527,13 +551,26 @@ public class RecordReaderImpl implements RecordReader {
boolean useUTCTimestamp) {
Object minValue = getMin(stats, useUTCTimestamp);
Object maxValue = getMax(stats, useUTCTimestamp);
- return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp);
+
+ String lowerBound = null;
+ String upperBound = null;
+
+ if(stats instanceof StringColumnStatistics) {
+ lowerBound = ((StringColumnStatistics) stats).getLowerBound();
+ minValue = ((StringColumnStatistics) stats).getMinimum();
+
+ upperBound = ((StringColumnStatistics) stats).getUpperBound();
+ maxValue = ((StringColumnStatistics) stats).getMaximum();
+ }
+
+ return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, lowerBound, upperBound);
}
static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
- Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp) {
+ Object max, boolean hasNull, BloomFilter bloomFilter,
+ boolean useUTCTimestamp, Object lowerBound, Object upperBound) {
// if we didn't have any values, everything must have been null
- if (min == null) {
+ if (min == null && lowerBound == null) {
if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
return TruthValue.YES;
} else {
@@ -543,6 +580,10 @@ public class RecordReaderImpl implements RecordReader {
return TruthValue.YES_NO_NULL;
}
+ if(max == UNKNOWN_VALUE) {
+ return TruthValue.YES_NO;
+ }
+
TruthValue result;
Object baseObj = predicate.getLiteral();
// Predicate object and stats objects are converted to the type of the predicate object.
@@ -550,7 +591,7 @@ public class RecordReaderImpl implements RecordReader {
Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
- result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
+ result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, lowerBound, upperBound);
if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp);
} else {
@@ -577,20 +618,22 @@ public class RecordReaderImpl implements RecordReader {
private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj,
Object minValue,
Object maxValue,
- boolean hasNull) {
+ boolean hasNull,
+ Object lowerBound,
+ Object upperBound) {
Location loc;
switch (predicate.getOperator()) {
case NULL_SAFE_EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.BEFORE || loc == Location.AFTER) {
return TruthValue.NO;
} else {
return TruthValue.YES_NO;
}
case EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
- if (minValue.equals(maxValue) && loc == Location.MIN) {
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
+ if (minValue != null && minValue.equals(maxValue) && loc == Location.MIN) {
return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.AFTER) {
return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
@@ -598,7 +641,7 @@ public class RecordReaderImpl implements RecordReader {
return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.AFTER) {
return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE || loc == Location.MIN) {
@@ -607,7 +650,7 @@ public class RecordReaderImpl implements RecordReader {
return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case LESS_THAN_EQUALS:
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.AFTER || loc == Location.MAX) {
return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc == Location.BEFORE) {
@@ -616,12 +659,17 @@ public class RecordReaderImpl implements RecordReader {
return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
}
case IN:
- if (minValue.equals(maxValue)) {
+ boolean minEqualsMax = predicate.getType()
+ .equals(PredicateLeaf.Type.STRING) ?
+ lowerBound.equals(upperBound) :
+ minValue.equals(maxValue);
+
+ if (minEqualsMax) {
// for a single value, look through to see if that value is in the
// set
for (Object arg : predicate.getLiteralList()) {
predObj = getBaseObjectForComparison(predicate.getType(), arg);
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.MIN) {
return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
}
@@ -631,7 +679,7 @@ public class RecordReaderImpl implements RecordReader {
// are all of the values outside of the range?
for (Object arg : predicate.getLiteralList()) {
predObj = getBaseObjectForComparison(predicate.getType(), arg);
- loc = compareToRange((Comparable) predObj, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.MIN || loc == Location.MIDDLE ||
loc == Location.MAX) {
return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
@@ -646,10 +694,10 @@ public class RecordReaderImpl implements RecordReader {
}
Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0));
- loc = compareToRange((Comparable) predObj1, minValue, maxValue);
+ loc = compareToRange((Comparable) predObj1, minValue, maxValue, lowerBound, upperBound);
if (loc == Location.BEFORE || loc == Location.MIN) {
Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1));
- Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
+ Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, lowerBound, upperBound);
if (loc2 == Location.AFTER || loc2 == Location.MAX) {
return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
} else if (loc2 == Location.BEFORE) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
new file mode 100644
index 0000000..d018efa
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.util.BloomFilter;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.orc.impl.TestRecordReaderImpl.createPredicateLeaf;
+
+public class TestPredicatePushDownBounds {
+
+ /**
+ * This test case handles the Equals corner case where the predicate is equal
+ * to truncated upper and lower bounds.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testCornerCases() {
+
+ int stringLength = 1100;
+ byte[] utf8F;
+ byte[] utf8P;
+
+ final TypeDescription schema = TypeDescription.createString();
+ final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+ BloomFilter bf = new BloomFilter(100);
+ // FFF... to PPP...
+ for (int i = 70; i <= 80; i++) {
+ final String inputString = StringUtils
+ .repeat(Character.toString((char) i), stringLength);
+ bf.addString(inputString);
+ }
+
+ final String longStringF = StringUtils
+ .repeat(Character.toString('F'), stringLength);
+ final String longStringP = StringUtils
+ .repeat(Character.toString('P'), stringLength);
+
+ /* String that matches the upperbound value after truncation */
+ final String upperboundString =
+ StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+ /* String that matches the lower value after truncation */
+ final String lowerboundString = StringUtils
+ .repeat(Character.toString('F'), 1024);
+
+ final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
+ final String shortStringP =
+ StringUtils.repeat(Character.toString('P'), 50) + "Q";
+
+ /* Test for a case EQUALS where only upperbound is set */
+ final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.EQUALS,
+ PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+ /* Test for a case LESS_THAN where only upperbound is set */
+ final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN,
+ PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+ /* Test for a case LESS_THAN_EQUALS where only upperbound is set */
+ final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+ PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+ utf8F = shortStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateUpperBoundEquals, null));
+
+ assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+ .evaluatePredicate(stat, predicateUpperBoundLessThan, null));
+
+ assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+ .evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null));
+
+ stat.reset();
+
+ utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = shortStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ /* Test for a case Equals where only lowerbound is set */
+ final PredicateLeaf predicateLowerBoundEquals = createPredicateLeaf(
+ PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x",
+ lowerboundString, null);
+
+ /* Test for a case LESS_THAN where only lowerbound is set */
+ final PredicateLeaf predicateLowerBoundLessThan = createPredicateLeaf(
+ PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x",
+ lowerboundString, null);
+
+ /* Test for a case LESS_THAN_EQUALS where only lowerbound is set */
+ final PredicateLeaf predicateLowerBoundLessThanEquals = createPredicateLeaf(
+ PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x",
+ lowerboundString, null);
+
+ assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateLowerBoundEquals, null));
+
+ assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateLowerBoundLessThan, bf));
+
+ assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null));
+
+ }
+
+ /**
+ * A case where the search values fall within the upperbound and lower bound
+ * range.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testNormalCase() throws Exception {
+
+ int stringLength = 1100;
+ /* length of string in BF */
+ int bfStringLength = 50;
+ //int stringLength = 11;
+ byte[] utf8F;
+ byte[] utf8P;
+
+ final TypeDescription schema = TypeDescription.createString();
+ final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+ BloomFilter bf = new BloomFilter(100);
+ // FFF... to PPP...
+ for (int i = 70; i <= 80; i++) {
+ final String inputString = StringUtils
+ .repeat(Character.toString((char) i), bfStringLength);
+ bf.addString(inputString);
+ }
+
+ final String longStringF = StringUtils
+ .repeat(Character.toString('F'), stringLength);
+ final String longStringP = StringUtils
+ .repeat(Character.toString('P'), stringLength);
+ final String predicateString = StringUtils
+ .repeat(Character.toString('I'), 50);
+
+
+ /* Test for a case where only upperbound is set */
+ final PredicateLeaf predicateEquals = createPredicateLeaf(
+ PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x",
+ predicateString, null);
+
+ /* trigger lower bound */
+ utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ /* trigger upper bound */
+ utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ assertEquals(SearchArgument.TruthValue.YES_NO,
+ RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf));
+
+ }
+
+ /**
+ * Test for IN search arg when upper and lower bounds are set.
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testIN() throws Exception {
+ int stringLength = 1100;
+ byte[] utf8F;
+ byte[] utf8P;
+
+ final TypeDescription schema = TypeDescription.createString();
+ final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+ final BloomFilter bf = new BloomFilter(100);
+ // FFF... to PPP...
+ for (int i = 70; i <= 80; i++) {
+ final String inputString = StringUtils
+ .repeat(Character.toString((char) i), stringLength);
+ bf.addString(inputString);
+ }
+
+ final String longStringF = StringUtils
+ .repeat(Character.toString('F'), stringLength);
+ final String longStringP = StringUtils
+ .repeat(Character.toString('P'), stringLength);
+
+ /* String that matches the upperbound value after truncation */
+ final String upperboundString =
+ StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+ /* String that matches the lower value after truncation */
+ final String lowerboundString = StringUtils
+ .repeat(Character.toString('F'), 1024);
+
+ final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
+ final String shortStringP =
+ StringUtils.repeat(Character.toString('P'), 50) + "Q";
+
+ final List<Object> args = new ArrayList<Object>();
+ args.add(upperboundString);
+
+ /* set upper bound */
+ utf8F = shortStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ /* Test for a case IN where only upper bound is set and test literal is equal to upperbound */
+ final PredicateLeaf predicateUpperBoundSet = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.IN,
+ PredicateLeaf.Type.STRING, "x", null, args);
+
+ assertEquals(SearchArgument.TruthValue.NO,
+ RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundSet, null));
+
+ /* Test for lower bound set only */
+ args.clear();
+ args.add(lowerboundString);
+
+ stat.reset();
+ /* set lower bound */
+ utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = shortStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ /* Test for a case IN where only lower bound is set and the test literal is lowerbound string */
+ final PredicateLeaf predicateLowerBoundSet = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.IN,
+ PredicateLeaf.Type.STRING, "x", null, args);
+
+ assertEquals(SearchArgument.TruthValue.NO,
+ RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundSet, null));
+
+ /* Test for a case LESS_THAN_EQUALS where only upperbound is set */
+ final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.IN,
+ PredicateLeaf.Type.STRING, "x", null, args);
+
+
+ /* Test the case were both upper and lower bounds are set */
+ args.clear();
+ args.add(lowerboundString);
+ args.add(upperboundString);
+
+ stat.reset();
+ /* set upper and lower bound */
+ utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ final PredicateLeaf predicateUpperLowerBoundSet = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.IN,
+ PredicateLeaf.Type.STRING, "x", null, args);
+
+ assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateUpperLowerBoundSet, null));
+
+ /* test the boundary condition */
+ args.clear();
+ args.add(longStringF);
+ args.add(longStringP);
+
+ stat.reset();
+ /* set upper and lower bound */
+ utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8F, 0, utf8F.length, 1);
+
+ utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+ stat.increment();
+ stat.updateString(utf8P, 0, utf8P.length, 1);
+
+ final PredicateLeaf predicateUpperLowerBoundSetBoundary = TestRecordReaderImpl
+ .createPredicateLeaf(PredicateLeaf.Operator.IN,
+ PredicateLeaf.Type.STRING, "x", null, args);
+
+ assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl
+ .evaluatePredicate(stat, predicateUpperLowerBoundSetBoundary, null));
+
+ }
+
+}
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 529a08b..37083ee 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -36,6 +36,7 @@ import static org.mockito.Mockito.when;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import java.sql.Date;
import java.sql.Timestamp;
import java.text.DateFormat;
@@ -47,6 +48,7 @@ import java.util.Arrays;
import java.util.List;
import java.util.TimeZone;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileStatus;
@@ -221,61 +223,61 @@ public class TestRecordReaderImpl {
@Test
public void testCompareToRangeInt() throws Exception {
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange(19L, 20L, 40L));
+ RecordReaderImpl.compareToRange(19L, 20L, 40L, null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange(41L, 20L, 40L));
+ RecordReaderImpl.compareToRange(41L, 20L, 40L, null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange(20L, 20L, 40L));
+ RecordReaderImpl.compareToRange(20L, 20L, 40L, null, null));
assertEquals(Location.MIDDLE,
- RecordReaderImpl.compareToRange(21L, 20L, 40L));
+ RecordReaderImpl.compareToRange(21L, 20L, 40L, null, null));
assertEquals(Location.MAX,
- RecordReaderImpl.compareToRange(40L, 20L, 40L));
+ RecordReaderImpl.compareToRange(40L, 20L, 40L, null, null));
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange(0L, 1L, 1L));
+ RecordReaderImpl.compareToRange(0L, 1L, 1L, null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange(1L, 1L, 1L));
+ RecordReaderImpl.compareToRange(1L, 1L, 1L, null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange(2L, 1L, 1L));
+ RecordReaderImpl.compareToRange(2L, 1L, 1L, null, null));
}
@Test
public void testCompareToRangeString() throws Exception {
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange("a", "b", "c"));
+ RecordReaderImpl.compareToRange("a", "b", "c", null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange("d", "b", "c"));
+ RecordReaderImpl.compareToRange("d", "b", "c", null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange("b", "b", "c"));
+ RecordReaderImpl.compareToRange("b", "b", "c", null, null));
assertEquals(Location.MIDDLE,
- RecordReaderImpl.compareToRange("bb", "b", "c"));
+ RecordReaderImpl.compareToRange("bb", "b", "c", null, null));
assertEquals(Location.MAX,
- RecordReaderImpl.compareToRange("c", "b", "c"));
+ RecordReaderImpl.compareToRange("c", "b", "c", null, null));
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange("a", "b", "b"));
+ RecordReaderImpl.compareToRange("a", "b", "b", null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange("b", "b", "b"));
+ RecordReaderImpl.compareToRange("b", "b", "b", null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange("c", "b", "b"));
+ RecordReaderImpl.compareToRange("c", "b", "b", null, null));
}
@Test
public void testCompareToCharNeedConvert() throws Exception {
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange("apple", "hello", "world"));
+ RecordReaderImpl.compareToRange("apple", "hello", "world", null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange("zombie", "hello", "world"));
+ RecordReaderImpl.compareToRange("zombie", "hello", "world", null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange("hello", "hello", "world"));
+ RecordReaderImpl.compareToRange("hello", "hello", "world", null, null));
assertEquals(Location.MIDDLE,
- RecordReaderImpl.compareToRange("pilot", "hello", "world"));
+ RecordReaderImpl.compareToRange("pilot", "hello", "world", null, null));
assertEquals(Location.MAX,
- RecordReaderImpl.compareToRange("world", "hello", "world"));
+ RecordReaderImpl.compareToRange("world", "hello", "world", null, null));
assertEquals(Location.BEFORE,
- RecordReaderImpl.compareToRange("apple", "hello", "hello"));
+ RecordReaderImpl.compareToRange("apple", "hello", "hello", null, null));
assertEquals(Location.MIN,
- RecordReaderImpl.compareToRange("hello", "hello", "hello"));
+ RecordReaderImpl.compareToRange("hello", "hello", "hello", null, null));
assertEquals(Location.AFTER,
- RecordReaderImpl.compareToRange("zombie", "hello", "hello"));
+ RecordReaderImpl.compareToRange("zombie", "hello", "hello", null, null));
}
@Test
@@ -338,6 +340,7 @@ public class TestRecordReaderImpl {
return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
}
+ //fixme
private static OrcProto.ColumnStatistics createStringStats(String min, String max,
boolean hasNull) {
OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();