You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2019/04/04 17:51:43 UTC

[orc] branch master updated: ORC-422: Fix issue with Predicate push down when lower/upper bounds are set

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/master by this push:
     new 6e825ee  ORC-422: Fix issue with Predicate push down when lower/upper bounds are set
6e825ee is described below

commit 6e825eec6439dae329f73c2be1841ee71c6874fc
Author: Sandeep More <mo...@apache.org>
AuthorDate: Wed Dec 12 14:51:26 2018 -0500

    ORC-422: Fix issue with Predicate push down when lower/upper bounds are set
    
    Fixes #348
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../org/apache/orc/impl/ColumnStatisticsImpl.java  |   2 +-
 .../java/org/apache/orc/impl/RecordReaderImpl.java | 148 +++++----
 .../orc/impl/TestPredicatePushDownBounds.java      | 331 +++++++++++++++++++++
 .../org/apache/orc/impl/TestRecordReaderImpl.java  |  51 ++--
 4 files changed, 457 insertions(+), 75 deletions(-)

diff --git a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
index e983f04..ae9b276 100644
--- a/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ColumnStatisticsImpl.java
@@ -677,7 +677,7 @@ public class ColumnStatisticsImpl implements ColumnStatistics {
 
     @Override
     public String getMaximum() {
-      /* if we have upper bound is set (in case of truncation)
+      /* if we have upper bound set (in case of truncation)
       getMaximum will be null */
       if(isUpperBoundSet) {
         return null;
diff --git a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
index c077f24..0bedfc5 100644
--- a/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/RecordReaderImpl.java
@@ -17,22 +17,18 @@
  */
 package org.apache.orc.impl;
 
-import org.apache.orc.CompressionKind;
-
-import java.io.IOException;
-import java.math.BigDecimal;
-import java.sql.Date;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.TimeZone;
-
-import org.apache.orc.OrcFile;
-import org.apache.orc.util.BloomFilter;
-import org.apache.orc.util.BloomFilterIO;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRangeList;
+import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
+import org.apache.hadoop.hive.common.type.HiveDecimal;
+import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
+import org.apache.hadoop.hive.ql.util.TimestampUtils;
+import org.apache.hadoop.hive.serde2.io.DateWritable;
+import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
+import org.apache.hadoop.io.Text;
 import org.apache.orc.BooleanColumnStatistics;
 import org.apache.orc.ColumnStatistics;
 import org.apache.orc.CompressionCodec;
@@ -42,6 +38,7 @@ import org.apache.orc.DecimalColumnStatistics;
 import org.apache.orc.DoubleColumnStatistics;
 import org.apache.orc.IntegerColumnStatistics;
 import org.apache.orc.OrcConf;
+import org.apache.orc.OrcFile;
 import org.apache.orc.OrcProto;
 import org.apache.orc.Reader;
 import org.apache.orc.RecordReader;
@@ -49,21 +46,21 @@ import org.apache.orc.StringColumnStatistics;
 import org.apache.orc.StripeInformation;
 import org.apache.orc.TimestampColumnStatistics;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.util.BloomFilter;
+import org.apache.orc.util.BloomFilterIO;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.hive.common.io.DiskRange;
-import org.apache.hadoop.hive.common.io.DiskRangeList;
-import org.apache.hadoop.hive.common.io.DiskRangeList.CreateHelper;
-import org.apache.hadoop.hive.common.type.HiveDecimal;
-import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
-import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
-import org.apache.hadoop.hive.ql.io.sarg.SearchArgument.TruthValue;
-import org.apache.hadoop.hive.serde2.io.DateWritable;
-import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.ql.util.TimestampUtils;
-import org.apache.hadoop.io.Text;
+
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.sql.Date;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TimeZone;
 
 public class RecordReaderImpl implements RecordReader {
   static final Logger LOG = LoggerFactory.getLogger(RecordReaderImpl.class);
@@ -318,16 +315,30 @@ public class RecordReaderImpl implements RecordReader {
    * @param <T> the type of the comparision
    * @return the location of the point
    */
-  static <T> Location compareToRange(Comparable<T> point, T min, T max) {
-    int minCompare = point.compareTo(min);
+  static <T> Location compareToRange(Comparable<T> point, T min, T max, T lowerBound, T upperBound) {
+
+    final boolean isLowerBoundSet = (min == null && lowerBound != null) ? true : false;
+    final boolean isUpperBoundSet = (max == null && upperBound != null) ? true : false;
+
+    final int minCompare = isLowerBoundSet ? point.compareTo(lowerBound) : point.compareTo(min);
     if (minCompare < 0) {
       return Location.BEFORE;
+    }
+
+    /* since min value is truncated when we have compare=0, it means the predicate string is BEFORE the min value*/
+    else if (minCompare == 0 && isLowerBoundSet) {
+      return Location.BEFORE;
     } else if (minCompare == 0) {
       return Location.MIN;
     }
-    int maxCompare = point.compareTo(max);
+
+    int maxCompare = isUpperBoundSet ? point.compareTo(upperBound) : point.compareTo(max);
     if (maxCompare > 0) {
       return Location.AFTER;
+    }
+    /* if upperbound is set then location here will be AFTER */
+    else if (maxCompare == 0 && isUpperBoundSet) {
+      return Location.AFTER;
     } else if (maxCompare == 0) {
       return Location.MAX;
     }
@@ -359,7 +370,7 @@ public class RecordReaderImpl implements RecordReader {
     } else if (index instanceof DoubleColumnStatistics) {
       return ((DoubleColumnStatistics) index).getMaximum();
     } else if (index instanceof StringColumnStatistics) {
-      return ((StringColumnStatistics) index).getMaximum();
+      return ((StringColumnStatistics) index).getUpperBound();
     } else if (index instanceof DateColumnStatistics) {
       return ((DateColumnStatistics) index).getMaximum();
     } else if (index instanceof DecimalColumnStatistics) {
@@ -406,7 +417,7 @@ public class RecordReaderImpl implements RecordReader {
     } else if (index instanceof DoubleColumnStatistics) {
       return ((DoubleColumnStatistics) index).getMinimum();
     } else if (index instanceof StringColumnStatistics) {
-      return ((StringColumnStatistics) index).getMinimum();
+      return ((StringColumnStatistics) index).getLowerBound();
     } else if (index instanceof DateColumnStatistics) {
       return ((DateColumnStatistics) index).getMinimum();
     } else if (index instanceof DecimalColumnStatistics) {
@@ -464,6 +475,7 @@ public class RecordReaderImpl implements RecordReader {
    * @return the set of truth values that may be returned for the given
    *   predicate.
    */
+
   static TruthValue evaluatePredicateProto(OrcProto.ColumnStatistics statsProto,
                                            PredicateLeaf predicate,
                                            OrcProto.Stream.Kind kind,
@@ -490,9 +502,21 @@ public class RecordReaderImpl implements RecordReader {
         return TruthValue.YES_NO_NULL;
       }
     }
+
+    String lowerBound = null;
+    String upperBound = null;
+
+    if(cs instanceof StringColumnStatistics) {
+      lowerBound = ((StringColumnStatistics) cs).getLowerBound();
+      minValue = ((StringColumnStatistics) cs).getMinimum();
+
+      upperBound = ((StringColumnStatistics) cs).getUpperBound();
+      maxValue = ((StringColumnStatistics) cs).getMaximum();
+    }
+
     return evaluatePredicateRange(predicate, minValue, maxValue, cs.hasNull(),
         BloomFilterIO.deserialize(kind, encoding, writerVersion, type, bloomFilter),
-        useUTCTimestamp);
+        useUTCTimestamp, lowerBound, upperBound);
   }
 
   /**
@@ -527,13 +551,26 @@ public class RecordReaderImpl implements RecordReader {
                                              boolean useUTCTimestamp) {
     Object minValue = getMin(stats, useUTCTimestamp);
     Object maxValue = getMax(stats, useUTCTimestamp);
-    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp);
+
+    String lowerBound = null;
+    String upperBound = null;
+
+    if(stats instanceof StringColumnStatistics) {
+      lowerBound = ((StringColumnStatistics) stats).getLowerBound();
+      minValue = ((StringColumnStatistics) stats).getMinimum();
+
+      upperBound = ((StringColumnStatistics) stats).getUpperBound();
+      maxValue = ((StringColumnStatistics) stats).getMaximum();
+    }
+
+    return evaluatePredicateRange(predicate, minValue, maxValue, stats.hasNull(), bloomFilter, useUTCTimestamp, lowerBound, upperBound);
   }
 
   static TruthValue evaluatePredicateRange(PredicateLeaf predicate, Object min,
-      Object max, boolean hasNull, BloomFilter bloomFilter, boolean useUTCTimestamp) {
+      Object max, boolean hasNull, BloomFilter bloomFilter,
+      boolean useUTCTimestamp, Object lowerBound, Object upperBound) {
     // if we didn't have any values, everything must have been null
-    if (min == null) {
+    if (min == null && lowerBound == null) {
       if (predicate.getOperator() == PredicateLeaf.Operator.IS_NULL) {
         return TruthValue.YES;
       } else {
@@ -543,6 +580,10 @@ public class RecordReaderImpl implements RecordReader {
       return TruthValue.YES_NO_NULL;
     }
 
+    if(max == UNKNOWN_VALUE) {
+      return TruthValue.YES_NO;
+    }
+
     TruthValue result;
     Object baseObj = predicate.getLiteral();
     // Predicate object and stats objects are converted to the type of the predicate object.
@@ -550,7 +591,7 @@ public class RecordReaderImpl implements RecordReader {
     Object maxValue = getBaseObjectForComparison(predicate.getType(), max);
     Object predObj = getBaseObjectForComparison(predicate.getType(), baseObj);
 
-    result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull);
+    result = evaluatePredicateMinMax(predicate, predObj, minValue, maxValue, hasNull, lowerBound, upperBound);
     if (shouldEvaluateBloomFilter(predicate, result, bloomFilter)) {
       return evaluatePredicateBloomFilter(predicate, predObj, bloomFilter, hasNull, useUTCTimestamp);
     } else {
@@ -577,20 +618,22 @@ public class RecordReaderImpl implements RecordReader {
   private static TruthValue evaluatePredicateMinMax(PredicateLeaf predicate, Object predObj,
       Object minValue,
       Object maxValue,
-      boolean hasNull) {
+      boolean hasNull,
+      Object lowerBound,
+      Object upperBound) {
     Location loc;
 
     switch (predicate.getOperator()) {
       case NULL_SAFE_EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
         if (loc == Location.BEFORE || loc == Location.AFTER) {
           return TruthValue.NO;
         } else {
           return TruthValue.YES_NO;
         }
       case EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
-        if (minValue.equals(maxValue) && loc == Location.MIN) {
+        loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
+        if (minValue != null && minValue.equals(maxValue) && loc == Location.MIN) {
           return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.AFTER) {
           return hasNull ? TruthValue.NO_NULL : TruthValue.NO;
@@ -598,7 +641,7 @@ public class RecordReaderImpl implements RecordReader {
           return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
         if (loc == Location.AFTER) {
           return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE || loc == Location.MIN) {
@@ -607,7 +650,7 @@ public class RecordReaderImpl implements RecordReader {
           return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case LESS_THAN_EQUALS:
-        loc = compareToRange((Comparable) predObj, minValue, maxValue);
+        loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
         if (loc == Location.AFTER || loc == Location.MAX) {
           return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
         } else if (loc == Location.BEFORE) {
@@ -616,12 +659,17 @@ public class RecordReaderImpl implements RecordReader {
           return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
         }
       case IN:
-        if (minValue.equals(maxValue)) {
+        boolean minEqualsMax = predicate.getType()
+            .equals(PredicateLeaf.Type.STRING) ?
+            lowerBound.equals(upperBound) :
+            minValue.equals(maxValue);
+
+        if (minEqualsMax) {
           // for a single value, look through to see if that value is in the
           // set
           for (Object arg : predicate.getLiteralList()) {
             predObj = getBaseObjectForComparison(predicate.getType(), arg);
-            loc = compareToRange((Comparable) predObj, minValue, maxValue);
+            loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
             if (loc == Location.MIN) {
               return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
             }
@@ -631,7 +679,7 @@ public class RecordReaderImpl implements RecordReader {
           // are all of the values outside of the range?
           for (Object arg : predicate.getLiteralList()) {
             predObj = getBaseObjectForComparison(predicate.getType(), arg);
-            loc = compareToRange((Comparable) predObj, minValue, maxValue);
+            loc = compareToRange((Comparable) predObj, minValue, maxValue, lowerBound, upperBound);
             if (loc == Location.MIN || loc == Location.MIDDLE ||
                 loc == Location.MAX) {
               return hasNull ? TruthValue.YES_NO_NULL : TruthValue.YES_NO;
@@ -646,10 +694,10 @@ public class RecordReaderImpl implements RecordReader {
         }
         Object predObj1 = getBaseObjectForComparison(predicate.getType(), args.get(0));
 
-        loc = compareToRange((Comparable) predObj1, minValue, maxValue);
+        loc = compareToRange((Comparable) predObj1, minValue, maxValue, lowerBound, upperBound);
         if (loc == Location.BEFORE || loc == Location.MIN) {
           Object predObj2 = getBaseObjectForComparison(predicate.getType(), args.get(1));
-          Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue);
+          Location loc2 = compareToRange((Comparable) predObj2, minValue, maxValue, lowerBound, upperBound);
           if (loc2 == Location.AFTER || loc2 == Location.MAX) {
             return hasNull ? TruthValue.YES_NULL : TruthValue.YES;
           } else if (loc2 == Location.BEFORE) {
diff --git a/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
new file mode 100644
index 0000000..d018efa
--- /dev/null
+++ b/java/core/src/test/org/apache/orc/impl/TestPredicatePushDownBounds.java
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.orc.impl;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.hive.ql.io.sarg.PredicateLeaf;
+import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
+import org.apache.orc.TypeDescription;
+import org.apache.orc.util.BloomFilter;
+import org.junit.Test;
+
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+
+import static junit.framework.Assert.assertEquals;
+import static org.apache.orc.impl.TestRecordReaderImpl.createPredicateLeaf;
+
+public class TestPredicatePushDownBounds {
+
+  /**
+   * This test case handles the Equals corner case where the predicate is equal
+   * to truncated upper and lower bounds.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testCornerCases() {
+
+    int stringLength = 1100;
+    byte[] utf8F;
+    byte[] utf8P;
+
+    final TypeDescription schema = TypeDescription.createString();
+    final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+    BloomFilter bf = new BloomFilter(100);
+    // FFF... to PPP...
+    for (int i = 70; i <= 80; i++) {
+      final String inputString = StringUtils
+          .repeat(Character.toString((char) i), stringLength);
+      bf.addString(inputString);
+    }
+
+    final String longStringF = StringUtils
+        .repeat(Character.toString('F'), stringLength);
+    final String longStringP = StringUtils
+        .repeat(Character.toString('P'), stringLength);
+
+    /* String that matches the upperbound value after truncation */
+    final String upperboundString =
+        StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+    /* String that matches the lower value after truncation */
+    final String lowerboundString = StringUtils
+        .repeat(Character.toString('F'), 1024);
+
+    final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
+    final String shortStringP =
+        StringUtils.repeat(Character.toString('P'), 50) + "Q";
+
+    /* Test for a case EQUALS where only upperbound is set */
+    final PredicateLeaf predicateUpperBoundEquals = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.EQUALS,
+            PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+    /* Test for a case LESS_THAN where only upperbound is set */
+    final PredicateLeaf predicateUpperBoundLessThan = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN,
+            PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+    /* Test for a case LESS_THAN_EQUALS where only upperbound is set */
+    final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.LESS_THAN_EQUALS,
+            PredicateLeaf.Type.STRING, "x", upperboundString, null);
+
+    utf8F = shortStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateUpperBoundEquals, null));
+
+    assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+        .evaluatePredicate(stat, predicateUpperBoundLessThan, null));
+
+    assertEquals(SearchArgument.TruthValue.YES, RecordReaderImpl
+        .evaluatePredicate(stat, predicateUpperBoundLessThanEquals, null));
+
+    stat.reset();
+
+    utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = shortStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    /* Test for a case Equals where only lowerbound is set */
+    final PredicateLeaf predicateLowerBoundEquals = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x",
+        lowerboundString, null);
+
+    /* Test for a case LESS_THAN where only lowerbound is set */
+    final PredicateLeaf predicateLowerBoundLessThan = createPredicateLeaf(
+        PredicateLeaf.Operator.LESS_THAN, PredicateLeaf.Type.STRING, "x",
+        lowerboundString, null);
+
+    /* Test for a case LESS_THAN_EQUALS where only lowerbound is set */
+    final PredicateLeaf predicateLowerBoundLessThanEquals = createPredicateLeaf(
+        PredicateLeaf.Operator.LESS_THAN_EQUALS, PredicateLeaf.Type.STRING, "x",
+        lowerboundString, null);
+
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateLowerBoundEquals, null));
+
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateLowerBoundLessThan, bf));
+
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateLowerBoundLessThanEquals, null));
+
+  }
+
+  /**
+   * A case where the search values fall within the upperbound and lower bound
+   * range.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testNormalCase() throws Exception {
+
+    int stringLength = 1100;
+    /* length of string in BF */
+    int bfStringLength = 50;
+    //int stringLength = 11;
+    byte[] utf8F;
+    byte[] utf8P;
+
+    final TypeDescription schema = TypeDescription.createString();
+    final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+    BloomFilter bf = new BloomFilter(100);
+    // FFF... to PPP...
+    for (int i = 70; i <= 80; i++) {
+      final String inputString = StringUtils
+          .repeat(Character.toString((char) i), bfStringLength);
+      bf.addString(inputString);
+    }
+
+    final String longStringF = StringUtils
+        .repeat(Character.toString('F'), stringLength);
+    final String longStringP = StringUtils
+        .repeat(Character.toString('P'), stringLength);
+    final String predicateString = StringUtils
+        .repeat(Character.toString('I'), 50);
+
+
+    /* Test for a case where only upperbound is set */
+    final PredicateLeaf predicateEquals = createPredicateLeaf(
+        PredicateLeaf.Operator.EQUALS, PredicateLeaf.Type.STRING, "x",
+        predicateString, null);
+
+    /* trigger lower bound */
+    utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    /* trigger upper bound */
+    utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    assertEquals(SearchArgument.TruthValue.YES_NO,
+        RecordReaderImpl.evaluatePredicate(stat, predicateEquals, bf));
+
+  }
+
+  /**
+   * Test for IN search arg when upper and lower bounds are set.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testIN() throws Exception {
+    int stringLength = 1100;
+    byte[] utf8F;
+    byte[] utf8P;
+
+    final TypeDescription schema = TypeDescription.createString();
+    final ColumnStatisticsImpl stat = ColumnStatisticsImpl.create(schema);
+
+    final BloomFilter bf = new BloomFilter(100);
+    // FFF... to PPP...
+    for (int i = 70; i <= 80; i++) {
+      final String inputString = StringUtils
+          .repeat(Character.toString((char) i), stringLength);
+      bf.addString(inputString);
+    }
+
+    final String longStringF = StringUtils
+        .repeat(Character.toString('F'), stringLength);
+    final String longStringP = StringUtils
+        .repeat(Character.toString('P'), stringLength);
+
+    /* String that matches the upperbound value after truncation */
+    final String upperboundString =
+        StringUtils.repeat(Character.toString('P'), 1023) + "Q";
+    /* String that matches the lower value after truncation */
+    final String lowerboundString = StringUtils
+        .repeat(Character.toString('F'), 1024);
+
+    final String shortStringF = StringUtils.repeat(Character.toString('F'), 50);
+    final String shortStringP =
+        StringUtils.repeat(Character.toString('P'), 50) + "Q";
+
+    final List<Object> args = new ArrayList<Object>();
+    args.add(upperboundString);
+
+    /* set upper bound */
+    utf8F = shortStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    /* Test for a case IN where only upper bound is set and test literal is equal to upperbound */
+    final PredicateLeaf predicateUpperBoundSet = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.IN,
+            PredicateLeaf.Type.STRING, "x", null, args);
+
+    assertEquals(SearchArgument.TruthValue.NO,
+        RecordReaderImpl.evaluatePredicate(stat, predicateUpperBoundSet, null));
+
+    /* Test for lower bound set only */
+    args.clear();
+    args.add(lowerboundString);
+
+    stat.reset();
+    /* set lower bound */
+    utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = shortStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    /* Test for a case IN where only lower bound is set and the test literal is lowerbound string */
+    final PredicateLeaf predicateLowerBoundSet = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.IN,
+            PredicateLeaf.Type.STRING, "x", null, args);
+
+    assertEquals(SearchArgument.TruthValue.NO,
+        RecordReaderImpl.evaluatePredicate(stat, predicateLowerBoundSet, null));
+
+    /* Test for a case LESS_THAN_EQUALS where only upperbound is set */
+    final PredicateLeaf predicateUpperBoundLessThanEquals = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.IN,
+            PredicateLeaf.Type.STRING, "x", null, args);
+
+
+    /* Test the case were both upper and lower bounds are set */
+    args.clear();
+    args.add(lowerboundString);
+    args.add(upperboundString);
+
+    stat.reset();
+    /* set upper and lower bound */
+    utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    final PredicateLeaf predicateUpperLowerBoundSet = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.IN,
+            PredicateLeaf.Type.STRING, "x", null, args);
+
+    assertEquals(SearchArgument.TruthValue.NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateUpperLowerBoundSet, null));
+
+    /* test the boundary condition */
+    args.clear();
+    args.add(longStringF);
+    args.add(longStringP);
+
+    stat.reset();
+    /* set upper and lower bound */
+    utf8F = longStringF.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8F, 0, utf8F.length, 1);
+
+    utf8P = longStringP.getBytes(StandardCharsets.UTF_8);
+    stat.increment();
+    stat.updateString(utf8P, 0, utf8P.length, 1);
+
+    final PredicateLeaf predicateUpperLowerBoundSetBoundary = TestRecordReaderImpl
+        .createPredicateLeaf(PredicateLeaf.Operator.IN,
+            PredicateLeaf.Type.STRING, "x", null, args);
+
+    assertEquals(SearchArgument.TruthValue.YES_NO, RecordReaderImpl
+        .evaluatePredicate(stat, predicateUpperLowerBoundSetBoundary, null));
+
+  }
+
+}
diff --git a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
index 529a08b..37083ee 100644
--- a/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
+++ b/java/core/src/test/org/apache/orc/impl/TestRecordReaderImpl.java
@@ -36,6 +36,7 @@ import static org.mockito.Mockito.when;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.text.DateFormat;
@@ -47,6 +48,7 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.TimeZone;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileStatus;
@@ -221,61 +223,61 @@ public class TestRecordReaderImpl {
   @Test
   public void testCompareToRangeInt() throws Exception {
     assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange(19L, 20L, 40L));
+      RecordReaderImpl.compareToRange(19L, 20L, 40L, null, null));
     assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange(41L, 20L, 40L));
+      RecordReaderImpl.compareToRange(41L, 20L, 40L, null, null));
     assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange(20L, 20L, 40L));
+        RecordReaderImpl.compareToRange(20L, 20L, 40L, null, null));
     assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange(21L, 20L, 40L));
+        RecordReaderImpl.compareToRange(21L, 20L, 40L, null, null));
     assertEquals(Location.MAX,
-      RecordReaderImpl.compareToRange(40L, 20L, 40L));
+      RecordReaderImpl.compareToRange(40L, 20L, 40L, null, null));
     assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange(0L, 1L, 1L));
+      RecordReaderImpl.compareToRange(0L, 1L, 1L, null, null));
     assertEquals(Location.MIN,
-      RecordReaderImpl.compareToRange(1L, 1L, 1L));
+      RecordReaderImpl.compareToRange(1L, 1L, 1L, null, null));
     assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange(2L, 1L, 1L));
+      RecordReaderImpl.compareToRange(2L, 1L, 1L, null, null));
   }
 
   @Test
   public void testCompareToRangeString() throws Exception {
     assertEquals(Location.BEFORE,
-        RecordReaderImpl.compareToRange("a", "b", "c"));
+        RecordReaderImpl.compareToRange("a", "b", "c", null, null));
     assertEquals(Location.AFTER,
-        RecordReaderImpl.compareToRange("d", "b", "c"));
+        RecordReaderImpl.compareToRange("d", "b", "c", null, null));
     assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("b", "b", "c"));
+        RecordReaderImpl.compareToRange("b", "b", "c", null, null));
     assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange("bb", "b", "c"));
+        RecordReaderImpl.compareToRange("bb", "b", "c", null, null));
     assertEquals(Location.MAX,
-        RecordReaderImpl.compareToRange("c", "b", "c"));
+        RecordReaderImpl.compareToRange("c", "b", "c", null, null));
     assertEquals(Location.BEFORE,
-        RecordReaderImpl.compareToRange("a", "b", "b"));
+        RecordReaderImpl.compareToRange("a", "b", "b", null, null));
     assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("b", "b", "b"));
+        RecordReaderImpl.compareToRange("b", "b", "b", null, null));
     assertEquals(Location.AFTER,
-        RecordReaderImpl.compareToRange("c", "b", "b"));
+        RecordReaderImpl.compareToRange("c", "b", "b", null, null));
   }
 
   @Test
   public void testCompareToCharNeedConvert() throws Exception {
     assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange("apple", "hello", "world"));
+      RecordReaderImpl.compareToRange("apple", "hello", "world", null, null));
     assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange("zombie", "hello", "world"));
+      RecordReaderImpl.compareToRange("zombie", "hello", "world", null, null));
     assertEquals(Location.MIN,
-        RecordReaderImpl.compareToRange("hello", "hello", "world"));
+        RecordReaderImpl.compareToRange("hello", "hello", "world", null, null));
     assertEquals(Location.MIDDLE,
-        RecordReaderImpl.compareToRange("pilot", "hello", "world"));
+        RecordReaderImpl.compareToRange("pilot", "hello", "world", null, null));
     assertEquals(Location.MAX,
-      RecordReaderImpl.compareToRange("world", "hello", "world"));
+      RecordReaderImpl.compareToRange("world", "hello", "world", null, null));
     assertEquals(Location.BEFORE,
-      RecordReaderImpl.compareToRange("apple", "hello", "hello"));
+      RecordReaderImpl.compareToRange("apple", "hello", "hello", null, null));
     assertEquals(Location.MIN,
-      RecordReaderImpl.compareToRange("hello", "hello", "hello"));
+      RecordReaderImpl.compareToRange("hello", "hello", "hello", null, null));
     assertEquals(Location.AFTER,
-      RecordReaderImpl.compareToRange("zombie", "hello", "hello"));
+      RecordReaderImpl.compareToRange("zombie", "hello", "hello", null, null));
   }
 
   @Test
@@ -338,6 +340,7 @@ public class TestRecordReaderImpl {
     return OrcProto.ColumnStatistics.newBuilder().setDoubleStatistics(dblStats.build()).build();
   }
 
+  //fixme
   private static OrcProto.ColumnStatistics createStringStats(String min, String max,
       boolean hasNull) {
     OrcProto.StringStatistics.Builder strStats = OrcProto.StringStatistics.newBuilder();